csi_images.csi_scans

Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.

  1"""
  2Contains the Scan class, which holds important metadata from a scan. This metadata
  3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan
  4object can also be loaded from a .czi file or a .txt file.
  5"""
  6
  7import os
  8import enum
  9import datetime
 10import zoneinfo
 11import typing
 12
 13import aicspylibczi
 14
 15import yaml
 16import json
 17
 18
 19class Scan(yaml.YAMLObject):
 20    """
 21    Class that composes a whole scan's metadata. Contains some universal data,
 22    plus lists for channels and ROIs.
 23
 24    .. include:: ../docs/csi_images/coordinate_systems.md
 25    """
 26
 27    yaml_tag = "csi_utils.scans.Scan"
 28
 29    class Type(enum.Enum):
 30        BZSCANNER = "bzscanner"
 31        AXIOSCAN7 = "axioscan7"
 32
 33    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 34    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 35
 36    METADATA_FILE_NAME = {
 37        Type.AXIOSCAN7: "scan.yaml",
 38        Type.BZSCANNER: "slideinfo.txt",
 39    }
 40    DATETIME_FORMAT = {
 41        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 42        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 43    }
 44
 45    # Actual channel names, from the BZScanner's default order
 46    BZSCANNER_CHANNEL_MAP = {
 47        "DAPI": "DAPI",
 48        "TRITC": "AF555",
 49        "CY5": "AF647",
 50        "BF": "BRIGHT",
 51        "FITC": "AF488",
 52    }
 53
 54    class Channel(yaml.YAMLObject):
 55        """
 56        Class that comprises a channel; we usually have multiple (2-5) per scan.
 57        Contains three fields:
 58        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 59        - exposure_ms: the exposure time to capture a frame in milliseconds
 60        - intensity: the light intensity used OR the gain applied to the channel
 61        """
 62
 63        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 64
 65        def __init__(
 66            self,
 67            name: str = "",
 68            exposure_ms: float = -1.0,
 69            intensity: float = -1.0,
 70        ):
 71            self.name = name
 72            self.exposure_ms = exposure_ms
 73            self.intensity = intensity
 74
 75        def __repr__(self):
 76            return yaml.dump(self, sort_keys=False)
 77
 78        def __eq__(self, other):
 79            return self.__repr__() == other.__repr__()
 80
 81    class ROI(yaml.YAMLObject):
 82        """
 83        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 84        """
 85
 86        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 87
 88        def __init__(
 89            self,
 90            origin_x_um: int = -1,
 91            origin_y_um: int = -1,
 92            width_um: int = -1,
 93            height_um: int = -1,
 94            tile_rows: int = -1,
 95            tile_cols: int = -1,
 96            focus_points=None,
 97        ):
 98            if focus_points is None:
 99                focus_points = []
100            self.origin_x_um = origin_x_um
101            self.origin_y_um = origin_y_um
102            self.width_um = width_um
103            self.height_um = height_um
104            self.tile_rows = tile_rows
105            self.tile_cols = tile_cols
106            self.focus_points = focus_points
107
108        def __repr__(self):
109            return yaml.dump(self, sort_keys=False)
110
111        def __eq__(self, other):
112            return self.__repr__() == other.__repr__()
113
114        def similar(self, other):
115            return (
116                self.origin_y_um == other.origin_y_um
117                and self.origin_x_um == other.origin_x_um
118                and self.width_um == other.width_um
119                and self.height_um == other.height_um
120                and self.tile_rows == other.tile_rows
121                and self.tile_cols == other.tile_cols
122            )
123
124    def __init__(
125        self,
126        slide_id: str = "",
127        path: str = "",
128        start_date: str = "",
129        end_date: str = "",
130        scan_time_s: int = -1,
131        scanner_id: str = "",
132        tray: int = -1,
133        slot: int = -1,
134        camera: str = "",
135        objective: str = "",
136        pixel_size_um: float = -1.0,
137        tile_width_px: int = -1,
138        tile_height_px: int = -1,
139        tile_overlap_proportion: int = -1,
140        channels=None,
141        roi=None,
142    ):
143        if roi is None:
144            roi = []
145        if channels is None:
146            channels = []
147        self.slide_id = slide_id
148        self.path = path
149        self.start_date = start_date
150        self.end_date = end_date
151        self.scan_time_s = scan_time_s
152        self.scanner_id = scanner_id
153        self.tray = tray
154        self.slot = slot
155        self.camera = camera
156        self.objective = objective
157        self.pixel_size_um = pixel_size_um
158        self.tile_width_px = tile_width_px
159        self.tile_height_px = tile_height_px
160        self.tile_overlap_proportion = tile_overlap_proportion
161        self.channels = channels
162        self.roi = roi
163
164    def __repr__(self):
165        return yaml.dump(self, sort_keys=False)
166
167    def __eq__(self, other):
168        return self.__repr__() == other.__repr__()
169
170    def has_same_profile(self, other):
171        return (
172            self.camera == other.camera
173            and self.objective == other.objective
174            and self.pixel_size_um == other.pixel_size_um
175            and self.tile_width_px == other.tile_width_px
176            and self.tile_height_px == other.tile_height_px
177            and self.tile_overlap_proportion == other.tile_overlap_proportion
178            and self.channels == other.channels
179            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
180        )
181
182    def get_channel_names(self) -> list[str]:
183        """
184        Get the channel names in the scan's channel order.
185        :return: a list of channel names.
186        """
187        return [channel.name for channel in self.channels]
188
189    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
190        """
191        Given a list of channel names, return the corresponding indices in the scan's
192        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
193        actual AlexaFluor names (AF555, AF647, AF488).
194        If a list entry is None, it will return -1 for that entry.
195        :param channel_names: a list of channel names.
196        :return: a list of channel indices.
197        """
198        # Get the scan's channel name list
199        scan_channel_names = self.get_channel_names()
200
201        channel_indices = []
202        for name in channel_names:
203            # Convert any BZScanner channel names to the actual channel names
204            if name in self.BZSCANNER_CHANNEL_MAP:
205                name = self.BZSCANNER_CHANNEL_MAP[name]
206
207            # Append the corresponding index if possible
208            if name is None:
209                channel_indices.append(-1)
210            elif name in scan_channel_names:
211                channel_indices.append(scan_channel_names.index(name))
212            else:
213                raise ValueError(
214                    f"Channel name {name} not found in scan channels {scan_channel_names}"
215                )
216        return channel_indices
217
218    def save_yaml(self, output_path: str):
219        """
220        Write the Scan object to a .yaml file.
221        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
222        :return: nothing; will raise an error on failure
223        """
224        # Create necessary folders
225        output_path = os.path.abspath(output_path)
226        if os.path.splitext(output_path)[1] == ".yaml":
227            os.makedirs(os.path.dirname(output_path), exist_ok=True)
228        else:
229            os.makedirs(output_path, exist_ok=True)
230            # Add the standard metadata file name to the path if needed
231            output_path = os.path.join(
232                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
233            )
234
235        # Populate the file
236        with open(output_path, "w") as file:
237            yaml.dump(self, stream=file, sort_keys=False)
238
239    @classmethod
240    def load_yaml(cls, input_path: str) -> typing.Self:
241        """
242        Load a Scan object from a .yaml file.
243        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
244        :return: a Scan object
245        """
246        input_path = os.path.abspath(input_path)
247        if os.path.isdir(input_path):
248            input_path = os.path.join(
249                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
250            )
251        with open(input_path, "r") as file:
252            metadata_obj = yaml.load(file, Loader=yaml.Loader)
253        return metadata_obj
254
255    def to_dict(self) -> dict:
256        # Dump to json; then add indents and a top-level key
257        channels_json = json.dumps(
258            self.channels, default=lambda x: x.__dict__, indent=2
259        )
260        channels_json = "  ".join(channels_json.splitlines(True))
261        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
262
263        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
264        roi_json = "  ".join(roi_json.splitlines(True))
265        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
266
267        return {
268            "slide_id": self.slide_id,
269            "path": self.path,
270            "start_date": self.start_date,
271            "end_date": self.end_date,
272            "scan_time_s": self.scan_time_s,
273            "scanner_id": self.scanner_id,
274            "tray": self.tray,
275            "slot": self.slot,
276            "camera": self.camera,
277            "objective": self.objective,
278            "pixel_size_um": self.pixel_size_um,
279            "tile_width_px": self.tile_width_px,
280            "tile_height_px": self.tile_height_px,
281            "tile_overlap_proportion": self.tile_overlap_proportion,
282            "channels": channels_json,
283            "roi": roi_json,
284        }
285
286    @classmethod
287    def from_dict(cls, scan_dict) -> typing.Self:
288        local_timezone = zoneinfo.ZoneInfo("localtime")
289        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
290        result = cls(
291            slide_id=scan_dict["slide_id"],
292            path=scan_dict["path"],
293            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
294            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
295            scan_time_s=int(dt),
296            scanner_id=scan_dict["scanner_id"],
297            tray=scan_dict["tray"],
298            slot=scan_dict["slot"],
299            camera=scan_dict["camera"],
300            objective=scan_dict["objective"],
301            pixel_size_um=scan_dict["pixel_size"],
302            tile_width_px=scan_dict["tile_width"],
303            tile_height_px=scan_dict["tile_height"],
304            tile_overlap_proportion=scan_dict["tile_overlap"],
305        )
306        for channel_json in scan_dict["channels"]["data"]:
307            result.channels.append(
308                cls.Channel(
309                    name=channel_json["name"],
310                    exposure_ms=channel_json["exposure_ms"],
311                    intensity=channel_json["intensity"],
312                )
313            )
314        for roi_json in scan_dict["roi"]["data"]:
315            result.roi.append(
316                cls.ROI(
317                    origin_x_um=roi_json["origin_x_um"],
318                    origin_y_um=roi_json["origin_y_um"],
319                    width_um=roi_json["width_um"],
320                    height_um=roi_json["height_um"],
321                    tile_rows=roi_json["tile_rows"],
322                    tile_cols=roi_json["tile_cols"],
323                    focus_points=roi_json["focus_points"],
324                )
325            )
326        return result
327
328    @classmethod
329    def load_czi(cls, input_path: str) -> typing.Self:
330        """
331        :param input_path: the path to the .czi file
332        :return: a Scan object
333        """
334        # Normalize paths
335        input_path = os.path.abspath(input_path)
336
337        # Read in metadata as XML elements
338        metadata_xml = aicspylibczi.CziFile(input_path).meta
339        # Read in shape metadata from binary
340        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
341
342        # Populate metadata
343        scan = cls()
344
345        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
346        scan.slide_id = scan.slide_id.strip().upper()
347        # Map the raw scanner ID (service ID) to our IDs
348        scan.scanner_id = cls.SCANNER_IDS[
349            metadata_xml.find(".//Microscope/UserDefinedName").text
350        ]
351
352        # Extract start and finish datetimes
353        date = metadata_xml.find(".//Document/CreationDate").text
354        # Strip out sub-second precision
355        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
356        date_as_datetime = datetime.datetime.strptime(
357            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
358        )
359        scan.start_date = date_as_datetime.strftime(
360            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
361        )
362        scan.scan_time_s = round(
363            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
364        )
365        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
366        scan.end_date = date_as_datetime.strftime(
367            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
368        )
369
370        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
371        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
372
373        # Get camera and magnifying info
374        scan.camera = (
375            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
376        )["Name"]
377        magnification = metadata_xml.find(
378            ".//Objectives/Objective/NominalMagnification"
379        )
380        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
381        scan.objective = f"{magnification.text}x-{aperture.text}"
382        scan.pixel_size_um = (
383            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
384        )
385        # Round off the pixel size to nanometers; might not be optimal, but this
386        # gets rounded when we send it to the database anyways (to 7 places)
387        scan.pixel_size_um = round(scan.pixel_size_um, 3)
388
389        # Get tile information
390        # Note: X Y is untested, could be flipped. I always forget. Just don't use
391        # non-square frames and we're all good.
392        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
393        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
394
395        scan.tile_width_px = rois_shape[0]["X"][1]
396        scan.tile_height_px = rois_shape[0]["Y"][1]
397        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
398
399        # Extract channels and create Channel objects from them
400        channel_indices = []
401        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
402            channel_indices.append(int(channel.attrib["Id"][-1]))
403            intensity_xml = channel.find(".//Intensity")
404            if intensity_xml is None:
405                intensity = 0
406            else:
407                intensity = float(intensity_xml.text[:-2]) * 1e-2
408            scan.channels.append(
409                cls.Channel(
410                    name=channel.attrib["Name"].upper(),
411                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
412                    intensity=intensity,
413                )
414            )
415        # Make sure the channels are sorted
416        scan.channels = [
417            channel for _, channel in sorted(zip(channel_indices, scan.channels))
418        ]
419        # Verify that the shape corresponds to the channels
420        for roi in rois_shape:
421            if roi["C"][1] != len(scan.channels):
422                raise ValueError(
423                    f"Number of channels {len(scan.channels)} "
424                    f"is not the same as the number of channels in an ROI: "
425                    f"{roi['C'][1]}"
426                )
427
428        # Get the real ROI limits; the metadata is not always correct
429        limits_xml = metadata_xml.findall(".//AllowedScanArea")
430        limits = [
431            round(float(limits_xml[0].find("Center").text.split(",")[0])),
432            round(float(limits_xml[0].find("Center").text.split(",")[1])),
433            round(float(limits_xml[0].find("Size").text.split(",")[0])),
434            round(float(limits_xml[0].find("Size").text.split(",")[1])),
435        ]
436        # Convert to top-left and bottom-right
437        limits = [
438            round(limits[0] - limits[2] / 2),
439            round(limits[1] - limits[3] / 2),
440            round(limits[0] + limits[2] / 2),
441            round(limits[1] + limits[3] / 2),
442        ]
443
444        # Extract ROIs and create ROI objects from them
445        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
446        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
447        if len(rois_xml_metadata) != len(rois_shape):
448            raise ValueError(
449                f"Metadata and binary data from {input_path} "
450                f"do not match in number of ROIs"
451            )
452        # We need both to determine the number of rows/columns because the XML lies
453        roi_indices = []
454        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
455            name = roi_xml.attrib["Name"]
456            # Determine the index of this scene
457            scene_index = -1
458            for scene in scenes_xml_metadata:
459                if scene.attrib["Name"] == name:
460                    scene_index = int(scene.attrib["Index"])
461                    break
462            if scene_index == -1:
463                raise ValueError(f"ROI {name} does not correspond to any scenes")
464            else:
465                roi_indices.append(scene_index)
466            # Extract other metadata
467            roi_limits = [
468                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
469                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
470                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
471                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
472            ]
473            # Convert to top-left and bottom-right
474            roi_limits = [
475                round(roi_limits[0] - roi_limits[2] / 2),
476                round(roi_limits[1] - roi_limits[3] / 2),
477                round(roi_limits[0] + roi_limits[2] / 2),
478                round(roi_limits[1] + roi_limits[3] / 2),
479            ]
480            # Bound the ROI to the actual scan limits
481            roi_limits = [
482                max(roi_limits[0], limits[0]),
483                max(roi_limits[1], limits[1]),
484                min(roi_limits[2], limits[2]),
485                min(roi_limits[3], limits[3]),
486            ]
487
488            tile_rows = int(roi_xml.find("Rows").text)
489            # Current best way of reliably extracting; <Columns> entry can be wrong
490            if (roi_shape["M"][1] % tile_rows) != 0:
491                raise ValueError(
492                    f"The number of tiles {roi_shape['M'][1]} is not "
493                    f"divisible by the tile rows {tile_rows}; metadata "
494                    f"must be messed up. Thanks Zeiss"
495                )
496            else:
497                tile_cols = int(roi_shape["M"][1] / tile_rows)
498            # Support points are actually the relevant focus points for this ROI
499            focus_points = []
500            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
501                focus_points.append(
502                    [
503                        int(float(focus_point.find("X").text)),
504                        int(float(focus_point.find("Y").text)),
505                        int(float(focus_point.find("Z").text)),
506                    ]
507                )
508            # Strip all sub-micron precision, it does not matter
509            scan.roi.append(
510                cls.ROI(
511                    origin_x_um=roi_limits[0],
512                    origin_y_um=roi_limits[1],
513                    width_um=roi_limits[2] - roi_limits[0],
514                    height_um=roi_limits[3] - roi_limits[1],
515                    tile_rows=tile_rows,
516                    tile_cols=tile_cols,
517                    focus_points=focus_points,
518                )
519            )
520        # Sort based on the scene indices
521        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
522
523        return scan
524
525    @classmethod
526    def load_txt(cls, input_path: str) -> typing.Self:
527        """
528        Loads a Scan object from a .txt file, which originates from the BZScanner.
529        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
530        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
531        :return: a Scan object
532        """
533        # Set paths
534        input_path = os.path.abspath(input_path)
535        if os.path.isdir(input_path):
536            input_path = os.path.join(
537                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
538            )
539
540        # Read in metadata as a dict
541        with open(input_path, "r") as file:
542            metadata_contents = file.read()
543            # Read each line, splitting on the = sign
544            metadata_dict = {}
545            for line in metadata_contents.splitlines():
546                key, value = line.split("=")
547                metadata_dict[key] = value
548
549        # Populate metadata
550        scan = cls()
551
552        scan.slide_id = metadata_dict["SLIDEID"]
553        scan.slide_id = scan.slide_id.strip().upper()
554
555        scan.path = metadata_dict["SLIDEDIR"]
556
557        # Extract start and finish datetimes
558        date = metadata_dict["DATE"]
559        date_as_datetime = datetime.datetime.strptime(
560            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
561        )
562        date_as_datetime = date_as_datetime.astimezone(
563            zoneinfo.ZoneInfo("America/Los_Angeles")
564        )  # Hardcoded because BZScanners are here
565        scan.start_date = date_as_datetime.strftime(
566            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
567        )
568        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
569        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
570        scan.end_date = date_as_datetime.strftime(
571            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
572        )
573
574        # Map the raw scanner ID (service ID) to our IDs
575        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
576        scan.tray = 0  # only one tray in a BZScanner
577        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
578
579        # Get camera and magnifying info
580        scan.camera = ""
581        magnification = 10
582        aperture = 0  # TODO: find the actual aperture
583        scan.objective = f"{magnification}x-{aperture}"
584        scan.pixel_size_um = 0.591  # Estimated from image metadata
585
586        # Get tile information
587        scan.tile_width_px = 1362  # Known from image metadata
588        scan.tile_height_px = 1004  # Known from image metadata
589        scan.tile_overlap_proportion = 0
590
591        # Extract channels and create Channel objects from them
592        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
593            channel_settings = metadata_dict[channel].split(",")
594            if channel_settings[0] == "0":
595                continue
596            scan.channels.append(
597                cls.Channel(
598                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
599                    exposure_ms=float(channel_settings[1]),
600                    intensity=float(channel_settings[2]),
601                )
602            )
603
604        # Get focus points
605        focus_points = []
606        for i in range(33):
607            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
608            if focus_point[0] == "0":
609                break
610            focus_points.append(
611                [
612                    int(float(focus_point[1])),
613                    int(float(focus_point[2])),
614                    int(float(focus_point[3])),
615                ]
616            )
617
618        # In the BZScanner, the slide is vertical instead of horizontal
619        # We put in nominal values for the ROI, which is oriented vertically as well
620        tile_rows = 96
621        tile_cols = 24
622        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
623        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
624        origin_x_um = 2500 + round((20000 - roi_width) / 2)
625        origin_y_um = 2500 + round((58000 - roi_height) / 2)
626        scan.roi.append(
627            cls.ROI(
628                origin_x_um=origin_x_um,
629                origin_y_um=origin_y_um,
630                width_um=roi_width,
631                height_um=roi_height,
632                tile_rows=tile_rows,
633                tile_cols=tile_cols,
634                focus_points=focus_points,
635            )
636        )
637        return scan
638
639    @classmethod
640    def load_from_folder(cls, input_path: str) -> typing.Self:
641        """
642        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
643        Prefers scan.yaml if both exist.
644        :param input_path: /path/to/folder
645        :return: a Scan object
646        """
647        input_path = os.path.abspath(input_path)
648        if os.path.isfile(
649            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
650        ):
651            return cls.load_yaml(input_path)
652        elif os.path.isfile(
653            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
654        ):
655            return cls.load_txt(input_path)
656        else:
657            raise ValueError(
658                f"No scan metadata files "
659                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
660                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
661                f"{input_path}"
662            )
663        pass
664
665    @classmethod
666    def make_placeholder(
667        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
668    ) -> typing.Self:
669        """
670        Make a placeholder Scan object with only basic required information filled in.
671        :param slide_id: the slide ID
672        :param n_tile: the number of this tile, which will become the number of
673                       tiles in the scan
674        :param n_roi: the number of ROIs in the scan
675        :return: a Scan object
676        """
677        # Sanitize inputs here
678        slide_id = str(slide_id).strip().upper()
679        n_tile = int(n_tile)
680        n_roi = int(n_roi)
681        # Generate the object
682        scan = cls()
683        scan.slide_id = slide_id
684        scan.roi = [cls.ROI() for _ in range(n_roi)]
685        scan.roi[0].tile_cols = n_tile
686        return scan
class Scan(yaml.YAMLObject):
 20class Scan(yaml.YAMLObject):
 21    """
 22    Class that composes a whole scan's metadata. Contains some universal data,
 23    plus lists for channels and ROIs.
 24
 25    .. include:: ../docs/csi_images/coordinate_systems.md
 26    """
 27
 28    yaml_tag = "csi_utils.scans.Scan"
 29
 30    class Type(enum.Enum):
 31        BZSCANNER = "bzscanner"
 32        AXIOSCAN7 = "axioscan7"
 33
 34    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 35    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 36
 37    METADATA_FILE_NAME = {
 38        Type.AXIOSCAN7: "scan.yaml",
 39        Type.BZSCANNER: "slideinfo.txt",
 40    }
 41    DATETIME_FORMAT = {
 42        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 43        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 44    }
 45
 46    # Actual channel names, from the BZScanner's default order
 47    BZSCANNER_CHANNEL_MAP = {
 48        "DAPI": "DAPI",
 49        "TRITC": "AF555",
 50        "CY5": "AF647",
 51        "BF": "BRIGHT",
 52        "FITC": "AF488",
 53    }
 54
 55    class Channel(yaml.YAMLObject):
 56        """
 57        Class that comprises a channel; we usually have multiple (2-5) per scan.
 58        Contains three fields:
 59        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 60        - exposure_ms: the exposure time to capture a frame in milliseconds
 61        - intensity: the light intensity used OR the gain applied to the channel
 62        """
 63
 64        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 65
 66        def __init__(
 67            self,
 68            name: str = "",
 69            exposure_ms: float = -1.0,
 70            intensity: float = -1.0,
 71        ):
 72            self.name = name
 73            self.exposure_ms = exposure_ms
 74            self.intensity = intensity
 75
 76        def __repr__(self):
 77            return yaml.dump(self, sort_keys=False)
 78
 79        def __eq__(self, other):
 80            return self.__repr__() == other.__repr__()
 81
 82    class ROI(yaml.YAMLObject):
 83        """
 84        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 85        """
 86
 87        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 88
 89        def __init__(
 90            self,
 91            origin_x_um: int = -1,
 92            origin_y_um: int = -1,
 93            width_um: int = -1,
 94            height_um: int = -1,
 95            tile_rows: int = -1,
 96            tile_cols: int = -1,
 97            focus_points=None,
 98        ):
 99            if focus_points is None:
100                focus_points = []
101            self.origin_x_um = origin_x_um
102            self.origin_y_um = origin_y_um
103            self.width_um = width_um
104            self.height_um = height_um
105            self.tile_rows = tile_rows
106            self.tile_cols = tile_cols
107            self.focus_points = focus_points
108
109        def __repr__(self):
110            return yaml.dump(self, sort_keys=False)
111
112        def __eq__(self, other):
113            return self.__repr__() == other.__repr__()
114
115        def similar(self, other):
116            return (
117                self.origin_y_um == other.origin_y_um
118                and self.origin_x_um == other.origin_x_um
119                and self.width_um == other.width_um
120                and self.height_um == other.height_um
121                and self.tile_rows == other.tile_rows
122                and self.tile_cols == other.tile_cols
123            )
124
125    def __init__(
126        self,
127        slide_id: str = "",
128        path: str = "",
129        start_date: str = "",
130        end_date: str = "",
131        scan_time_s: int = -1,
132        scanner_id: str = "",
133        tray: int = -1,
134        slot: int = -1,
135        camera: str = "",
136        objective: str = "",
137        pixel_size_um: float = -1.0,
138        tile_width_px: int = -1,
139        tile_height_px: int = -1,
140        tile_overlap_proportion: int = -1,
141        channels=None,
142        roi=None,
143    ):
144        if roi is None:
145            roi = []
146        if channels is None:
147            channels = []
148        self.slide_id = slide_id
149        self.path = path
150        self.start_date = start_date
151        self.end_date = end_date
152        self.scan_time_s = scan_time_s
153        self.scanner_id = scanner_id
154        self.tray = tray
155        self.slot = slot
156        self.camera = camera
157        self.objective = objective
158        self.pixel_size_um = pixel_size_um
159        self.tile_width_px = tile_width_px
160        self.tile_height_px = tile_height_px
161        self.tile_overlap_proportion = tile_overlap_proportion
162        self.channels = channels
163        self.roi = roi
164
165    def __repr__(self):
166        return yaml.dump(self, sort_keys=False)
167
168    def __eq__(self, other):
169        return self.__repr__() == other.__repr__()
170
171    def has_same_profile(self, other):
172        return (
173            self.camera == other.camera
174            and self.objective == other.objective
175            and self.pixel_size_um == other.pixel_size_um
176            and self.tile_width_px == other.tile_width_px
177            and self.tile_height_px == other.tile_height_px
178            and self.tile_overlap_proportion == other.tile_overlap_proportion
179            and self.channels == other.channels
180            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
181        )
182
183    def get_channel_names(self) -> list[str]:
184        """
185        Get the channel names in the scan's channel order.
186        :return: a list of channel names.
187        """
188        return [channel.name for channel in self.channels]
189
190    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
191        """
192        Given a list of channel names, return the corresponding indices in the scan's
193        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
194        actual AlexaFluor names (AF555, AF647, AF488).
195        If a list entry is None, it will return -1 for that entry.
196        :param channel_names: a list of channel names.
197        :return: a list of channel indices.
198        """
199        # Get the scan's channel name list
200        scan_channel_names = self.get_channel_names()
201
202        channel_indices = []
203        for name in channel_names:
204            # Convert any BZScanner channel names to the actual channel names
205            if name in self.BZSCANNER_CHANNEL_MAP:
206                name = self.BZSCANNER_CHANNEL_MAP[name]
207
208            # Append the corresponding index if possible
209            if name is None:
210                channel_indices.append(-1)
211            elif name in scan_channel_names:
212                channel_indices.append(scan_channel_names.index(name))
213            else:
214                raise ValueError(
215                    f"Channel name {name} not found in scan channels {scan_channel_names}"
216                )
217        return channel_indices
218
219    def save_yaml(self, output_path: str):
220        """
221        Write the Scan object to a .yaml file.
222        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
223        :return: nothing; will raise an error on failure
224        """
225        # Create necessary folders
226        output_path = os.path.abspath(output_path)
227        if os.path.splitext(output_path)[1] == ".yaml":
228            os.makedirs(os.path.dirname(output_path), exist_ok=True)
229        else:
230            os.makedirs(output_path, exist_ok=True)
231            # Add the standard metadata file name to the path if needed
232            output_path = os.path.join(
233                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
234            )
235
236        # Populate the file
237        with open(output_path, "w") as file:
238            yaml.dump(self, stream=file, sort_keys=False)
239
240    @classmethod
241    def load_yaml(cls, input_path: str) -> typing.Self:
242        """
243        Load a Scan object from a .yaml file.
244        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
245        :return: a Scan object
246        """
247        input_path = os.path.abspath(input_path)
248        if os.path.isdir(input_path):
249            input_path = os.path.join(
250                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
251            )
252        with open(input_path, "r") as file:
253            metadata_obj = yaml.load(file, Loader=yaml.Loader)
254        return metadata_obj
255
256    def to_dict(self) -> dict:
257        # Dump to json; then add indents and a top-level key
258        channels_json = json.dumps(
259            self.channels, default=lambda x: x.__dict__, indent=2
260        )
261        channels_json = "  ".join(channels_json.splitlines(True))
262        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
263
264        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
265        roi_json = "  ".join(roi_json.splitlines(True))
266        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
267
268        return {
269            "slide_id": self.slide_id,
270            "path": self.path,
271            "start_date": self.start_date,
272            "end_date": self.end_date,
273            "scan_time_s": self.scan_time_s,
274            "scanner_id": self.scanner_id,
275            "tray": self.tray,
276            "slot": self.slot,
277            "camera": self.camera,
278            "objective": self.objective,
279            "pixel_size_um": self.pixel_size_um,
280            "tile_width_px": self.tile_width_px,
281            "tile_height_px": self.tile_height_px,
282            "tile_overlap_proportion": self.tile_overlap_proportion,
283            "channels": channels_json,
284            "roi": roi_json,
285        }
286
287    @classmethod
288    def from_dict(cls, scan_dict) -> typing.Self:
289        local_timezone = zoneinfo.ZoneInfo("localtime")
290        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
291        result = cls(
292            slide_id=scan_dict["slide_id"],
293            path=scan_dict["path"],
294            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
295            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
296            scan_time_s=int(dt),
297            scanner_id=scan_dict["scanner_id"],
298            tray=scan_dict["tray"],
299            slot=scan_dict["slot"],
300            camera=scan_dict["camera"],
301            objective=scan_dict["objective"],
302            pixel_size_um=scan_dict["pixel_size"],
303            tile_width_px=scan_dict["tile_width"],
304            tile_height_px=scan_dict["tile_height"],
305            tile_overlap_proportion=scan_dict["tile_overlap"],
306        )
307        for channel_json in scan_dict["channels"]["data"]:
308            result.channels.append(
309                cls.Channel(
310                    name=channel_json["name"],
311                    exposure_ms=channel_json["exposure_ms"],
312                    intensity=channel_json["intensity"],
313                )
314            )
315        for roi_json in scan_dict["roi"]["data"]:
316            result.roi.append(
317                cls.ROI(
318                    origin_x_um=roi_json["origin_x_um"],
319                    origin_y_um=roi_json["origin_y_um"],
320                    width_um=roi_json["width_um"],
321                    height_um=roi_json["height_um"],
322                    tile_rows=roi_json["tile_rows"],
323                    tile_cols=roi_json["tile_cols"],
324                    focus_points=roi_json["focus_points"],
325                )
326            )
327        return result
328
329    @classmethod
330    def load_czi(cls, input_path: str) -> typing.Self:
331        """
332        :param input_path: the path to the .czi file
333        :return: a Scan object
334        """
335        # Normalize paths
336        input_path = os.path.abspath(input_path)
337
338        # Read in metadata as XML elements
339        metadata_xml = aicspylibczi.CziFile(input_path).meta
340        # Read in shape metadata from binary
341        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
342
343        # Populate metadata
344        scan = cls()
345
346        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
347        scan.slide_id = scan.slide_id.strip().upper()
348        # Map the raw scanner ID (service ID) to our IDs
349        scan.scanner_id = cls.SCANNER_IDS[
350            metadata_xml.find(".//Microscope/UserDefinedName").text
351        ]
352
353        # Extract start and finish datetimes
354        date = metadata_xml.find(".//Document/CreationDate").text
355        # Strip out sub-second precision
356        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
357        date_as_datetime = datetime.datetime.strptime(
358            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
359        )
360        scan.start_date = date_as_datetime.strftime(
361            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
362        )
363        scan.scan_time_s = round(
364            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
365        )
366        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
367        scan.end_date = date_as_datetime.strftime(
368            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
369        )
370
371        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
372        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
373
374        # Get camera and magnifying info
375        scan.camera = (
376            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
377        )["Name"]
378        magnification = metadata_xml.find(
379            ".//Objectives/Objective/NominalMagnification"
380        )
381        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
382        scan.objective = f"{magnification.text}x-{aperture.text}"
383        scan.pixel_size_um = (
384            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
385        )
386        # Round off the pixel size to nanometers; might not be optimal, but this
387        # gets rounded when we send it to the database anyways (to 7 places)
388        scan.pixel_size_um = round(scan.pixel_size_um, 3)
389
390        # Get tile information
391        # Note: X Y is untested, could be flipped. I always forget. Just don't use
392        # non-square frames and we're all good.
393        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
394        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
395
396        scan.tile_width_px = rois_shape[0]["X"][1]
397        scan.tile_height_px = rois_shape[0]["Y"][1]
398        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
399
400        # Extract channels and create Channel objects from them
401        channel_indices = []
402        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
403            channel_indices.append(int(channel.attrib["Id"][-1]))
404            intensity_xml = channel.find(".//Intensity")
405            if intensity_xml is None:
406                intensity = 0
407            else:
408                intensity = float(intensity_xml.text[:-2]) * 1e-2
409            scan.channels.append(
410                cls.Channel(
411                    name=channel.attrib["Name"].upper(),
412                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
413                    intensity=intensity,
414                )
415            )
416        # Make sure the channels are sorted
417        scan.channels = [
418            channel for _, channel in sorted(zip(channel_indices, scan.channels))
419        ]
420        # Verify that the shape corresponds to the channels
421        for roi in rois_shape:
422            if roi["C"][1] != len(scan.channels):
423                raise ValueError(
424                    f"Number of channels {len(scan.channels)} "
425                    f"is not the same as the number of channels in an ROI: "
426                    f"{roi['C'][1]}"
427                )
428
429        # Get the real ROI limits; the metadata is not always correct
430        limits_xml = metadata_xml.findall(".//AllowedScanArea")
431        limits = [
432            round(float(limits_xml[0].find("Center").text.split(",")[0])),
433            round(float(limits_xml[0].find("Center").text.split(",")[1])),
434            round(float(limits_xml[0].find("Size").text.split(",")[0])),
435            round(float(limits_xml[0].find("Size").text.split(",")[1])),
436        ]
437        # Convert to top-left and bottom-right
438        limits = [
439            round(limits[0] - limits[2] / 2),
440            round(limits[1] - limits[3] / 2),
441            round(limits[0] + limits[2] / 2),
442            round(limits[1] + limits[3] / 2),
443        ]
444
445        # Extract ROIs and create ROI objects from them
446        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
447        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
448        if len(rois_xml_metadata) != len(rois_shape):
449            raise ValueError(
450                f"Metadata and binary data from {input_path} "
451                f"do not match in number of ROIs"
452            )
453        # We need both to determine the number of rows/columns because the XML lies
454        roi_indices = []
455        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
456            name = roi_xml.attrib["Name"]
457            # Determine the index of this scene
458            scene_index = -1
459            for scene in scenes_xml_metadata:
460                if scene.attrib["Name"] == name:
461                    scene_index = int(scene.attrib["Index"])
462                    break
463            if scene_index == -1:
464                raise ValueError(f"ROI {name} does not correspond to any scenes")
465            else:
466                roi_indices.append(scene_index)
467            # Extract other metadata
468            roi_limits = [
469                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
470                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
471                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
472                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
473            ]
474            # Convert to top-left and bottom-right
475            roi_limits = [
476                round(roi_limits[0] - roi_limits[2] / 2),
477                round(roi_limits[1] - roi_limits[3] / 2),
478                round(roi_limits[0] + roi_limits[2] / 2),
479                round(roi_limits[1] + roi_limits[3] / 2),
480            ]
481            # Bound the ROI to the actual scan limits
482            roi_limits = [
483                max(roi_limits[0], limits[0]),
484                max(roi_limits[1], limits[1]),
485                min(roi_limits[2], limits[2]),
486                min(roi_limits[3], limits[3]),
487            ]
488
489            tile_rows = int(roi_xml.find("Rows").text)
490            # Current best way of reliably extracting; <Columns> entry can be wrong
491            if (roi_shape["M"][1] % tile_rows) != 0:
492                raise ValueError(
493                    f"The number of tiles {roi_shape['M'][1]} is not "
494                    f"divisible by the tile rows {tile_rows}; metadata "
495                    f"must be messed up. Thanks Zeiss"
496                )
497            else:
498                tile_cols = int(roi_shape["M"][1] / tile_rows)
499            # Support points are actually the relevant focus points for this ROI
500            focus_points = []
501            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
502                focus_points.append(
503                    [
504                        int(float(focus_point.find("X").text)),
505                        int(float(focus_point.find("Y").text)),
506                        int(float(focus_point.find("Z").text)),
507                    ]
508                )
509            # Strip all sub-micron precision, it does not matter
510            scan.roi.append(
511                cls.ROI(
512                    origin_x_um=roi_limits[0],
513                    origin_y_um=roi_limits[1],
514                    width_um=roi_limits[2] - roi_limits[0],
515                    height_um=roi_limits[3] - roi_limits[1],
516                    tile_rows=tile_rows,
517                    tile_cols=tile_cols,
518                    focus_points=focus_points,
519                )
520            )
521        # Sort based on the scene indices
522        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
523
524        return scan
525
526    @classmethod
527    def load_txt(cls, input_path: str) -> typing.Self:
528        """
529        Loads a Scan object from a .txt file, which originates from the BZScanner.
530        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
531        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
532        :return: a Scan object
533        """
534        # Set paths
535        input_path = os.path.abspath(input_path)
536        if os.path.isdir(input_path):
537            input_path = os.path.join(
538                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
539            )
540
541        # Read in metadata as a dict
542        with open(input_path, "r") as file:
543            metadata_contents = file.read()
544            # Read each line, splitting on the = sign
545            metadata_dict = {}
546            for line in metadata_contents.splitlines():
547                key, value = line.split("=")
548                metadata_dict[key] = value
549
550        # Populate metadata
551        scan = cls()
552
553        scan.slide_id = metadata_dict["SLIDEID"]
554        scan.slide_id = scan.slide_id.strip().upper()
555
556        scan.path = metadata_dict["SLIDEDIR"]
557
558        # Extract start and finish datetimes
559        date = metadata_dict["DATE"]
560        date_as_datetime = datetime.datetime.strptime(
561            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
562        )
563        date_as_datetime = date_as_datetime.astimezone(
564            zoneinfo.ZoneInfo("America/Los_Angeles")
565        )  # Hardcoded because BZScanners are here
566        scan.start_date = date_as_datetime.strftime(
567            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
568        )
569        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
570        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
571        scan.end_date = date_as_datetime.strftime(
572            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
573        )
574
575        # Map the raw scanner ID (service ID) to our IDs
576        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
577        scan.tray = 0  # only one tray in a BZScanner
578        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
579
580        # Get camera and magnifying info
581        scan.camera = ""
582        magnification = 10
583        aperture = 0  # TODO: find the actual aperture
584        scan.objective = f"{magnification}x-{aperture}"
585        scan.pixel_size_um = 0.591  # Estimated from image metadata
586
587        # Get tile information
588        scan.tile_width_px = 1362  # Known from image metadata
589        scan.tile_height_px = 1004  # Known from image metadata
590        scan.tile_overlap_proportion = 0
591
592        # Extract channels and create Channel objects from them
593        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
594            channel_settings = metadata_dict[channel].split(",")
595            if channel_settings[0] == "0":
596                continue
597            scan.channels.append(
598                cls.Channel(
599                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
600                    exposure_ms=float(channel_settings[1]),
601                    intensity=float(channel_settings[2]),
602                )
603            )
604
605        # Get focus points
606        focus_points = []
607        for i in range(33):
608            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
609            if focus_point[0] == "0":
610                break
611            focus_points.append(
612                [
613                    int(float(focus_point[1])),
614                    int(float(focus_point[2])),
615                    int(float(focus_point[3])),
616                ]
617            )
618
619        # In the BZScanner, the slide is vertical instead of horizontal
620        # We put in nominal values for the ROI, which is oriented vertically as well
621        tile_rows = 96
622        tile_cols = 24
623        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
624        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
625        origin_x_um = 2500 + round((20000 - roi_width) / 2)
626        origin_y_um = 2500 + round((58000 - roi_height) / 2)
627        scan.roi.append(
628            cls.ROI(
629                origin_x_um=origin_x_um,
630                origin_y_um=origin_y_um,
631                width_um=roi_width,
632                height_um=roi_height,
633                tile_rows=tile_rows,
634                tile_cols=tile_cols,
635                focus_points=focus_points,
636            )
637        )
638        return scan
639
640    @classmethod
641    def load_from_folder(cls, input_path: str) -> typing.Self:
642        """
643        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
644        Prefers scan.yaml if both exist.
645        :param input_path: /path/to/folder
646        :return: a Scan object
647        """
648        input_path = os.path.abspath(input_path)
649        if os.path.isfile(
650            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
651        ):
652            return cls.load_yaml(input_path)
653        elif os.path.isfile(
654            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
655        ):
656            return cls.load_txt(input_path)
657        else:
658            raise ValueError(
659                f"No scan metadata files "
660                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
661                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
662                f"{input_path}"
663            )
664        pass
665
666    @classmethod
667    def make_placeholder(
668        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
669    ) -> typing.Self:
670        """
671        Make a placeholder Scan object with only basic required information filled in.
672        :param slide_id: the slide ID
673        :param n_tile: the number of this tile, which will become the number of
674                       tiles in the scan
675        :param n_roi: the number of ROIs in the scan
676        :return: a Scan object
677        """
678        # Sanitize inputs here
679        slide_id = str(slide_id).strip().upper()
680        n_tile = int(n_tile)
681        n_roi = int(n_roi)
682        # Generate the object
683        scan = cls()
684        scan.slide_id = slide_id
685        scan.roi = [cls.ROI() for _ in range(n_roi)]
686        scan.roi[0].tile_cols = n_tile
687        return scan

Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.

Scans -> scan-level coordinate frames. Each scan maintains its own scan-level coordinate frames based on the scanner it was scanned with.

Slide-level coordinate frame (common, agreed-upon frame of reference for a slide).

Picture of the slide coordinate system, which assumes a slide placed horizontally
with the label on the left. The x-axis points to the right, and the y-axis points
down. The origin is at the top left corner. Key positions, such as the origin of the
slide's active area at (14500, 2500) micrometers and the bottom-right corner at
(72500, 22500) micrometers are displayed.

Scan( slide_id: str = '', path: str = '', start_date: str = '', end_date: str = '', scan_time_s: int = -1, scanner_id: str = '', tray: int = -1, slot: int = -1, camera: str = '', objective: str = '', pixel_size_um: float = -1.0, tile_width_px: int = -1, tile_height_px: int = -1, tile_overlap_proportion: int = -1, channels=None, roi=None)
125    def __init__(
126        self,
127        slide_id: str = "",
128        path: str = "",
129        start_date: str = "",
130        end_date: str = "",
131        scan_time_s: int = -1,
132        scanner_id: str = "",
133        tray: int = -1,
134        slot: int = -1,
135        camera: str = "",
136        objective: str = "",
137        pixel_size_um: float = -1.0,
138        tile_width_px: int = -1,
139        tile_height_px: int = -1,
140        tile_overlap_proportion: int = -1,
141        channels=None,
142        roi=None,
143    ):
144        if roi is None:
145            roi = []
146        if channels is None:
147            channels = []
148        self.slide_id = slide_id
149        self.path = path
150        self.start_date = start_date
151        self.end_date = end_date
152        self.scan_time_s = scan_time_s
153        self.scanner_id = scanner_id
154        self.tray = tray
155        self.slot = slot
156        self.camera = camera
157        self.objective = objective
158        self.pixel_size_um = pixel_size_um
159        self.tile_width_px = tile_width_px
160        self.tile_height_px = tile_height_px
161        self.tile_overlap_proportion = tile_overlap_proportion
162        self.channels = channels
163        self.roi = roi
yaml_tag = 'csi_utils.scans.Scan'
SCANNER_IDS = {'4661000426': 'axioscan7_0'}

Axioscan 7 scanner IDs (service number), mapped to our scanner IDs

METADATA_FILE_NAME = {<Type.AXIOSCAN7: 'axioscan7'>: 'scan.yaml', <Type.BZSCANNER: 'bzscanner'>: 'slideinfo.txt'}
DATETIME_FORMAT = {<Type.AXIOSCAN7: 'axioscan7'>: '%Y-%m-%dT%H:%M:%S%z', <Type.BZSCANNER: 'bzscanner'>: '%a %b %d %H:%M:%S %Y'}
BZSCANNER_CHANNEL_MAP = {'DAPI': 'DAPI', 'TRITC': 'AF555', 'CY5': 'AF647', 'BF': 'BRIGHT', 'FITC': 'AF488'}
slide_id
path
start_date
end_date
scan_time_s
scanner_id
tray
slot
camera
objective
pixel_size_um
tile_width_px
tile_height_px
tile_overlap_proportion
channels
roi
def has_same_profile(self, other):
171    def has_same_profile(self, other):
172        return (
173            self.camera == other.camera
174            and self.objective == other.objective
175            and self.pixel_size_um == other.pixel_size_um
176            and self.tile_width_px == other.tile_width_px
177            and self.tile_height_px == other.tile_height_px
178            and self.tile_overlap_proportion == other.tile_overlap_proportion
179            and self.channels == other.channels
180            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
181        )
def get_channel_names(self) -> list[str]:
183    def get_channel_names(self) -> list[str]:
184        """
185        Get the channel names in the scan's channel order.
186        :return: a list of channel names.
187        """
188        return [channel.name for channel in self.channels]

Get the channel names in the scan's channel order.

Returns

a list of channel names.

def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
190    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
191        """
192        Given a list of channel names, return the corresponding indices in the scan's
193        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
194        actual AlexaFluor names (AF555, AF647, AF488).
195        If a list entry is None, it will return -1 for that entry.
196        :param channel_names: a list of channel names.
197        :return: a list of channel indices.
198        """
199        # Get the scan's channel name list
200        scan_channel_names = self.get_channel_names()
201
202        channel_indices = []
203        for name in channel_names:
204            # Convert any BZScanner channel names to the actual channel names
205            if name in self.BZSCANNER_CHANNEL_MAP:
206                name = self.BZSCANNER_CHANNEL_MAP[name]
207
208            # Append the corresponding index if possible
209            if name is None:
210                channel_indices.append(-1)
211            elif name in scan_channel_names:
212                channel_indices.append(scan_channel_names.index(name))
213            else:
214                raise ValueError(
215                    f"Channel name {name} not found in scan channels {scan_channel_names}"
216                )
217        return channel_indices

Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is None, it will return -1 for that entry.

Parameters
  • channel_names: a list of channel names.
Returns

a list of channel indices.

def save_yaml(self, output_path: str):
219    def save_yaml(self, output_path: str):
220        """
221        Write the Scan object to a .yaml file.
222        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
223        :return: nothing; will raise an error on failure
224        """
225        # Create necessary folders
226        output_path = os.path.abspath(output_path)
227        if os.path.splitext(output_path)[1] == ".yaml":
228            os.makedirs(os.path.dirname(output_path), exist_ok=True)
229        else:
230            os.makedirs(output_path, exist_ok=True)
231            # Add the standard metadata file name to the path if needed
232            output_path = os.path.join(
233                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
234            )
235
236        # Populate the file
237        with open(output_path, "w") as file:
238            yaml.dump(self, stream=file, sort_keys=False)

Write the Scan object to a .yaml file.

Parameters
  • output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns

nothing; will raise an error on failure

@classmethod
def load_yaml(cls, input_path: str) -> Self:
240    @classmethod
241    def load_yaml(cls, input_path: str) -> typing.Self:
242        """
243        Load a Scan object from a .yaml file.
244        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
245        :return: a Scan object
246        """
247        input_path = os.path.abspath(input_path)
248        if os.path.isdir(input_path):
249            input_path = os.path.join(
250                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
251            )
252        with open(input_path, "r") as file:
253            metadata_obj = yaml.load(file, Loader=yaml.Loader)
254        return metadata_obj

Load a Scan object from a .yaml file.

Parameters
  • input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns

a Scan object

def to_dict(self) -> dict:
256    def to_dict(self) -> dict:
257        # Dump to json; then add indents and a top-level key
258        channels_json = json.dumps(
259            self.channels, default=lambda x: x.__dict__, indent=2
260        )
261        channels_json = "  ".join(channels_json.splitlines(True))
262        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
263
264        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
265        roi_json = "  ".join(roi_json.splitlines(True))
266        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
267
268        return {
269            "slide_id": self.slide_id,
270            "path": self.path,
271            "start_date": self.start_date,
272            "end_date": self.end_date,
273            "scan_time_s": self.scan_time_s,
274            "scanner_id": self.scanner_id,
275            "tray": self.tray,
276            "slot": self.slot,
277            "camera": self.camera,
278            "objective": self.objective,
279            "pixel_size_um": self.pixel_size_um,
280            "tile_width_px": self.tile_width_px,
281            "tile_height_px": self.tile_height_px,
282            "tile_overlap_proportion": self.tile_overlap_proportion,
283            "channels": channels_json,
284            "roi": roi_json,
285        }
@classmethod
def from_dict(cls, scan_dict) -> Self:
287    @classmethod
288    def from_dict(cls, scan_dict) -> typing.Self:
289        local_timezone = zoneinfo.ZoneInfo("localtime")
290        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
291        result = cls(
292            slide_id=scan_dict["slide_id"],
293            path=scan_dict["path"],
294            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
295            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
296            scan_time_s=int(dt),
297            scanner_id=scan_dict["scanner_id"],
298            tray=scan_dict["tray"],
299            slot=scan_dict["slot"],
300            camera=scan_dict["camera"],
301            objective=scan_dict["objective"],
302            pixel_size_um=scan_dict["pixel_size"],
303            tile_width_px=scan_dict["tile_width"],
304            tile_height_px=scan_dict["tile_height"],
305            tile_overlap_proportion=scan_dict["tile_overlap"],
306        )
307        for channel_json in scan_dict["channels"]["data"]:
308            result.channels.append(
309                cls.Channel(
310                    name=channel_json["name"],
311                    exposure_ms=channel_json["exposure_ms"],
312                    intensity=channel_json["intensity"],
313                )
314            )
315        for roi_json in scan_dict["roi"]["data"]:
316            result.roi.append(
317                cls.ROI(
318                    origin_x_um=roi_json["origin_x_um"],
319                    origin_y_um=roi_json["origin_y_um"],
320                    width_um=roi_json["width_um"],
321                    height_um=roi_json["height_um"],
322                    tile_rows=roi_json["tile_rows"],
323                    tile_cols=roi_json["tile_cols"],
324                    focus_points=roi_json["focus_points"],
325                )
326            )
327        return result
@classmethod
def load_czi(cls, input_path: str) -> Self:
329    @classmethod
330    def load_czi(cls, input_path: str) -> typing.Self:
331        """
332        :param input_path: the path to the .czi file
333        :return: a Scan object
334        """
335        # Normalize paths
336        input_path = os.path.abspath(input_path)
337
338        # Read in metadata as XML elements
339        metadata_xml = aicspylibczi.CziFile(input_path).meta
340        # Read in shape metadata from binary
341        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
342
343        # Populate metadata
344        scan = cls()
345
346        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
347        scan.slide_id = scan.slide_id.strip().upper()
348        # Map the raw scanner ID (service ID) to our IDs
349        scan.scanner_id = cls.SCANNER_IDS[
350            metadata_xml.find(".//Microscope/UserDefinedName").text
351        ]
352
353        # Extract start and finish datetimes
354        date = metadata_xml.find(".//Document/CreationDate").text
355        # Strip out sub-second precision
356        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
357        date_as_datetime = datetime.datetime.strptime(
358            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
359        )
360        scan.start_date = date_as_datetime.strftime(
361            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
362        )
363        scan.scan_time_s = round(
364            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
365        )
366        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
367        scan.end_date = date_as_datetime.strftime(
368            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
369        )
370
371        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
372        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
373
374        # Get camera and magnifying info
375        scan.camera = (
376            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
377        )["Name"]
378        magnification = metadata_xml.find(
379            ".//Objectives/Objective/NominalMagnification"
380        )
381        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
382        scan.objective = f"{magnification.text}x-{aperture.text}"
383        scan.pixel_size_um = (
384            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
385        )
386        # Round off the pixel size to nanometers; might not be optimal, but this
387        # gets rounded when we send it to the database anyways (to 7 places)
388        scan.pixel_size_um = round(scan.pixel_size_um, 3)
389
390        # Get tile information
391        # Note: X Y is untested, could be flipped. I always forget. Just don't use
392        # non-square frames and we're all good.
393        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
394        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
395
396        scan.tile_width_px = rois_shape[0]["X"][1]
397        scan.tile_height_px = rois_shape[0]["Y"][1]
398        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
399
400        # Extract channels and create Channel objects from them
401        channel_indices = []
402        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
403            channel_indices.append(int(channel.attrib["Id"][-1]))
404            intensity_xml = channel.find(".//Intensity")
405            if intensity_xml is None:
406                intensity = 0
407            else:
408                intensity = float(intensity_xml.text[:-2]) * 1e-2
409            scan.channels.append(
410                cls.Channel(
411                    name=channel.attrib["Name"].upper(),
412                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
413                    intensity=intensity,
414                )
415            )
416        # Make sure the channels are sorted
417        scan.channels = [
418            channel for _, channel in sorted(zip(channel_indices, scan.channels))
419        ]
420        # Verify that the shape corresponds to the channels
421        for roi in rois_shape:
422            if roi["C"][1] != len(scan.channels):
423                raise ValueError(
424                    f"Number of channels {len(scan.channels)} "
425                    f"is not the same as the number of channels in an ROI: "
426                    f"{roi['C'][1]}"
427                )
428
429        # Get the real ROI limits; the metadata is not always correct
430        limits_xml = metadata_xml.findall(".//AllowedScanArea")
431        limits = [
432            round(float(limits_xml[0].find("Center").text.split(",")[0])),
433            round(float(limits_xml[0].find("Center").text.split(",")[1])),
434            round(float(limits_xml[0].find("Size").text.split(",")[0])),
435            round(float(limits_xml[0].find("Size").text.split(",")[1])),
436        ]
437        # Convert to top-left and bottom-right
438        limits = [
439            round(limits[0] - limits[2] / 2),
440            round(limits[1] - limits[3] / 2),
441            round(limits[0] + limits[2] / 2),
442            round(limits[1] + limits[3] / 2),
443        ]
444
445        # Extract ROIs and create ROI objects from them
446        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
447        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
448        if len(rois_xml_metadata) != len(rois_shape):
449            raise ValueError(
450                f"Metadata and binary data from {input_path} "
451                f"do not match in number of ROIs"
452            )
453        # We need both to determine the number of rows/columns because the XML lies
454        roi_indices = []
455        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
456            name = roi_xml.attrib["Name"]
457            # Determine the index of this scene
458            scene_index = -1
459            for scene in scenes_xml_metadata:
460                if scene.attrib["Name"] == name:
461                    scene_index = int(scene.attrib["Index"])
462                    break
463            if scene_index == -1:
464                raise ValueError(f"ROI {name} does not correspond to any scenes")
465            else:
466                roi_indices.append(scene_index)
467            # Extract other metadata
468            roi_limits = [
469                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
470                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
471                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
472                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
473            ]
474            # Convert to top-left and bottom-right
475            roi_limits = [
476                round(roi_limits[0] - roi_limits[2] / 2),
477                round(roi_limits[1] - roi_limits[3] / 2),
478                round(roi_limits[0] + roi_limits[2] / 2),
479                round(roi_limits[1] + roi_limits[3] / 2),
480            ]
481            # Bound the ROI to the actual scan limits
482            roi_limits = [
483                max(roi_limits[0], limits[0]),
484                max(roi_limits[1], limits[1]),
485                min(roi_limits[2], limits[2]),
486                min(roi_limits[3], limits[3]),
487            ]
488
489            tile_rows = int(roi_xml.find("Rows").text)
490            # Current best way of reliably extracting; <Columns> entry can be wrong
491            if (roi_shape["M"][1] % tile_rows) != 0:
492                raise ValueError(
493                    f"The number of tiles {roi_shape['M'][1]} is not "
494                    f"divisible by the tile rows {tile_rows}; metadata "
495                    f"must be messed up. Thanks Zeiss"
496                )
497            else:
498                tile_cols = int(roi_shape["M"][1] / tile_rows)
499            # Support points are actually the relevant focus points for this ROI
500            focus_points = []
501            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
502                focus_points.append(
503                    [
504                        int(float(focus_point.find("X").text)),
505                        int(float(focus_point.find("Y").text)),
506                        int(float(focus_point.find("Z").text)),
507                    ]
508                )
509            # Strip all sub-micron precision, it does not matter
510            scan.roi.append(
511                cls.ROI(
512                    origin_x_um=roi_limits[0],
513                    origin_y_um=roi_limits[1],
514                    width_um=roi_limits[2] - roi_limits[0],
515                    height_um=roi_limits[3] - roi_limits[1],
516                    tile_rows=tile_rows,
517                    tile_cols=tile_cols,
518                    focus_points=focus_points,
519                )
520            )
521        # Sort based on the scene indices
522        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
523
524        return scan
Parameters
  • input_path: the path to the .czi file
Returns

a Scan object

@classmethod
def load_txt(cls, input_path: str) -> Self:
526    @classmethod
527    def load_txt(cls, input_path: str) -> typing.Self:
528        """
529        Loads a Scan object from a .txt file, which originates from the BZScanner.
530        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
531        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
532        :return: a Scan object
533        """
534        # Set paths
535        input_path = os.path.abspath(input_path)
536        if os.path.isdir(input_path):
537            input_path = os.path.join(
538                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
539            )
540
541        # Read in metadata as a dict
542        with open(input_path, "r") as file:
543            metadata_contents = file.read()
544            # Read each line, splitting on the = sign
545            metadata_dict = {}
546            for line in metadata_contents.splitlines():
547                key, value = line.split("=")
548                metadata_dict[key] = value
549
550        # Populate metadata
551        scan = cls()
552
553        scan.slide_id = metadata_dict["SLIDEID"]
554        scan.slide_id = scan.slide_id.strip().upper()
555
556        scan.path = metadata_dict["SLIDEDIR"]
557
558        # Extract start and finish datetimes
559        date = metadata_dict["DATE"]
560        date_as_datetime = datetime.datetime.strptime(
561            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
562        )
563        date_as_datetime = date_as_datetime.astimezone(
564            zoneinfo.ZoneInfo("America/Los_Angeles")
565        )  # Hardcoded because BZScanners are here
566        scan.start_date = date_as_datetime.strftime(
567            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
568        )
569        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
570        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
571        scan.end_date = date_as_datetime.strftime(
572            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
573        )
574
575        # Map the raw scanner ID (service ID) to our IDs
576        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
577        scan.tray = 0  # only one tray in a BZScanner
578        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
579
580        # Get camera and magnifying info
581        scan.camera = ""
582        magnification = 10
583        aperture = 0  # TODO: find the actual aperture
584        scan.objective = f"{magnification}x-{aperture}"
585        scan.pixel_size_um = 0.591  # Estimated from image metadata
586
587        # Get tile information
588        scan.tile_width_px = 1362  # Known from image metadata
589        scan.tile_height_px = 1004  # Known from image metadata
590        scan.tile_overlap_proportion = 0
591
592        # Extract channels and create Channel objects from them
593        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
594            channel_settings = metadata_dict[channel].split(",")
595            if channel_settings[0] == "0":
596                continue
597            scan.channels.append(
598                cls.Channel(
599                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
600                    exposure_ms=float(channel_settings[1]),
601                    intensity=float(channel_settings[2]),
602                )
603            )
604
605        # Get focus points
606        focus_points = []
607        for i in range(33):
608            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
609            if focus_point[0] == "0":
610                break
611            focus_points.append(
612                [
613                    int(float(focus_point[1])),
614                    int(float(focus_point[2])),
615                    int(float(focus_point[3])),
616                ]
617            )
618
619        # In the BZScanner, the slide is vertical instead of horizontal
620        # We put in nominal values for the ROI, which is oriented vertically as well
621        tile_rows = 96
622        tile_cols = 24
623        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
624        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
625        origin_x_um = 2500 + round((20000 - roi_width) / 2)
626        origin_y_um = 2500 + round((58000 - roi_height) / 2)
627        scan.roi.append(
628            cls.ROI(
629                origin_x_um=origin_x_um,
630                origin_y_um=origin_y_um,
631                width_um=roi_width,
632                height_um=roi_height,
633                tile_rows=tile_rows,
634                tile_cols=tile_cols,
635                focus_points=focus_points,
636            )
637        )
638        return scan

Loads a Scan object from a .txt file, which originates from the BZScanner. Some metadata from the slideinfo.txt file is missing or adjusted to fit.

Parameters
  • input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
Returns

a Scan object

@classmethod
def load_from_folder(cls, input_path: str) -> Self:
640    @classmethod
641    def load_from_folder(cls, input_path: str) -> typing.Self:
642        """
643        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
644        Prefers scan.yaml if both exist.
645        :param input_path: /path/to/folder
646        :return: a Scan object
647        """
648        input_path = os.path.abspath(input_path)
649        if os.path.isfile(
650            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
651        ):
652            return cls.load_yaml(input_path)
653        elif os.path.isfile(
654            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
655        ):
656            return cls.load_txt(input_path)
657        else:
658            raise ValueError(
659                f"No scan metadata files "
660                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
661                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
662                f"{input_path}"
663            )
664        pass

Load a Scan object from a folder that contains scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist.

Parameters
  • input_path: /path/to/folder
Returns

a Scan object

@classmethod
def make_placeholder(cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0) -> Self:
666    @classmethod
667    def make_placeholder(
668        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
669    ) -> typing.Self:
670        """
671        Make a placeholder Scan object with only basic required information filled in.
672        :param slide_id: the slide ID
673        :param n_tile: the number of this tile, which will become the number of
674                       tiles in the scan
675        :param n_roi: the number of ROIs in the scan
676        :return: a Scan object
677        """
678        # Sanitize inputs here
679        slide_id = str(slide_id).strip().upper()
680        n_tile = int(n_tile)
681        n_roi = int(n_roi)
682        # Generate the object
683        scan = cls()
684        scan.slide_id = slide_id
685        scan.roi = [cls.ROI() for _ in range(n_roi)]
686        scan.roi[0].tile_cols = n_tile
687        return scan

Make a placeholder Scan object with only basic required information filled in.

Parameters
  • slide_id: the slide ID
  • n_tile: the number of this tile, which will become the number of tiles in the scan
  • n_roi: the number of ROIs in the scan
Returns

a Scan object

class Scan.Type(enum.Enum):
30    class Type(enum.Enum):
31        BZSCANNER = "bzscanner"
32        AXIOSCAN7 = "axioscan7"
BZSCANNER = <Type.BZSCANNER: 'bzscanner'>
AXIOSCAN7 = <Type.AXIOSCAN7: 'axioscan7'>
class Scan.Channel(yaml.YAMLObject):
55    class Channel(yaml.YAMLObject):
56        """
57        Class that comprises a channel; we usually have multiple (2-5) per scan.
58        Contains three fields:
59        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
60        - exposure_ms: the exposure time to capture a frame in milliseconds
61        - intensity: the light intensity used OR the gain applied to the channel
62        """
63
64        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
65
66        def __init__(
67            self,
68            name: str = "",
69            exposure_ms: float = -1.0,
70            intensity: float = -1.0,
71        ):
72            self.name = name
73            self.exposure_ms = exposure_ms
74            self.intensity = intensity
75
76        def __repr__(self):
77            return yaml.dump(self, sort_keys=False)
78
79        def __eq__(self, other):
80            return self.__repr__() == other.__repr__()

Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:

  • name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
  • exposure_ms: the exposure time to capture a frame in milliseconds
  • intensity: the light intensity used OR the gain applied to the channel
Scan.Channel(name: str = '', exposure_ms: float = -1.0, intensity: float = -1.0)
66        def __init__(
67            self,
68            name: str = "",
69            exposure_ms: float = -1.0,
70            intensity: float = -1.0,
71        ):
72            self.name = name
73            self.exposure_ms = exposure_ms
74            self.intensity = intensity
yaml_tag = 'csi_utils.csi_scans.Scan.Channel'
name
exposure_ms
intensity
class Scan.ROI(yaml.YAMLObject):
 82    class ROI(yaml.YAMLObject):
 83        """
 84        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 85        """
 86
 87        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 88
 89        def __init__(
 90            self,
 91            origin_x_um: int = -1,
 92            origin_y_um: int = -1,
 93            width_um: int = -1,
 94            height_um: int = -1,
 95            tile_rows: int = -1,
 96            tile_cols: int = -1,
 97            focus_points=None,
 98        ):
 99            if focus_points is None:
100                focus_points = []
101            self.origin_x_um = origin_x_um
102            self.origin_y_um = origin_y_um
103            self.width_um = width_um
104            self.height_um = height_um
105            self.tile_rows = tile_rows
106            self.tile_cols = tile_cols
107            self.focus_points = focus_points
108
109        def __repr__(self):
110            return yaml.dump(self, sort_keys=False)
111
112        def __eq__(self, other):
113            return self.__repr__() == other.__repr__()
114
115        def similar(self, other):
116            return (
117                self.origin_y_um == other.origin_y_um
118                and self.origin_x_um == other.origin_x_um
119                and self.width_um == other.width_um
120                and self.height_um == other.height_um
121                and self.tile_rows == other.tile_rows
122                and self.tile_cols == other.tile_cols
123            )

Class that comprises an ROI; we usually have 1, but may have more in a scan.

Scan.ROI( origin_x_um: int = -1, origin_y_um: int = -1, width_um: int = -1, height_um: int = -1, tile_rows: int = -1, tile_cols: int = -1, focus_points=None)
 89        def __init__(
 90            self,
 91            origin_x_um: int = -1,
 92            origin_y_um: int = -1,
 93            width_um: int = -1,
 94            height_um: int = -1,
 95            tile_rows: int = -1,
 96            tile_cols: int = -1,
 97            focus_points=None,
 98        ):
 99            if focus_points is None:
100                focus_points = []
101            self.origin_x_um = origin_x_um
102            self.origin_y_um = origin_y_um
103            self.width_um = width_um
104            self.height_um = height_um
105            self.tile_rows = tile_rows
106            self.tile_cols = tile_cols
107            self.focus_points = focus_points
yaml_tag = 'csi_utils.csi_scans.Scan.ROI'
origin_x_um
origin_y_um
width_um
height_um
tile_rows
tile_cols
focus_points
def similar(self, other):
115        def similar(self, other):
116            return (
117                self.origin_y_um == other.origin_y_um
118                and self.origin_x_um == other.origin_x_um
119                and self.width_um == other.width_um
120                and self.height_um == other.height_um
121                and self.tile_rows == other.tile_rows
122                and self.tile_cols == other.tile_cols
123            )