csi_images.csi_events

Contains the Event class, which represents a single event in a scan. The Event class optionally holds metadata and features. Lists of events with similar metadata or features can be combined into DataFrames for analysis.

The Event class holds the position of the event in the frame, which can be converted to the position in the scanner or slide coordinate positions. See the csi_utils.csi_scans documentation page for more information on the coordinate systems.

  1"""
  2Contains the Event class, which represents a single event in a scan.
  3The Event class optionally holds metadata and features. Lists of events with
  4similar metadata or features can be combined into DataFrames for analysis.
  5
  6The Event class holds the position of the event in the frame, which can be converted
  7to the position in the scanner or slide coordinate positions. See the
  8csi_utils.csi_scans documentation page for more information on the coordinate systems.
  9"""
 10
 11import math
 12import os.path
 13import typing
 14
 15import numpy as np
 16import pandas as pd
 17
 18from csi_images.csi_scans import Scan
 19from csi_images.csi_tiles import Tile
 20from csi_images.csi_frames import Frame
 21
 22
 23class Event:
 24    """
 25    A class that represents a single event in a scan, making it easy to evaluate
 26    singular events. Required metadata is exposed as attributes, and optional
 27    metadata and features are stored as DataFrames.
 28    """
 29
 30    # 2D homogenous transformation matrices
 31    # Translations (final column) are in micrometers (um)
 32    SCAN_TO_SLIDE_TRANSFORM = {
 33        Scan.Type.AXIOSCAN7: np.array(
 34            [
 35                [1, 0, 75000],
 36                [0, 1, 0],
 37                [0, 0, 1],
 38            ]
 39        ),
 40        # BZScanner coordinates are a special kind of messed up:
 41        # - The slide is upside-down.
 42        # - The slide is oriented vertically, with the barcode at the bottom.
 43        # - Tiles are numbered from the top-right
 44        Scan.Type.BZSCANNER: np.array(
 45            [
 46                [0, -1, 75000],
 47                [-1, 0, 25000],
 48                [0, 0, 1],
 49            ]
 50        ),
 51    }
 52    """
 53    Homogeneous transformation matrices for converting between scanner and slide
 54    coordinates. The matrices are 3x3, with the final column representing the
 55    translation in micrometers (um). For more information, see 
 56    [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations).
 57    
 58    Transformations are nominal, and accuracy is not guaranteed; this is due to 
 59    imperfections in slides and alignment in the scanners. 
 60    """
 61
 62    def __init__(
 63        self,
 64        scan: Scan,
 65        tile: Tile,
 66        x: int,
 67        y: int,
 68        size: int = 12,  # End-to-end size in pixels
 69        metadata: pd.Series = None,
 70        features: pd.Series = None,
 71    ):
 72        self.scan = scan
 73        self.tile = tile
 74        self.x = x
 75        self.y = y
 76        self.size = size
 77        self.metadata = metadata
 78        self.features = features
 79
 80    def __repr__(self) -> str:
 81        return f"{self.scan.slide_id}-{self.tile.n}-{self.x}-{self.y}"
 82
 83    def __eq__(self, other) -> bool:
 84        return self.__repr__() == other.__repr__()
 85
 86    def __lt__(self, other):
 87        return self.__repr__() < other.__repr__()
 88
 89    def get_scan_position(self) -> tuple[float, float]:
 90        """
 91        Get the position of the event in the scanner's coordinate frame.
 92        :return: the scan position of the event in micrometers (um).
 93        """
 94        # Get overall pixel position
 95        pixel_x = self.x + (self.scan.tile_width_px * self.tile.x)
 96        pixel_y = self.y + (self.scan.tile_height_px * self.tile.y)
 97        # Convert to micrometers
 98        x_um = pixel_x * self.scan.pixel_size_um
 99        y_um = pixel_y * self.scan.pixel_size_um
100        # Add the scan's origin in the scanner frame
101        x_um += self.scan.roi[self.tile.n_roi].origin_x_um
102        y_um += self.scan.roi[self.tile.n_roi].origin_y_um
103        return x_um, y_um
104
105    def get_slide_position(self) -> tuple[float, float]:
106        """
107        Get the slide position of the event in micrometers (um).
108        :return: the slide position of the event.
109        """
110        # Turn scan_position into a 3x1 vector
111        scan_position = self.get_scan_position()
112        scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]])
113
114        # Multiply by the appropriate homogeneous matrix
115        if self.scan.scanner_id.startswith(self.scan.Type.AXIOSCAN7.value):
116            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.AXIOSCAN7]
117        elif self.scan.scanner_id.startswith(self.scan.Type.BZSCANNER.value):
118            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.BZSCANNER]
119        else:
120            raise ValueError(f"Scanner type {self.scan.scanner_id} not supported.")
121        slide_position = np.matmul(transform, scan_position)
122        return float(slide_position[0][0]), float(slide_position[1][0])
123
124    def crop_images(
125        self, images: list[np.ndarray], crop_size: int = 50, in_pixels: bool = True
126    ) -> list[np.ndarray]:
127        """
128        Get the event crops from the frame images. Called "get" because it does not
129        need to extract anything; it is very quick for extracting multiple events from
130        the same tile.
131        Use this if you're interested in many events.
132        :param images: the frame images.
133        :param crop_size: the square size of the image crop to get for this event.
134        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
135        :return: image_size x image_size crops of the event in the provided frames. If
136        the event is too close to the edge, the crop will be smaller and not centered.
137        """
138        # Convert a crop size in micrometers to pixels
139        if not in_pixels:
140            crop_size = round(crop_size / self.scan.pixel_size_um)
141        # Find the crop bounds
142        bounds = [
143            self.x - crop_size // 2,
144            self.y - crop_size // 2,
145            self.x + math.ceil(crop_size / 2),
146            self.y + math.ceil(crop_size / 2),
147        ]
148        # Determine how much the bounds violate the image size
149        displacements = [
150            max(0, -bounds[0]),
151            max(0, -bounds[1]),
152            max(0, bounds[2] - images[0].shape[1]),
153            max(0, bounds[3] - images[0].shape[0]),
154        ]
155        # Cap off the bounds
156        bounds = [
157            max(0, bounds[0]),
158            max(0, bounds[1]),
159            min(images[0].shape[1], bounds[2]),
160            min(images[0].shape[0], bounds[3]),
161        ]
162
163        # Crop the images
164        cropped_images = []
165        for image in images:
166            # Create a blank image of the right size
167            cropped_image = np.zeros((crop_size, crop_size), dtype=image.dtype)
168
169            # Insert the cropped image into the blank image, leaving a black buffer
170            # around the edges if the crop would go beyond the original image bounds
171            cropped_image[
172                displacements[1] : crop_size - displacements[3],
173                displacements[0] : crop_size - displacements[2],
174            ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]]
175            cropped_images.append(cropped_image)
176        return cropped_images
177
178    def extract_images(
179        self, crop_size: int = 50, in_pixels: bool = True
180    ) -> list[np.ndarray]:
181        """
182        Extract the images from the scan and tile, reading from the file. Called
183        "extract" because it must read and extract the images from file, which is slow.
184        Use this if you're interested in only a few events, as it is inefficient when
185        reading multiple events from the same tile.
186        :param crop_size: the square size of the image crop to get for this event.
187        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
188        :return: a list of cropped images from the scan in the order of the channels.
189        """
190        frames = Frame.get_frames(self.tile)
191        images = [frame.get_image() for frame in frames]
192        return self.crop_images(images, crop_size, in_pixels)
193
194    @classmethod
195    def extract_images_for_list(
196        cls,
197        events: list[typing.Self],
198        crop_size: int | list[int] = None,
199        in_pixels: bool = True,
200    ) -> list[list[np.ndarray]]:
201        """
202        Get the images for a list of events, ensuring that there is no wasteful reading
203        of the same tile multiple times. This function is more efficient than calling
204        extract_event_images for each event.
205        TODO: test this function
206        :param events: the events to extract images for.
207        :param crop_size: the square size of the image crop to get for this event.
208                          Defaults to twice the size of the event.
209        :param in_pixels: whether the crop size is in pixels or micrometers.
210                          Defaults to pixels, and is ignored if crop_size is None.
211        :return: a list of lists of cropped images for each event.
212        """
213        if len(events) == 0:
214            return []
215
216        # Populate a crop size if none provided
217        if crop_size is None:
218            crop_size = [4 * event.size for event in events]
219            in_pixels = True
220        # Propagate a constant crop size
221        elif isinstance(crop_size, int):
222            crop_size = [crop_size] * len(events)
223
224        # Sort the events by tile; use a shallow copy to avoid modifying the original
225        order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
226
227        # Allocate the list to size
228        images = [None] * len(events)
229        last_tile = None
230        frame_images = None  # Holds large numpy arrays, so expensive to compare
231        # Iterate through in sorted order
232        for i in order:
233            if last_tile != events[i].tile:
234                # Gather the frame images, preserving them for the next event
235                frames = Frame.get_frames(events[i].tile)
236                frame_images = [frame.get_image() for frame in frames]
237
238                last_tile = events[i].tile
239            # Use the frame images to crop the event images
240            # Preserve the original order using order[i]
241            images[i] = events[i].crop_images(frame_images, crop_size[i], in_pixels)
242        return images
243
244
245class EventArray:
246    """
247    A class that holds a large number of events' data, making it easy to analyze and
248    manipulate many events at once. A more separated version of the Event class.
249    """
250
251    INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y", "size"]
252
253    def __init__(
254        self,
255        info: pd.DataFrame = None,
256        metadata: pd.DataFrame = None,
257        features: pd.DataFrame = None,
258    ):
259        # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y", "size"
260        if info is not None and (
261            not all(col in info.columns for col in self.INFO_COLUMNS)
262            or len(info.columns) != 6
263        ):
264            raise ValueError(
265                "EventArray.info must have columns 'slide_id', 'tile', 'roi', 'x', 'y', 'size'"
266            )
267        # All DataFrames must all have the same number of rows
268        if metadata is not None and (info is None or len(info) != len(metadata)):
269            raise ValueError(
270                "If EventArray.metadata is not None, it should match rows with .info"
271            )
272        if features is not None and (info is None or len(info) != len(features)):
273            raise ValueError(
274                "If EventArray.features is not None, it should match rows with .info"
275            )
276        self.info = info
277        self.metadata = metadata
278        self.features = features
279
280    def __len__(self) -> int:
281        # Convenience method to get the number of events
282        if self.info is None:
283            return 0
284        else:
285            return len(self.info)
286
287    def __eq__(self, other):
288        is_equal = True
289        # Parse all possibilities for info
290        if isinstance(self.info, pd.DataFrame):
291            if isinstance(other.info, pd.DataFrame):
292                is_equal = self.info.equals(other.info)
293                if not is_equal:
294                    return False
295            else:
296                return False
297        elif self.info is None:
298            if other.info is not None:
299                return False
300
301        # Parse all possibilities for metadata
302        if isinstance(self.metadata, pd.DataFrame):
303            if isinstance(other.metadata, pd.DataFrame):
304                is_equal = self.metadata.equals(other.metadata)
305                if not is_equal:
306                    return False
307            else:
308                return False
309        elif self.metadata is None:
310            if other.metadata is not None:
311                return False
312
313        # Parse all possibilities for features
314        if isinstance(self.features, pd.DataFrame):
315            if isinstance(other.features, pd.DataFrame):
316                is_equal = self.features.equals(other.features)
317                if not is_equal:
318                    return False
319            else:
320                return False
321        elif self.features is None:
322            if other.features is not None:
323                return False
324
325        return is_equal
326
327    def add_metadata(self, new_metadata: pd.DataFrame) -> None:
328        """
329        Add metadata to the EventArray.
330        :param new_metadata: the metadata to add.
331        """
332        if self.metadata is None:
333            if len(self) != len(new_metadata):
334                raise ValueError("New metadata does not match length of existing info")
335            self.metadata = new_metadata
336        else:
337            # Add the new metadata columns to the existing metadata
338            self.metadata = pd.concat([self.metadata, new_metadata], axis=1)
339
340    def add_features(self, new_features: pd.DataFrame) -> None:
341        """
342        Add features to the EventArray.
343        :param new_features: the metadata to add.
344        """
345        if self.features is None:
346            if len(self) != len(new_features):
347                raise ValueError("New metadata does not match length of existing info")
348            self.features = new_features
349        else:
350            # Add the new metadata columns to the existing metadata
351            self.features = pd.concat([self.features, new_features], axis=1)
352
353    @classmethod
354    def from_list(cls, events: list[typing.Self]) -> typing.Self:
355        """
356        Combine EventArrays in a list into a single EventArray.
357        :param events: the new list of events.
358        """
359        all_info = []
360        all_metadata = []
361        all_features = []
362        for event_array in events:
363            # Skip empty EventArrays
364            if event_array.info is not None:
365                all_info.append(event_array.info)
366            if event_array.metadata is not None:
367                all_metadata.append(event_array.metadata)
368            if event_array.features is not None:
369                all_features.append(event_array.features)
370        if len(all_info) == 0:
371            return EventArray()
372        else:
373            all_info = pd.concat(all_info, ignore_index=True)
374        if len(all_metadata) == 0:
375            all_metadata = None
376        else:
377            all_metadata = pd.concat(all_metadata, ignore_index=True)
378        if len(all_features) == 0:
379            all_features = None
380        else:
381            all_features = pd.concat(all_features, ignore_index=True)
382
383        return EventArray(all_info, all_metadata, all_features)
384
385    @classmethod
386    def from_events(cls, events: list[Event]) -> typing.Self:
387        """
388        Set the events in the EventArray to a new list of events.
389        :param events: the new list of events.
390        """
391        # Return an empty array if we were passed nothing
392        if events is None or len(events) == 0:
393            return EventArray()
394        # Otherwise, grab the info
395        info = pd.DataFrame(
396            {
397                "slide_id": [event.scan.slide_id for event in events],
398                "tile": [event.tile.n for event in events],
399                "roi": [event.tile.n_roi for event in events],
400                "x": [event.x for event in events],
401                "y": [event.y for event in events],
402                "size": [event.size for event in events],
403            }
404        )
405        metadata_list = [event.metadata for event in events]
406        # Iterate through and ensure that all metadata is the same shape
407        for metadata in metadata_list:
408            if type(metadata) != type(metadata_list[0]):
409                raise ValueError("All metadata must be the same type.")
410            if metadata is not None and metadata.shape != metadata_list[0].shape:
411                raise ValueError("All metadata must be the same shape.")
412        if metadata_list[0] is None:
413            metadata = None
414        else:
415            metadata = pd.DataFrame(metadata_list)
416        features_list = [event.features for event in events]
417        # Iterate through and ensure that all features are the same shape
418        for features in features_list:
419            if type(features) != type(features_list[0]):
420                raise ValueError("All features must be the same type.")
421            if features is not None and features.shape != features_list[0].shape:
422                raise ValueError("All features must be the same shape.")
423        if features_list[0] is None:
424            features = None
425        else:
426            features = pd.DataFrame(features_list)
427        return EventArray(info=info, metadata=metadata, features=features)
428
429    def to_events(
430        self,
431        scans: list[Scan],
432        ignore_missing_scans=True,
433        ignore_metadata=False,
434        ignore_features=False,
435    ) -> list[Event]:
436        """
437        Get the events in the EventArray as a list of events.
438        :param scans: the scans that the events belong to. Pass an empty list if you
439                      don't care about scan metadata.
440        :param ignore_missing_scans: whether to create blank scans for events without scans.
441        :param ignore_metadata: whether to ignore metadata or not
442        :param ignore_features: whether to ignore features or not
443        :return:
444        """
445        events = []
446        for i in range(len(self.info)):
447            # Determine the associated scan
448            scan = None
449            for s in scans:
450                if s.slide_id == self.info["slide_id"][i]:
451                    scan = s
452                    break
453            if scan is None:
454                if ignore_missing_scans:
455                    # Create a placeholder scan if the scan is missing
456                    scan = Scan.make_placeholder(
457                        self.info["slide_id"][i],
458                        self.info["tile"][i],
459                        self.info["roi"][i],
460                    )
461                else:
462                    raise ValueError(
463                        f"Scan {self.info['slide_id'][i]} not found for event {i}."
464                    )
465            # Add to the list
466            events.append(
467                Event(
468                    scan,
469                    Tile(scan, self.info["tile"][i], self.info["roi"][i]),
470                    self.info["x"][i],
471                    self.info["y"][i],
472                    size=self.info["size"][i],
473                    metadata=None if ignore_metadata else self.metadata.loc[i],
474                    features=None if ignore_features else self.features.loc[i],
475                )
476            )
477        return events
478
479    def to_dataframe(self) -> pd.DataFrame:
480        """
481        Convert all the data in the EventArray to a single DataFrame.
482        :return: a DataFrame with all the data in the EventArray.
483        """
484        # Make a copy of the info DataFrame and prepend "info_" to the column names
485        output = self.info.copy()
486        output.columns = [f"info_{col}" for col in output.columns]
487        # Combine with the metadata and prepend "metadata_" to the column names
488        if self.metadata is not None:
489            metadata = self.metadata.copy()
490            metadata.columns = [f"metadata_{col}" for col in metadata.columns]
491            output = pd.concat([output, metadata], axis=1)
492        # Combine with the features and prepend "features_" to the column names
493        if self.features is not None:
494            features = self.features.copy()
495            features.columns = [f"features_{col}" for col in features.columns]
496            output = pd.concat([output, features], axis=1)
497        return output
498
499    @classmethod
500    def from_dataframe(cls, df) -> typing.Self:
501        """
502        From a single, special DataFrame, create an EventArray.
503        :return: a DataFrame with all the data in the EventArray.
504        """
505        # Split the columns into info, metadata, and features and strip prefix
506        info = df[[col for col in df.columns if col.startswith("info_")]].copy()
507        info.columns = [col.replace("info_", "") for col in info.columns]
508        if info.size == 0:
509            info = None
510        metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy()
511        metadata.columns = [col.replace("metadata_", "") for col in metadata.columns]
512        if metadata.size == 0:
513            metadata = None
514        features = df[[col for col in df.columns if col.startswith("features_")]].copy()
515        features.columns = [col.replace("features_", "") for col in features.columns]
516        if features.size == 0:
517            features = None
518        return cls(info=info, metadata=metadata, features=features)
519
520    def save_csv(self, output_path: str) -> bool:
521        """
522        Save the events to an CSV file, including metadata and features.
523        :param output_path:
524        :return:
525        """
526        self.to_dataframe().to_csv(output_path, index=False)
527        return os.path.exists(output_path)
528
529    @classmethod
530    def load_csv(cls, input_path: str) -> typing.Self:
531        """
532        Load the events from an CSV file, including metadata and features.
533        :param input_path:
534        :return:
535        """
536        # Load the CSV file
537        df = pd.read_csv(input_path)
538        return cls.from_dataframe(df)
539
540    def save_hdf5(self, output_path: str) -> bool:
541        """
542        Save the events to an HDF5 file, including metadata and features.
543        Uses the pandas-provided HDF5 functions for ease, and external compatibility,
544        though these files are slightly harder to view in HDFView or similar.
545        :param output_path:
546        :return:
547        """
548        # Open the output_path as an HDF5 file
549        with pd.HDFStore(output_path) as store:
550            # Store the dataframes in the HDF5 file
551            if self.info is not None:
552                store.put("info", self.info, index=False)
553            if self.metadata is not None:
554                store.put("metadata", self.metadata, index=False)
555            if self.features is not None:
556                store.put("features", self.features, index=False)
557        return os.path.exists(output_path)
558
559    @classmethod
560    def load_hdf5(cls, input_path: str) -> typing.Self:
561        """
562        Load the events from an HDF5 file, including metadata and features.
563        :param input_path:
564        :return:
565        """
566        # Open the input_path as an HDF5 file
567        with pd.HDFStore(input_path) as store:
568            # Load the dataframes from the HDF5 file
569            info = store.get("info") if "info" in store else None
570            metadata = store.get("metadata") if "metadata" in store else None
571            features = store.get("features") if "features" in store else None
572        return cls(info=info, metadata=metadata, features=features)
class Event:
 24class Event:
 25    """
 26    A class that represents a single event in a scan, making it easy to evaluate
 27    singular events. Required metadata is exposed as attributes, and optional
 28    metadata and features are stored as DataFrames.
 29    """
 30
 31    # 2D homogenous transformation matrices
 32    # Translations (final column) are in micrometers (um)
 33    SCAN_TO_SLIDE_TRANSFORM = {
 34        Scan.Type.AXIOSCAN7: np.array(
 35            [
 36                [1, 0, 75000],
 37                [0, 1, 0],
 38                [0, 0, 1],
 39            ]
 40        ),
 41        # BZScanner coordinates are a special kind of messed up:
 42        # - The slide is upside-down.
 43        # - The slide is oriented vertically, with the barcode at the bottom.
 44        # - Tiles are numbered from the top-right
 45        Scan.Type.BZSCANNER: np.array(
 46            [
 47                [0, -1, 75000],
 48                [-1, 0, 25000],
 49                [0, 0, 1],
 50            ]
 51        ),
 52    }
 53    """
 54    Homogeneous transformation matrices for converting between scanner and slide
 55    coordinates. The matrices are 3x3, with the final column representing the
 56    translation in micrometers (um). For more information, see 
 57    [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations).
 58    
 59    Transformations are nominal, and accuracy is not guaranteed; this is due to 
 60    imperfections in slides and alignment in the scanners. 
 61    """
 62
 63    def __init__(
 64        self,
 65        scan: Scan,
 66        tile: Tile,
 67        x: int,
 68        y: int,
 69        size: int = 12,  # End-to-end size in pixels
 70        metadata: pd.Series = None,
 71        features: pd.Series = None,
 72    ):
 73        self.scan = scan
 74        self.tile = tile
 75        self.x = x
 76        self.y = y
 77        self.size = size
 78        self.metadata = metadata
 79        self.features = features
 80
 81    def __repr__(self) -> str:
 82        return f"{self.scan.slide_id}-{self.tile.n}-{self.x}-{self.y}"
 83
 84    def __eq__(self, other) -> bool:
 85        return self.__repr__() == other.__repr__()
 86
 87    def __lt__(self, other):
 88        return self.__repr__() < other.__repr__()
 89
 90    def get_scan_position(self) -> tuple[float, float]:
 91        """
 92        Get the position of the event in the scanner's coordinate frame.
 93        :return: the scan position of the event in micrometers (um).
 94        """
 95        # Get overall pixel position
 96        pixel_x = self.x + (self.scan.tile_width_px * self.tile.x)
 97        pixel_y = self.y + (self.scan.tile_height_px * self.tile.y)
 98        # Convert to micrometers
 99        x_um = pixel_x * self.scan.pixel_size_um
100        y_um = pixel_y * self.scan.pixel_size_um
101        # Add the scan's origin in the scanner frame
102        x_um += self.scan.roi[self.tile.n_roi].origin_x_um
103        y_um += self.scan.roi[self.tile.n_roi].origin_y_um
104        return x_um, y_um
105
106    def get_slide_position(self) -> tuple[float, float]:
107        """
108        Get the slide position of the event in micrometers (um).
109        :return: the slide position of the event.
110        """
111        # Turn scan_position into a 3x1 vector
112        scan_position = self.get_scan_position()
113        scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]])
114
115        # Multiply by the appropriate homogeneous matrix
116        if self.scan.scanner_id.startswith(self.scan.Type.AXIOSCAN7.value):
117            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.AXIOSCAN7]
118        elif self.scan.scanner_id.startswith(self.scan.Type.BZSCANNER.value):
119            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.BZSCANNER]
120        else:
121            raise ValueError(f"Scanner type {self.scan.scanner_id} not supported.")
122        slide_position = np.matmul(transform, scan_position)
123        return float(slide_position[0][0]), float(slide_position[1][0])
124
125    def crop_images(
126        self, images: list[np.ndarray], crop_size: int = 50, in_pixels: bool = True
127    ) -> list[np.ndarray]:
128        """
129        Get the event crops from the frame images. Called "get" because it does not
130        need to extract anything; it is very quick for extracting multiple events from
131        the same tile.
132        Use this if you're interested in many events.
133        :param images: the frame images.
134        :param crop_size: the square size of the image crop to get for this event.
135        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
136        :return: image_size x image_size crops of the event in the provided frames. If
137        the event is too close to the edge, the crop will be smaller and not centered.
138        """
139        # Convert a crop size in micrometers to pixels
140        if not in_pixels:
141            crop_size = round(crop_size / self.scan.pixel_size_um)
142        # Find the crop bounds
143        bounds = [
144            self.x - crop_size // 2,
145            self.y - crop_size // 2,
146            self.x + math.ceil(crop_size / 2),
147            self.y + math.ceil(crop_size / 2),
148        ]
149        # Determine how much the bounds violate the image size
150        displacements = [
151            max(0, -bounds[0]),
152            max(0, -bounds[1]),
153            max(0, bounds[2] - images[0].shape[1]),
154            max(0, bounds[3] - images[0].shape[0]),
155        ]
156        # Cap off the bounds
157        bounds = [
158            max(0, bounds[0]),
159            max(0, bounds[1]),
160            min(images[0].shape[1], bounds[2]),
161            min(images[0].shape[0], bounds[3]),
162        ]
163
164        # Crop the images
165        cropped_images = []
166        for image in images:
167            # Create a blank image of the right size
168            cropped_image = np.zeros((crop_size, crop_size), dtype=image.dtype)
169
170            # Insert the cropped image into the blank image, leaving a black buffer
171            # around the edges if the crop would go beyond the original image bounds
172            cropped_image[
173                displacements[1] : crop_size - displacements[3],
174                displacements[0] : crop_size - displacements[2],
175            ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]]
176            cropped_images.append(cropped_image)
177        return cropped_images
178
179    def extract_images(
180        self, crop_size: int = 50, in_pixels: bool = True
181    ) -> list[np.ndarray]:
182        """
183        Extract the images from the scan and tile, reading from the file. Called
184        "extract" because it must read and extract the images from file, which is slow.
185        Use this if you're interested in only a few events, as it is inefficient when
186        reading multiple events from the same tile.
187        :param crop_size: the square size of the image crop to get for this event.
188        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
189        :return: a list of cropped images from the scan in the order of the channels.
190        """
191        frames = Frame.get_frames(self.tile)
192        images = [frame.get_image() for frame in frames]
193        return self.crop_images(images, crop_size, in_pixels)
194
195    @classmethod
196    def extract_images_for_list(
197        cls,
198        events: list[typing.Self],
199        crop_size: int | list[int] = None,
200        in_pixels: bool = True,
201    ) -> list[list[np.ndarray]]:
202        """
203        Get the images for a list of events, ensuring that there is no wasteful reading
204        of the same tile multiple times. This function is more efficient than calling
205        extract_event_images for each event.
206        TODO: test this function
207        :param events: the events to extract images for.
208        :param crop_size: the square size of the image crop to get for this event.
209                          Defaults to twice the size of the event.
210        :param in_pixels: whether the crop size is in pixels or micrometers.
211                          Defaults to pixels, and is ignored if crop_size is None.
212        :return: a list of lists of cropped images for each event.
213        """
214        if len(events) == 0:
215            return []
216
217        # Populate a crop size if none provided
218        if crop_size is None:
219            crop_size = [4 * event.size for event in events]
220            in_pixels = True
221        # Propagate a constant crop size
222        elif isinstance(crop_size, int):
223            crop_size = [crop_size] * len(events)
224
225        # Sort the events by tile; use a shallow copy to avoid modifying the original
226        order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
227
228        # Allocate the list to size
229        images = [None] * len(events)
230        last_tile = None
231        frame_images = None  # Holds large numpy arrays, so expensive to compare
232        # Iterate through in sorted order
233        for i in order:
234            if last_tile != events[i].tile:
235                # Gather the frame images, preserving them for the next event
236                frames = Frame.get_frames(events[i].tile)
237                frame_images = [frame.get_image() for frame in frames]
238
239                last_tile = events[i].tile
240            # Use the frame images to crop the event images
241            # Preserve the original order using order[i]
242            images[i] = events[i].crop_images(frame_images, crop_size[i], in_pixels)
243        return images

A class that represents a single event in a scan, making it easy to evaluate singular events. Required metadata is exposed as attributes, and optional metadata and features are stored as DataFrames.

Event( scan: csi_images.csi_scans.Scan, tile: csi_images.csi_tiles.Tile, x: int, y: int, size: int = 12, metadata: pandas.core.series.Series = None, features: pandas.core.series.Series = None)
63    def __init__(
64        self,
65        scan: Scan,
66        tile: Tile,
67        x: int,
68        y: int,
69        size: int = 12,  # End-to-end size in pixels
70        metadata: pd.Series = None,
71        features: pd.Series = None,
72    ):
73        self.scan = scan
74        self.tile = tile
75        self.x = x
76        self.y = y
77        self.size = size
78        self.metadata = metadata
79        self.features = features
SCAN_TO_SLIDE_TRANSFORM = {<Type.AXIOSCAN7: 'axioscan7'>: array([[ 1, 0, 75000], [ 0, 1, 0], [ 0, 0, 1]]), <Type.BZSCANNER: 'bzscanner'>: array([[ 0, -1, 75000], [ -1, 0, 25000], [ 0, 0, 1]])}

Homogeneous transformation matrices for converting between scanner and slide coordinates. The matrices are 3x3, with the final column representing the translation in micrometers (um). For more information, see affine transformations.

Transformations are nominal, and accuracy is not guaranteed; this is due to imperfections in slides and alignment in the scanners.

scan
tile
x
y
size
metadata
features
def get_scan_position(self) -> tuple[float, float]:
 90    def get_scan_position(self) -> tuple[float, float]:
 91        """
 92        Get the position of the event in the scanner's coordinate frame.
 93        :return: the scan position of the event in micrometers (um).
 94        """
 95        # Get overall pixel position
 96        pixel_x = self.x + (self.scan.tile_width_px * self.tile.x)
 97        pixel_y = self.y + (self.scan.tile_height_px * self.tile.y)
 98        # Convert to micrometers
 99        x_um = pixel_x * self.scan.pixel_size_um
100        y_um = pixel_y * self.scan.pixel_size_um
101        # Add the scan's origin in the scanner frame
102        x_um += self.scan.roi[self.tile.n_roi].origin_x_um
103        y_um += self.scan.roi[self.tile.n_roi].origin_y_um
104        return x_um, y_um

Get the position of the event in the scanner's coordinate frame.

Returns

the scan position of the event in micrometers (um).

def get_slide_position(self) -> tuple[float, float]:
106    def get_slide_position(self) -> tuple[float, float]:
107        """
108        Get the slide position of the event in micrometers (um).
109        :return: the slide position of the event.
110        """
111        # Turn scan_position into a 3x1 vector
112        scan_position = self.get_scan_position()
113        scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]])
114
115        # Multiply by the appropriate homogeneous matrix
116        if self.scan.scanner_id.startswith(self.scan.Type.AXIOSCAN7.value):
117            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.AXIOSCAN7]
118        elif self.scan.scanner_id.startswith(self.scan.Type.BZSCANNER.value):
119            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.BZSCANNER]
120        else:
121            raise ValueError(f"Scanner type {self.scan.scanner_id} not supported.")
122        slide_position = np.matmul(transform, scan_position)
123        return float(slide_position[0][0]), float(slide_position[1][0])

Get the slide position of the event in micrometers (um).

Returns

the slide position of the event.

def crop_images( self, images: list[numpy.ndarray], crop_size: int = 50, in_pixels: bool = True) -> list[numpy.ndarray]:
125    def crop_images(
126        self, images: list[np.ndarray], crop_size: int = 50, in_pixels: bool = True
127    ) -> list[np.ndarray]:
128        """
129        Get the event crops from the frame images. Called "get" because it does not
130        need to extract anything; it is very quick for extracting multiple events from
131        the same tile.
132        Use this if you're interested in many events.
133        :param images: the frame images.
134        :param crop_size: the square size of the image crop to get for this event.
135        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
136        :return: image_size x image_size crops of the event in the provided frames. If
137        the event is too close to the edge, the crop will be smaller and not centered.
138        """
139        # Convert a crop size in micrometers to pixels
140        if not in_pixels:
141            crop_size = round(crop_size / self.scan.pixel_size_um)
142        # Find the crop bounds
143        bounds = [
144            self.x - crop_size // 2,
145            self.y - crop_size // 2,
146            self.x + math.ceil(crop_size / 2),
147            self.y + math.ceil(crop_size / 2),
148        ]
149        # Determine how much the bounds violate the image size
150        displacements = [
151            max(0, -bounds[0]),
152            max(0, -bounds[1]),
153            max(0, bounds[2] - images[0].shape[1]),
154            max(0, bounds[3] - images[0].shape[0]),
155        ]
156        # Cap off the bounds
157        bounds = [
158            max(0, bounds[0]),
159            max(0, bounds[1]),
160            min(images[0].shape[1], bounds[2]),
161            min(images[0].shape[0], bounds[3]),
162        ]
163
164        # Crop the images
165        cropped_images = []
166        for image in images:
167            # Create a blank image of the right size
168            cropped_image = np.zeros((crop_size, crop_size), dtype=image.dtype)
169
170            # Insert the cropped image into the blank image, leaving a black buffer
171            # around the edges if the crop would go beyond the original image bounds
172            cropped_image[
173                displacements[1] : crop_size - displacements[3],
174                displacements[0] : crop_size - displacements[2],
175            ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]]
176            cropped_images.append(cropped_image)
177        return cropped_images

Get the event crops from the frame images. Called "get" because it does not need to extract anything; it is very quick for extracting multiple events from the same tile. Use this if you're interested in many events.

Parameters
  • images: the frame images.
  • crop_size: the square size of the image crop to get for this event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
Returns

image_size x image_size crops of the event in the provided frames. If the event is too close to the edge, the crop will be smaller and not centered.

def extract_images(self, crop_size: int = 50, in_pixels: bool = True) -> list[numpy.ndarray]:
179    def extract_images(
180        self, crop_size: int = 50, in_pixels: bool = True
181    ) -> list[np.ndarray]:
182        """
183        Extract the images from the scan and tile, reading from the file. Called
184        "extract" because it must read and extract the images from file, which is slow.
185        Use this if you're interested in only a few events, as it is inefficient when
186        reading multiple events from the same tile.
187        :param crop_size: the square size of the image crop to get for this event.
188        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
189        :return: a list of cropped images from the scan in the order of the channels.
190        """
191        frames = Frame.get_frames(self.tile)
192        images = [frame.get_image() for frame in frames]
193        return self.crop_images(images, crop_size, in_pixels)

Extract the images from the scan and tile, reading from the file. Called "extract" because it must read and extract the images from file, which is slow. Use this if you're interested in only a few events, as it is inefficient when reading multiple events from the same tile.

Parameters
  • crop_size: the square size of the image crop to get for this event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
Returns

a list of cropped images from the scan in the order of the channels.

@classmethod
def extract_images_for_list( cls, events: list[typing.Self], crop_size: int | list[int] = None, in_pixels: bool = True) -> list[list[numpy.ndarray]]:
195    @classmethod
196    def extract_images_for_list(
197        cls,
198        events: list[typing.Self],
199        crop_size: int | list[int] = None,
200        in_pixels: bool = True,
201    ) -> list[list[np.ndarray]]:
202        """
203        Get the images for a list of events, ensuring that there is no wasteful reading
204        of the same tile multiple times. This function is more efficient than calling
205        extract_event_images for each event.
206        TODO: test this function
207        :param events: the events to extract images for.
208        :param crop_size: the square size of the image crop to get for this event.
209                          Defaults to twice the size of the event.
210        :param in_pixels: whether the crop size is in pixels or micrometers.
211                          Defaults to pixels, and is ignored if crop_size is None.
212        :return: a list of lists of cropped images for each event.
213        """
214        if len(events) == 0:
215            return []
216
217        # Populate a crop size if none provided
218        if crop_size is None:
219            crop_size = [4 * event.size for event in events]
220            in_pixels = True
221        # Propagate a constant crop size
222        elif isinstance(crop_size, int):
223            crop_size = [crop_size] * len(events)
224
225        # Sort the events by tile; use a shallow copy to avoid modifying the original
226        order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
227
228        # Allocate the list to size
229        images = [None] * len(events)
230        last_tile = None
231        frame_images = None  # Holds large numpy arrays, so expensive to compare
232        # Iterate through in sorted order
233        for i in order:
234            if last_tile != events[i].tile:
235                # Gather the frame images, preserving them for the next event
236                frames = Frame.get_frames(events[i].tile)
237                frame_images = [frame.get_image() for frame in frames]
238
239                last_tile = events[i].tile
240            # Use the frame images to crop the event images
241            # Preserve the original order using order[i]
242            images[i] = events[i].crop_images(frame_images, crop_size[i], in_pixels)
243        return images

Get the images for a list of events, ensuring that there is no wasteful reading of the same tile multiple times. This function is more efficient than calling extract_event_images for each event. TODO: test this function

Parameters
  • events: the events to extract images for.
  • crop_size: the square size of the image crop to get for this event. Defaults to twice the size of the event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels, and is ignored if crop_size is None.
Returns

a list of lists of cropped images for each event.

class EventArray:
246class EventArray:
247    """
248    A class that holds a large number of events' data, making it easy to analyze and
249    manipulate many events at once. A more separated version of the Event class.
250    """
251
252    INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y", "size"]
253
254    def __init__(
255        self,
256        info: pd.DataFrame = None,
257        metadata: pd.DataFrame = None,
258        features: pd.DataFrame = None,
259    ):
260        # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y", "size"
261        if info is not None and (
262            not all(col in info.columns for col in self.INFO_COLUMNS)
263            or len(info.columns) != 6
264        ):
265            raise ValueError(
266                "EventArray.info must have columns 'slide_id', 'tile', 'roi', 'x', 'y', 'size'"
267            )
268        # All DataFrames must all have the same number of rows
269        if metadata is not None and (info is None or len(info) != len(metadata)):
270            raise ValueError(
271                "If EventArray.metadata is not None, it should match rows with .info"
272            )
273        if features is not None and (info is None or len(info) != len(features)):
274            raise ValueError(
275                "If EventArray.features is not None, it should match rows with .info"
276            )
277        self.info = info
278        self.metadata = metadata
279        self.features = features
280
281    def __len__(self) -> int:
282        # Convenience method to get the number of events
283        if self.info is None:
284            return 0
285        else:
286            return len(self.info)
287
288    def __eq__(self, other):
289        is_equal = True
290        # Parse all possibilities for info
291        if isinstance(self.info, pd.DataFrame):
292            if isinstance(other.info, pd.DataFrame):
293                is_equal = self.info.equals(other.info)
294                if not is_equal:
295                    return False
296            else:
297                return False
298        elif self.info is None:
299            if other.info is not None:
300                return False
301
302        # Parse all possibilities for metadata
303        if isinstance(self.metadata, pd.DataFrame):
304            if isinstance(other.metadata, pd.DataFrame):
305                is_equal = self.metadata.equals(other.metadata)
306                if not is_equal:
307                    return False
308            else:
309                return False
310        elif self.metadata is None:
311            if other.metadata is not None:
312                return False
313
314        # Parse all possibilities for features
315        if isinstance(self.features, pd.DataFrame):
316            if isinstance(other.features, pd.DataFrame):
317                is_equal = self.features.equals(other.features)
318                if not is_equal:
319                    return False
320            else:
321                return False
322        elif self.features is None:
323            if other.features is not None:
324                return False
325
326        return is_equal
327
328    def add_metadata(self, new_metadata: pd.DataFrame) -> None:
329        """
330        Add metadata to the EventArray.
331        :param new_metadata: the metadata to add.
332        """
333        if self.metadata is None:
334            if len(self) != len(new_metadata):
335                raise ValueError("New metadata does not match length of existing info")
336            self.metadata = new_metadata
337        else:
338            # Add the new metadata columns to the existing metadata
339            self.metadata = pd.concat([self.metadata, new_metadata], axis=1)
340
341    def add_features(self, new_features: pd.DataFrame) -> None:
342        """
343        Add features to the EventArray.
344        :param new_features: the metadata to add.
345        """
346        if self.features is None:
347            if len(self) != len(new_features):
348                raise ValueError("New metadata does not match length of existing info")
349            self.features = new_features
350        else:
351            # Add the new metadata columns to the existing metadata
352            self.features = pd.concat([self.features, new_features], axis=1)
353
354    @classmethod
355    def from_list(cls, events: list[typing.Self]) -> typing.Self:
356        """
357        Combine EventArrays in a list into a single EventArray.
358        :param events: the new list of events.
359        """
360        all_info = []
361        all_metadata = []
362        all_features = []
363        for event_array in events:
364            # Skip empty EventArrays
365            if event_array.info is not None:
366                all_info.append(event_array.info)
367            if event_array.metadata is not None:
368                all_metadata.append(event_array.metadata)
369            if event_array.features is not None:
370                all_features.append(event_array.features)
371        if len(all_info) == 0:
372            return EventArray()
373        else:
374            all_info = pd.concat(all_info, ignore_index=True)
375        if len(all_metadata) == 0:
376            all_metadata = None
377        else:
378            all_metadata = pd.concat(all_metadata, ignore_index=True)
379        if len(all_features) == 0:
380            all_features = None
381        else:
382            all_features = pd.concat(all_features, ignore_index=True)
383
384        return EventArray(all_info, all_metadata, all_features)
385
386    @classmethod
387    def from_events(cls, events: list[Event]) -> typing.Self:
388        """
389        Set the events in the EventArray to a new list of events.
390        :param events: the new list of events.
391        """
392        # Return an empty array if we were passed nothing
393        if events is None or len(events) == 0:
394            return EventArray()
395        # Otherwise, grab the info
396        info = pd.DataFrame(
397            {
398                "slide_id": [event.scan.slide_id for event in events],
399                "tile": [event.tile.n for event in events],
400                "roi": [event.tile.n_roi for event in events],
401                "x": [event.x for event in events],
402                "y": [event.y for event in events],
403                "size": [event.size for event in events],
404            }
405        )
406        metadata_list = [event.metadata for event in events]
407        # Iterate through and ensure that all metadata is the same shape
408        for metadata in metadata_list:
409            if type(metadata) != type(metadata_list[0]):
410                raise ValueError("All metadata must be the same type.")
411            if metadata is not None and metadata.shape != metadata_list[0].shape:
412                raise ValueError("All metadata must be the same shape.")
413        if metadata_list[0] is None:
414            metadata = None
415        else:
416            metadata = pd.DataFrame(metadata_list)
417        features_list = [event.features for event in events]
418        # Iterate through and ensure that all features are the same shape
419        for features in features_list:
420            if type(features) != type(features_list[0]):
421                raise ValueError("All features must be the same type.")
422            if features is not None and features.shape != features_list[0].shape:
423                raise ValueError("All features must be the same shape.")
424        if features_list[0] is None:
425            features = None
426        else:
427            features = pd.DataFrame(features_list)
428        return EventArray(info=info, metadata=metadata, features=features)
429
430    def to_events(
431        self,
432        scans: list[Scan],
433        ignore_missing_scans=True,
434        ignore_metadata=False,
435        ignore_features=False,
436    ) -> list[Event]:
437        """
438        Get the events in the EventArray as a list of events.
439        :param scans: the scans that the events belong to. Pass an empty list if you
440                      don't care about scan metadata.
441        :param ignore_missing_scans: whether to create blank scans for events without scans.
442        :param ignore_metadata: whether to ignore metadata or not
443        :param ignore_features: whether to ignore features or not
444        :return:
445        """
446        events = []
447        for i in range(len(self.info)):
448            # Determine the associated scan
449            scan = None
450            for s in scans:
451                if s.slide_id == self.info["slide_id"][i]:
452                    scan = s
453                    break
454            if scan is None:
455                if ignore_missing_scans:
456                    # Create a placeholder scan if the scan is missing
457                    scan = Scan.make_placeholder(
458                        self.info["slide_id"][i],
459                        self.info["tile"][i],
460                        self.info["roi"][i],
461                    )
462                else:
463                    raise ValueError(
464                        f"Scan {self.info['slide_id'][i]} not found for event {i}."
465                    )
466            # Add to the list
467            events.append(
468                Event(
469                    scan,
470                    Tile(scan, self.info["tile"][i], self.info["roi"][i]),
471                    self.info["x"][i],
472                    self.info["y"][i],
473                    size=self.info["size"][i],
474                    metadata=None if ignore_metadata else self.metadata.loc[i],
475                    features=None if ignore_features else self.features.loc[i],
476                )
477            )
478        return events
479
480    def to_dataframe(self) -> pd.DataFrame:
481        """
482        Convert all the data in the EventArray to a single DataFrame.
483        :return: a DataFrame with all the data in the EventArray.
484        """
485        # Make a copy of the info DataFrame and prepend "info_" to the column names
486        output = self.info.copy()
487        output.columns = [f"info_{col}" for col in output.columns]
488        # Combine with the metadata and prepend "metadata_" to the column names
489        if self.metadata is not None:
490            metadata = self.metadata.copy()
491            metadata.columns = [f"metadata_{col}" for col in metadata.columns]
492            output = pd.concat([output, metadata], axis=1)
493        # Combine with the features and prepend "features_" to the column names
494        if self.features is not None:
495            features = self.features.copy()
496            features.columns = [f"features_{col}" for col in features.columns]
497            output = pd.concat([output, features], axis=1)
498        return output
499
500    @classmethod
501    def from_dataframe(cls, df) -> typing.Self:
502        """
503        From a single, special DataFrame, create an EventArray.
504        :return: a DataFrame with all the data in the EventArray.
505        """
506        # Split the columns into info, metadata, and features and strip prefix
507        info = df[[col for col in df.columns if col.startswith("info_")]].copy()
508        info.columns = [col.replace("info_", "") for col in info.columns]
509        if info.size == 0:
510            info = None
511        metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy()
512        metadata.columns = [col.replace("metadata_", "") for col in metadata.columns]
513        if metadata.size == 0:
514            metadata = None
515        features = df[[col for col in df.columns if col.startswith("features_")]].copy()
516        features.columns = [col.replace("features_", "") for col in features.columns]
517        if features.size == 0:
518            features = None
519        return cls(info=info, metadata=metadata, features=features)
520
521    def save_csv(self, output_path: str) -> bool:
522        """
523        Save the events to an CSV file, including metadata and features.
524        :param output_path:
525        :return:
526        """
527        self.to_dataframe().to_csv(output_path, index=False)
528        return os.path.exists(output_path)
529
530    @classmethod
531    def load_csv(cls, input_path: str) -> typing.Self:
532        """
533        Load the events from an CSV file, including metadata and features.
534        :param input_path:
535        :return:
536        """
537        # Load the CSV file
538        df = pd.read_csv(input_path)
539        return cls.from_dataframe(df)
540
541    def save_hdf5(self, output_path: str) -> bool:
542        """
543        Save the events to an HDF5 file, including metadata and features.
544        Uses the pandas-provided HDF5 functions for ease, and external compatibility,
545        though these files are slightly harder to view in HDFView or similar.
546        :param output_path:
547        :return:
548        """
549        # Open the output_path as an HDF5 file
550        with pd.HDFStore(output_path) as store:
551            # Store the dataframes in the HDF5 file
552            if self.info is not None:
553                store.put("info", self.info, index=False)
554            if self.metadata is not None:
555                store.put("metadata", self.metadata, index=False)
556            if self.features is not None:
557                store.put("features", self.features, index=False)
558        return os.path.exists(output_path)
559
560    @classmethod
561    def load_hdf5(cls, input_path: str) -> typing.Self:
562        """
563        Load the events from an HDF5 file, including metadata and features.
564        :param input_path:
565        :return:
566        """
567        # Open the input_path as an HDF5 file
568        with pd.HDFStore(input_path) as store:
569            # Load the dataframes from the HDF5 file
570            info = store.get("info") if "info" in store else None
571            metadata = store.get("metadata") if "metadata" in store else None
572            features = store.get("features") if "features" in store else None
573        return cls(info=info, metadata=metadata, features=features)

A class that holds a large number of events' data, making it easy to analyze and manipulate many events at once. A more separated version of the Event class.

EventArray( info: pandas.core.frame.DataFrame = None, metadata: pandas.core.frame.DataFrame = None, features: pandas.core.frame.DataFrame = None)
254    def __init__(
255        self,
256        info: pd.DataFrame = None,
257        metadata: pd.DataFrame = None,
258        features: pd.DataFrame = None,
259    ):
260        # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y", "size"
261        if info is not None and (
262            not all(col in info.columns for col in self.INFO_COLUMNS)
263            or len(info.columns) != 6
264        ):
265            raise ValueError(
266                "EventArray.info must have columns 'slide_id', 'tile', 'roi', 'x', 'y', 'size'"
267            )
268        # All DataFrames must all have the same number of rows
269        if metadata is not None and (info is None or len(info) != len(metadata)):
270            raise ValueError(
271                "If EventArray.metadata is not None, it should match rows with .info"
272            )
273        if features is not None and (info is None or len(info) != len(features)):
274            raise ValueError(
275                "If EventArray.features is not None, it should match rows with .info"
276            )
277        self.info = info
278        self.metadata = metadata
279        self.features = features
INFO_COLUMNS = ['slide_id', 'tile', 'roi', 'x', 'y', 'size']
info
metadata
features
def add_metadata(self, new_metadata: pandas.core.frame.DataFrame) -> None:
328    def add_metadata(self, new_metadata: pd.DataFrame) -> None:
329        """
330        Add metadata to the EventArray.
331        :param new_metadata: the metadata to add.
332        """
333        if self.metadata is None:
334            if len(self) != len(new_metadata):
335                raise ValueError("New metadata does not match length of existing info")
336            self.metadata = new_metadata
337        else:
338            # Add the new metadata columns to the existing metadata
339            self.metadata = pd.concat([self.metadata, new_metadata], axis=1)

Add metadata to the EventArray.

Parameters
  • new_metadata: the metadata to add.
def add_features(self, new_features: pandas.core.frame.DataFrame) -> None:
341    def add_features(self, new_features: pd.DataFrame) -> None:
342        """
343        Add features to the EventArray.
344        :param new_features: the metadata to add.
345        """
346        if self.features is None:
347            if len(self) != len(new_features):
348                raise ValueError("New metadata does not match length of existing info")
349            self.features = new_features
350        else:
351            # Add the new metadata columns to the existing metadata
352            self.features = pd.concat([self.features, new_features], axis=1)

Add features to the EventArray.

Parameters
  • new_features: the metadata to add.
@classmethod
def from_list(cls, events: list[typing.Self]) -> Self:
354    @classmethod
355    def from_list(cls, events: list[typing.Self]) -> typing.Self:
356        """
357        Combine EventArrays in a list into a single EventArray.
358        :param events: the new list of events.
359        """
360        all_info = []
361        all_metadata = []
362        all_features = []
363        for event_array in events:
364            # Skip empty EventArrays
365            if event_array.info is not None:
366                all_info.append(event_array.info)
367            if event_array.metadata is not None:
368                all_metadata.append(event_array.metadata)
369            if event_array.features is not None:
370                all_features.append(event_array.features)
371        if len(all_info) == 0:
372            return EventArray()
373        else:
374            all_info = pd.concat(all_info, ignore_index=True)
375        if len(all_metadata) == 0:
376            all_metadata = None
377        else:
378            all_metadata = pd.concat(all_metadata, ignore_index=True)
379        if len(all_features) == 0:
380            all_features = None
381        else:
382            all_features = pd.concat(all_features, ignore_index=True)
383
384        return EventArray(all_info, all_metadata, all_features)

Combine EventArrays in a list into a single EventArray.

Parameters
  • events: the new list of events.
@classmethod
def from_events(cls, events: list[Event]) -> Self:
386    @classmethod
387    def from_events(cls, events: list[Event]) -> typing.Self:
388        """
389        Set the events in the EventArray to a new list of events.
390        :param events: the new list of events.
391        """
392        # Return an empty array if we were passed nothing
393        if events is None or len(events) == 0:
394            return EventArray()
395        # Otherwise, grab the info
396        info = pd.DataFrame(
397            {
398                "slide_id": [event.scan.slide_id for event in events],
399                "tile": [event.tile.n for event in events],
400                "roi": [event.tile.n_roi for event in events],
401                "x": [event.x for event in events],
402                "y": [event.y for event in events],
403                "size": [event.size for event in events],
404            }
405        )
406        metadata_list = [event.metadata for event in events]
407        # Iterate through and ensure that all metadata is the same shape
408        for metadata in metadata_list:
409            if type(metadata) != type(metadata_list[0]):
410                raise ValueError("All metadata must be the same type.")
411            if metadata is not None and metadata.shape != metadata_list[0].shape:
412                raise ValueError("All metadata must be the same shape.")
413        if metadata_list[0] is None:
414            metadata = None
415        else:
416            metadata = pd.DataFrame(metadata_list)
417        features_list = [event.features for event in events]
418        # Iterate through and ensure that all features are the same shape
419        for features in features_list:
420            if type(features) != type(features_list[0]):
421                raise ValueError("All features must be the same type.")
422            if features is not None and features.shape != features_list[0].shape:
423                raise ValueError("All features must be the same shape.")
424        if features_list[0] is None:
425            features = None
426        else:
427            features = pd.DataFrame(features_list)
428        return EventArray(info=info, metadata=metadata, features=features)

Set the events in the EventArray to a new list of events.

Parameters
  • events: the new list of events.
def to_events( self, scans: list[csi_images.csi_scans.Scan], ignore_missing_scans=True, ignore_metadata=False, ignore_features=False) -> list[Event]:
430    def to_events(
431        self,
432        scans: list[Scan],
433        ignore_missing_scans=True,
434        ignore_metadata=False,
435        ignore_features=False,
436    ) -> list[Event]:
437        """
438        Get the events in the EventArray as a list of events.
439        :param scans: the scans that the events belong to. Pass an empty list if you
440                      don't care about scan metadata.
441        :param ignore_missing_scans: whether to create blank scans for events without scans.
442        :param ignore_metadata: whether to ignore metadata or not
443        :param ignore_features: whether to ignore features or not
444        :return:
445        """
446        events = []
447        for i in range(len(self.info)):
448            # Determine the associated scan
449            scan = None
450            for s in scans:
451                if s.slide_id == self.info["slide_id"][i]:
452                    scan = s
453                    break
454            if scan is None:
455                if ignore_missing_scans:
456                    # Create a placeholder scan if the scan is missing
457                    scan = Scan.make_placeholder(
458                        self.info["slide_id"][i],
459                        self.info["tile"][i],
460                        self.info["roi"][i],
461                    )
462                else:
463                    raise ValueError(
464                        f"Scan {self.info['slide_id'][i]} not found for event {i}."
465                    )
466            # Add to the list
467            events.append(
468                Event(
469                    scan,
470                    Tile(scan, self.info["tile"][i], self.info["roi"][i]),
471                    self.info["x"][i],
472                    self.info["y"][i],
473                    size=self.info["size"][i],
474                    metadata=None if ignore_metadata else self.metadata.loc[i],
475                    features=None if ignore_features else self.features.loc[i],
476                )
477            )
478        return events

Get the events in the EventArray as a list of events.

Parameters
  • scans: the scans that the events belong to. Pass an empty list if you don't care about scan metadata.
  • ignore_missing_scans: whether to create blank scans for events without scans.
  • ignore_metadata: whether to ignore metadata or not
  • ignore_features: whether to ignore features or not
Returns
def to_dataframe(self) -> pandas.core.frame.DataFrame:
480    def to_dataframe(self) -> pd.DataFrame:
481        """
482        Convert all the data in the EventArray to a single DataFrame.
483        :return: a DataFrame with all the data in the EventArray.
484        """
485        # Make a copy of the info DataFrame and prepend "info_" to the column names
486        output = self.info.copy()
487        output.columns = [f"info_{col}" for col in output.columns]
488        # Combine with the metadata and prepend "metadata_" to the column names
489        if self.metadata is not None:
490            metadata = self.metadata.copy()
491            metadata.columns = [f"metadata_{col}" for col in metadata.columns]
492            output = pd.concat([output, metadata], axis=1)
493        # Combine with the features and prepend "features_" to the column names
494        if self.features is not None:
495            features = self.features.copy()
496            features.columns = [f"features_{col}" for col in features.columns]
497            output = pd.concat([output, features], axis=1)
498        return output

Convert all the data in the EventArray to a single DataFrame.

Returns

a DataFrame with all the data in the EventArray.

@classmethod
def from_dataframe(cls, df) -> Self:
500    @classmethod
501    def from_dataframe(cls, df) -> typing.Self:
502        """
503        From a single, special DataFrame, create an EventArray.
504        :return: a DataFrame with all the data in the EventArray.
505        """
506        # Split the columns into info, metadata, and features and strip prefix
507        info = df[[col for col in df.columns if col.startswith("info_")]].copy()
508        info.columns = [col.replace("info_", "") for col in info.columns]
509        if info.size == 0:
510            info = None
511        metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy()
512        metadata.columns = [col.replace("metadata_", "") for col in metadata.columns]
513        if metadata.size == 0:
514            metadata = None
515        features = df[[col for col in df.columns if col.startswith("features_")]].copy()
516        features.columns = [col.replace("features_", "") for col in features.columns]
517        if features.size == 0:
518            features = None
519        return cls(info=info, metadata=metadata, features=features)

From a single, special DataFrame, create an EventArray.

Returns

a DataFrame with all the data in the EventArray.

def save_csv(self, output_path: str) -> bool:
521    def save_csv(self, output_path: str) -> bool:
522        """
523        Save the events to an CSV file, including metadata and features.
524        :param output_path:
525        :return:
526        """
527        self.to_dataframe().to_csv(output_path, index=False)
528        return os.path.exists(output_path)

Save the events to an CSV file, including metadata and features.

Parameters
  • output_path:
Returns
@classmethod
def load_csv(cls, input_path: str) -> Self:
530    @classmethod
531    def load_csv(cls, input_path: str) -> typing.Self:
532        """
533        Load the events from an CSV file, including metadata and features.
534        :param input_path:
535        :return:
536        """
537        # Load the CSV file
538        df = pd.read_csv(input_path)
539        return cls.from_dataframe(df)

Load the events from an CSV file, including metadata and features.

Parameters
  • input_path:
Returns
def save_hdf5(self, output_path: str) -> bool:
541    def save_hdf5(self, output_path: str) -> bool:
542        """
543        Save the events to an HDF5 file, including metadata and features.
544        Uses the pandas-provided HDF5 functions for ease, and external compatibility,
545        though these files are slightly harder to view in HDFView or similar.
546        :param output_path:
547        :return:
548        """
549        # Open the output_path as an HDF5 file
550        with pd.HDFStore(output_path) as store:
551            # Store the dataframes in the HDF5 file
552            if self.info is not None:
553                store.put("info", self.info, index=False)
554            if self.metadata is not None:
555                store.put("metadata", self.metadata, index=False)
556            if self.features is not None:
557                store.put("features", self.features, index=False)
558        return os.path.exists(output_path)

Save the events to an HDF5 file, including metadata and features. Uses the pandas-provided HDF5 functions for ease, and external compatibility, though these files are slightly harder to view in HDFView or similar.

Parameters
  • output_path:
Returns
@classmethod
def load_hdf5(cls, input_path: str) -> Self:
560    @classmethod
561    def load_hdf5(cls, input_path: str) -> typing.Self:
562        """
563        Load the events from an HDF5 file, including metadata and features.
564        :param input_path:
565        :return:
566        """
567        # Open the input_path as an HDF5 file
568        with pd.HDFStore(input_path) as store:
569            # Load the dataframes from the HDF5 file
570            info = store.get("info") if "info" in store else None
571            metadata = store.get("metadata") if "metadata" in store else None
572            features = store.get("features") if "features" in store else None
573        return cls(info=info, metadata=metadata, features=features)

Load the events from an HDF5 file, including metadata and features.

Parameters
  • input_path:
Returns