csi_images.csi_events
Contains the Event class, which represents a single event in a scan. The Event class optionally holds metadata and features. Lists of events with similar metadata or features can be combined into DataFrames for analysis.
The Event class holds the position of the event in the frame, which can be converted to the position in the scanner or slide coordinate positions. See the csi_utils.csi_scans documentation page for more information on the coordinate systems.
1""" 2Contains the Event class, which represents a single event in a scan. 3The Event class optionally holds metadata and features. Lists of events with 4similar metadata or features can be combined into DataFrames for analysis. 5 6The Event class holds the position of the event in the frame, which can be converted 7to the position in the scanner or slide coordinate positions. See the 8csi_utils.csi_scans documentation page for more information on the coordinate systems. 9""" 10 11import math 12import os.path 13import typing 14 15import numpy as np 16import pandas as pd 17 18from csi_images.csi_scans import Scan 19from csi_images.csi_tiles import Tile 20from csi_images.csi_frames import Frame 21 22 23class Event: 24 """ 25 A class that represents a single event in a scan, making it easy to evaluate 26 singular events. Required metadata is exposed as attributes, and optional 27 metadata and features are stored as DataFrames. 28 """ 29 30 # 2D homogenous transformation matrices 31 # Translations (final column) are in micrometers (um) 32 SCAN_TO_SLIDE_TRANSFORM = { 33 Scan.Type.AXIOSCAN7: np.array( 34 [ 35 [1, 0, 75000], 36 [0, 1, 0], 37 [0, 0, 1], 38 ] 39 ), 40 # BZScanner coordinates are a special kind of messed up: 41 # - The slide is upside-down. 42 # - The slide is oriented vertically, with the barcode at the bottom. 43 # - Tiles are numbered from the top-right 44 Scan.Type.BZSCANNER: np.array( 45 [ 46 [0, -1, 75000], 47 [-1, 0, 25000], 48 [0, 0, 1], 49 ] 50 ), 51 } 52 """ 53 Homogeneous transformation matrices for converting between scanner and slide 54 coordinates. The matrices are 3x3, with the final column representing the 55 translation in micrometers (um). For more information, see 56 [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations). 57 58 Transformations are nominal, and accuracy is not guaranteed; this is due to 59 imperfections in slides and alignment in the scanners. 60 """ 61 62 def __init__( 63 self, 64 scan: Scan, 65 tile: Tile, 66 x: int, 67 y: int, 68 size: int = 12, # End-to-end size in pixels 69 metadata: pd.Series = None, 70 features: pd.Series = None, 71 ): 72 self.scan = scan 73 self.tile = tile 74 self.x = x 75 self.y = y 76 self.size = size 77 self.metadata = metadata 78 self.features = features 79 80 def __repr__(self) -> str: 81 return f"{self.scan.slide_id}-{self.tile.n}-{self.x}-{self.y}" 82 83 def __eq__(self, other) -> bool: 84 return self.__repr__() == other.__repr__() 85 86 def __lt__(self, other): 87 return self.__repr__() < other.__repr__() 88 89 def get_scan_position(self) -> tuple[float, float]: 90 """ 91 Get the position of the event in the scanner's coordinate frame. 92 :return: the scan position of the event in micrometers (um). 93 """ 94 # Get overall pixel position 95 pixel_x = self.x + (self.scan.tile_width_px * self.tile.x) 96 pixel_y = self.y + (self.scan.tile_height_px * self.tile.y) 97 # Convert to micrometers 98 x_um = pixel_x * self.scan.pixel_size_um 99 y_um = pixel_y * self.scan.pixel_size_um 100 # Add the scan's origin in the scanner frame 101 x_um += self.scan.roi[self.tile.n_roi].origin_x_um 102 y_um += self.scan.roi[self.tile.n_roi].origin_y_um 103 return x_um, y_um 104 105 def get_slide_position(self) -> tuple[float, float]: 106 """ 107 Get the slide position of the event in micrometers (um). 108 :return: the slide position of the event. 109 """ 110 # Turn scan_position into a 3x1 vector 111 scan_position = self.get_scan_position() 112 scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]]) 113 114 # Multiply by the appropriate homogeneous matrix 115 if self.scan.scanner_id.startswith(self.scan.Type.AXIOSCAN7.value): 116 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.AXIOSCAN7] 117 elif self.scan.scanner_id.startswith(self.scan.Type.BZSCANNER.value): 118 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.BZSCANNER] 119 else: 120 raise ValueError(f"Scanner type {self.scan.scanner_id} not supported.") 121 slide_position = np.matmul(transform, scan_position) 122 return float(slide_position[0][0]), float(slide_position[1][0]) 123 124 def crop_images( 125 self, images: list[np.ndarray], crop_size: int = 50, in_pixels: bool = True 126 ) -> list[np.ndarray]: 127 """ 128 Get the event crops from the frame images. Called "get" because it does not 129 need to extract anything; it is very quick for extracting multiple events from 130 the same tile. 131 Use this if you're interested in many events. 132 :param images: the frame images. 133 :param crop_size: the square size of the image crop to get for this event. 134 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 135 :return: image_size x image_size crops of the event in the provided frames. If 136 the event is too close to the edge, the crop will be smaller and not centered. 137 """ 138 # Convert a crop size in micrometers to pixels 139 if not in_pixels: 140 crop_size = round(crop_size / self.scan.pixel_size_um) 141 # Find the crop bounds 142 bounds = [ 143 self.x - crop_size // 2, 144 self.y - crop_size // 2, 145 self.x + math.ceil(crop_size / 2), 146 self.y + math.ceil(crop_size / 2), 147 ] 148 # Determine how much the bounds violate the image size 149 displacements = [ 150 max(0, -bounds[0]), 151 max(0, -bounds[1]), 152 max(0, bounds[2] - images[0].shape[1]), 153 max(0, bounds[3] - images[0].shape[0]), 154 ] 155 # Cap off the bounds 156 bounds = [ 157 max(0, bounds[0]), 158 max(0, bounds[1]), 159 min(images[0].shape[1], bounds[2]), 160 min(images[0].shape[0], bounds[3]), 161 ] 162 163 # Crop the images 164 cropped_images = [] 165 for image in images: 166 # Create a blank image of the right size 167 cropped_image = np.zeros((crop_size, crop_size), dtype=image.dtype) 168 169 # Insert the cropped image into the blank image, leaving a black buffer 170 # around the edges if the crop would go beyond the original image bounds 171 cropped_image[ 172 displacements[1] : crop_size - displacements[3], 173 displacements[0] : crop_size - displacements[2], 174 ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]] 175 cropped_images.append(cropped_image) 176 return cropped_images 177 178 def extract_images( 179 self, crop_size: int = 50, in_pixels: bool = True 180 ) -> list[np.ndarray]: 181 """ 182 Extract the images from the scan and tile, reading from the file. Called 183 "extract" because it must read and extract the images from file, which is slow. 184 Use this if you're interested in only a few events, as it is inefficient when 185 reading multiple events from the same tile. 186 :param crop_size: the square size of the image crop to get for this event. 187 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 188 :return: a list of cropped images from the scan in the order of the channels. 189 """ 190 frames = Frame.get_frames(self.tile) 191 images = [frame.get_image() for frame in frames] 192 return self.crop_images(images, crop_size, in_pixels) 193 194 @classmethod 195 def extract_images_for_list( 196 cls, 197 events: list[typing.Self], 198 crop_size: int | list[int] = None, 199 in_pixels: bool = True, 200 ) -> list[list[np.ndarray]]: 201 """ 202 Get the images for a list of events, ensuring that there is no wasteful reading 203 of the same tile multiple times. This function is more efficient than calling 204 extract_event_images for each event. 205 TODO: test this function 206 :param events: the events to extract images for. 207 :param crop_size: the square size of the image crop to get for this event. 208 Defaults to twice the size of the event. 209 :param in_pixels: whether the crop size is in pixels or micrometers. 210 Defaults to pixels, and is ignored if crop_size is None. 211 :return: a list of lists of cropped images for each event. 212 """ 213 if len(events) == 0: 214 return [] 215 216 # Populate a crop size if none provided 217 if crop_size is None: 218 crop_size = [4 * event.size for event in events] 219 in_pixels = True 220 # Propagate a constant crop size 221 elif isinstance(crop_size, int): 222 crop_size = [crop_size] * len(events) 223 224 # Sort the events by tile; use a shallow copy to avoid modifying the original 225 order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 226 227 # Allocate the list to size 228 images = [None] * len(events) 229 last_tile = None 230 frame_images = None # Holds large numpy arrays, so expensive to compare 231 # Iterate through in sorted order 232 for i in order: 233 if last_tile != events[i].tile: 234 # Gather the frame images, preserving them for the next event 235 frames = Frame.get_frames(events[i].tile) 236 frame_images = [frame.get_image() for frame in frames] 237 238 last_tile = events[i].tile 239 # Use the frame images to crop the event images 240 # Preserve the original order using order[i] 241 images[i] = events[i].crop_images(frame_images, crop_size[i], in_pixels) 242 return images 243 244 245class EventArray: 246 """ 247 A class that holds a large number of events' data, making it easy to analyze and 248 manipulate many events at once. A more separated version of the Event class. 249 """ 250 251 INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y", "size"] 252 253 def __init__( 254 self, 255 info: pd.DataFrame = None, 256 metadata: pd.DataFrame = None, 257 features: pd.DataFrame = None, 258 ): 259 # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y", "size" 260 if info is not None and ( 261 not all(col in info.columns for col in self.INFO_COLUMNS) 262 or len(info.columns) != 6 263 ): 264 raise ValueError( 265 "EventArray.info must have columns 'slide_id', 'tile', 'roi', 'x', 'y', 'size'" 266 ) 267 # All DataFrames must all have the same number of rows 268 if metadata is not None and (info is None or len(info) != len(metadata)): 269 raise ValueError( 270 "If EventArray.metadata is not None, it should match rows with .info" 271 ) 272 if features is not None and (info is None or len(info) != len(features)): 273 raise ValueError( 274 "If EventArray.features is not None, it should match rows with .info" 275 ) 276 self.info = info 277 self.metadata = metadata 278 self.features = features 279 280 def __len__(self) -> int: 281 # Convenience method to get the number of events 282 if self.info is None: 283 return 0 284 else: 285 return len(self.info) 286 287 def __eq__(self, other): 288 is_equal = True 289 # Parse all possibilities for info 290 if isinstance(self.info, pd.DataFrame): 291 if isinstance(other.info, pd.DataFrame): 292 is_equal = self.info.equals(other.info) 293 if not is_equal: 294 return False 295 else: 296 return False 297 elif self.info is None: 298 if other.info is not None: 299 return False 300 301 # Parse all possibilities for metadata 302 if isinstance(self.metadata, pd.DataFrame): 303 if isinstance(other.metadata, pd.DataFrame): 304 is_equal = self.metadata.equals(other.metadata) 305 if not is_equal: 306 return False 307 else: 308 return False 309 elif self.metadata is None: 310 if other.metadata is not None: 311 return False 312 313 # Parse all possibilities for features 314 if isinstance(self.features, pd.DataFrame): 315 if isinstance(other.features, pd.DataFrame): 316 is_equal = self.features.equals(other.features) 317 if not is_equal: 318 return False 319 else: 320 return False 321 elif self.features is None: 322 if other.features is not None: 323 return False 324 325 return is_equal 326 327 def add_metadata(self, new_metadata: pd.DataFrame) -> None: 328 """ 329 Add metadata to the EventArray. 330 :param new_metadata: the metadata to add. 331 """ 332 if self.metadata is None: 333 if len(self) != len(new_metadata): 334 raise ValueError("New metadata does not match length of existing info") 335 self.metadata = new_metadata 336 else: 337 # Add the new metadata columns to the existing metadata 338 self.metadata = pd.concat([self.metadata, new_metadata], axis=1) 339 340 def add_features(self, new_features: pd.DataFrame) -> None: 341 """ 342 Add features to the EventArray. 343 :param new_features: the metadata to add. 344 """ 345 if self.features is None: 346 if len(self) != len(new_features): 347 raise ValueError("New metadata does not match length of existing info") 348 self.features = new_features 349 else: 350 # Add the new metadata columns to the existing metadata 351 self.features = pd.concat([self.features, new_features], axis=1) 352 353 @classmethod 354 def from_list(cls, events: list[typing.Self]) -> typing.Self: 355 """ 356 Combine EventArrays in a list into a single EventArray. 357 :param events: the new list of events. 358 """ 359 all_info = [] 360 all_metadata = [] 361 all_features = [] 362 for event_array in events: 363 # Skip empty EventArrays 364 if event_array.info is not None: 365 all_info.append(event_array.info) 366 if event_array.metadata is not None: 367 all_metadata.append(event_array.metadata) 368 if event_array.features is not None: 369 all_features.append(event_array.features) 370 if len(all_info) == 0: 371 return EventArray() 372 else: 373 all_info = pd.concat(all_info, ignore_index=True) 374 if len(all_metadata) == 0: 375 all_metadata = None 376 else: 377 all_metadata = pd.concat(all_metadata, ignore_index=True) 378 if len(all_features) == 0: 379 all_features = None 380 else: 381 all_features = pd.concat(all_features, ignore_index=True) 382 383 return EventArray(all_info, all_metadata, all_features) 384 385 @classmethod 386 def from_events(cls, events: list[Event]) -> typing.Self: 387 """ 388 Set the events in the EventArray to a new list of events. 389 :param events: the new list of events. 390 """ 391 # Return an empty array if we were passed nothing 392 if events is None or len(events) == 0: 393 return EventArray() 394 # Otherwise, grab the info 395 info = pd.DataFrame( 396 { 397 "slide_id": [event.scan.slide_id for event in events], 398 "tile": [event.tile.n for event in events], 399 "roi": [event.tile.n_roi for event in events], 400 "x": [event.x for event in events], 401 "y": [event.y for event in events], 402 "size": [event.size for event in events], 403 } 404 ) 405 metadata_list = [event.metadata for event in events] 406 # Iterate through and ensure that all metadata is the same shape 407 for metadata in metadata_list: 408 if type(metadata) != type(metadata_list[0]): 409 raise ValueError("All metadata must be the same type.") 410 if metadata is not None and metadata.shape != metadata_list[0].shape: 411 raise ValueError("All metadata must be the same shape.") 412 if metadata_list[0] is None: 413 metadata = None 414 else: 415 metadata = pd.DataFrame(metadata_list) 416 features_list = [event.features for event in events] 417 # Iterate through and ensure that all features are the same shape 418 for features in features_list: 419 if type(features) != type(features_list[0]): 420 raise ValueError("All features must be the same type.") 421 if features is not None and features.shape != features_list[0].shape: 422 raise ValueError("All features must be the same shape.") 423 if features_list[0] is None: 424 features = None 425 else: 426 features = pd.DataFrame(features_list) 427 return EventArray(info=info, metadata=metadata, features=features) 428 429 def to_events( 430 self, 431 scans: list[Scan], 432 ignore_missing_scans=True, 433 ignore_metadata=False, 434 ignore_features=False, 435 ) -> list[Event]: 436 """ 437 Get the events in the EventArray as a list of events. 438 :param scans: the scans that the events belong to. Pass an empty list if you 439 don't care about scan metadata. 440 :param ignore_missing_scans: whether to create blank scans for events without scans. 441 :param ignore_metadata: whether to ignore metadata or not 442 :param ignore_features: whether to ignore features or not 443 :return: 444 """ 445 events = [] 446 for i in range(len(self.info)): 447 # Determine the associated scan 448 scan = None 449 for s in scans: 450 if s.slide_id == self.info["slide_id"][i]: 451 scan = s 452 break 453 if scan is None: 454 if ignore_missing_scans: 455 # Create a placeholder scan if the scan is missing 456 scan = Scan.make_placeholder( 457 self.info["slide_id"][i], 458 self.info["tile"][i], 459 self.info["roi"][i], 460 ) 461 else: 462 raise ValueError( 463 f"Scan {self.info['slide_id'][i]} not found for event {i}." 464 ) 465 # Add to the list 466 events.append( 467 Event( 468 scan, 469 Tile(scan, self.info["tile"][i], self.info["roi"][i]), 470 self.info["x"][i], 471 self.info["y"][i], 472 size=self.info["size"][i], 473 metadata=None if ignore_metadata else self.metadata.loc[i], 474 features=None if ignore_features else self.features.loc[i], 475 ) 476 ) 477 return events 478 479 def to_dataframe(self) -> pd.DataFrame: 480 """ 481 Convert all the data in the EventArray to a single DataFrame. 482 :return: a DataFrame with all the data in the EventArray. 483 """ 484 # Make a copy of the info DataFrame and prepend "info_" to the column names 485 output = self.info.copy() 486 output.columns = [f"info_{col}" for col in output.columns] 487 # Combine with the metadata and prepend "metadata_" to the column names 488 if self.metadata is not None: 489 metadata = self.metadata.copy() 490 metadata.columns = [f"metadata_{col}" for col in metadata.columns] 491 output = pd.concat([output, metadata], axis=1) 492 # Combine with the features and prepend "features_" to the column names 493 if self.features is not None: 494 features = self.features.copy() 495 features.columns = [f"features_{col}" for col in features.columns] 496 output = pd.concat([output, features], axis=1) 497 return output 498 499 @classmethod 500 def from_dataframe(cls, df) -> typing.Self: 501 """ 502 From a single, special DataFrame, create an EventArray. 503 :return: a DataFrame with all the data in the EventArray. 504 """ 505 # Split the columns into info, metadata, and features and strip prefix 506 info = df[[col for col in df.columns if col.startswith("info_")]].copy() 507 info.columns = [col.replace("info_", "") for col in info.columns] 508 if info.size == 0: 509 info = None 510 metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy() 511 metadata.columns = [col.replace("metadata_", "") for col in metadata.columns] 512 if metadata.size == 0: 513 metadata = None 514 features = df[[col for col in df.columns if col.startswith("features_")]].copy() 515 features.columns = [col.replace("features_", "") for col in features.columns] 516 if features.size == 0: 517 features = None 518 return cls(info=info, metadata=metadata, features=features) 519 520 def save_csv(self, output_path: str) -> bool: 521 """ 522 Save the events to an CSV file, including metadata and features. 523 :param output_path: 524 :return: 525 """ 526 self.to_dataframe().to_csv(output_path, index=False) 527 return os.path.exists(output_path) 528 529 @classmethod 530 def load_csv(cls, input_path: str) -> typing.Self: 531 """ 532 Load the events from an CSV file, including metadata and features. 533 :param input_path: 534 :return: 535 """ 536 # Load the CSV file 537 df = pd.read_csv(input_path) 538 return cls.from_dataframe(df) 539 540 def save_hdf5(self, output_path: str) -> bool: 541 """ 542 Save the events to an HDF5 file, including metadata and features. 543 Uses the pandas-provided HDF5 functions for ease, and external compatibility, 544 though these files are slightly harder to view in HDFView or similar. 545 :param output_path: 546 :return: 547 """ 548 # Open the output_path as an HDF5 file 549 with pd.HDFStore(output_path) as store: 550 # Store the dataframes in the HDF5 file 551 if self.info is not None: 552 store.put("info", self.info, index=False) 553 if self.metadata is not None: 554 store.put("metadata", self.metadata, index=False) 555 if self.features is not None: 556 store.put("features", self.features, index=False) 557 return os.path.exists(output_path) 558 559 @classmethod 560 def load_hdf5(cls, input_path: str) -> typing.Self: 561 """ 562 Load the events from an HDF5 file, including metadata and features. 563 :param input_path: 564 :return: 565 """ 566 # Open the input_path as an HDF5 file 567 with pd.HDFStore(input_path) as store: 568 # Load the dataframes from the HDF5 file 569 info = store.get("info") if "info" in store else None 570 metadata = store.get("metadata") if "metadata" in store else None 571 features = store.get("features") if "features" in store else None 572 return cls(info=info, metadata=metadata, features=features)
24class Event: 25 """ 26 A class that represents a single event in a scan, making it easy to evaluate 27 singular events. Required metadata is exposed as attributes, and optional 28 metadata and features are stored as DataFrames. 29 """ 30 31 # 2D homogenous transformation matrices 32 # Translations (final column) are in micrometers (um) 33 SCAN_TO_SLIDE_TRANSFORM = { 34 Scan.Type.AXIOSCAN7: np.array( 35 [ 36 [1, 0, 75000], 37 [0, 1, 0], 38 [0, 0, 1], 39 ] 40 ), 41 # BZScanner coordinates are a special kind of messed up: 42 # - The slide is upside-down. 43 # - The slide is oriented vertically, with the barcode at the bottom. 44 # - Tiles are numbered from the top-right 45 Scan.Type.BZSCANNER: np.array( 46 [ 47 [0, -1, 75000], 48 [-1, 0, 25000], 49 [0, 0, 1], 50 ] 51 ), 52 } 53 """ 54 Homogeneous transformation matrices for converting between scanner and slide 55 coordinates. The matrices are 3x3, with the final column representing the 56 translation in micrometers (um). For more information, see 57 [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations). 58 59 Transformations are nominal, and accuracy is not guaranteed; this is due to 60 imperfections in slides and alignment in the scanners. 61 """ 62 63 def __init__( 64 self, 65 scan: Scan, 66 tile: Tile, 67 x: int, 68 y: int, 69 size: int = 12, # End-to-end size in pixels 70 metadata: pd.Series = None, 71 features: pd.Series = None, 72 ): 73 self.scan = scan 74 self.tile = tile 75 self.x = x 76 self.y = y 77 self.size = size 78 self.metadata = metadata 79 self.features = features 80 81 def __repr__(self) -> str: 82 return f"{self.scan.slide_id}-{self.tile.n}-{self.x}-{self.y}" 83 84 def __eq__(self, other) -> bool: 85 return self.__repr__() == other.__repr__() 86 87 def __lt__(self, other): 88 return self.__repr__() < other.__repr__() 89 90 def get_scan_position(self) -> tuple[float, float]: 91 """ 92 Get the position of the event in the scanner's coordinate frame. 93 :return: the scan position of the event in micrometers (um). 94 """ 95 # Get overall pixel position 96 pixel_x = self.x + (self.scan.tile_width_px * self.tile.x) 97 pixel_y = self.y + (self.scan.tile_height_px * self.tile.y) 98 # Convert to micrometers 99 x_um = pixel_x * self.scan.pixel_size_um 100 y_um = pixel_y * self.scan.pixel_size_um 101 # Add the scan's origin in the scanner frame 102 x_um += self.scan.roi[self.tile.n_roi].origin_x_um 103 y_um += self.scan.roi[self.tile.n_roi].origin_y_um 104 return x_um, y_um 105 106 def get_slide_position(self) -> tuple[float, float]: 107 """ 108 Get the slide position of the event in micrometers (um). 109 :return: the slide position of the event. 110 """ 111 # Turn scan_position into a 3x1 vector 112 scan_position = self.get_scan_position() 113 scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]]) 114 115 # Multiply by the appropriate homogeneous matrix 116 if self.scan.scanner_id.startswith(self.scan.Type.AXIOSCAN7.value): 117 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.AXIOSCAN7] 118 elif self.scan.scanner_id.startswith(self.scan.Type.BZSCANNER.value): 119 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.BZSCANNER] 120 else: 121 raise ValueError(f"Scanner type {self.scan.scanner_id} not supported.") 122 slide_position = np.matmul(transform, scan_position) 123 return float(slide_position[0][0]), float(slide_position[1][0]) 124 125 def crop_images( 126 self, images: list[np.ndarray], crop_size: int = 50, in_pixels: bool = True 127 ) -> list[np.ndarray]: 128 """ 129 Get the event crops from the frame images. Called "get" because it does not 130 need to extract anything; it is very quick for extracting multiple events from 131 the same tile. 132 Use this if you're interested in many events. 133 :param images: the frame images. 134 :param crop_size: the square size of the image crop to get for this event. 135 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 136 :return: image_size x image_size crops of the event in the provided frames. If 137 the event is too close to the edge, the crop will be smaller and not centered. 138 """ 139 # Convert a crop size in micrometers to pixels 140 if not in_pixels: 141 crop_size = round(crop_size / self.scan.pixel_size_um) 142 # Find the crop bounds 143 bounds = [ 144 self.x - crop_size // 2, 145 self.y - crop_size // 2, 146 self.x + math.ceil(crop_size / 2), 147 self.y + math.ceil(crop_size / 2), 148 ] 149 # Determine how much the bounds violate the image size 150 displacements = [ 151 max(0, -bounds[0]), 152 max(0, -bounds[1]), 153 max(0, bounds[2] - images[0].shape[1]), 154 max(0, bounds[3] - images[0].shape[0]), 155 ] 156 # Cap off the bounds 157 bounds = [ 158 max(0, bounds[0]), 159 max(0, bounds[1]), 160 min(images[0].shape[1], bounds[2]), 161 min(images[0].shape[0], bounds[3]), 162 ] 163 164 # Crop the images 165 cropped_images = [] 166 for image in images: 167 # Create a blank image of the right size 168 cropped_image = np.zeros((crop_size, crop_size), dtype=image.dtype) 169 170 # Insert the cropped image into the blank image, leaving a black buffer 171 # around the edges if the crop would go beyond the original image bounds 172 cropped_image[ 173 displacements[1] : crop_size - displacements[3], 174 displacements[0] : crop_size - displacements[2], 175 ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]] 176 cropped_images.append(cropped_image) 177 return cropped_images 178 179 def extract_images( 180 self, crop_size: int = 50, in_pixels: bool = True 181 ) -> list[np.ndarray]: 182 """ 183 Extract the images from the scan and tile, reading from the file. Called 184 "extract" because it must read and extract the images from file, which is slow. 185 Use this if you're interested in only a few events, as it is inefficient when 186 reading multiple events from the same tile. 187 :param crop_size: the square size of the image crop to get for this event. 188 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 189 :return: a list of cropped images from the scan in the order of the channels. 190 """ 191 frames = Frame.get_frames(self.tile) 192 images = [frame.get_image() for frame in frames] 193 return self.crop_images(images, crop_size, in_pixels) 194 195 @classmethod 196 def extract_images_for_list( 197 cls, 198 events: list[typing.Self], 199 crop_size: int | list[int] = None, 200 in_pixels: bool = True, 201 ) -> list[list[np.ndarray]]: 202 """ 203 Get the images for a list of events, ensuring that there is no wasteful reading 204 of the same tile multiple times. This function is more efficient than calling 205 extract_event_images for each event. 206 TODO: test this function 207 :param events: the events to extract images for. 208 :param crop_size: the square size of the image crop to get for this event. 209 Defaults to twice the size of the event. 210 :param in_pixels: whether the crop size is in pixels or micrometers. 211 Defaults to pixels, and is ignored if crop_size is None. 212 :return: a list of lists of cropped images for each event. 213 """ 214 if len(events) == 0: 215 return [] 216 217 # Populate a crop size if none provided 218 if crop_size is None: 219 crop_size = [4 * event.size for event in events] 220 in_pixels = True 221 # Propagate a constant crop size 222 elif isinstance(crop_size, int): 223 crop_size = [crop_size] * len(events) 224 225 # Sort the events by tile; use a shallow copy to avoid modifying the original 226 order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 227 228 # Allocate the list to size 229 images = [None] * len(events) 230 last_tile = None 231 frame_images = None # Holds large numpy arrays, so expensive to compare 232 # Iterate through in sorted order 233 for i in order: 234 if last_tile != events[i].tile: 235 # Gather the frame images, preserving them for the next event 236 frames = Frame.get_frames(events[i].tile) 237 frame_images = [frame.get_image() for frame in frames] 238 239 last_tile = events[i].tile 240 # Use the frame images to crop the event images 241 # Preserve the original order using order[i] 242 images[i] = events[i].crop_images(frame_images, crop_size[i], in_pixels) 243 return images
A class that represents a single event in a scan, making it easy to evaluate singular events. Required metadata is exposed as attributes, and optional metadata and features are stored as DataFrames.
63 def __init__( 64 self, 65 scan: Scan, 66 tile: Tile, 67 x: int, 68 y: int, 69 size: int = 12, # End-to-end size in pixels 70 metadata: pd.Series = None, 71 features: pd.Series = None, 72 ): 73 self.scan = scan 74 self.tile = tile 75 self.x = x 76 self.y = y 77 self.size = size 78 self.metadata = metadata 79 self.features = features
Homogeneous transformation matrices for converting between scanner and slide coordinates. The matrices are 3x3, with the final column representing the translation in micrometers (um). For more information, see affine transformations.
Transformations are nominal, and accuracy is not guaranteed; this is due to imperfections in slides and alignment in the scanners.
90 def get_scan_position(self) -> tuple[float, float]: 91 """ 92 Get the position of the event in the scanner's coordinate frame. 93 :return: the scan position of the event in micrometers (um). 94 """ 95 # Get overall pixel position 96 pixel_x = self.x + (self.scan.tile_width_px * self.tile.x) 97 pixel_y = self.y + (self.scan.tile_height_px * self.tile.y) 98 # Convert to micrometers 99 x_um = pixel_x * self.scan.pixel_size_um 100 y_um = pixel_y * self.scan.pixel_size_um 101 # Add the scan's origin in the scanner frame 102 x_um += self.scan.roi[self.tile.n_roi].origin_x_um 103 y_um += self.scan.roi[self.tile.n_roi].origin_y_um 104 return x_um, y_um
Get the position of the event in the scanner's coordinate frame.
Returns
the scan position of the event in micrometers (um).
106 def get_slide_position(self) -> tuple[float, float]: 107 """ 108 Get the slide position of the event in micrometers (um). 109 :return: the slide position of the event. 110 """ 111 # Turn scan_position into a 3x1 vector 112 scan_position = self.get_scan_position() 113 scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]]) 114 115 # Multiply by the appropriate homogeneous matrix 116 if self.scan.scanner_id.startswith(self.scan.Type.AXIOSCAN7.value): 117 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.AXIOSCAN7] 118 elif self.scan.scanner_id.startswith(self.scan.Type.BZSCANNER.value): 119 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.scan.Type.BZSCANNER] 120 else: 121 raise ValueError(f"Scanner type {self.scan.scanner_id} not supported.") 122 slide_position = np.matmul(transform, scan_position) 123 return float(slide_position[0][0]), float(slide_position[1][0])
Get the slide position of the event in micrometers (um).
Returns
the slide position of the event.
125 def crop_images( 126 self, images: list[np.ndarray], crop_size: int = 50, in_pixels: bool = True 127 ) -> list[np.ndarray]: 128 """ 129 Get the event crops from the frame images. Called "get" because it does not 130 need to extract anything; it is very quick for extracting multiple events from 131 the same tile. 132 Use this if you're interested in many events. 133 :param images: the frame images. 134 :param crop_size: the square size of the image crop to get for this event. 135 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 136 :return: image_size x image_size crops of the event in the provided frames. If 137 the event is too close to the edge, the crop will be smaller and not centered. 138 """ 139 # Convert a crop size in micrometers to pixels 140 if not in_pixels: 141 crop_size = round(crop_size / self.scan.pixel_size_um) 142 # Find the crop bounds 143 bounds = [ 144 self.x - crop_size // 2, 145 self.y - crop_size // 2, 146 self.x + math.ceil(crop_size / 2), 147 self.y + math.ceil(crop_size / 2), 148 ] 149 # Determine how much the bounds violate the image size 150 displacements = [ 151 max(0, -bounds[0]), 152 max(0, -bounds[1]), 153 max(0, bounds[2] - images[0].shape[1]), 154 max(0, bounds[3] - images[0].shape[0]), 155 ] 156 # Cap off the bounds 157 bounds = [ 158 max(0, bounds[0]), 159 max(0, bounds[1]), 160 min(images[0].shape[1], bounds[2]), 161 min(images[0].shape[0], bounds[3]), 162 ] 163 164 # Crop the images 165 cropped_images = [] 166 for image in images: 167 # Create a blank image of the right size 168 cropped_image = np.zeros((crop_size, crop_size), dtype=image.dtype) 169 170 # Insert the cropped image into the blank image, leaving a black buffer 171 # around the edges if the crop would go beyond the original image bounds 172 cropped_image[ 173 displacements[1] : crop_size - displacements[3], 174 displacements[0] : crop_size - displacements[2], 175 ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]] 176 cropped_images.append(cropped_image) 177 return cropped_images
Get the event crops from the frame images. Called "get" because it does not need to extract anything; it is very quick for extracting multiple events from the same tile. Use this if you're interested in many events.
Parameters
- images: the frame images.
- crop_size: the square size of the image crop to get for this event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
Returns
image_size x image_size crops of the event in the provided frames. If the event is too close to the edge, the crop will be smaller and not centered.
179 def extract_images( 180 self, crop_size: int = 50, in_pixels: bool = True 181 ) -> list[np.ndarray]: 182 """ 183 Extract the images from the scan and tile, reading from the file. Called 184 "extract" because it must read and extract the images from file, which is slow. 185 Use this if you're interested in only a few events, as it is inefficient when 186 reading multiple events from the same tile. 187 :param crop_size: the square size of the image crop to get for this event. 188 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 189 :return: a list of cropped images from the scan in the order of the channels. 190 """ 191 frames = Frame.get_frames(self.tile) 192 images = [frame.get_image() for frame in frames] 193 return self.crop_images(images, crop_size, in_pixels)
Extract the images from the scan and tile, reading from the file. Called "extract" because it must read and extract the images from file, which is slow. Use this if you're interested in only a few events, as it is inefficient when reading multiple events from the same tile.
Parameters
- crop_size: the square size of the image crop to get for this event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
Returns
a list of cropped images from the scan in the order of the channels.
195 @classmethod 196 def extract_images_for_list( 197 cls, 198 events: list[typing.Self], 199 crop_size: int | list[int] = None, 200 in_pixels: bool = True, 201 ) -> list[list[np.ndarray]]: 202 """ 203 Get the images for a list of events, ensuring that there is no wasteful reading 204 of the same tile multiple times. This function is more efficient than calling 205 extract_event_images for each event. 206 TODO: test this function 207 :param events: the events to extract images for. 208 :param crop_size: the square size of the image crop to get for this event. 209 Defaults to twice the size of the event. 210 :param in_pixels: whether the crop size is in pixels or micrometers. 211 Defaults to pixels, and is ignored if crop_size is None. 212 :return: a list of lists of cropped images for each event. 213 """ 214 if len(events) == 0: 215 return [] 216 217 # Populate a crop size if none provided 218 if crop_size is None: 219 crop_size = [4 * event.size for event in events] 220 in_pixels = True 221 # Propagate a constant crop size 222 elif isinstance(crop_size, int): 223 crop_size = [crop_size] * len(events) 224 225 # Sort the events by tile; use a shallow copy to avoid modifying the original 226 order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 227 228 # Allocate the list to size 229 images = [None] * len(events) 230 last_tile = None 231 frame_images = None # Holds large numpy arrays, so expensive to compare 232 # Iterate through in sorted order 233 for i in order: 234 if last_tile != events[i].tile: 235 # Gather the frame images, preserving them for the next event 236 frames = Frame.get_frames(events[i].tile) 237 frame_images = [frame.get_image() for frame in frames] 238 239 last_tile = events[i].tile 240 # Use the frame images to crop the event images 241 # Preserve the original order using order[i] 242 images[i] = events[i].crop_images(frame_images, crop_size[i], in_pixels) 243 return images
Get the images for a list of events, ensuring that there is no wasteful reading of the same tile multiple times. This function is more efficient than calling extract_event_images for each event. TODO: test this function
Parameters
- events: the events to extract images for.
- crop_size: the square size of the image crop to get for this event. Defaults to twice the size of the event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels, and is ignored if crop_size is None.
Returns
a list of lists of cropped images for each event.
246class EventArray: 247 """ 248 A class that holds a large number of events' data, making it easy to analyze and 249 manipulate many events at once. A more separated version of the Event class. 250 """ 251 252 INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y", "size"] 253 254 def __init__( 255 self, 256 info: pd.DataFrame = None, 257 metadata: pd.DataFrame = None, 258 features: pd.DataFrame = None, 259 ): 260 # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y", "size" 261 if info is not None and ( 262 not all(col in info.columns for col in self.INFO_COLUMNS) 263 or len(info.columns) != 6 264 ): 265 raise ValueError( 266 "EventArray.info must have columns 'slide_id', 'tile', 'roi', 'x', 'y', 'size'" 267 ) 268 # All DataFrames must all have the same number of rows 269 if metadata is not None and (info is None or len(info) != len(metadata)): 270 raise ValueError( 271 "If EventArray.metadata is not None, it should match rows with .info" 272 ) 273 if features is not None and (info is None or len(info) != len(features)): 274 raise ValueError( 275 "If EventArray.features is not None, it should match rows with .info" 276 ) 277 self.info = info 278 self.metadata = metadata 279 self.features = features 280 281 def __len__(self) -> int: 282 # Convenience method to get the number of events 283 if self.info is None: 284 return 0 285 else: 286 return len(self.info) 287 288 def __eq__(self, other): 289 is_equal = True 290 # Parse all possibilities for info 291 if isinstance(self.info, pd.DataFrame): 292 if isinstance(other.info, pd.DataFrame): 293 is_equal = self.info.equals(other.info) 294 if not is_equal: 295 return False 296 else: 297 return False 298 elif self.info is None: 299 if other.info is not None: 300 return False 301 302 # Parse all possibilities for metadata 303 if isinstance(self.metadata, pd.DataFrame): 304 if isinstance(other.metadata, pd.DataFrame): 305 is_equal = self.metadata.equals(other.metadata) 306 if not is_equal: 307 return False 308 else: 309 return False 310 elif self.metadata is None: 311 if other.metadata is not None: 312 return False 313 314 # Parse all possibilities for features 315 if isinstance(self.features, pd.DataFrame): 316 if isinstance(other.features, pd.DataFrame): 317 is_equal = self.features.equals(other.features) 318 if not is_equal: 319 return False 320 else: 321 return False 322 elif self.features is None: 323 if other.features is not None: 324 return False 325 326 return is_equal 327 328 def add_metadata(self, new_metadata: pd.DataFrame) -> None: 329 """ 330 Add metadata to the EventArray. 331 :param new_metadata: the metadata to add. 332 """ 333 if self.metadata is None: 334 if len(self) != len(new_metadata): 335 raise ValueError("New metadata does not match length of existing info") 336 self.metadata = new_metadata 337 else: 338 # Add the new metadata columns to the existing metadata 339 self.metadata = pd.concat([self.metadata, new_metadata], axis=1) 340 341 def add_features(self, new_features: pd.DataFrame) -> None: 342 """ 343 Add features to the EventArray. 344 :param new_features: the metadata to add. 345 """ 346 if self.features is None: 347 if len(self) != len(new_features): 348 raise ValueError("New metadata does not match length of existing info") 349 self.features = new_features 350 else: 351 # Add the new metadata columns to the existing metadata 352 self.features = pd.concat([self.features, new_features], axis=1) 353 354 @classmethod 355 def from_list(cls, events: list[typing.Self]) -> typing.Self: 356 """ 357 Combine EventArrays in a list into a single EventArray. 358 :param events: the new list of events. 359 """ 360 all_info = [] 361 all_metadata = [] 362 all_features = [] 363 for event_array in events: 364 # Skip empty EventArrays 365 if event_array.info is not None: 366 all_info.append(event_array.info) 367 if event_array.metadata is not None: 368 all_metadata.append(event_array.metadata) 369 if event_array.features is not None: 370 all_features.append(event_array.features) 371 if len(all_info) == 0: 372 return EventArray() 373 else: 374 all_info = pd.concat(all_info, ignore_index=True) 375 if len(all_metadata) == 0: 376 all_metadata = None 377 else: 378 all_metadata = pd.concat(all_metadata, ignore_index=True) 379 if len(all_features) == 0: 380 all_features = None 381 else: 382 all_features = pd.concat(all_features, ignore_index=True) 383 384 return EventArray(all_info, all_metadata, all_features) 385 386 @classmethod 387 def from_events(cls, events: list[Event]) -> typing.Self: 388 """ 389 Set the events in the EventArray to a new list of events. 390 :param events: the new list of events. 391 """ 392 # Return an empty array if we were passed nothing 393 if events is None or len(events) == 0: 394 return EventArray() 395 # Otherwise, grab the info 396 info = pd.DataFrame( 397 { 398 "slide_id": [event.scan.slide_id for event in events], 399 "tile": [event.tile.n for event in events], 400 "roi": [event.tile.n_roi for event in events], 401 "x": [event.x for event in events], 402 "y": [event.y for event in events], 403 "size": [event.size for event in events], 404 } 405 ) 406 metadata_list = [event.metadata for event in events] 407 # Iterate through and ensure that all metadata is the same shape 408 for metadata in metadata_list: 409 if type(metadata) != type(metadata_list[0]): 410 raise ValueError("All metadata must be the same type.") 411 if metadata is not None and metadata.shape != metadata_list[0].shape: 412 raise ValueError("All metadata must be the same shape.") 413 if metadata_list[0] is None: 414 metadata = None 415 else: 416 metadata = pd.DataFrame(metadata_list) 417 features_list = [event.features for event in events] 418 # Iterate through and ensure that all features are the same shape 419 for features in features_list: 420 if type(features) != type(features_list[0]): 421 raise ValueError("All features must be the same type.") 422 if features is not None and features.shape != features_list[0].shape: 423 raise ValueError("All features must be the same shape.") 424 if features_list[0] is None: 425 features = None 426 else: 427 features = pd.DataFrame(features_list) 428 return EventArray(info=info, metadata=metadata, features=features) 429 430 def to_events( 431 self, 432 scans: list[Scan], 433 ignore_missing_scans=True, 434 ignore_metadata=False, 435 ignore_features=False, 436 ) -> list[Event]: 437 """ 438 Get the events in the EventArray as a list of events. 439 :param scans: the scans that the events belong to. Pass an empty list if you 440 don't care about scan metadata. 441 :param ignore_missing_scans: whether to create blank scans for events without scans. 442 :param ignore_metadata: whether to ignore metadata or not 443 :param ignore_features: whether to ignore features or not 444 :return: 445 """ 446 events = [] 447 for i in range(len(self.info)): 448 # Determine the associated scan 449 scan = None 450 for s in scans: 451 if s.slide_id == self.info["slide_id"][i]: 452 scan = s 453 break 454 if scan is None: 455 if ignore_missing_scans: 456 # Create a placeholder scan if the scan is missing 457 scan = Scan.make_placeholder( 458 self.info["slide_id"][i], 459 self.info["tile"][i], 460 self.info["roi"][i], 461 ) 462 else: 463 raise ValueError( 464 f"Scan {self.info['slide_id'][i]} not found for event {i}." 465 ) 466 # Add to the list 467 events.append( 468 Event( 469 scan, 470 Tile(scan, self.info["tile"][i], self.info["roi"][i]), 471 self.info["x"][i], 472 self.info["y"][i], 473 size=self.info["size"][i], 474 metadata=None if ignore_metadata else self.metadata.loc[i], 475 features=None if ignore_features else self.features.loc[i], 476 ) 477 ) 478 return events 479 480 def to_dataframe(self) -> pd.DataFrame: 481 """ 482 Convert all the data in the EventArray to a single DataFrame. 483 :return: a DataFrame with all the data in the EventArray. 484 """ 485 # Make a copy of the info DataFrame and prepend "info_" to the column names 486 output = self.info.copy() 487 output.columns = [f"info_{col}" for col in output.columns] 488 # Combine with the metadata and prepend "metadata_" to the column names 489 if self.metadata is not None: 490 metadata = self.metadata.copy() 491 metadata.columns = [f"metadata_{col}" for col in metadata.columns] 492 output = pd.concat([output, metadata], axis=1) 493 # Combine with the features and prepend "features_" to the column names 494 if self.features is not None: 495 features = self.features.copy() 496 features.columns = [f"features_{col}" for col in features.columns] 497 output = pd.concat([output, features], axis=1) 498 return output 499 500 @classmethod 501 def from_dataframe(cls, df) -> typing.Self: 502 """ 503 From a single, special DataFrame, create an EventArray. 504 :return: a DataFrame with all the data in the EventArray. 505 """ 506 # Split the columns into info, metadata, and features and strip prefix 507 info = df[[col for col in df.columns if col.startswith("info_")]].copy() 508 info.columns = [col.replace("info_", "") for col in info.columns] 509 if info.size == 0: 510 info = None 511 metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy() 512 metadata.columns = [col.replace("metadata_", "") for col in metadata.columns] 513 if metadata.size == 0: 514 metadata = None 515 features = df[[col for col in df.columns if col.startswith("features_")]].copy() 516 features.columns = [col.replace("features_", "") for col in features.columns] 517 if features.size == 0: 518 features = None 519 return cls(info=info, metadata=metadata, features=features) 520 521 def save_csv(self, output_path: str) -> bool: 522 """ 523 Save the events to an CSV file, including metadata and features. 524 :param output_path: 525 :return: 526 """ 527 self.to_dataframe().to_csv(output_path, index=False) 528 return os.path.exists(output_path) 529 530 @classmethod 531 def load_csv(cls, input_path: str) -> typing.Self: 532 """ 533 Load the events from an CSV file, including metadata and features. 534 :param input_path: 535 :return: 536 """ 537 # Load the CSV file 538 df = pd.read_csv(input_path) 539 return cls.from_dataframe(df) 540 541 def save_hdf5(self, output_path: str) -> bool: 542 """ 543 Save the events to an HDF5 file, including metadata and features. 544 Uses the pandas-provided HDF5 functions for ease, and external compatibility, 545 though these files are slightly harder to view in HDFView or similar. 546 :param output_path: 547 :return: 548 """ 549 # Open the output_path as an HDF5 file 550 with pd.HDFStore(output_path) as store: 551 # Store the dataframes in the HDF5 file 552 if self.info is not None: 553 store.put("info", self.info, index=False) 554 if self.metadata is not None: 555 store.put("metadata", self.metadata, index=False) 556 if self.features is not None: 557 store.put("features", self.features, index=False) 558 return os.path.exists(output_path) 559 560 @classmethod 561 def load_hdf5(cls, input_path: str) -> typing.Self: 562 """ 563 Load the events from an HDF5 file, including metadata and features. 564 :param input_path: 565 :return: 566 """ 567 # Open the input_path as an HDF5 file 568 with pd.HDFStore(input_path) as store: 569 # Load the dataframes from the HDF5 file 570 info = store.get("info") if "info" in store else None 571 metadata = store.get("metadata") if "metadata" in store else None 572 features = store.get("features") if "features" in store else None 573 return cls(info=info, metadata=metadata, features=features)
A class that holds a large number of events' data, making it easy to analyze and manipulate many events at once. A more separated version of the Event class.
254 def __init__( 255 self, 256 info: pd.DataFrame = None, 257 metadata: pd.DataFrame = None, 258 features: pd.DataFrame = None, 259 ): 260 # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y", "size" 261 if info is not None and ( 262 not all(col in info.columns for col in self.INFO_COLUMNS) 263 or len(info.columns) != 6 264 ): 265 raise ValueError( 266 "EventArray.info must have columns 'slide_id', 'tile', 'roi', 'x', 'y', 'size'" 267 ) 268 # All DataFrames must all have the same number of rows 269 if metadata is not None and (info is None or len(info) != len(metadata)): 270 raise ValueError( 271 "If EventArray.metadata is not None, it should match rows with .info" 272 ) 273 if features is not None and (info is None or len(info) != len(features)): 274 raise ValueError( 275 "If EventArray.features is not None, it should match rows with .info" 276 ) 277 self.info = info 278 self.metadata = metadata 279 self.features = features
328 def add_metadata(self, new_metadata: pd.DataFrame) -> None: 329 """ 330 Add metadata to the EventArray. 331 :param new_metadata: the metadata to add. 332 """ 333 if self.metadata is None: 334 if len(self) != len(new_metadata): 335 raise ValueError("New metadata does not match length of existing info") 336 self.metadata = new_metadata 337 else: 338 # Add the new metadata columns to the existing metadata 339 self.metadata = pd.concat([self.metadata, new_metadata], axis=1)
Add metadata to the EventArray.
Parameters
- new_metadata: the metadata to add.
341 def add_features(self, new_features: pd.DataFrame) -> None: 342 """ 343 Add features to the EventArray. 344 :param new_features: the metadata to add. 345 """ 346 if self.features is None: 347 if len(self) != len(new_features): 348 raise ValueError("New metadata does not match length of existing info") 349 self.features = new_features 350 else: 351 # Add the new metadata columns to the existing metadata 352 self.features = pd.concat([self.features, new_features], axis=1)
Add features to the EventArray.
Parameters
- new_features: the metadata to add.
354 @classmethod 355 def from_list(cls, events: list[typing.Self]) -> typing.Self: 356 """ 357 Combine EventArrays in a list into a single EventArray. 358 :param events: the new list of events. 359 """ 360 all_info = [] 361 all_metadata = [] 362 all_features = [] 363 for event_array in events: 364 # Skip empty EventArrays 365 if event_array.info is not None: 366 all_info.append(event_array.info) 367 if event_array.metadata is not None: 368 all_metadata.append(event_array.metadata) 369 if event_array.features is not None: 370 all_features.append(event_array.features) 371 if len(all_info) == 0: 372 return EventArray() 373 else: 374 all_info = pd.concat(all_info, ignore_index=True) 375 if len(all_metadata) == 0: 376 all_metadata = None 377 else: 378 all_metadata = pd.concat(all_metadata, ignore_index=True) 379 if len(all_features) == 0: 380 all_features = None 381 else: 382 all_features = pd.concat(all_features, ignore_index=True) 383 384 return EventArray(all_info, all_metadata, all_features)
Combine EventArrays in a list into a single EventArray.
Parameters
- events: the new list of events.
386 @classmethod 387 def from_events(cls, events: list[Event]) -> typing.Self: 388 """ 389 Set the events in the EventArray to a new list of events. 390 :param events: the new list of events. 391 """ 392 # Return an empty array if we were passed nothing 393 if events is None or len(events) == 0: 394 return EventArray() 395 # Otherwise, grab the info 396 info = pd.DataFrame( 397 { 398 "slide_id": [event.scan.slide_id for event in events], 399 "tile": [event.tile.n for event in events], 400 "roi": [event.tile.n_roi for event in events], 401 "x": [event.x for event in events], 402 "y": [event.y for event in events], 403 "size": [event.size for event in events], 404 } 405 ) 406 metadata_list = [event.metadata for event in events] 407 # Iterate through and ensure that all metadata is the same shape 408 for metadata in metadata_list: 409 if type(metadata) != type(metadata_list[0]): 410 raise ValueError("All metadata must be the same type.") 411 if metadata is not None and metadata.shape != metadata_list[0].shape: 412 raise ValueError("All metadata must be the same shape.") 413 if metadata_list[0] is None: 414 metadata = None 415 else: 416 metadata = pd.DataFrame(metadata_list) 417 features_list = [event.features for event in events] 418 # Iterate through and ensure that all features are the same shape 419 for features in features_list: 420 if type(features) != type(features_list[0]): 421 raise ValueError("All features must be the same type.") 422 if features is not None and features.shape != features_list[0].shape: 423 raise ValueError("All features must be the same shape.") 424 if features_list[0] is None: 425 features = None 426 else: 427 features = pd.DataFrame(features_list) 428 return EventArray(info=info, metadata=metadata, features=features)
Set the events in the EventArray to a new list of events.
Parameters
- events: the new list of events.
430 def to_events( 431 self, 432 scans: list[Scan], 433 ignore_missing_scans=True, 434 ignore_metadata=False, 435 ignore_features=False, 436 ) -> list[Event]: 437 """ 438 Get the events in the EventArray as a list of events. 439 :param scans: the scans that the events belong to. Pass an empty list if you 440 don't care about scan metadata. 441 :param ignore_missing_scans: whether to create blank scans for events without scans. 442 :param ignore_metadata: whether to ignore metadata or not 443 :param ignore_features: whether to ignore features or not 444 :return: 445 """ 446 events = [] 447 for i in range(len(self.info)): 448 # Determine the associated scan 449 scan = None 450 for s in scans: 451 if s.slide_id == self.info["slide_id"][i]: 452 scan = s 453 break 454 if scan is None: 455 if ignore_missing_scans: 456 # Create a placeholder scan if the scan is missing 457 scan = Scan.make_placeholder( 458 self.info["slide_id"][i], 459 self.info["tile"][i], 460 self.info["roi"][i], 461 ) 462 else: 463 raise ValueError( 464 f"Scan {self.info['slide_id'][i]} not found for event {i}." 465 ) 466 # Add to the list 467 events.append( 468 Event( 469 scan, 470 Tile(scan, self.info["tile"][i], self.info["roi"][i]), 471 self.info["x"][i], 472 self.info["y"][i], 473 size=self.info["size"][i], 474 metadata=None if ignore_metadata else self.metadata.loc[i], 475 features=None if ignore_features else self.features.loc[i], 476 ) 477 ) 478 return events
Get the events in the EventArray as a list of events.
Parameters
- scans: the scans that the events belong to. Pass an empty list if you don't care about scan metadata.
- ignore_missing_scans: whether to create blank scans for events without scans.
- ignore_metadata: whether to ignore metadata or not
- ignore_features: whether to ignore features or not
Returns
480 def to_dataframe(self) -> pd.DataFrame: 481 """ 482 Convert all the data in the EventArray to a single DataFrame. 483 :return: a DataFrame with all the data in the EventArray. 484 """ 485 # Make a copy of the info DataFrame and prepend "info_" to the column names 486 output = self.info.copy() 487 output.columns = [f"info_{col}" for col in output.columns] 488 # Combine with the metadata and prepend "metadata_" to the column names 489 if self.metadata is not None: 490 metadata = self.metadata.copy() 491 metadata.columns = [f"metadata_{col}" for col in metadata.columns] 492 output = pd.concat([output, metadata], axis=1) 493 # Combine with the features and prepend "features_" to the column names 494 if self.features is not None: 495 features = self.features.copy() 496 features.columns = [f"features_{col}" for col in features.columns] 497 output = pd.concat([output, features], axis=1) 498 return output
Convert all the data in the EventArray to a single DataFrame.
Returns
a DataFrame with all the data in the EventArray.
500 @classmethod 501 def from_dataframe(cls, df) -> typing.Self: 502 """ 503 From a single, special DataFrame, create an EventArray. 504 :return: a DataFrame with all the data in the EventArray. 505 """ 506 # Split the columns into info, metadata, and features and strip prefix 507 info = df[[col for col in df.columns if col.startswith("info_")]].copy() 508 info.columns = [col.replace("info_", "") for col in info.columns] 509 if info.size == 0: 510 info = None 511 metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy() 512 metadata.columns = [col.replace("metadata_", "") for col in metadata.columns] 513 if metadata.size == 0: 514 metadata = None 515 features = df[[col for col in df.columns if col.startswith("features_")]].copy() 516 features.columns = [col.replace("features_", "") for col in features.columns] 517 if features.size == 0: 518 features = None 519 return cls(info=info, metadata=metadata, features=features)
From a single, special DataFrame, create an EventArray.
Returns
a DataFrame with all the data in the EventArray.
521 def save_csv(self, output_path: str) -> bool: 522 """ 523 Save the events to an CSV file, including metadata and features. 524 :param output_path: 525 :return: 526 """ 527 self.to_dataframe().to_csv(output_path, index=False) 528 return os.path.exists(output_path)
Save the events to an CSV file, including metadata and features.
Parameters
- output_path:
Returns
530 @classmethod 531 def load_csv(cls, input_path: str) -> typing.Self: 532 """ 533 Load the events from an CSV file, including metadata and features. 534 :param input_path: 535 :return: 536 """ 537 # Load the CSV file 538 df = pd.read_csv(input_path) 539 return cls.from_dataframe(df)
Load the events from an CSV file, including metadata and features.
Parameters
- input_path:
Returns
541 def save_hdf5(self, output_path: str) -> bool: 542 """ 543 Save the events to an HDF5 file, including metadata and features. 544 Uses the pandas-provided HDF5 functions for ease, and external compatibility, 545 though these files are slightly harder to view in HDFView or similar. 546 :param output_path: 547 :return: 548 """ 549 # Open the output_path as an HDF5 file 550 with pd.HDFStore(output_path) as store: 551 # Store the dataframes in the HDF5 file 552 if self.info is not None: 553 store.put("info", self.info, index=False) 554 if self.metadata is not None: 555 store.put("metadata", self.metadata, index=False) 556 if self.features is not None: 557 store.put("features", self.features, index=False) 558 return os.path.exists(output_path)
Save the events to an HDF5 file, including metadata and features. Uses the pandas-provided HDF5 functions for ease, and external compatibility, though these files are slightly harder to view in HDFView or similar.
Parameters
- output_path:
Returns
560 @classmethod 561 def load_hdf5(cls, input_path: str) -> typing.Self: 562 """ 563 Load the events from an HDF5 file, including metadata and features. 564 :param input_path: 565 :return: 566 """ 567 # Open the input_path as an HDF5 file 568 with pd.HDFStore(input_path) as store: 569 # Load the dataframes from the HDF5 file 570 info = store.get("info") if "info" in store else None 571 metadata = store.get("metadata") if "metadata" in store else None 572 features = store.get("features") if "features" in store else None 573 return cls(info=info, metadata=metadata, features=features)
Load the events from an HDF5 file, including metadata and features.
Parameters
- input_path: