taulu
Taulu - segment tables from images
Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.
Here is an example python script of how to use Taulu:
from taulu import Taulu
import os
def setup():
# create an Annotation file of the headers in the image
# (one for the left header, one for the right)
# and store them in the examples directory
print("Annotating the LEFT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
print("Annotating the RIGHT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
def main():
taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png"))
table = taulu.segment_table("../data/table_00.png", cell_height_factor=0.8, debug_view=True)
table.show_cells("../data/table_00.png")
if __name__ == "__main__":
if os.path.exists("table_00_header_left.png") and os.path.exists(
"table_00_header_right.png"
):
main()
else:
setup()
main()
If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class
1""" 2Taulu - *segment tables from images* 3 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells). 5 6To use this package, you first need to make an annotation of the headers in your table images. 7The idea is that these headers will be similar across your full set of images, and they will be 8used as a starting point for the search algorithm that finds the table grid. 9 10Here is an example python script of how to use Taulu: 11```python 12from taulu import Taulu 13import os 14 15 16def setup(): 17 # create an Annotation file of the headers in the image 18 # (one for the left header, one for the right) 19 # and store them in the examples directory 20 print("Annotating the LEFT header...") 21 Taulu.annotate("../data/table_00.png", "table_00_header_left.png") 22 23 print("Annotating the RIGHT header...") 24 Taulu.annotate("../data/table_00.png", "table_00_header_right.png") 25 26 27def main(): 28 taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png")) 29 table = taulu.segment_table("../data/table_00.png", cell_height_factor=0.8, debug_view=True) 30 31 table.show_cells("../data/table_00.png") 32 33 34if __name__ == "__main__": 35 if os.path.exists("table_00_header_left.png") and os.path.exists( 36 "table_00_header_right.png" 37 ): 38 main() 39 else: 40 setup() 41 main() 42 43``` 44 45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu) 46""" 47 48from .grid import GridDetector, TableGrid 49from .header_aligner import HeaderAligner 50from .header_template import HeaderTemplate 51from .table_indexer import TableIndexer 52from .split import Split 53from .taulu import Taulu 54 55__pdoc__ = {} 56__pdoc__["constants"] = False 57__pdoc__["main"] = False 58__pdoc__["decorators"] = False 59__pdoc__["error"] = False 60__pdoc__["types"] = False 61__pdoc__["img_util"] = False 62 63__all__ = [ 64 "GridDetector", 65 "TableGrid", 66 "HeaderAligner", 67 "HeaderTemplate", 68 "TableIndexer", 69 "Split", 70 "Taulu", 71] 72 73try: 74 from . import gpu 75 76 __all__.append("gpu") 77except ImportError: 78 pass
119class GridDetector: 120 """ 121 Detects table grid intersections using morphological filtering and template matching. 122 123 This detector implements a multi-stage pipeline: 124 125 1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting 126 2. **Morphological operations**: Dilation to connect broken rule segments 127 3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find 128 rule intersections where horizontal and vertical lines meet 129 4. **Grid growing**: Iterative point detection starting from a known seed point 130 131 The cross-kernel is designed to match the specific geometry of your table rules. 132 It should be sized so that after morphology, it aligns with actual corner shapes. 133 134 ## Tuning Guidelines 135 136 - **kernel_size**: Increase if you need more selectivity (fewer false positives) 137 - **cross_width/height**: Should match rule thickness after morphology 138 - **morph_size**: Increase to connect more broken lines, but this thickens rules 139 - **sauvola_k**: Increase to threshold more aggressively (remove noise) 140 - **search_region**: Increase for documents with more warping/distortion 141 - **distance_penalty**: Increase to prefer corners closer to expected positions 142 143 ## Visual Debugging 144 145 Set `visual=True` in methods to see intermediate results and tune parameters. 146 """ 147 148 def __init__( 149 self, 150 kernel_size: int = 21, 151 cross_width: int = 6, 152 cross_height: Optional[int] = None, 153 morph_size: Optional[int] = None, 154 sauvola_k: float = 0.04, 155 sauvola_window: int = 15, 156 scale: float = 1.0, 157 search_region: int = 40, 158 distance_penalty: float = 0.4, 159 min_rows: int = 5, 160 grow_threshold: float = 0.3, 161 look_distance: int = 4, 162 ): 163 """ 164 Args: 165 kernel_size (int): the size of the cross kernel 166 a larger kernel size often means that more penalty is applied, often leading 167 to more sparse results 168 cross_width (int): the width of one of the edges in the cross filter, should be 169 roughly equal to the width of the rules in the image after morphology is applied 170 cross_height (int | None): useful if the horizontal rules and vertical rules 171 have different sizes 172 morph_size (int | None): the size of the morphology operators that are applied before 173 the cross kernel. 'bridges the gaps' of broken-up lines 174 sauvola_k (float): threshold parameter for sauvola thresholding 175 sauvola_window (int): window_size parameter for sauvola thresholding 176 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 177 search_region (int): area in which to search for a new max value in `find_nearest` etc. 178 distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 179 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 180 grow_threshold (float): the threshold for accepting a new point when growing the table 181 look_distance (int): how many points away to look when calculating the median slope 182 """ 183 self._validate_parameters( 184 kernel_size, 185 cross_width, 186 cross_height, 187 morph_size, 188 search_region, 189 sauvola_k, 190 sauvola_window, 191 distance_penalty, 192 ) 193 194 self._kernel_size = kernel_size 195 self._cross_width = cross_width 196 self._cross_height = cross_width if cross_height is None else cross_height 197 self._morph_size = morph_size if morph_size is not None else cross_width 198 self._search_region = search_region 199 self._sauvola_k = sauvola_k 200 self._sauvola_window = sauvola_window 201 self._distance_penalty = distance_penalty 202 self._scale = scale 203 self._min_rows = min_rows 204 self._grow_threshold = grow_threshold 205 self._look_distance = look_distance 206 207 self._cross_kernel = self._create_cross_kernel() 208 209 def _validate_parameters( 210 self, 211 kernel_size: int, 212 cross_width: int, 213 cross_height: Optional[int], 214 morph_size: Optional[int], 215 search_region: int, 216 sauvola_k: float, 217 sauvola_window: int, 218 distance_penalty: float, 219 ) -> None: 220 """Validate initialization parameters.""" 221 if kernel_size % 2 == 0: 222 raise ValueError("kernel_size must be odd") 223 if ( 224 kernel_size <= 0 225 or cross_width <= 0 226 or search_region <= 0 227 or sauvola_window <= 0 228 ): 229 raise ValueError("Size parameters must be positive") 230 if cross_height is not None and cross_height <= 0: 231 raise ValueError("cross_height must be positive") 232 if morph_size is not None and morph_size <= 0: 233 raise ValueError("morph_size must be positive") 234 if not 0 <= distance_penalty <= 1: 235 raise ValueError("distance_penalty must be in [0, 1]") 236 if sauvola_k <= 0: 237 raise ValueError("sauvola_k must be positive") 238 239 def _create_gaussian_weights(self, region_size: int) -> NDArray: 240 """ 241 Create a 2D Gaussian weight mask. 242 243 Args: 244 shape (tuple[int, int]): Shape of the region (height, width) 245 p (float): Minimum value at the edge = 1 - p 246 247 Returns: 248 NDArray: Gaussian weight mask 249 """ 250 if self._distance_penalty == 0: 251 return np.ones((region_size, region_size), dtype=np.float32) 252 253 y = np.linspace(-1, 1, region_size) 254 x = np.linspace(-1, 1, region_size) 255 xv, yv = np.meshgrid(x, y) 256 dist_squared = xv**2 + yv**2 257 258 # Prevent log(0) when distance_penalty is 1 259 if self._distance_penalty >= 0.999: 260 sigma = 0.1 # Small sigma for very sharp peak 261 else: 262 sigma = np.sqrt(-1 / (2 * np.log(1 - self._distance_penalty))) 263 264 weights = np.exp(-dist_squared / (2 * sigma**2)) 265 266 return weights.astype(np.float32) 267 268 def _create_cross_kernel(self) -> NDArray: 269 kernel = np.zeros((self._kernel_size, self._kernel_size), dtype=np.uint8) 270 center = self._kernel_size // 2 271 272 # Create horizontal bar 273 h_start = max(0, center - self._cross_height // 2) 274 h_end = min(self._kernel_size, center + (self._cross_height + 1) // 2) 275 kernel[h_start:h_end, :] = 255 276 277 # Create vertical bar 278 v_start = max(0, center - self._cross_width // 2) 279 v_end = min(self._kernel_size, center + (self._cross_width + 1) // 2) 280 kernel[:, v_start:v_end] = 255 281 282 return kernel 283 284 def _apply_morphology(self, binary: MatLike) -> MatLike: 285 # Define a horizontal kernel (adjust width as needed) 286 kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._morph_size, 1)) 287 kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._morph_size)) 288 289 # Apply dilation 290 dilated = cv.dilate(binary, kernel_hor, iterations=1) 291 dilated = cv.dilate(dilated, kernel_ver, iterations=1) 292 293 return dilated 294 295 def _apply_cross_matching(self, img: MatLike) -> MatLike: 296 """Apply cross kernel template matching.""" 297 pad_y = self._cross_kernel.shape[0] // 2 298 pad_x = self._cross_kernel.shape[1] // 2 299 300 padded = cv.copyMakeBorder( 301 img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0 302 ) 303 304 filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED) 305 # Invert and normalize to 0-255 range 306 filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX) 307 return filtered.astype(np.uint8) 308 309 def apply(self, img: MatLike, visual: bool = False) -> MatLike: 310 """ 311 Apply the grid detection filter to the input image. 312 313 Args: 314 img (MatLike): the input image 315 visual (bool): whether to show intermediate steps 316 317 Returns: 318 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 319 """ 320 321 if img is None or img.size == 0: 322 raise ValueError("Input image is empty or None") 323 324 binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window) 325 326 if visual: 327 imu.show(binary, title="thresholded") 328 329 binary = self._apply_morphology(binary) 330 331 if visual: 332 imu.show(binary, title="dilated") 333 334 filtered = self._apply_cross_matching(binary) 335 336 return filtered 337 338 @log_calls(level=logging.DEBUG, include_return=True) 339 def find_nearest( 340 self, filtered: MatLike, point: Point, region: Optional[int] = None 341 ) -> Tuple[Point, float]: 342 """ 343 Find the nearest 'corner match' in the image, along with its score [0,1] 344 345 Args: 346 filtered (MatLike): the filtered image (obtained through `apply`) 347 point (tuple[int, int]): the approximate target point (x, y) 348 region (None | int): alternative value for search region, 349 overwriting the `__init__` parameter `region` 350 """ 351 352 if filtered is None or filtered.size == 0: 353 raise ValueError("Filtered image is empty or None") 354 355 region_size = region if region is not None else self._search_region 356 x, y = point 357 358 # Calculate crop boundaries 359 crop_x = max(0, x - region_size // 2) 360 crop_y = max(0, y - region_size // 2) 361 crop_width = min(region_size, filtered.shape[1] - crop_x) 362 crop_height = min(region_size, filtered.shape[0] - crop_y) 363 364 # Handle edge cases 365 if crop_width <= 0 or crop_height <= 0: 366 logger.warning(f"Point {point} is outside image bounds") 367 return point, 0.0 368 369 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 370 371 if cropped.size == 0: 372 return point, 0.0 373 374 # Always apply Gaussian weighting by extending crop if needed 375 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 376 # Perfect size - apply weights directly 377 weights = self._create_gaussian_weights(region_size) 378 weighted = cropped.astype(np.float32) * weights 379 else: 380 # Extend crop to match region_size, apply weights, then restore 381 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 382 383 # Calculate offset to center the cropped region in extended array 384 offset_y = (region_size - cropped.shape[0]) // 2 385 offset_x = (region_size - cropped.shape[1]) // 2 386 387 # Place cropped region in center of extended array 388 extended[ 389 offset_y : offset_y + cropped.shape[0], 390 offset_x : offset_x + cropped.shape[1], 391 ] = cropped 392 393 # Apply Gaussian weights to extended array 394 weights = self._create_gaussian_weights(region_size) 395 weighted_extended = extended.astype(np.float32) * weights 396 397 # Extract the original region back out 398 weighted = weighted_extended[ 399 offset_y : offset_y + cropped.shape[0], 400 offset_x : offset_x + cropped.shape[1], 401 ] 402 403 best_idx = np.argmax(weighted) 404 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 405 406 result_point = ( 407 int(crop_x + best_x), 408 int(crop_y + best_y), 409 ) 410 result_confidence = float(weighted[best_y, best_x]) / 255.0 411 412 return result_point, result_confidence 413 414 def find_table_points( 415 self, 416 img: MatLike | PathLike[str], 417 left_top: Point, 418 cell_widths: list[int], 419 cell_heights: list[int] | int, 420 visual: bool = False, 421 window: str = WINDOW, 422 goals_width: Optional[int] = None, 423 filtered: Optional[MatLike | PathLike[str]] = None, 424 ) -> "TableGrid": 425 """ 426 Parse the image to a `TableGrid` structure that holds all of the 427 intersections between horizontal and vertical rules, starting near the `left_top` point 428 429 Args: 430 img (MatLike): the input image of a table 431 left_top (tuple[int, int]): the starting point of the algorithm 432 cell_widths (list[int]): the expected widths of the cells (based on a header template) 433 cell_heights (list[int]): the expected height of the rows of data. 434 The last value from this list is used until the image has no more vertical space. 435 visual (bool): whether to show intermediate steps 436 window (str): the name of the OpenCV window to use for visualization 437 goals_width (int | None): the width of the goal region when searching for the next point. 438 If None, defaults to 1.5 * search_region 439 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 440 calculating the filtered image from scratch 441 442 Returns: 443 a TableGrid object 444 """ 445 446 if goals_width is None: 447 goals_width = self._search_region * 3 // 2 448 449 if not cell_widths: 450 raise ValueError("cell_widths must contain at least one value") 451 452 if not isinstance(img, np.ndarray): 453 img = cv.imread(os.fspath(img)) 454 455 if filtered is None: 456 filtered = self.apply(img, visual) 457 else: 458 if not isinstance(filtered, np.ndarray): 459 filtered = cv.imread(os.fspath(filtered)) 460 461 filtered = ensure_gray(filtered) 462 463 if visual: 464 imu.show(filtered, window=window) 465 466 if isinstance(cell_heights, int): 467 cell_heights = [cell_heights] 468 469 left_top, confidence = self.find_nearest( 470 filtered, left_top, int(self._search_region * 3) 471 ) 472 473 if confidence < 0.1: 474 logger.warning( 475 f"Low confidence for the starting point: {confidence} at {left_top}" 476 ) 477 478 # resize all parameters according to scale 479 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 480 481 if visual: 482 imu.push(img) 483 484 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 485 cell_widths = [int(w * self._scale) for w in cell_widths] 486 cell_heights = [int(h * self._scale) for h in cell_heights] 487 left_top = (int(left_top[0] * self._scale), int(left_top[1] * self._scale)) 488 self._search_region = int(self._search_region * self._scale) 489 490 img_gray = ensure_gray(img) 491 filtered_gray = ensure_gray(filtered) 492 493 table_grower = TableGrower( 494 img_gray, 495 filtered_gray, 496 cell_widths, # pyright: ignore 497 cell_heights, # pyright: ignore 498 left_top, 499 self._search_region, 500 self._distance_penalty, 501 self._look_distance, 502 self._grow_threshold, 503 self._min_rows, 504 ) 505 506 def show_grower_progress(wait: bool = False): 507 img_orig = np.copy(img) 508 corners = table_grower.get_all_corners() 509 for y in range(len(corners)): 510 for x in range(len(corners[y])): 511 if corners[y][x] is not None: 512 img_orig = imu.draw_points( 513 img_orig, 514 [corners[y][x]], 515 color=(0, 0, 255), 516 thickness=30, 517 ) 518 519 edge = table_grower.get_edge_points() 520 521 for point, score in edge: 522 color = (100, int(clamp(score * 255, 0, 255)), 100) 523 imu.draw_point(img_orig, point, color=color, thickness=20) 524 525 imu.show(img_orig, wait=wait) 526 527 if visual: 528 threshold = self._grow_threshold 529 look_distance = self._look_distance 530 531 # python implementation of rust loops, for visualization purposes 532 # note this is a LOT slower 533 while table_grower.grow_point(img_gray, filtered_gray) is not None: 534 show_grower_progress() 535 536 show_grower_progress(True) 537 538 original_threshold = threshold 539 540 loops_without_change = 0 541 542 while not table_grower.is_table_complete(): 543 loops_without_change += 1 544 545 if loops_without_change > 50: 546 break 547 548 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 549 show_grower_progress() 550 551 loops_without_change = 0 552 553 grown = False 554 while table_grower.grow_point(img_gray, filtered_gray) is not None: 555 show_grower_progress() 556 grown = True 557 threshold = min(0.1 + 0.9 * threshold, original_threshold) 558 table_grower.set_threshold(threshold) 559 560 if not grown: 561 threshold *= 0.9 562 table_grower.set_threshold(threshold) 563 564 else: 565 threshold *= 0.9 566 table_grower.set_threshold(threshold) 567 568 if table_grower.grow_point(img_gray, filtered_gray) is not None: 569 show_grower_progress() 570 loops_without_change = 0 571 572 else: 573 table_grower.grow_table(img_gray, filtered_gray) 574 575 table_grower.smooth_grid() 576 corners = table_grower.get_all_corners() 577 logger.info( 578 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 579 ) 580 # rescale corners back to original size 581 if self._scale != 1.0: 582 for y in range(len(corners)): 583 for x in range(len(corners[y])): 584 if corners[y][x] is not None: 585 corners[y][x] = ( 586 int(corners[y][x][0] / self._scale), # pyright:ignore 587 int(corners[y][x][1] / self._scale), # pyright:ignore 588 ) 589 590 return TableGrid(corners) # pyright: ignore 591 592 @log_calls(level=logging.DEBUG, include_return=True) 593 def _build_table_row( 594 self, 595 gray: MatLike, 596 filtered: MatLike, 597 start_point: Point, 598 cell_widths: List[int], 599 row_idx: int, 600 goals_width: int, 601 previous_row_points: Optional[List[Point]] = None, 602 visual: bool = False, 603 ) -> List[Point]: 604 """Build a single row of table points.""" 605 row = [start_point] 606 current = start_point 607 608 for col_idx, width in enumerate(cell_widths): 609 next_point = self._find_next_column_point( 610 gray, 611 filtered, 612 current, 613 width, 614 goals_width, 615 visual, 616 previous_row_points, 617 col_idx, 618 ) 619 if next_point is None: 620 logger.warning( 621 f"Could not find point for row {row_idx}, col {col_idx + 1}" 622 ) 623 return [] # Return empty list to signal failure 624 row.append(next_point) 625 current = next_point 626 627 return row 628 629 def _clamp_point_to_img(self, point: Point, img: MatLike) -> Point: 630 """Clamp a point to be within the image bounds.""" 631 x = max(0, min(point[0], img.shape[1] - 1)) 632 y = max(0, min(point[1], img.shape[0] - 1)) 633 return (x, y) 634 635 @log_calls(level=logging.DEBUG, include_return=True) 636 def _find_next_column_point( 637 self, 638 gray: MatLike, 639 filtered: MatLike, 640 current: Point, 641 width: int, 642 goals_width: int, 643 visual: bool = False, 644 previous_row_points: Optional[List[Point]] = None, 645 current_col_idx: int = 0, 646 ) -> Optional[Point]: 647 """Find the next point in the current row.""" 648 649 if previous_row_points is not None and current_col_idx + 1 < len( 650 previous_row_points 651 ): 652 # grow an astar path downwards from the previous row point that is 653 # above and to the right of current 654 # and ensure all points are within image bounds 655 bottom_right = [ 656 self._clamp_point_to_img( 657 ( 658 current[0] + width - goals_width // 2 + x, 659 current[1] + goals_width, 660 ), 661 gray, 662 ) 663 for x in range(goals_width) 664 ] 665 goals = self._astar( 666 gray, previous_row_points[current_col_idx + 1], bottom_right, "down" 667 ) 668 669 if goals is None: 670 logger.warning( 671 f"A* failed to find path going downwards from previous row's point at idx {current_col_idx + 1}" 672 ) 673 return None 674 else: 675 goals = [ 676 self._clamp_point_to_img( 677 (current[0] + width, current[1] - goals_width // 2 + y), gray 678 ) 679 for y in range(goals_width) 680 ] 681 682 path = self._astar(gray, current, goals, "right") 683 684 if path is None: 685 logger.warning( 686 f"A* failed to find path going rightward from {current} to goals" 687 ) 688 return None 689 690 next_point, _ = self.find_nearest(filtered, path[-1], self._search_region) 691 692 # show the point and the search region on the image for debugging 693 if visual: 694 self._visualize_path_finding( 695 goals + path, 696 current, 697 next_point, 698 current, 699 path[-1], 700 self._search_region, 701 ) 702 703 return next_point 704 705 @log_calls(level=logging.DEBUG, include_return=True) 706 def _find_next_row_start( 707 self, 708 gray: MatLike, 709 filtered: MatLike, 710 top_point: Point, 711 row_idx: int, 712 cell_heights: List[int], 713 goals_width: int, 714 visual: bool = False, 715 ) -> Optional[Point]: 716 """Find the starting point of the next row.""" 717 if row_idx < len(cell_heights): 718 row_height = cell_heights[row_idx] 719 else: 720 row_height = cell_heights[-1] 721 722 if top_point[1] + row_height >= filtered.shape[0] - 10: # Near bottom 723 return None 724 725 goals = [ 726 (top_point[0] - goals_width // 2 + x, top_point[1] + row_height) 727 for x in range(goals_width) 728 ] 729 730 path = self._astar(gray, top_point, goals, "down") 731 if path is None: 732 return None 733 734 next_point, _ = self.find_nearest( 735 filtered, path[-1], region=self._search_region * 3 // 2 736 ) 737 738 if visual: 739 self._visualize_path_finding( 740 path, top_point, next_point, top_point, path[-1], self._search_region 741 ) 742 743 return next_point 744 745 def _visualize_grid(self, img: MatLike, points: List[List[Point]]) -> None: 746 """Visualize the detected grid points.""" 747 all_points = [point for row in points for point in row] 748 drawn = imu.draw_points(img, all_points) 749 imu.show(drawn, wait=True) 750 751 def _visualize_path_finding( 752 self, 753 path: List[Point], 754 current: Point, 755 next_point: Point, 756 previous_row_target: Optional[Point] = None, 757 region_center: Optional[Point] = None, 758 region_size: Optional[int] = None, 759 ) -> None: 760 """Visualize the path finding process for debugging.""" 761 global show_time 762 763 screen = imu.pop() 764 765 # if gray, convert to BGR 766 if len(screen.shape) == 2 or screen.shape[2] == 1: 767 debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR) 768 else: 769 debug_img = cast(MatLike, screen) 770 771 debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2) 772 debug_img = imu.draw_points( 773 debug_img, [current], color=(0, 255, 0), thickness=3 774 ) 775 debug_img = imu.draw_points( 776 debug_img, [next_point], color=(0, 0, 255), thickness=2 777 ) 778 779 # Draw previous row target if available 780 if previous_row_target is not None: 781 debug_img = imu.draw_points( 782 debug_img, [previous_row_target], color=(255, 0, 255), thickness=2 783 ) 784 785 # Draw search region if available 786 if region_center is not None and region_size is not None: 787 top_left = ( 788 max(0, region_center[0] - region_size // 2), 789 max(0, region_center[1] - region_size // 2), 790 ) 791 bottom_right = ( 792 min(debug_img.shape[1], region_center[0] + region_size // 2), 793 min(debug_img.shape[0], region_center[1] + region_size // 2), 794 ) 795 cv.rectangle( 796 debug_img, 797 top_left, 798 bottom_right, 799 color=(255, 0, 0), 800 thickness=2, 801 lineType=cv.LINE_AA, 802 ) 803 804 imu.push(debug_img) 805 806 show_time += 1 807 if show_time % 10 != 1: 808 return 809 810 imu.show(debug_img, title="Next column point", wait=False) 811 # time.sleep(0.003) 812 813 @log_calls(level=logging.DEBUG, include_return=True) 814 def _astar( 815 self, 816 img: np.ndarray, 817 start: tuple[int, int], 818 goals: list[tuple[int, int]], 819 direction: str, 820 ) -> Optional[List[Point]]: 821 """ 822 Find the best path between the start point and one of the goal points on the image 823 """ 824 825 if not goals: 826 return None 827 828 if self._scale != 1.0: 829 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 830 start = (int(start[0] * self._scale), int(start[1] * self._scale)) 831 goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals] 832 833 # calculate bounding box with margin 834 all_points = goals + [start] 835 xs = [p[0] for p in all_points] 836 ys = [p[1] for p in all_points] 837 838 margin = 30 839 top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin)) 840 bottom_right = ( 841 min(img.shape[1], max(xs) + margin), 842 min(img.shape[0], max(ys) + margin), 843 ) 844 845 # check bounds 846 if ( 847 top_left[0] >= bottom_right[0] 848 or top_left[1] >= bottom_right[1] 849 or top_left[0] >= img.shape[1] 850 or top_left[1] >= img.shape[0] 851 ): 852 return None 853 854 # transform coordinates to cropped image 855 start_local = (start[0] - top_left[0], start[1] - top_left[1]) 856 goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals] 857 858 cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]] 859 860 if cropped.size == 0: 861 return None 862 863 path = rust_astar(cropped, start_local, goals_local, direction) 864 865 if path is None: 866 return None 867 868 if self._scale != 1.0: 869 path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path] 870 top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale)) 871 872 return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]
Detects table grid intersections using morphological filtering and template matching.
This detector implements a multi-stage pipeline:
- Binarization: Sauvola adaptive thresholding to handle varying lighting
- Morphological operations: Dilation to connect broken rule segments
- Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
- Grid growing: Iterative point detection starting from a known seed point
The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.
Tuning Guidelines
- kernel_size: Increase if you need more selectivity (fewer false positives)
- cross_width/height: Should match rule thickness after morphology
- morph_size: Increase to connect more broken lines, but this thickens rules
- sauvola_k: Increase to threshold more aggressively (remove noise)
- search_region: Increase for documents with more warping/distortion
- distance_penalty: Increase to prefer corners closer to expected positions
Visual Debugging
Set visual=True in methods to see intermediate results and tune parameters.
148 def __init__( 149 self, 150 kernel_size: int = 21, 151 cross_width: int = 6, 152 cross_height: Optional[int] = None, 153 morph_size: Optional[int] = None, 154 sauvola_k: float = 0.04, 155 sauvola_window: int = 15, 156 scale: float = 1.0, 157 search_region: int = 40, 158 distance_penalty: float = 0.4, 159 min_rows: int = 5, 160 grow_threshold: float = 0.3, 161 look_distance: int = 4, 162 ): 163 """ 164 Args: 165 kernel_size (int): the size of the cross kernel 166 a larger kernel size often means that more penalty is applied, often leading 167 to more sparse results 168 cross_width (int): the width of one of the edges in the cross filter, should be 169 roughly equal to the width of the rules in the image after morphology is applied 170 cross_height (int | None): useful if the horizontal rules and vertical rules 171 have different sizes 172 morph_size (int | None): the size of the morphology operators that are applied before 173 the cross kernel. 'bridges the gaps' of broken-up lines 174 sauvola_k (float): threshold parameter for sauvola thresholding 175 sauvola_window (int): window_size parameter for sauvola thresholding 176 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 177 search_region (int): area in which to search for a new max value in `find_nearest` etc. 178 distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 179 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 180 grow_threshold (float): the threshold for accepting a new point when growing the table 181 look_distance (int): how many points away to look when calculating the median slope 182 """ 183 self._validate_parameters( 184 kernel_size, 185 cross_width, 186 cross_height, 187 morph_size, 188 search_region, 189 sauvola_k, 190 sauvola_window, 191 distance_penalty, 192 ) 193 194 self._kernel_size = kernel_size 195 self._cross_width = cross_width 196 self._cross_height = cross_width if cross_height is None else cross_height 197 self._morph_size = morph_size if morph_size is not None else cross_width 198 self._search_region = search_region 199 self._sauvola_k = sauvola_k 200 self._sauvola_window = sauvola_window 201 self._distance_penalty = distance_penalty 202 self._scale = scale 203 self._min_rows = min_rows 204 self._grow_threshold = grow_threshold 205 self._look_distance = look_distance 206 207 self._cross_kernel = self._create_cross_kernel()
Arguments:
- kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
- cross_width (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
- cross_height (int | None): useful if the horizontal rules and vertical rules have different sizes
- morph_size (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
- sauvola_k (float): threshold parameter for sauvola thresholding
- sauvola_window (int): window_size parameter for sauvola thresholding
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- search_region (int): area in which to search for a new max value in
find_nearestetc. - distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
- min_rows (int): minimum number of rows to find before stopping the table finding algorithm
- grow_threshold (float): the threshold for accepting a new point when growing the table
- look_distance (int): how many points away to look when calculating the median slope
309 def apply(self, img: MatLike, visual: bool = False) -> MatLike: 310 """ 311 Apply the grid detection filter to the input image. 312 313 Args: 314 img (MatLike): the input image 315 visual (bool): whether to show intermediate steps 316 317 Returns: 318 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 319 """ 320 321 if img is None or img.size == 0: 322 raise ValueError("Input image is empty or None") 323 324 binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window) 325 326 if visual: 327 imu.show(binary, title="thresholded") 328 329 binary = self._apply_morphology(binary) 330 331 if visual: 332 imu.show(binary, title="dilated") 333 334 filtered = self._apply_cross_matching(binary) 335 336 return filtered
Apply the grid detection filter to the input image.
Arguments:
- img (MatLike): the input image
- visual (bool): whether to show intermediate steps
Returns:
MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
338 @log_calls(level=logging.DEBUG, include_return=True) 339 def find_nearest( 340 self, filtered: MatLike, point: Point, region: Optional[int] = None 341 ) -> Tuple[Point, float]: 342 """ 343 Find the nearest 'corner match' in the image, along with its score [0,1] 344 345 Args: 346 filtered (MatLike): the filtered image (obtained through `apply`) 347 point (tuple[int, int]): the approximate target point (x, y) 348 region (None | int): alternative value for search region, 349 overwriting the `__init__` parameter `region` 350 """ 351 352 if filtered is None or filtered.size == 0: 353 raise ValueError("Filtered image is empty or None") 354 355 region_size = region if region is not None else self._search_region 356 x, y = point 357 358 # Calculate crop boundaries 359 crop_x = max(0, x - region_size // 2) 360 crop_y = max(0, y - region_size // 2) 361 crop_width = min(region_size, filtered.shape[1] - crop_x) 362 crop_height = min(region_size, filtered.shape[0] - crop_y) 363 364 # Handle edge cases 365 if crop_width <= 0 or crop_height <= 0: 366 logger.warning(f"Point {point} is outside image bounds") 367 return point, 0.0 368 369 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 370 371 if cropped.size == 0: 372 return point, 0.0 373 374 # Always apply Gaussian weighting by extending crop if needed 375 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 376 # Perfect size - apply weights directly 377 weights = self._create_gaussian_weights(region_size) 378 weighted = cropped.astype(np.float32) * weights 379 else: 380 # Extend crop to match region_size, apply weights, then restore 381 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 382 383 # Calculate offset to center the cropped region in extended array 384 offset_y = (region_size - cropped.shape[0]) // 2 385 offset_x = (region_size - cropped.shape[1]) // 2 386 387 # Place cropped region in center of extended array 388 extended[ 389 offset_y : offset_y + cropped.shape[0], 390 offset_x : offset_x + cropped.shape[1], 391 ] = cropped 392 393 # Apply Gaussian weights to extended array 394 weights = self._create_gaussian_weights(region_size) 395 weighted_extended = extended.astype(np.float32) * weights 396 397 # Extract the original region back out 398 weighted = weighted_extended[ 399 offset_y : offset_y + cropped.shape[0], 400 offset_x : offset_x + cropped.shape[1], 401 ] 402 403 best_idx = np.argmax(weighted) 404 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 405 406 result_point = ( 407 int(crop_x + best_x), 408 int(crop_y + best_y), 409 ) 410 result_confidence = float(weighted[best_y, best_x]) / 255.0 411 412 return result_point, result_confidence
Find the nearest 'corner match' in the image, along with its score [0,1]
Arguments:
414 def find_table_points( 415 self, 416 img: MatLike | PathLike[str], 417 left_top: Point, 418 cell_widths: list[int], 419 cell_heights: list[int] | int, 420 visual: bool = False, 421 window: str = WINDOW, 422 goals_width: Optional[int] = None, 423 filtered: Optional[MatLike | PathLike[str]] = None, 424 ) -> "TableGrid": 425 """ 426 Parse the image to a `TableGrid` structure that holds all of the 427 intersections between horizontal and vertical rules, starting near the `left_top` point 428 429 Args: 430 img (MatLike): the input image of a table 431 left_top (tuple[int, int]): the starting point of the algorithm 432 cell_widths (list[int]): the expected widths of the cells (based on a header template) 433 cell_heights (list[int]): the expected height of the rows of data. 434 The last value from this list is used until the image has no more vertical space. 435 visual (bool): whether to show intermediate steps 436 window (str): the name of the OpenCV window to use for visualization 437 goals_width (int | None): the width of the goal region when searching for the next point. 438 If None, defaults to 1.5 * search_region 439 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 440 calculating the filtered image from scratch 441 442 Returns: 443 a TableGrid object 444 """ 445 446 if goals_width is None: 447 goals_width = self._search_region * 3 // 2 448 449 if not cell_widths: 450 raise ValueError("cell_widths must contain at least one value") 451 452 if not isinstance(img, np.ndarray): 453 img = cv.imread(os.fspath(img)) 454 455 if filtered is None: 456 filtered = self.apply(img, visual) 457 else: 458 if not isinstance(filtered, np.ndarray): 459 filtered = cv.imread(os.fspath(filtered)) 460 461 filtered = ensure_gray(filtered) 462 463 if visual: 464 imu.show(filtered, window=window) 465 466 if isinstance(cell_heights, int): 467 cell_heights = [cell_heights] 468 469 left_top, confidence = self.find_nearest( 470 filtered, left_top, int(self._search_region * 3) 471 ) 472 473 if confidence < 0.1: 474 logger.warning( 475 f"Low confidence for the starting point: {confidence} at {left_top}" 476 ) 477 478 # resize all parameters according to scale 479 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 480 481 if visual: 482 imu.push(img) 483 484 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 485 cell_widths = [int(w * self._scale) for w in cell_widths] 486 cell_heights = [int(h * self._scale) for h in cell_heights] 487 left_top = (int(left_top[0] * self._scale), int(left_top[1] * self._scale)) 488 self._search_region = int(self._search_region * self._scale) 489 490 img_gray = ensure_gray(img) 491 filtered_gray = ensure_gray(filtered) 492 493 table_grower = TableGrower( 494 img_gray, 495 filtered_gray, 496 cell_widths, # pyright: ignore 497 cell_heights, # pyright: ignore 498 left_top, 499 self._search_region, 500 self._distance_penalty, 501 self._look_distance, 502 self._grow_threshold, 503 self._min_rows, 504 ) 505 506 def show_grower_progress(wait: bool = False): 507 img_orig = np.copy(img) 508 corners = table_grower.get_all_corners() 509 for y in range(len(corners)): 510 for x in range(len(corners[y])): 511 if corners[y][x] is not None: 512 img_orig = imu.draw_points( 513 img_orig, 514 [corners[y][x]], 515 color=(0, 0, 255), 516 thickness=30, 517 ) 518 519 edge = table_grower.get_edge_points() 520 521 for point, score in edge: 522 color = (100, int(clamp(score * 255, 0, 255)), 100) 523 imu.draw_point(img_orig, point, color=color, thickness=20) 524 525 imu.show(img_orig, wait=wait) 526 527 if visual: 528 threshold = self._grow_threshold 529 look_distance = self._look_distance 530 531 # python implementation of rust loops, for visualization purposes 532 # note this is a LOT slower 533 while table_grower.grow_point(img_gray, filtered_gray) is not None: 534 show_grower_progress() 535 536 show_grower_progress(True) 537 538 original_threshold = threshold 539 540 loops_without_change = 0 541 542 while not table_grower.is_table_complete(): 543 loops_without_change += 1 544 545 if loops_without_change > 50: 546 break 547 548 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 549 show_grower_progress() 550 551 loops_without_change = 0 552 553 grown = False 554 while table_grower.grow_point(img_gray, filtered_gray) is not None: 555 show_grower_progress() 556 grown = True 557 threshold = min(0.1 + 0.9 * threshold, original_threshold) 558 table_grower.set_threshold(threshold) 559 560 if not grown: 561 threshold *= 0.9 562 table_grower.set_threshold(threshold) 563 564 else: 565 threshold *= 0.9 566 table_grower.set_threshold(threshold) 567 568 if table_grower.grow_point(img_gray, filtered_gray) is not None: 569 show_grower_progress() 570 loops_without_change = 0 571 572 else: 573 table_grower.grow_table(img_gray, filtered_gray) 574 575 table_grower.smooth_grid() 576 corners = table_grower.get_all_corners() 577 logger.info( 578 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 579 ) 580 # rescale corners back to original size 581 if self._scale != 1.0: 582 for y in range(len(corners)): 583 for x in range(len(corners[y])): 584 if corners[y][x] is not None: 585 corners[y][x] = ( 586 int(corners[y][x][0] / self._scale), # pyright:ignore 587 int(corners[y][x][1] / self._scale), # pyright:ignore 588 ) 589 590 return TableGrid(corners) # pyright: ignore
Parse the image to a TableGrid structure that holds all of the
intersections between horizontal and vertical rules, starting near the left_top point
Arguments:
- img (MatLike): the input image of a table
- left_top (tuple[int, int]): the starting point of the algorithm
- cell_widths (list[int]): the expected widths of the cells (based on a header template)
- cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
- visual (bool): whether to show intermediate steps
- window (str): the name of the OpenCV window to use for visualization
- goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_region
- filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
Returns:
a TableGrid object
875class TableGrid(TableIndexer): 876 """ 877 A data class that allows segmenting the image into cells 878 """ 879 880 _right_offset: int | None = None 881 882 def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None): 883 """ 884 Args: 885 points: a 2D list of intersections between hor. and vert. rules 886 """ 887 self._points = points 888 self._right_offset = right_offset 889 890 @property 891 def points(self) -> list[list[Point]]: 892 return self._points 893 894 def row(self, i: int) -> list[Point]: 895 assert 0 <= i and i < len(self._points) 896 return self._points[i] 897 898 @property 899 def cols(self) -> int: 900 if self._right_offset is not None: 901 return len(self.row(0)) - 2 902 else: 903 return len(self.row(0)) - 1 904 905 @property 906 def rows(self) -> int: 907 return len(self._points) - 1 908 909 @staticmethod 910 def from_split( 911 split_grids: Split["TableGrid"], offsets: Split[Point] 912 ) -> "TableGrid": 913 """ 914 Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image 915 916 Args: 917 split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table 918 offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened 919 """ 920 921 def offset_points(points, offset): 922 return [ 923 [(p[0] + offset[0], p[1] + offset[1]) for p in row] for row in points 924 ] 925 926 split_points = split_grids.apply( 927 lambda grid, offset: offset_points(grid.points, offset), offsets 928 ) 929 930 points = [] 931 932 rows = min(split_grids.left.rows, split_grids.right.rows) 933 934 for row in range(rows + 1): 935 row_points = [] 936 937 row_points.extend(split_points.left[row]) 938 row_points.extend(split_points.right[row]) 939 940 points.append(row_points) 941 942 table_grid = TableGrid(points, split_grids.left.cols) 943 944 return table_grid 945 946 def save(self, path: str | Path): 947 with open(path, "w") as f: 948 json.dump({"points": self.points, "right_offset": self._right_offset}, f) 949 950 @staticmethod 951 def from_saved(path: str | Path) -> "TableGrid": 952 with open(path, "r") as f: 953 points = json.load(f) 954 right_offset = points.get("right_offset", None) 955 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 956 return TableGrid(points, right_offset) 957 958 def add_left_col(self, width: int): 959 for row in self._points: 960 first = row[0] 961 new_first = (first[0] - width, first[1]) 962 row.insert(0, new_first) 963 964 def add_top_row(self, height: int): 965 new_row = [] 966 for point in self._points[0]: 967 new_row.append((point[0], point[1] - height)) 968 969 self.points.insert(0, new_row) 970 971 def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool: 972 """point: x, y""" 973 lt, rt, rb, lb = rect 974 x, y = point 975 976 top = _Rule(*lt, *rt) 977 if top._y_at_x(x) > y: 978 return False 979 980 right = _Rule(*rt, *rb) 981 if right._x_at_y(y) < x: 982 return False 983 984 bottom = _Rule(*lb, *rb) 985 if bottom._y_at_x(x) < y: 986 return False 987 988 left = _Rule(*lb, *lt) 989 if left._x_at_y(y) > x: 990 return False 991 992 return True 993 994 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 995 for r in range(len(self._points) - 1): 996 offset = 0 997 for c in range(len(self.row(0)) - 1): 998 if self._right_offset is not None and c == self._right_offset: 999 offset = -1 1000 continue 1001 1002 if self._surrounds( 1003 [ 1004 self._points[r][c], 1005 self._points[r][c + 1], 1006 self._points[r + 1][c + 1], 1007 self._points[r + 1][c], 1008 ], 1009 point, 1010 ): 1011 return (r, c + offset) 1012 1013 return (-1, -1) 1014 1015 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 1016 r, c = cell 1017 1018 self._check_row_idx(r) 1019 self._check_col_idx(c) 1020 1021 if self._right_offset is not None and c >= self._right_offset: 1022 c = c + 1 1023 1024 return ( 1025 self._points[r][c], 1026 self._points[r][c + 1], 1027 self._points[r + 1][c + 1], 1028 self._points[r + 1][c], 1029 ) 1030 1031 def region( 1032 self, start: tuple[int, int], end: tuple[int, int] 1033 ) -> tuple[Point, Point, Point, Point]: 1034 r0, c0 = start 1035 r1, c1 = end 1036 1037 self._check_row_idx(r0) 1038 self._check_row_idx(r1) 1039 self._check_col_idx(c0) 1040 self._check_col_idx(c1) 1041 1042 if self._right_offset is not None and c0 >= self._right_offset: 1043 c0 = c0 + 1 1044 1045 if self._right_offset is not None and c1 >= self._right_offset: 1046 c1 = c1 + 1 1047 1048 lt = self._points[r0][c0] 1049 rt = self._points[r0][c1 + 1] 1050 rb = self._points[r1 + 1][c1 + 1] 1051 lb = self._points[r1 + 1][c0] 1052 1053 return lt, rt, rb, lb 1054 1055 def visualize_points(self, img: MatLike): 1056 """ 1057 Draw the detected table points on the image for visual verification 1058 """ 1059 import colorsys 1060 1061 def clr(index, total_steps): 1062 hue = index / total_steps # Normalized hue between 0 and 1 1063 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 1064 return int(r * 255), int(g * 255), int(b * 255) 1065 1066 for i, row in enumerate(self._points): 1067 for p in row: 1068 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 1069 1070 imu.show(img) 1071 1072 def text_regions( 1073 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 1074 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 1075 def vertical_rule_crop(row: int, col: int): 1076 self._check_col_idx(col) 1077 self._check_row_idx(row) 1078 1079 if self._right_offset is not None and col >= self._right_offset: 1080 col = col + 1 1081 1082 top = self._points[row][col] 1083 bottom = self._points[row + 1][col] 1084 1085 left = int(min(top[0], bottom[0])) 1086 right = int(max(top[0], bottom[0])) 1087 1088 return img[ 1089 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 1090 left - margin_x : right + margin_x, 1091 ] 1092 1093 result = [] 1094 1095 start = None 1096 for col in range(self.cols): 1097 crop = vertical_rule_crop(row, col) 1098 text_over_score = imu.text_presence_score(crop) 1099 text_over = text_over_score > -0.10 1100 1101 if not text_over: 1102 if start is not None: 1103 result.append(((row, start), (row, col - 1))) 1104 start = col 1105 1106 if start is not None: 1107 result.append(((row, start), (row, self.cols - 1))) 1108 1109 return result
A data class that allows segmenting the image into cells
882 def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None): 883 """ 884 Args: 885 points: a 2D list of intersections between hor. and vert. rules 886 """ 887 self._points = points 888 self._right_offset = right_offset
Arguments:
- points: a 2D list of intersections between hor. and vert. rules
909 @staticmethod 910 def from_split( 911 split_grids: Split["TableGrid"], offsets: Split[Point] 912 ) -> "TableGrid": 913 """ 914 Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image 915 916 Args: 917 split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table 918 offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened 919 """ 920 921 def offset_points(points, offset): 922 return [ 923 [(p[0] + offset[0], p[1] + offset[1]) for p in row] for row in points 924 ] 925 926 split_points = split_grids.apply( 927 lambda grid, offset: offset_points(grid.points, offset), offsets 928 ) 929 930 points = [] 931 932 rows = min(split_grids.left.rows, split_grids.right.rows) 933 934 for row in range(rows + 1): 935 row_points = [] 936 937 row_points.extend(split_points.left[row]) 938 row_points.extend(split_points.right[row]) 939 940 points.append(row_points) 941 942 table_grid = TableGrid(points, split_grids.left.cols) 943 944 return table_grid
Convert two TableGrid objects into one, that is able to segment the original (non-cropped) image
Arguments:
- split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
- offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
950 @staticmethod 951 def from_saved(path: str | Path) -> "TableGrid": 952 with open(path, "r") as f: 953 points = json.load(f) 954 right_offset = points.get("right_offset", None) 955 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 956 return TableGrid(points, right_offset)
994 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 995 for r in range(len(self._points) - 1): 996 offset = 0 997 for c in range(len(self.row(0)) - 1): 998 if self._right_offset is not None and c == self._right_offset: 999 offset = -1 1000 continue 1001 1002 if self._surrounds( 1003 [ 1004 self._points[r][c], 1005 self._points[r][c + 1], 1006 self._points[r + 1][c + 1], 1007 self._points[r + 1][c], 1008 ], 1009 point, 1010 ): 1011 return (r, c + offset) 1012 1013 return (-1, -1)
Returns the coordinate (row, col) of the cell that contains the given position
Arguments:
- point (tuple[float, float]): a location in the input image
Returns:
tuple[int, int]: the cell index (row, col) that contains the given point
1015 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 1016 r, c = cell 1017 1018 self._check_row_idx(r) 1019 self._check_col_idx(c) 1020 1021 if self._right_offset is not None and c >= self._right_offset: 1022 c = c + 1 1023 1024 return ( 1025 self._points[r][c], 1026 self._points[r][c + 1], 1027 self._points[r + 1][c + 1], 1028 self._points[r + 1][c], 1029 )
returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position
1031 def region( 1032 self, start: tuple[int, int], end: tuple[int, int] 1033 ) -> tuple[Point, Point, Point, Point]: 1034 r0, c0 = start 1035 r1, c1 = end 1036 1037 self._check_row_idx(r0) 1038 self._check_row_idx(r1) 1039 self._check_col_idx(c0) 1040 self._check_col_idx(c1) 1041 1042 if self._right_offset is not None and c0 >= self._right_offset: 1043 c0 = c0 + 1 1044 1045 if self._right_offset is not None and c1 >= self._right_offset: 1046 c1 = c1 + 1 1047 1048 lt = self._points[r0][c0] 1049 rt = self._points[r0][c1 + 1] 1050 rb = self._points[r1 + 1][c1 + 1] 1051 lb = self._points[r1 + 1][c0] 1052 1053 return lt, rt, rb, lb
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
1055 def visualize_points(self, img: MatLike): 1056 """ 1057 Draw the detected table points on the image for visual verification 1058 """ 1059 import colorsys 1060 1061 def clr(index, total_steps): 1062 hue = index / total_steps # Normalized hue between 0 and 1 1063 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 1064 return int(r * 255), int(g * 255), int(b * 255) 1065 1066 for i, row in enumerate(self._points): 1067 for p in row: 1068 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 1069 1070 imu.show(img)
Draw the detected table points on the image for visual verification
1072 def text_regions( 1073 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 1074 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 1075 def vertical_rule_crop(row: int, col: int): 1076 self._check_col_idx(col) 1077 self._check_row_idx(row) 1078 1079 if self._right_offset is not None and col >= self._right_offset: 1080 col = col + 1 1081 1082 top = self._points[row][col] 1083 bottom = self._points[row + 1][col] 1084 1085 left = int(min(top[0], bottom[0])) 1086 right = int(max(top[0], bottom[0])) 1087 1088 return img[ 1089 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 1090 left - margin_x : right + margin_x, 1091 ] 1092 1093 result = [] 1094 1095 start = None 1096 for col in range(self.cols): 1097 crop = vertical_rule_crop(row, col) 1098 text_over_score = imu.text_presence_score(crop) 1099 text_over = text_over_score > -0.10 1100 1101 if not text_over: 1102 if start is not None: 1103 result.append(((row, start), (row, col - 1))) 1104 start = col 1105 1106 if start is not None: 1107 result.append(((row, start), (row, self.cols - 1))) 1108 1109 return result
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
23class HeaderAligner: 24 """ 25 Aligns table header templates to subject images using feature-based registration. 26 27 This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and 28 matching to compute a homography transformation that maps points from a header 29 template image to their corresponding locations in full table images. 30 31 ## How it Works 32 33 1. **Feature Detection**: Extracts ORB keypoints from both template and subject 34 2. **Feature Matching**: Finds correspondences using Hamming distance 35 3. **Filtering**: Keeps top matches and prunes based on spatial consistency 36 4. **Homography Estimation**: Computes perspective transform using RANSAC 37 38 The computed homography can then transform any point from template space to 39 image space, allowing you to locate table structures based on your annotation. 40 41 ## Preprocessing Options 42 43 - Set `k` parameter to apply Sauvola thresholding before feature detection. 44 This can improve matching on documents with variable lighting. 45 - Set `k=None` to use raw images (just extract blue channel for BGR images) 46 47 ## Tuning Guidelines 48 49 - **max_features**: Increase if matching fails on complex templates 50 - **match_fraction**: Decrease if you get many incorrect matches 51 - **max_dist**: Increase for documents with more warping/distortion 52 - **scale**: Decrease (<1.0) to speed up on high-resolution images 53 54 Args: 55 template (MatLike | PathLike[str] | str | None): Header template image or path. 56 This should contain a clear, representative view of the table header. 57 max_features (int): Maximum ORB features to detect. More features = slower 58 but potentially more robust matching. 59 patch_size (int): ORB patch size for feature extraction. 60 match_fraction (float): Fraction [0, 1] of matches to keep after sorting by 61 quality. Higher = more matches but potentially more outliers. 62 scale (float): Image downscaling factor (0, 1] for processing speed. 63 max_dist (float): Maximum allowed distance (relative to image size) between 64 matched keypoints. Filters out spatially inconsistent matches. 65 k (float | None): Sauvola threshold parameter for preprocessing. If None, 66 no thresholding is applied. Typical range: 0.03-0.15. 67 """ 68 69 def __init__( 70 self, 71 template: None | MatLike | PathLike[str] | str = None, 72 max_features: int = 25_000, 73 patch_size: int = 31, 74 match_fraction: float = 0.6, 75 scale: float = 1.0, 76 max_dist: float = 1.00, 77 k: float | None = 0.05, 78 ): 79 """ 80 Args: 81 template (MatLike | str): (path of) template image, with the table template clearly visible 82 max_features (int): maximal number of features that will be extracted by ORB 83 patch_size (int): for ORB feature extractor 84 match_fraction (float): best fraction of matches that are kept 85 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 86 max_dist (float): maximum distance (relative to image size) of matched features. 87 Increase this value if the warping between image and template needs to be more agressive 88 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 89 """ 90 91 if type(template) is str or type(template) is PathLike: 92 value = cv.imread(fspath(template)) 93 template = value 94 95 self._k = k 96 if scale > 1.0: 97 raise TauluException( 98 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 99 ) 100 if scale == 0: 101 raise TauluException("Use 0 < scale <= 1.0") 102 103 self._scale = scale 104 self._template = self._scale_img(cast(MatLike, template)) 105 self._template_orig: None | MatLike = None 106 self._preprocess_template() 107 self._max_features = max_features 108 self._patch_size = patch_size 109 self._match_fraction = match_fraction 110 self._max_dist = max_dist 111 112 def _scale_img(self, img: MatLike) -> MatLike: 113 if self._scale == 1.0: 114 return img 115 116 return cv.resize(img, None, fx=self._scale, fy=self._scale) 117 118 def _unscale_img(self, img: MatLike) -> MatLike: 119 if self._scale == 1.0: 120 return img 121 122 return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale) 123 124 def _unscale_homography(self, h: np.ndarray) -> np.ndarray: 125 if self._scale == 1.0: 126 return h 127 128 scale_matrix = np.diag([self._scale, self._scale, 1.0]) 129 # inv_scale_matrix = np.linalg.inv(scale_matrix) 130 inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0]) 131 # return inv_scale_matrix @ h @ scale_matrix 132 return inv_scale_matrix @ h @ scale_matrix 133 134 @property 135 def template(self): 136 """The template image that subject images are aligned to""" 137 return self._template 138 139 @template.setter 140 def template(self, value: MatLike | str): 141 """Set the template image as a path or an image""" 142 143 if type(value) is str: 144 value = cv.imread(value) 145 self._template = value 146 147 # TODO: check if the image has the right properties (dimensions etc.) 148 self._template = cast(MatLike, value) 149 150 self._preprocess_template() 151 152 def _preprocess_template(self): 153 self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY) 154 if self._k is not None: 155 self._template = imu.sauvola(self._template, self._k) 156 self._template = cv.bitwise_not(self._template) 157 else: 158 _, _, self._template = cv.split(self._template) 159 160 def _preprocess_image(self, img: MatLike): 161 if self._template_orig is None: 162 raise TauluException("process the template first") 163 164 if self._k is not None: 165 img = imu.sauvola(img, self._k) 166 img = cv.bitwise_not(img) 167 else: 168 _, _, img = cv.split(img) 169 170 return img 171 172 @log_calls(level=logging.DEBUG, include_return=True) 173 def _find_transform_of_template_on( 174 self, im: MatLike, visual: bool = False, window: str = WINDOW 175 ): 176 im = self._scale_img(im) 177 # Detect ORB features and compute descriptors. 178 orb = cv.ORB_create( 179 self._max_features, # type:ignore 180 patchSize=self._patch_size, 181 ) 182 keypoints_im, descriptors_im = orb.detectAndCompute(im, None) 183 keypoints_tg, descriptors_tg = orb.detectAndCompute(self._template, None) 184 185 # Match features 186 matcher = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True) 187 matches = matcher.match(descriptors_im, descriptors_tg) 188 189 # Sort matches by score 190 matches = sorted(matches, key=lambda x: x.distance) 191 192 # Remove not so good matches 193 numGoodMatches = int(len(matches) * self._match_fraction) 194 matches = matches[:numGoodMatches] 195 196 if visual: 197 final_img_filtered = cv.drawMatches( 198 im, 199 keypoints_im, 200 self._template, 201 keypoints_tg, 202 matches[:10], 203 None, # type:ignore 204 cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, 205 ) 206 imu.show(final_img_filtered, title="matches", window=window) 207 208 # Extract location of good matches 209 points1 = np.zeros((len(matches), 2), dtype=np.float32) 210 points2 = np.zeros((len(matches), 2), dtype=np.float32) 211 212 for i, match in enumerate(matches): 213 points1[i, :] = keypoints_tg[match.trainIdx].pt 214 points2[i, :] = keypoints_im[match.queryIdx].pt 215 216 # Prune reference points based upon distance between 217 # key points. This assumes a fairly good alignment to start with 218 # due to the protocol used (location of the sheets) 219 p1 = pd.DataFrame(data=points1) 220 p2 = pd.DataFrame(data=points2) 221 refdist = abs(p1 - p2) 222 223 mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist) 224 mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist) 225 mask = mask_x & mask_y 226 points1 = points1[mask.to_numpy()] 227 points2 = points2[mask.to_numpy()] 228 229 # Find homography 230 h, _ = cv.findHomography(points1, points2, cv.RANSAC) 231 232 return self._unscale_homography(h) 233 234 def view_alignment(self, img: MatLike, h: NDArray): 235 """ 236 Show the alignment of the template on the given image 237 by transforming it using the supplied transformation matrix `h` 238 and visualising both on different channels 239 240 Args: 241 img (MatLike): the image on which the template is transformed 242 h (NDArray): the transformation matrix 243 """ 244 245 im = imu.ensure_gray(img) 246 header = imu.ensure_gray(self._unscale_img(self._template)) 247 height, width = im.shape 248 249 header_warped = cv.warpPerspective(header, h, (width, height)) 250 251 merged = np.full((height, width, 3), 255, dtype=np.uint8) 252 253 merged[..., 1] = im 254 merged[..., 2] = header_warped 255 256 return imu.show(merged) 257 258 @log_calls(level=logging.DEBUG, include_return=True) 259 def align( 260 self, img: MatLike | str, visual: bool = False, window: str = WINDOW 261 ) -> NDArray: 262 """ 263 Calculates a homogeneous transformation matrix that maps pixels of 264 the template to the given image 265 """ 266 267 logger.info("Aligning header with supplied table image") 268 269 if type(img) is str: 270 img = cv.imread(img) 271 img = cast(MatLike, img) 272 273 img = self._preprocess_image(img) 274 275 h = self._find_transform_of_template_on(img, visual, window) 276 277 if visual: 278 self.view_alignment(img, h) 279 280 return h 281 282 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 283 """ 284 Transform the given point (in template-space) using the transformation h 285 (obtained through the `align` method) 286 287 Args: 288 h (NDArray): transformation matrix of shape (3, 3) 289 point (Iterable[int]): the to-be-transformed point, should conform to (x, y) 290 """ 291 292 point = np.array([[point[0], point[1], 1]]) # type:ignore 293 transformed = np.dot(h, point.T) # type:ignore 294 295 transformed /= transformed[2] 296 297 return int(transformed[0][0]), int(transformed[1][0])
Aligns table header templates to subject images using feature-based registration.
This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and matching to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.
How it Works
- Feature Detection: Extracts ORB keypoints from both template and subject
- Feature Matching: Finds correspondences using Hamming distance
- Filtering: Keeps top matches and prunes based on spatial consistency
- Homography Estimation: Computes perspective transform using RANSAC
The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.
Preprocessing Options
- Set
kparameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting. - Set
k=Noneto use raw images (just extract blue channel for BGR images)
Tuning Guidelines
- max_features: Increase if matching fails on complex templates
- match_fraction: Decrease if you get many incorrect matches
- max_dist: Increase for documents with more warping/distortion
- scale: Decrease (<1.0) to speed up on high-resolution images
Arguments:
- template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
- max_features (int): Maximum ORB features to detect. More features = slower but potentially more robust matching.
- patch_size (int): ORB patch size for feature extraction.
- match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
- scale (float): Image downscaling factor (0, 1] for processing speed.
- max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
- k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.
69 def __init__( 70 self, 71 template: None | MatLike | PathLike[str] | str = None, 72 max_features: int = 25_000, 73 patch_size: int = 31, 74 match_fraction: float = 0.6, 75 scale: float = 1.0, 76 max_dist: float = 1.00, 77 k: float | None = 0.05, 78 ): 79 """ 80 Args: 81 template (MatLike | str): (path of) template image, with the table template clearly visible 82 max_features (int): maximal number of features that will be extracted by ORB 83 patch_size (int): for ORB feature extractor 84 match_fraction (float): best fraction of matches that are kept 85 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 86 max_dist (float): maximum distance (relative to image size) of matched features. 87 Increase this value if the warping between image and template needs to be more agressive 88 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 89 """ 90 91 if type(template) is str or type(template) is PathLike: 92 value = cv.imread(fspath(template)) 93 template = value 94 95 self._k = k 96 if scale > 1.0: 97 raise TauluException( 98 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 99 ) 100 if scale == 0: 101 raise TauluException("Use 0 < scale <= 1.0") 102 103 self._scale = scale 104 self._template = self._scale_img(cast(MatLike, template)) 105 self._template_orig: None | MatLike = None 106 self._preprocess_template() 107 self._max_features = max_features 108 self._patch_size = patch_size 109 self._match_fraction = match_fraction 110 self._max_dist = max_dist
Arguments:
- template (MatLike | str): (path of) template image, with the table template clearly visible
- max_features (int): maximal number of features that will be extracted by ORB
- patch_size (int): for ORB feature extractor
- match_fraction (float): best fraction of matches that are kept
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
- k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
134 @property 135 def template(self): 136 """The template image that subject images are aligned to""" 137 return self._template
The template image that subject images are aligned to
234 def view_alignment(self, img: MatLike, h: NDArray): 235 """ 236 Show the alignment of the template on the given image 237 by transforming it using the supplied transformation matrix `h` 238 and visualising both on different channels 239 240 Args: 241 img (MatLike): the image on which the template is transformed 242 h (NDArray): the transformation matrix 243 """ 244 245 im = imu.ensure_gray(img) 246 header = imu.ensure_gray(self._unscale_img(self._template)) 247 height, width = im.shape 248 249 header_warped = cv.warpPerspective(header, h, (width, height)) 250 251 merged = np.full((height, width, 3), 255, dtype=np.uint8) 252 253 merged[..., 1] = im 254 merged[..., 2] = header_warped 255 256 return imu.show(merged)
Show the alignment of the template on the given image
by transforming it using the supplied transformation matrix h
and visualising both on different channels
Arguments:
- img (MatLike): the image on which the template is transformed
- h (NDArray): the transformation matrix
258 @log_calls(level=logging.DEBUG, include_return=True) 259 def align( 260 self, img: MatLike | str, visual: bool = False, window: str = WINDOW 261 ) -> NDArray: 262 """ 263 Calculates a homogeneous transformation matrix that maps pixels of 264 the template to the given image 265 """ 266 267 logger.info("Aligning header with supplied table image") 268 269 if type(img) is str: 270 img = cv.imread(img) 271 img = cast(MatLike, img) 272 273 img = self._preprocess_image(img) 274 275 h = self._find_transform_of_template_on(img, visual, window) 276 277 if visual: 278 self.view_alignment(img, h) 279 280 return h
Calculates a homogeneous transformation matrix that maps pixels of the template to the given image
282 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 283 """ 284 Transform the given point (in template-space) using the transformation h 285 (obtained through the `align` method) 286 287 Args: 288 h (NDArray): transformation matrix of shape (3, 3) 289 point (Iterable[int]): the to-be-transformed point, should conform to (x, y) 290 """ 291 292 point = np.array([[point[0], point[1], 1]]) # type:ignore 293 transformed = np.dot(h, point.T) # type:ignore 294 295 transformed /= transformed[2] 296 297 return int(transformed[0][0]), int(transformed[1][0])
Transform the given point (in template-space) using the transformation h
(obtained through the align method)
Arguments:
- h (NDArray): transformation matrix of shape (3, 3)
- point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
151class HeaderTemplate(TableIndexer): 152 def __init__(self, rules: Iterable[Iterable[int]]): 153 """ 154 A TableTemplate is a collection of rules of a table. This class implements methods 155 for finding cell positions in a table image, given the template the image adheres to. 156 157 Args: 158 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 159 """ 160 161 super().__init__() 162 self._rules = [_Rule(*rule) for rule in rules] 163 self._h_rules = sorted( 164 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 165 ) 166 self._v_rules = sorted( 167 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 168 ) 169 170 @log_calls(level=logging.DEBUG) 171 def save(self, path: PathLike[str]): 172 """ 173 Save the HeaderTemplate to the given path, as a json 174 """ 175 176 data = {"rules": [r.to_dict() for r in self._rules]} 177 178 with open(path, "w") as f: 179 json.dump(data, f) 180 181 @staticmethod 182 @log_calls(level=logging.DEBUG) 183 def from_saved(path: PathLike[str]) -> "HeaderTemplate": 184 with open(path, "r") as f: 185 data = json.load(f) 186 rules = data["rules"] 187 rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules] 188 189 return HeaderTemplate(rules) 190 191 @property 192 def cols(self) -> int: 193 return len(self._v_rules) - 1 194 195 @property 196 def rows(self) -> int: 197 return len(self._h_rules) - 1 198 199 @staticmethod 200 @log_calls(level=logging.DEBUG) 201 def annotate_image( 202 template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10 203 ) -> "HeaderTemplate": 204 """ 205 Utility method that allows users to create a template form a template image. 206 207 The user is asked to click to annotate lines (two clicks per line). 208 209 Args: 210 template: the image on which to annotate the header lines 211 crop (str | None): if str, crop the template image first, then do the annotation. 212 The cropped image will be stored at the supplied path 213 margin (int): margin to add around the cropping of the header 214 """ 215 216 if type(template) is str: 217 value = cv.imread(template) 218 template = value 219 template = cast(MatLike, template) 220 221 if crop is not None: 222 cropped = HeaderTemplate._crop(template, margin) 223 cv.imwrite(os.fspath(crop), cropped) 224 template = cropped 225 226 start_point = None 227 lines: list[list[int]] = [] 228 229 anno_template = np.copy(template) 230 231 def get_point(event, x, y, flags, params): 232 nonlocal lines, start_point, anno_template 233 _ = flags 234 _ = params 235 if event == cv.EVENT_LBUTTONDOWN: 236 if start_point is not None: 237 line: list[int] = [start_point[1], start_point[0], x, y] 238 239 cv.line( # type:ignore 240 anno_template, # type:ignore 241 (start_point[1], start_point[0]), 242 (x, y), 243 (0, 255, 0), 244 2, 245 cv.LINE_AA, 246 ) 247 cv.imshow(constants.WINDOW, anno_template) # type:ignore 248 249 lines.append(line) 250 start_point = None 251 else: 252 start_point = (y, x) 253 elif event == cv.EVENT_RBUTTONDOWN: 254 start_point = None 255 256 # remove the last annotation 257 lines = lines[:-1] 258 259 anno_template = np.copy(anno_template) 260 261 for line in lines: 262 cv.line( 263 template, 264 (line[0], line[1]), 265 (line[2], line[3]), 266 (0, 255, 0), 267 2, 268 cv.LINE_AA, 269 ) 270 271 cv.imshow(constants.WINDOW, template) 272 273 print(ANNO_HELP) 274 275 imu.show(anno_template, get_point, title="annotate the header") 276 277 return HeaderTemplate(lines) 278 279 @staticmethod 280 @log_calls(level=logging.DEBUG, include_return=True) 281 def _crop(template: MatLike, margin: int = 10) -> MatLike: 282 """ 283 Crop the image to contain only the annotations, such that it can be used as the header image in the taulu workflow. 284 """ 285 286 points = [] 287 anno_template = np.copy(template) 288 289 def get_point(event, x, y, flags, params): 290 nonlocal points, anno_template 291 _ = flags 292 _ = params 293 if event == cv.EVENT_LBUTTONDOWN: 294 point = (x, y) 295 296 cv.circle( # type:ignore 297 anno_template, # type:ignore 298 (x, y), 299 4, 300 (0, 255, 0), 301 2, 302 ) 303 cv.imshow(constants.WINDOW, anno_template) # type:ignore 304 305 points.append(point) 306 elif event == cv.EVENT_RBUTTONDOWN: 307 # remove the last annotation 308 points = points[:-1] 309 310 anno_template = np.copy(anno_template) 311 312 for p in points: 313 cv.circle( 314 anno_template, 315 p, 316 4, 317 (0, 255, 0), 318 2, 319 ) 320 321 cv.imshow(constants.WINDOW, anno_template) 322 323 print(CROP_HELP) 324 325 imu.show(anno_template, get_point, title="crop the header") 326 327 assert len(points) == 4, ( 328 "you need to annotate the four corners of the table in order to crop it" 329 ) 330 331 # crop the image to contain all of the points (just crop rectangularly, x, y, w, h) 332 # Convert points to numpy array 333 points_np = np.array(points) 334 335 # Find bounding box 336 x_min = np.min(points_np[:, 0]) 337 y_min = np.min(points_np[:, 1]) 338 x_max = np.max(points_np[:, 0]) 339 y_max = np.max(points_np[:, 1]) 340 341 # Compute width and height 342 width = x_max - x_min 343 height = y_max - y_min 344 345 # Ensure integers and within image boundaries 346 x_min = max(int(x_min), 0) 347 y_min = max(int(y_min), 0) 348 width = int(width) 349 height = int(height) 350 351 # Crop the image 352 cropped = template[ 353 y_min - margin : y_min + height + margin, 354 x_min - margin : x_min + width + margin, 355 ] 356 357 return cropped 358 359 @staticmethod 360 def from_vgg_annotation(annotation: str) -> "HeaderTemplate": 361 """ 362 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 363 364 Args: 365 annotation (str): the path of the annotation csv file 366 """ 367 368 rules = [] 369 with open(annotation, "r") as csvfile: 370 reader = csv.DictReader(csvfile) 371 for row in reader: 372 shape_attributes = json.loads(row["region_shape_attributes"]) 373 if shape_attributes["name"] == "polyline": 374 x_points = shape_attributes["all_points_x"] 375 y_points = shape_attributes["all_points_y"] 376 if len(x_points) == 2 and len(y_points) == 2: 377 rules.append( 378 [x_points[0], y_points[0], x_points[1], y_points[1]] 379 ) 380 381 return HeaderTemplate(rules) 382 383 def cell_width(self, i: int) -> int: 384 self._check_col_idx(i) 385 return int(self._v_rules[i + 1]._x - self._v_rules[i]._x) 386 387 def cell_widths(self, start: int = 0) -> list[int]: 388 return [self.cell_width(i) for i in range(start, self.cols)] 389 390 def cell_height(self, header_factor: float = 0.8) -> int: 391 return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor) 392 393 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 394 if isinstance(header_factors, float): 395 header_factors = [header_factors] 396 header_factors = cast(list, header_factors) 397 return [ 398 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 399 ] 400 401 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 402 """ 403 Returns the interaction of the index[0]th horizontal rule and the 404 index[1]th vertical rule 405 """ 406 407 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 408 assert ints is not None 409 return ints 410 411 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 412 """ 413 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 414 415 Args: 416 point (tuple[float, float]): the coordinates in the template image 417 418 Returns: 419 tuple[int, int]: (row, col) 420 """ 421 422 x, y = point 423 424 row = -1 425 col = -1 426 427 for i in range(self.rows): 428 y0 = self._h_rules[i]._y_at_x(x) 429 y1 = self._h_rules[i + 1]._y_at_x(x) 430 if min(y0, y1) <= y <= max(y0, y1): 431 row = i 432 break 433 434 for i in range(self.cols): 435 x0 = self._v_rules[i]._x_at_y(y) 436 x1 = self._v_rules[i + 1]._x_at_y(y) 437 if min(x0, x1) <= x <= max(x0, x1): 438 col = i 439 break 440 441 if row == -1 or col == -1: 442 return (-1, -1) 443 444 return (row, col) 445 446 def cell_polygon( 447 self, cell: tuple[int, int] 448 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 449 """ 450 Return points (x,y) that make up a polygon around the requested cell 451 (top left, top right, bottom right, bottom left) 452 """ 453 454 row, col = cell 455 456 self._check_col_idx(col) 457 self._check_row_idx(row) 458 459 top_rule = self._h_rules[row] 460 bottom_rule = self._h_rules[row + 1] 461 left_rule = self._v_rules[col] 462 right_rule = self._v_rules[col + 1] 463 464 # Calculate corner points using intersections 465 top_left = top_rule.intersection(left_rule) 466 top_right = top_rule.intersection(right_rule) 467 bottom_left = bottom_rule.intersection(left_rule) 468 bottom_right = bottom_rule.intersection(right_rule) 469 470 if not all( 471 [ 472 point is not None 473 for point in [top_left, top_right, bottom_left, bottom_right] 474 ] 475 ): 476 raise TauluException("the lines around this cell do not intersect") 477 478 return top_left, top_right, bottom_right, bottom_left # type:ignore 479 480 def region( 481 self, start: tuple[int, int], end: tuple[int, int] 482 ) -> tuple[Point, Point, Point, Point]: 483 self._check_row_idx(start[0]) 484 self._check_row_idx(end[0]) 485 self._check_col_idx(start[1]) 486 self._check_col_idx(end[1]) 487 488 # the rules that surround this row 489 top_rule = self._h_rules[start[0]] 490 bottom_rule = self._h_rules[end[0] + 1] 491 left_rule = self._v_rules[start[1]] 492 right_rule = self._v_rules[end[1] + 1] 493 494 # four points that will be the bounding polygon of the result, 495 # which needs to be rectified 496 top_left = top_rule.intersection(left_rule) 497 top_right = top_rule.intersection(right_rule) 498 bottom_left = bottom_rule.intersection(left_rule) 499 bottom_right = bottom_rule.intersection(right_rule) 500 501 if ( 502 top_left is None 503 or top_right is None 504 or bottom_left is None 505 or bottom_right is None 506 ): 507 raise TauluException("the lines around this row do not intersect properly") 508 509 def to_point(pnt) -> Point: 510 return (int(pnt[0]), int(pnt[1])) 511 512 return ( 513 to_point(top_left), 514 to_point(top_right), 515 to_point(bottom_right), 516 to_point(bottom_left), 517 ) 518 519 def text_regions( 520 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 521 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 522 raise TauluException("text_regions should not be called on a HeaderTemplate")
Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.
152 def __init__(self, rules: Iterable[Iterable[int]]): 153 """ 154 A TableTemplate is a collection of rules of a table. This class implements methods 155 for finding cell positions in a table image, given the template the image adheres to. 156 157 Args: 158 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 159 """ 160 161 super().__init__() 162 self._rules = [_Rule(*rule) for rule in rules] 163 self._h_rules = sorted( 164 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 165 ) 166 self._v_rules = sorted( 167 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 168 )
A TableTemplate is a collection of rules of a table. This class implements methods for finding cell positions in a table image, given the template the image adheres to.
Arguments:
- rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
170 @log_calls(level=logging.DEBUG) 171 def save(self, path: PathLike[str]): 172 """ 173 Save the HeaderTemplate to the given path, as a json 174 """ 175 176 data = {"rules": [r.to_dict() for r in self._rules]} 177 178 with open(path, "w") as f: 179 json.dump(data, f)
Save the HeaderTemplate to the given path, as a json
199 @staticmethod 200 @log_calls(level=logging.DEBUG) 201 def annotate_image( 202 template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10 203 ) -> "HeaderTemplate": 204 """ 205 Utility method that allows users to create a template form a template image. 206 207 The user is asked to click to annotate lines (two clicks per line). 208 209 Args: 210 template: the image on which to annotate the header lines 211 crop (str | None): if str, crop the template image first, then do the annotation. 212 The cropped image will be stored at the supplied path 213 margin (int): margin to add around the cropping of the header 214 """ 215 216 if type(template) is str: 217 value = cv.imread(template) 218 template = value 219 template = cast(MatLike, template) 220 221 if crop is not None: 222 cropped = HeaderTemplate._crop(template, margin) 223 cv.imwrite(os.fspath(crop), cropped) 224 template = cropped 225 226 start_point = None 227 lines: list[list[int]] = [] 228 229 anno_template = np.copy(template) 230 231 def get_point(event, x, y, flags, params): 232 nonlocal lines, start_point, anno_template 233 _ = flags 234 _ = params 235 if event == cv.EVENT_LBUTTONDOWN: 236 if start_point is not None: 237 line: list[int] = [start_point[1], start_point[0], x, y] 238 239 cv.line( # type:ignore 240 anno_template, # type:ignore 241 (start_point[1], start_point[0]), 242 (x, y), 243 (0, 255, 0), 244 2, 245 cv.LINE_AA, 246 ) 247 cv.imshow(constants.WINDOW, anno_template) # type:ignore 248 249 lines.append(line) 250 start_point = None 251 else: 252 start_point = (y, x) 253 elif event == cv.EVENT_RBUTTONDOWN: 254 start_point = None 255 256 # remove the last annotation 257 lines = lines[:-1] 258 259 anno_template = np.copy(anno_template) 260 261 for line in lines: 262 cv.line( 263 template, 264 (line[0], line[1]), 265 (line[2], line[3]), 266 (0, 255, 0), 267 2, 268 cv.LINE_AA, 269 ) 270 271 cv.imshow(constants.WINDOW, template) 272 273 print(ANNO_HELP) 274 275 imu.show(anno_template, get_point, title="annotate the header") 276 277 return HeaderTemplate(lines)
Utility method that allows users to create a template form a template image.
The user is asked to click to annotate lines (two clicks per line).
Arguments:
- template: the image on which to annotate the header lines
- crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
- margin (int): margin to add around the cropping of the header
359 @staticmethod 360 def from_vgg_annotation(annotation: str) -> "HeaderTemplate": 361 """ 362 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 363 364 Args: 365 annotation (str): the path of the annotation csv file 366 """ 367 368 rules = [] 369 with open(annotation, "r") as csvfile: 370 reader = csv.DictReader(csvfile) 371 for row in reader: 372 shape_attributes = json.loads(row["region_shape_attributes"]) 373 if shape_attributes["name"] == "polyline": 374 x_points = shape_attributes["all_points_x"] 375 y_points = shape_attributes["all_points_y"] 376 if len(x_points) == 2 and len(y_points) == 2: 377 rules.append( 378 [x_points[0], y_points[0], x_points[1], y_points[1]] 379 ) 380 381 return HeaderTemplate(rules)
Create a TableTemplate from annotations made in vgg, using the polylines tool.
Arguments:
- annotation (str): the path of the annotation csv file
393 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 394 if isinstance(header_factors, float): 395 header_factors = [header_factors] 396 header_factors = cast(list, header_factors) 397 return [ 398 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 399 ]
401 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 402 """ 403 Returns the interaction of the index[0]th horizontal rule and the 404 index[1]th vertical rule 405 """ 406 407 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 408 assert ints is not None 409 return ints
Returns the interaction of the index[0]th horizontal rule and the index[1]th vertical rule
411 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 412 """ 413 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 414 415 Args: 416 point (tuple[float, float]): the coordinates in the template image 417 418 Returns: 419 tuple[int, int]: (row, col) 420 """ 421 422 x, y = point 423 424 row = -1 425 col = -1 426 427 for i in range(self.rows): 428 y0 = self._h_rules[i]._y_at_x(x) 429 y1 = self._h_rules[i + 1]._y_at_x(x) 430 if min(y0, y1) <= y <= max(y0, y1): 431 row = i 432 break 433 434 for i in range(self.cols): 435 x0 = self._v_rules[i]._x_at_y(y) 436 x1 = self._v_rules[i + 1]._x_at_y(y) 437 if min(x0, x1) <= x <= max(x0, x1): 438 col = i 439 break 440 441 if row == -1 or col == -1: 442 return (-1, -1) 443 444 return (row, col)
Get the cell index (row, col) that corresponds with the point (x, y) in the template image
Arguments:
- point (tuple[float, float]): the coordinates in the template image
Returns:
tuple[int, int]: (row, col)
446 def cell_polygon( 447 self, cell: tuple[int, int] 448 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 449 """ 450 Return points (x,y) that make up a polygon around the requested cell 451 (top left, top right, bottom right, bottom left) 452 """ 453 454 row, col = cell 455 456 self._check_col_idx(col) 457 self._check_row_idx(row) 458 459 top_rule = self._h_rules[row] 460 bottom_rule = self._h_rules[row + 1] 461 left_rule = self._v_rules[col] 462 right_rule = self._v_rules[col + 1] 463 464 # Calculate corner points using intersections 465 top_left = top_rule.intersection(left_rule) 466 top_right = top_rule.intersection(right_rule) 467 bottom_left = bottom_rule.intersection(left_rule) 468 bottom_right = bottom_rule.intersection(right_rule) 469 470 if not all( 471 [ 472 point is not None 473 for point in [top_left, top_right, bottom_left, bottom_right] 474 ] 475 ): 476 raise TauluException("the lines around this cell do not intersect") 477 478 return top_left, top_right, bottom_right, bottom_left # type:ignore
Return points (x,y) that make up a polygon around the requested cell (top left, top right, bottom right, bottom left)
480 def region( 481 self, start: tuple[int, int], end: tuple[int, int] 482 ) -> tuple[Point, Point, Point, Point]: 483 self._check_row_idx(start[0]) 484 self._check_row_idx(end[0]) 485 self._check_col_idx(start[1]) 486 self._check_col_idx(end[1]) 487 488 # the rules that surround this row 489 top_rule = self._h_rules[start[0]] 490 bottom_rule = self._h_rules[end[0] + 1] 491 left_rule = self._v_rules[start[1]] 492 right_rule = self._v_rules[end[1] + 1] 493 494 # four points that will be the bounding polygon of the result, 495 # which needs to be rectified 496 top_left = top_rule.intersection(left_rule) 497 top_right = top_rule.intersection(right_rule) 498 bottom_left = bottom_rule.intersection(left_rule) 499 bottom_right = bottom_rule.intersection(right_rule) 500 501 if ( 502 top_left is None 503 or top_right is None 504 or bottom_left is None 505 or bottom_right is None 506 ): 507 raise TauluException("the lines around this row do not intersect properly") 508 509 def to_point(pnt) -> Point: 510 return (int(pnt[0]), int(pnt[1])) 511 512 return ( 513 to_point(top_left), 514 to_point(top_right), 515 to_point(bottom_right), 516 to_point(bottom_left), 517 )
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
519 def text_regions( 520 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 521 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 522 raise TauluException("text_regions should not be called on a HeaderTemplate")
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
72class TableIndexer(ABC): 73 """ 74 Subclasses implement methods for going from a pixel in the input image to a table cell index, 75 and cropping an image to the given table cell index. 76 """ 77 78 def __init__(self): 79 self._col_offset = 0 80 81 @property 82 def col_offset(self) -> int: 83 return self._col_offset 84 85 @col_offset.setter 86 def col_offset(self, value: int): 87 assert value >= 0 88 self._col_offset = value 89 90 @property 91 @abstractmethod 92 def cols(self) -> int: 93 pass 94 95 @property 96 @abstractmethod 97 def rows(self) -> int: 98 pass 99 100 def cells(self) -> Generator[tuple[int, int], None, None]: 101 for row in range(self.rows): 102 for col in range(self.cols): 103 yield (row, col) 104 105 def _check_row_idx(self, row: int): 106 if row < 0: 107 raise TauluException("row number needs to be positive or zero") 108 if row >= self.rows: 109 raise TauluException(f"row number too high: {row} >= {self.rows}") 110 111 def _check_col_idx(self, col: int): 112 if col < 0: 113 raise TauluException("col number needs to be positive or zero") 114 if col >= self.cols: 115 raise TauluException(f"col number too high: {col} >= {self.cols}") 116 117 @abstractmethod 118 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 119 """ 120 Returns the coordinate (row, col) of the cell that contains the given position 121 122 Args: 123 point (tuple[float, float]): a location in the input image 124 125 Returns: 126 tuple[int, int]: the cell index (row, col) that contains the given point 127 """ 128 pass 129 130 @abstractmethod 131 def cell_polygon( 132 self, cell: tuple[int, int] 133 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 134 """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position""" 135 pass 136 137 def _highlight_cell( 138 self, 139 image: MatLike, 140 cell: tuple[int, int], 141 color: tuple[int, int, int] = (0, 0, 255), 142 thickness: int = 2, 143 ): 144 polygon = self.cell_polygon(cell) 145 points = np.int32(list(polygon)) # type:ignore 146 cv.polylines(image, [points], True, color, thickness, cv.LINE_AA) # type:ignore 147 cv.putText( 148 image, 149 str(cell), 150 (int(polygon[3][0] + 10), int(polygon[3][1] - 10)), 151 cv.FONT_HERSHEY_PLAIN, 152 2.0, 153 (255, 255, 255), 154 2, 155 ) 156 157 def highlight_all_cells( 158 self, 159 image: MatLike, 160 color: tuple[int, int, int] = (0, 0, 255), 161 thickness: int = 1, 162 ) -> MatLike: 163 img = np.copy(image) 164 165 for cell in self.cells(): 166 self._highlight_cell(img, cell, color, thickness) 167 168 return img 169 170 def select_one_cell( 171 self, 172 image: MatLike, 173 window: str = WINDOW, 174 color: tuple[int, int, int] = (255, 0, 0), 175 thickness: int = 2, 176 ) -> tuple[int, int] | None: 177 clicked = None 178 179 def click_event(event, x, y, flags, params): 180 nonlocal clicked 181 182 img = np.copy(image) 183 _ = flags 184 _ = params 185 if event == cv.EVENT_LBUTTONDOWN: 186 cell = self.cell((x, y)) 187 if cell[0] >= 0: 188 clicked = cell 189 else: 190 return 191 self._highlight_cell(img, cell, color, thickness) 192 cv.imshow(window, img) 193 194 imu.show(image, click_event=click_event, title="select one cell", window=window) 195 196 return clicked 197 198 def show_cells( 199 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 200 ) -> list[tuple[int, int]]: 201 if not isinstance(image, np.ndarray): 202 image = cv.imread(os.fspath(image)) 203 204 img = np.copy(image) 205 206 cells = [] 207 208 def click_event(event, x, y, flags, params): 209 _ = flags 210 _ = params 211 if event == cv.EVENT_LBUTTONDOWN: 212 cell = self.cell((x, y)) 213 if cell[0] >= 0: 214 cells.append(cell) 215 else: 216 return 217 self._highlight_cell(img, cell) 218 cv.imshow(window, img) 219 220 imu.show( 221 img, 222 click_event=click_event, 223 title="click to highlight cells", 224 window=window, 225 ) 226 227 return cells 228 229 @abstractmethod 230 def region( 231 self, 232 start: tuple[int, int], 233 end: tuple[int, int], 234 ) -> tuple[Point, Point, Point, Point]: 235 """ 236 Get the bounding box for the rectangular region that goes from start to end 237 238 Returns: 239 4 points: lt, rt, rb, lb, in format (x, y) 240 """ 241 pass 242 243 def crop_region( 244 self, 245 image: MatLike, 246 start: tuple[int, int], 247 end: tuple[int, int], 248 margin: int = 0, 249 margin_top: int | None = None, 250 margin_bottom: int | None = None, 251 margin_left: int | None = None, 252 margin_right: int | None = None, 253 margin_y: int | None = None, 254 margin_x: int | None = None, 255 ) -> MatLike: 256 """Crop the input image to a rectangular region with the start and end cells as extremes""" 257 258 region = self.region(start, end) 259 260 lt, rt, rb, lb = _apply_margin( 261 *region, 262 margin=margin, 263 margin_top=margin_top, 264 margin_bottom=margin_bottom, 265 margin_left=margin_left, 266 margin_right=margin_right, 267 margin_y=margin_y, 268 margin_x=margin_x, 269 ) 270 271 # apply margins according to priority: 272 # margin_top > margin_y > margin (etc.) 273 274 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 275 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 276 277 # crop by doing a perspective transform to the desired quad 278 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 279 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 280 M = cv.getPerspectiveTransform(src_pts, dst_pts) 281 warped = cv.warpPerspective(image, M, (int(w), int(h))) # type:ignore 282 283 return warped 284 285 @abstractmethod 286 def text_regions( 287 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 288 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 289 """ 290 Split the row into regions of continuous text 291 292 Returns 293 list[tuple[int, int]]: a list of spans (start col, end col) 294 """ 295 296 pass 297 298 def crop_cell(self, image, cell: tuple[int, int], margin: int = 0) -> MatLike: 299 return self.crop_region(image, cell, cell, margin)
Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.
117 @abstractmethod 118 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 119 """ 120 Returns the coordinate (row, col) of the cell that contains the given position 121 122 Args: 123 point (tuple[float, float]): a location in the input image 124 125 Returns: 126 tuple[int, int]: the cell index (row, col) that contains the given point 127 """ 128 pass
Returns the coordinate (row, col) of the cell that contains the given position
Arguments:
- point (tuple[float, float]): a location in the input image
Returns:
tuple[int, int]: the cell index (row, col) that contains the given point
130 @abstractmethod 131 def cell_polygon( 132 self, cell: tuple[int, int] 133 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 134 """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position""" 135 pass
returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position
170 def select_one_cell( 171 self, 172 image: MatLike, 173 window: str = WINDOW, 174 color: tuple[int, int, int] = (255, 0, 0), 175 thickness: int = 2, 176 ) -> tuple[int, int] | None: 177 clicked = None 178 179 def click_event(event, x, y, flags, params): 180 nonlocal clicked 181 182 img = np.copy(image) 183 _ = flags 184 _ = params 185 if event == cv.EVENT_LBUTTONDOWN: 186 cell = self.cell((x, y)) 187 if cell[0] >= 0: 188 clicked = cell 189 else: 190 return 191 self._highlight_cell(img, cell, color, thickness) 192 cv.imshow(window, img) 193 194 imu.show(image, click_event=click_event, title="select one cell", window=window) 195 196 return clicked
198 def show_cells( 199 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 200 ) -> list[tuple[int, int]]: 201 if not isinstance(image, np.ndarray): 202 image = cv.imread(os.fspath(image)) 203 204 img = np.copy(image) 205 206 cells = [] 207 208 def click_event(event, x, y, flags, params): 209 _ = flags 210 _ = params 211 if event == cv.EVENT_LBUTTONDOWN: 212 cell = self.cell((x, y)) 213 if cell[0] >= 0: 214 cells.append(cell) 215 else: 216 return 217 self._highlight_cell(img, cell) 218 cv.imshow(window, img) 219 220 imu.show( 221 img, 222 click_event=click_event, 223 title="click to highlight cells", 224 window=window, 225 ) 226 227 return cells
229 @abstractmethod 230 def region( 231 self, 232 start: tuple[int, int], 233 end: tuple[int, int], 234 ) -> tuple[Point, Point, Point, Point]: 235 """ 236 Get the bounding box for the rectangular region that goes from start to end 237 238 Returns: 239 4 points: lt, rt, rb, lb, in format (x, y) 240 """ 241 pass
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
243 def crop_region( 244 self, 245 image: MatLike, 246 start: tuple[int, int], 247 end: tuple[int, int], 248 margin: int = 0, 249 margin_top: int | None = None, 250 margin_bottom: int | None = None, 251 margin_left: int | None = None, 252 margin_right: int | None = None, 253 margin_y: int | None = None, 254 margin_x: int | None = None, 255 ) -> MatLike: 256 """Crop the input image to a rectangular region with the start and end cells as extremes""" 257 258 region = self.region(start, end) 259 260 lt, rt, rb, lb = _apply_margin( 261 *region, 262 margin=margin, 263 margin_top=margin_top, 264 margin_bottom=margin_bottom, 265 margin_left=margin_left, 266 margin_right=margin_right, 267 margin_y=margin_y, 268 margin_x=margin_x, 269 ) 270 271 # apply margins according to priority: 272 # margin_top > margin_y > margin (etc.) 273 274 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 275 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 276 277 # crop by doing a perspective transform to the desired quad 278 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 279 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 280 M = cv.getPerspectiveTransform(src_pts, dst_pts) 281 warped = cv.warpPerspective(image, M, (int(w), int(h))) # type:ignore 282 283 return warped
Crop the input image to a rectangular region with the start and end cells as extremes
285 @abstractmethod 286 def text_regions( 287 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 288 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 289 """ 290 Split the row into regions of continuous text 291 292 Returns 293 list[tuple[int, int]]: a list of spans (start col, end col) 294 """ 295 296 pass
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
15class Split(Generic[T]): 16 """ 17 Container for paired left/right data with convenient manipulation methods. 18 19 The Split class is designed for working with table images that span two pages 20 or have distinct left and right sections. It allows you to: 21 - Store related data for both sides 22 - Apply functions to both sides simultaneously 23 - Access attributes/methods of contained objects transparently 24 25 Examples: 26 >>> # Create a split with different parameters for each side 27 >>> thresholds = Split(0.25, 0.30) 28 >>> 29 >>> # Apply a function to both sides 30 >>> images = Split(left_img, right_img) 31 >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) 32 >>> 33 >>> # Use with different parameters per side 34 >>> results = images.apply( 35 ... lambda img, k: sauvola_threshold(img, k), 36 ... k=thresholds # k.left used for left img, k.right for right 37 ... ) 38 >>> 39 >>> # Access methods of contained objects directly 40 >>> templates = Split(template_left, template_right) 41 >>> widths = templates.cell_widths(0) # Calls on both templates 42 43 Type Parameters: 44 T: The type of objects stored in left and right 45 """ 46 47 def __init__(self, left: T | None = None, right: T | None = None): 48 """ 49 Initialize a Split container. 50 51 Args: 52 left: Data for the left side 53 right: Data for the right side 54 55 Note: 56 Both can initially be None. Use the `append` method or set 57 properties directly to populate. 58 """ 59 self._left = left 60 self._right = right 61 62 @property 63 def left(self) -> T: 64 assert self._left is not None 65 return self._left 66 67 @left.setter 68 def left(self, value: T): 69 self._left = value 70 71 @property 72 def right(self) -> T: 73 assert self._right is not None 74 return self._right 75 76 @right.setter 77 def right(self, value: T): 78 self._right = value 79 80 def append(self, value: T): 81 if self._left is None: 82 self._left = value 83 else: 84 self._right = value 85 86 def __repr__(self) -> str: 87 return f"left: {self._left}, right: {self._right}" 88 89 def __iter__(self): 90 assert self._left is not None 91 assert self._right is not None 92 return iter((self._left, self._right)) 93 94 def __getitem__(self, index: bool) -> T: 95 assert self._left is not None 96 assert self._right is not None 97 if int(index) == 0: 98 return self._left 99 else: 100 return self._right 101 102 def apply( 103 self, 104 funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]", 105 *args, 106 **kwargs, 107 ) -> "Split[V]": 108 if not isinstance(funcs, Split): 109 funcs = Split(funcs, funcs) 110 111 def get_arg(side: str, arg): 112 if isinstance(arg, Split): 113 return getattr(arg, side) 114 return arg 115 116 def call(side: str): 117 func = getattr(funcs, side) 118 target = getattr(self, side) 119 120 side_args = [get_arg(side, arg) for arg in args] 121 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 122 123 return func(target, *side_args, **side_kwargs) 124 125 return Split(call("left"), call("right")) 126 127 def __getattr__(self, attr_name: str): 128 if attr_name in self.__dict__: 129 return getattr(self, attr_name) 130 131 def wrapper(*args, **kwargs): 132 return self.apply( 133 Split( 134 getattr(self.left.__class__, attr_name), 135 getattr(self.right.__class__, attr_name), 136 ), 137 *args, 138 **kwargs, 139 ) 140 141 return wrapper
Container for paired left/right data with convenient manipulation methods.
The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:
- Store related data for both sides
- Apply functions to both sides simultaneously
- Access attributes/methods of contained objects transparently
Examples:
>>> # Create a split with different parameters for each side >>> thresholds = Split(0.25, 0.30) >>> >>> # Apply a function to both sides >>> images = Split(left_img, right_img) >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) >>> >>> # Use with different parameters per side >>> results = images.apply( ... lambda img, k: sauvola_threshold(img, k), ... k=thresholds # k.left used for left img, k.right for right ... ) >>> >>> # Access methods of contained objects directly >>> templates = Split(template_left, template_right) >>> widths = templates.cell_widths(0) # Calls on both templates
Type Parameters:
T: The type of objects stored in left and right
47 def __init__(self, left: T | None = None, right: T | None = None): 48 """ 49 Initialize a Split container. 50 51 Args: 52 left: Data for the left side 53 right: Data for the right side 54 55 Note: 56 Both can initially be None. Use the `append` method or set 57 properties directly to populate. 58 """ 59 self._left = left 60 self._right = right
Initialize a Split container.
Arguments:
- left: Data for the left side
- right: Data for the right side
Note:
Both can initially be None. Use the
appendmethod or set properties directly to populate.
102 def apply( 103 self, 104 funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]", 105 *args, 106 **kwargs, 107 ) -> "Split[V]": 108 if not isinstance(funcs, Split): 109 funcs = Split(funcs, funcs) 110 111 def get_arg(side: str, arg): 112 if isinstance(arg, Split): 113 return getattr(arg, side) 114 return arg 115 116 def call(side: str): 117 func = getattr(funcs, side) 118 target = getattr(self, side) 119 120 side_args = [get_arg(side, arg) for arg in args] 121 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 122 123 return func(target, *side_args, **side_kwargs) 124 125 return Split(call("left"), call("right"))
36class Taulu: 37 """ 38 High-level API for table segmentation from images. 39 40 Taulu provides a simplified interface that orchestrates header alignment, 41 grid detection, and table segmentation into a single workflow. It's designed 42 to hide complexity while still allowing fine-tuned control through parameters. 43 44 ## Workflow Overview 45 46 1. **Header Template Creation**: Use `Taulu.annotate()` to create annotated 47 header images that define your table structure 48 2. **Initialization**: Create a Taulu instance with your header(s) and parameters 49 3. **Segmentation**: Call `segment_table()` on your table images to get a 50 `TableGrid` object containing all detected cell boundaries 51 52 ## Single vs Split Tables 53 54 Taulu supports two modes: 55 56 - **Single header**: For tables that fit on one page or have consistent structure 57 - **Split header**: For tables that span two pages (left/right) with potentially 58 different parameters for each side 59 60 Use `Split[T]` objects to provide different parameters for left and right sides. 61 62 ## Parameter Tuning Strategy 63 64 If segmentation fails or is inaccurate: 65 66 1. **Visual debugging**: Set `debug_view=True` in `segment_table()` to see 67 intermediate results 68 2. **Adjust thresholding**: Modify `sauvola_k` to change binarization sensitivity 69 - Increase to remove more noise (more aggressive) 70 - Decrease to preserve faint lines 71 3. **Tune cross-kernel**: Adjust `cross_width`, `cross_height`, `kernel_size` 72 to match your rule thickness after morphology 73 4. **Morphology**: Increase `morph_size` to connect broken lines, but be aware 74 this also thickens lines (requiring larger cross_width) 75 5. **Search parameters**: Increase `search_region` for warped documents, 76 adjust `distance_penalty` to control how strictly positions are enforced 77 6. **Growth parameters**: Lower `grow_threshold` if the algorithm stops too early, 78 increase `look_distance` for better extrapolation 79 80 Examples: 81 Basic usage with a single header: 82 83 >>> from taulu import Taulu 84 >>> 85 >>> # First, create annotated header (one-time setup) 86 >>> Taulu.annotate("table_image.png", "header.png") 87 >>> # This creates header.png and header.json 88 >>> 89 >>> # Initialize Taulu with the header 90 >>> taulu = Taulu( 91 ... header_image_path="header.png", 92 ... cell_height_factor=0.8, # Rows are 80% of header height 93 ... sauvola_k=0.25, 94 ... search_region=60, 95 ... cross_width=10 96 ... ) 97 >>> 98 >>> # Segment a table image 99 >>> grid = taulu.segment_table("table_page_01.png") 100 >>> 101 >>> # Use the grid to extract cells 102 >>> import cv2 103 >>> img = cv2.imread("table_page_01.png") 104 >>> cell_image = grid.crop_cell(img, (0, 0)) # First cell 105 106 Using split headers for two-page tables: 107 108 >>> from taulu import Taulu, Split 109 >>> 110 >>> # Annotate both headers 111 >>> Taulu.annotate("scan_01.png", "header_left.png") 112 >>> Taulu.annotate("scan_01.png", "header_right.png") 113 >>> 114 >>> # Use different parameters for each side 115 >>> taulu = Taulu( 116 ... header_image_path=Split("header_left.png", "header_right.png"), 117 ... cell_height_factor=Split([0.8, 0.9], [0.75]), # Different row heights 118 ... sauvola_k=Split(0.25, 0.30), # Different thresholds 119 ... cross_width=10 # Same for both sides 120 ... ) 121 >>> 122 >>> # Segment returns a unified grid 123 >>> grid = taulu.segment_table("scan_01.png") 124 125 Debug visualization to tune parameters: 126 127 >>> taulu = Taulu("header.png", sauvola_k=0.15) 128 >>> 129 >>> # Opens windows showing each processing step 130 >>> # Press 'n' to advance, 'q' to quit 131 >>> grid = taulu.segment_table("table.png", debug_view=True) 132 >>> 133 >>> # Adjust parameters based on what you see: 134 >>> # - If binarization is too noisy: increase sauvola_k 135 >>> # - If lines are broken after morphology: increase morph_size 136 >>> # - If filtered image has "undefined" corners: adjust cross_width to match line thickness (after morphology) 137 >>> # - If corners are missed during search: decrease grow_threshold or increase search_region 138 139 140 Attributes: 141 _header (MatLike | Split[MatLike]): Loaded header image(s) 142 _aligner (HeaderAligner | Split[HeaderAligner]): Header alignment engine(s) 143 _template (HeaderTemplate | Split[HeaderTemplate]): Parsed header structure(s) 144 _grid_detector (GridDetector | Split[GridDetector]): Grid detection engine(s) 145 _cell_heights (list[int] | Split[list[int]]): Computed cell heights in pixels 146 147 Raises: 148 TauluException: If header files don't exist, annotation is missing, or 149 Split parameters are used incorrectly with single headers 150 151 See Also: 152 - `TableGrid`: The result object with methods for accessing cells 153 - `Split`: Container for paired left/right parameters 154 - `GridDetector`: Lower-level grid detection (for advanced usage) 155 - `HeaderAligner`: Lower-level header alignment (for advanced usage) 156 """ 157 158 def __init__( 159 self, 160 header_image_path: PathLike[str] | str | Split[PathLike[str] | str], 161 cell_height_factor: float | list[float] | Split[float | list[float]] = [1.0], 162 header_anno_path: PathLike[str] 163 | str 164 | Split[PathLike[str] | str] 165 | None = None, 166 sauvola_k: float | Split[float] = 0.25, 167 search_region: int | Split[int] = 60, 168 distance_penalty: float | Split[float] = 0.4, 169 cross_width: int | Split[int] = 10, 170 morph_size: int | Split[int] = 4, 171 kernel_size: int | Split[int] = 41, 172 processing_scale: float | Split[float] = 1.0, 173 min_rows: int | Split[int] = 5, 174 look_distance: int | Split[int] = 3, 175 grow_threshold: float | Split[float] = 0.3, 176 ): 177 """ 178 Args: 179 header_image_path: 180 Path to the header template image(s). The header should be a cropped 181 image showing a clear view of the table's first row. An annotation 182 file (.json) must exist alongside the image, created via `Taulu.annotate()`. 183 For split tables, provide a `Split` containing left and right header paths. 184 185 cell_height_factor: 186 Height of data rows relative to header height. For example, if your 187 header is 100px tall and data rows are 80px tall, use 0.8. 188 189 - **float**: All rows have the same height 190 - **list[float]**: Different heights for different rows. The last value 191 is repeated for any additional rows beyond the list length. Useful when 192 the first data row is taller than subsequent rows. 193 - **Split**: Different height factors for left and right sides 194 195 Default: [1.0] 196 197 header_anno_path (PathLike[str] | str | Split[PathLike[str] | str] | None): 198 Optional explicit path to header annotation JSON file(s). If None, 199 looks for a .json file with the same name as `header_image_path`. 200 Default: None 201 202 sauvola_k (float | Split[float]): 203 Threshold sensitivity for Sauvola adaptive binarization (0.0-1.0). 204 Controls how aggressively the algorithm converts the image to binary. 205 206 - **Lower values** (0.04-0.15): Preserve faint lines, more noise 207 - **Higher values** (0.20-0.35): Remove noise, may lose faint lines 208 209 Start with 0.25 and adjust based on your image quality. 210 Default: 0.25 211 212 search_region (int | Split[int]): 213 Size in pixels of the square region to search for the next corner point. 214 The algorithm estimates where a corner should be, then searches within 215 this region for the best match. 216 217 - **Smaller values** (20-40): Faster, requires well-aligned tables 218 - **Larger values** (60-100): More robust to warping and distortion 219 220 Default: 60 221 222 distance_penalty (float | Split[float]): 223 Weight factor [0, 1] for penalizing corners far from expected position. 224 Uses Gaussian weighting within the search region. 225 226 - **0.0**: No penalty, any position in search region is equally valid 227 - **0.5**: Moderate preference for positions near the expected location 228 - **1.0**: Strong preference, only accepts positions very close to expected 229 230 Default: 0.4 231 232 cross_width (int | Split[int]): 233 Width in pixels of the cross-shaped kernel used to detect intersections. 234 Should approximately match the thickness of your table rules AFTER 235 morphological dilation. 236 237 **Tuning**: Look at the dilated image in debug_view. The cross_width 238 should match the thickness of the black lines you see. 239 Default: 10 240 241 morph_size (int | Split[int]): 242 Size of morphological structuring element for dilation. Controls how 243 much gap-bridging occurs to connect broken line segments. 244 245 - **Smaller values** (2-4): Minimal connection, preserves thin lines 246 - **Larger values** (6-10): Connects larger gaps, but thickens lines 247 248 Note: Increasing this requires increasing `cross_width` proportionally. 249 Default: 4 250 251 kernel_size (int | Split[int]): 252 Size of the cross-shaped kernel (must be odd). Larger kernels are more 253 selective, reducing false positives but potentially missing valid corners. 254 255 - **Smaller values** (21-31): More sensitive, finds more candidates 256 - **Larger values** (41-61): More selective, fewer false positives 257 258 Default: 41 259 260 processing_scale (float | Split[float]): 261 Image downscaling factor (0, 1] for processing speed. Processing is done 262 on scaled images, then results are scaled back to original size. 263 264 - **1.0**: Full resolution (slowest, most accurate) 265 - **0.5-0.75**: Good balance for high-res scans (2x-4x speedup) 266 - **0.25-0.5**: Fast processing for very large images 267 268 Default: 1.0 269 270 min_rows (int | Split[int]): 271 Minimum number of rows required before the algorithm considers the 272 table complete. Prevents stopping too early on tables with initial 273 low-confidence detections. 274 Default: 5 275 276 look_distance (int | Split[int]): 277 Number of adjacent rows/columns to examine when extrapolating missing 278 corners using polynomial regression. Higher values provide more context 279 but may smooth over legitimate variations. 280 281 - **2-3**: Good for consistent grids 282 - **4-6**: Better for grids with some irregularity 283 284 Default: 3 285 286 grow_threshold (float | Split[float]): 287 Initial minimum confidence [0, 1] required to accept a detected corner 288 during the growing phase. The algorithm may adaptively lower this 289 threshold if growth stalls. 290 291 - **Higher values** (0.5-0.8): Stricter, fewer errors but may miss valid corners 292 - **Lower values** (0.2-0.4): More permissive, finds more corners but more errors 293 294 Default: 0.3 295 296 """ 297 self._processing_scale = processing_scale 298 self._cell_height_factor = cell_height_factor 299 300 if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split): 301 header = Split(Path(header_image_path.left), Path(header_image_path.right)) 302 303 if not exists(header.left.with_suffix(".png")) or not exists( 304 header.right.with_suffix(".png") 305 ): 306 raise TauluException( 307 "The header images you provided do not exist (or they aren't .png files)" 308 ) 309 310 if header_anno_path is None: 311 if not exists(header.left.with_suffix(".json")) or not exists( 312 header.right.with_suffix(".json") 313 ): 314 raise TauluException( 315 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 316 ) 317 318 template_left = HeaderTemplate.from_saved( 319 header.left.with_suffix(".json") 320 ) 321 template_right = HeaderTemplate.from_saved( 322 header.right.with_suffix(".json") 323 ) 324 325 else: 326 if not exists(header_anno_path.left) or not exists( 327 header_anno_path.right 328 ): 329 raise TauluException( 330 "The header annotation files you provided do not exist (or they aren't .json files)" 331 ) 332 333 template_left = HeaderTemplate.from_saved(header_anno_path.left) 334 template_right = HeaderTemplate.from_saved(header_anno_path.right) 335 336 self._header = Split( 337 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 338 ) 339 340 self._aligner = Split( 341 HeaderAligner( 342 self._header.left, scale=get_param(self._processing_scale, "left") 343 ), 344 HeaderAligner( 345 self._header.right, scale=get_param(self._processing_scale, "right") 346 ), 347 ) 348 349 self._template = Split(template_left, template_right) 350 351 self._cell_heights = Split( 352 self._template.left.cell_heights(get_param(cell_height_factor, "left")), 353 self._template.right.cell_heights( 354 get_param(cell_height_factor, "right") 355 ), 356 ) 357 358 # Create GridDetector for left and right with potentially different parameters 359 self._grid_detector = Split( 360 GridDetector( 361 kernel_size=get_param(kernel_size, "left"), 362 cross_width=get_param(cross_width, "left"), 363 morph_size=get_param(morph_size, "left"), 364 search_region=get_param(search_region, "left"), 365 sauvola_k=get_param(sauvola_k, "left"), 366 distance_penalty=get_param(distance_penalty, "left"), 367 scale=get_param(self._processing_scale, "left"), 368 min_rows=get_param(min_rows, "left"), 369 look_distance=get_param(look_distance, "left"), 370 grow_threshold=get_param(grow_threshold, "left"), 371 ), 372 GridDetector( 373 kernel_size=get_param(kernel_size, "right"), 374 cross_width=get_param(cross_width, "right"), 375 morph_size=get_param(morph_size, "right"), 376 search_region=get_param(search_region, "right"), 377 sauvola_k=get_param(sauvola_k, "right"), 378 distance_penalty=get_param(distance_penalty, "right"), 379 scale=get_param(self._processing_scale, "right"), 380 min_rows=get_param(min_rows, "right"), 381 look_distance=get_param(look_distance, "right"), 382 grow_threshold=get_param(grow_threshold, "right"), 383 ), 384 ) 385 386 else: 387 header_image_path = Path(header_image_path) 388 self._header = cv2.imread(os.fspath(header_image_path)) 389 self._aligner = HeaderAligner(self._header) 390 self._template = HeaderTemplate.from_saved( 391 header_image_path.with_suffix(".json") 392 ) 393 394 # For single header, parameters should not be Split objects 395 if any( 396 isinstance(param, Split) 397 for param in [ 398 sauvola_k, 399 search_region, 400 distance_penalty, 401 cross_width, 402 morph_size, 403 kernel_size, 404 processing_scale, 405 min_rows, 406 look_distance, 407 grow_threshold, 408 cell_height_factor, 409 ] 410 ): 411 raise TauluException( 412 "Split parameters can only be used with split headers (tuple header_path)" 413 ) 414 415 self._cell_heights = self._template.cell_heights(self._cell_height_factor) 416 417 self._grid_detector = GridDetector( 418 kernel_size=kernel_size, 419 cross_width=cross_width, 420 morph_size=morph_size, 421 search_region=search_region, 422 sauvola_k=sauvola_k, 423 distance_penalty=distance_penalty, 424 scale=self._processing_scale, 425 min_rows=min_rows, 426 look_distance=look_distance, 427 grow_threshold=grow_threshold, 428 ) 429 430 @staticmethod 431 def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str): 432 """ 433 Interactive tool to create header annotations for table segmentation. 434 435 This method guides you through a two-step annotation process: 436 437 1. **Crop the header**: Click four corners to define the header region 438 2. **Annotate lines**: Click pairs of points to define each vertical and 439 horizontal line in the header 440 441 The annotations are saved as: 442 - A cropped header image (.png) at `output_path` 443 - A JSON file (.json) containing line coordinates 444 445 ## Annotation Guidelines 446 447 **Which lines to annotate:** 448 - All vertical lines that extend into the table body (column separators) 449 - The top horizontal line of the header 450 - The bottom horizontal line of the header (top of data rows) 451 452 **Order doesn't matter** - annotate lines in any order that's convenient. 453 454 **To annotate a line:** 455 1. Click once at one endpoint 456 2. Click again at the other endpoint 457 3. A green line appears showing your annotation 458 459 **To undo:** 460 - Right-click anywhere to remove the last line you drew 461 462 **When finished:** 463 - Press 'n' to save and exit 464 - Press 'q' to quit without saving 465 466 Args: 467 image_path (PathLike[str] | str): Path to a table image containing 468 a clear view of the header. This can be a full table image. 469 output_path (PathLike[str] | str): Where to save the cropped header 470 image. The annotation JSON will be saved with the same name but 471 .json extension. 472 473 Raises: 474 TauluException: If image_path doesn't exist or output_path is a directory 475 476 Examples: 477 Annotate a single header: 478 479 >>> from taulu import Taulu 480 >>> Taulu.annotate("scan_page_01.png", "header.png") 481 # Interactive window opens 482 # After annotation: creates header.png and header.json 483 484 Annotate left and right headers for a split table: 485 486 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 487 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 488 # Creates header_left.{png,json} and header_right.{png,json} 489 490 Notes: 491 - The header image doesn't need to be perfectly cropped initially - 492 the tool will help you crop it precisely 493 - Annotation accuracy is important: misaligned lines will cause 494 segmentation errors 495 - You can re-run this method to update annotations if needed 496 """ 497 498 if not exists(image_path): 499 raise TauluException(f"Image path {image_path} does not exist") 500 501 if os.path.isdir(output_path): 502 raise TauluException("Output path should be a file") 503 504 output_path = Path(output_path) 505 506 template = HeaderTemplate.annotate_image( 507 os.fspath(image_path), crop=output_path.with_suffix(".png") 508 ) 509 510 template.save(output_path.with_suffix(".json")) 511 512 def segment_table( 513 self, 514 image: MatLike | PathLike[str] | str, 515 filtered: Optional[MatLike | PathLike[str] | str] = None, 516 debug_view: bool = False, 517 ) -> TableGrid: 518 """ 519 Segment a table image into a grid of cells. 520 521 This is the main entry point for the taulu package. It orchestrates: 522 523 1. **Header alignment**: Locates the table by matching the header template 524 to the image using feature-based registration (ORB features + homography) 525 2. **Grid detection**: Applies morphological filtering and cross-correlation 526 to find corner intersections 527 3. **Grid growing**: Iteratively detects corners row-by-row and column-by-column, 528 starting from the aligned header position 529 4. **Extrapolation**: Fills in any missing corners using polynomial regression 530 based on neighboring detected points 531 5. **Smoothing**: Refines corner positions for consistency 532 533 ## Performance Notes 534 535 Processing time depends on: 536 - Image resolution (use `processing_scale < 1.0` for large images) 537 - Table complexity (more rows/columns = longer processing) 538 - Parameter settings 539 540 ## Troubleshooting 541 542 **If segmentation fails (returns incomplete grid):** 543 1. Enable `debug_view=True` to see where it stops 544 2. Check if header alignment is correct (first debug image) 545 3. Verify cross-correlation shows bright spots at corners 546 4. Adjust `grow_threshold` (lower if stopping too early) 547 5. Increase `search_region` if corners are far from expected positions 548 549 **If segmentation is inaccurate (corners in wrong positions):** 550 1. Check binarization quality (adjust `sauvola_k`) 551 2. Verify cross-kernel size matches line thickness (adjust `cross_width`) 552 3. Ensure morphology isn't over-connecting (reduce `morph_size`) 553 4. Increase `distance_penalty` to enforce expected positions more strictly 554 555 Args: 556 image (MatLike | PathLike[str] | str): Table image to segment. 557 Can be a file path or a numpy array (BGR or grayscale). 558 559 filtered (MatLike | PathLike[str] | str | None): Optional pre-filtered 560 binary image to use instead of computing it internally. 561 Must be the same size as `image`. If provided, parameters related 562 to filtering (e.g. `sauvola_k`, `morph_size`) are ignored. 563 564 **GPU acceleration**: Use trained CNN model for corner detection: 565 566 >>> from taulu.gpu import DeepConvNet, apply_kernel_to_image_tiled 567 >>> model = DeepConvNet.load("model.pth") 568 >>> filtered = apply_kernel_to_image_tiled(model, image) 569 >>> grid = taulu.segment_table(image, filtered=filtered) 570 571 Default: None 572 573 debug_view (bool): If True, opens OpenCV windows showing intermediate 574 processing steps: 575 - Header alignment overlay 576 - Binarized image 577 - After morphological operations 578 - Cross-correlation result 579 - Growing progress (corner-by-corner) 580 581 **Controls:** 582 - Press 'n' to advance to next step 583 - Press 'q' to quit immediately 584 585 Useful for parameter tuning and understanding failures. 586 Default: False 587 588 Returns: 589 TableGrid: A grid structure containing detected corner positions with 590 methods for: 591 592 **Position queries:** 593 - `cell(point)`: Get (row, col) at pixel coordinates (x, y) 594 - `cell_polygon(cell)`: Get 4 corners of a cell as (lt, rt, rb, lb) 595 - `region(start, end)`: Get bounding box for a cell range 596 597 **Image extraction:** 598 - `crop_cell(img, cell, margin=0)`: Extract single cell with optional margin 599 - `crop_region(img, start, end, margin=0)`: Extract rectangular region 600 601 **Visualization:** 602 - `show_cells(img)`: Interactive cell viewer (click to highlight) 603 - `highlight_all_cells(img)`: Draw all cell boundaries 604 - `visualize_points(img)`: Show detected corner points 605 606 **Analysis:** 607 - `text_regions(img, row)`: Find continuous text regions in a row 608 - `cells()`: Generator yielding all (row, col) indices 609 610 **Persistence:** 611 - `save(path)`: Save grid to JSON file 612 - `TableGrid.from_saved(path)`: Load grid from JSON 613 614 **Properties:** 615 - `rows`: Number of data rows (header not included) 616 - `cols`: Number of columns 617 - `points`: Raw list of detected corner coordinates 618 619 Raises: 620 TauluException: If image cannot be loaded, header alignment fails, 621 or grid detection produces no results 622 623 Examples: 624 Basic segmentation: 625 626 >>> from taulu import Taulu 627 >>> import cv2 628 >>> 629 >>> taulu = Taulu("header.png") 630 >>> grid = taulu.segment_table("table_page_01.png") 631 >>> 632 >>> print(f"Detected {grid.rows} rows and {grid.cols} columns") 633 >>> 634 >>> # Extract first cell 635 >>> img = cv2.imread("table_page_01.png") 636 >>> cell_img = grid.crop_cell(img, (0, 0)) 637 >>> cv2.imwrite("cell_0_0.png", cell_img) 638 639 Debug mode for parameter tuning: 640 641 >>> grid = taulu.segment_table("table_page_01.png", debug_view=True) 642 # Windows open showing each step 643 # Adjust parameters based on what you see 644 645 Process multiple images with the same header: 646 647 >>> taulu = Taulu("header.png", sauvola_k=0.25) 648 >>> 649 >>> for i in range(1, 11): 650 ... img_path = f"table_page_{i:02d}.png" 651 ... grid = taulu.segment_table(img_path) 652 ... grid.save(f"grid_{i:02d}.json") 653 ... print(f"Page {i}: {grid.rows} rows detected") 654 655 Extract all cells from a table: 656 657 >>> img = cv2.imread("table.png") 658 >>> grid = taulu.segment_table("table.png") 659 >>> 660 >>> for row, col in grid.cells(): 661 ... cell_img = grid.crop_cell(img, (row, col), margin=5) 662 ... cv2.imwrite(f"cell_{row}_{col}.png", cell_img) 663 664 Find text regions for OCR: 665 666 >>> for row in range(grid.rows): 667 ... text_regions = grid.text_regions(img, row) 668 ... for start_cell, end_cell in text_regions: 669 ... # Extract region spanning multiple cells 670 ... region_img = grid.crop_region(img, start_cell, end_cell) 671 ... # Run OCR on region_img... 672 673 See Also: 674 - `TableGrid`: Complete documentation of the returned object 675 - `GridDetector.find_table_points()`: Lower-level grid detection 676 - `HeaderAligner.align()`: Lower-level header alignment 677 """ 678 679 if not isinstance(image, MatLike): 680 image = cv2.imread(os.fspath(image)) 681 682 now = perf_counter() 683 h = self._aligner.align(image, visual=debug_view) 684 align_time = perf_counter() - now 685 logger.info(f"Header alignment took {align_time:.2f} seconds") 686 687 # find the starting point for the table grid algorithm 688 left_top_template = self._template.intersection((1, 0)) 689 if isinstance(left_top_template, Split): 690 left_top_template = Split( 691 (int(left_top_template.left[0]), int(left_top_template.left[1])), 692 (int(left_top_template.right[0]), int(left_top_template.right[1])), 693 ) 694 else: 695 left_top_template = (int(left_top_template[0]), int(left_top_template[1])) 696 697 left_top_table = self._aligner.template_to_img(h, left_top_template) 698 699 now = perf_counter() 700 table = self._grid_detector.find_table_points( 701 image, 702 left_top_table, 703 self._template.cell_widths(0), 704 self._cell_heights, 705 visual=debug_view, 706 filtered=filtered, 707 ) 708 grid_time = perf_counter() - now 709 logger.info(f"Grid detection took {grid_time:.2f} seconds") 710 711 if isinstance(table, Split): 712 table = TableGrid.from_split(table, (0, 0)) 713 714 return table
High-level API for table segmentation from images.
Taulu provides a simplified interface that orchestrates header alignment, grid detection, and table segmentation into a single workflow. It's designed to hide complexity while still allowing fine-tuned control through parameters.
Workflow Overview
- Header Template Creation: Use
Taulu.annotate()to create annotated header images that define your table structure - Initialization: Create a Taulu instance with your header(s) and parameters
- Segmentation: Call
segment_table()on your table images to get aTableGridobject containing all detected cell boundaries
Single vs Split Tables
Taulu supports two modes:
- Single header: For tables that fit on one page or have consistent structure
- Split header: For tables that span two pages (left/right) with potentially different parameters for each side
Use Split[T] objects to provide different parameters for left and right sides.
Parameter Tuning Strategy
If segmentation fails or is inaccurate:
- Visual debugging: Set
debug_view=Trueinsegment_table()to see intermediate results - Adjust thresholding: Modify
sauvola_kto change binarization sensitivity- Increase to remove more noise (more aggressive)
- Decrease to preserve faint lines
- Tune cross-kernel: Adjust
cross_width,cross_height,kernel_sizeto match your rule thickness after morphology - Morphology: Increase
morph_sizeto connect broken lines, but be aware this also thickens lines (requiring larger cross_width) - Search parameters: Increase
search_regionfor warped documents, adjustdistance_penaltyto control how strictly positions are enforced - Growth parameters: Lower
grow_thresholdif the algorithm stops too early, increaselook_distancefor better extrapolation
Examples:
Basic usage with a single header:
>>> from taulu import Taulu >>> >>> # First, create annotated header (one-time setup) >>> Taulu.annotate("table_image.png", "header.png") >>> # This creates header.png and header.json >>> >>> # Initialize Taulu with the header >>> taulu = Taulu( ... header_image_path="header.png", ... cell_height_factor=0.8, # Rows are 80% of header height ... sauvola_k=0.25, ... search_region=60, ... cross_width=10 ... ) >>> >>> # Segment a table image >>> grid = taulu.segment_table("table_page_01.png") >>> >>> # Use the grid to extract cells >>> import cv2 >>> img = cv2.imread("table_page_01.png") >>> cell_image = grid.crop_cell(img, (0, 0)) # First cellUsing split headers for two-page tables:
>>> from taulu import Taulu, Split >>> >>> # Annotate both headers >>> Taulu.annotate("scan_01.png", "header_left.png") >>> Taulu.annotate("scan_01.png", "header_right.png") >>> >>> # Use different parameters for each side >>> taulu = Taulu( ... header_image_path=Split("header_left.png", "header_right.png"), ... cell_height_factor=Split([0.8, 0.9], [0.75]), # Different row heights ... sauvola_k=Split(0.25, 0.30), # Different thresholds ... cross_width=10 # Same for both sides ... ) >>> >>> # Segment returns a unified grid >>> grid = taulu.segment_table("scan_01.png")Debug visualization to tune parameters:
>>> taulu = Taulu("header.png", sauvola_k=0.15) >>> >>> # Opens windows showing each processing step >>> # Press 'n' to advance, 'q' to quit >>> grid = taulu.segment_table("table.png", debug_view=True) >>> >>> # Adjust parameters based on what you see: >>> # - If binarization is too noisy: increase sauvola_k >>> # - If lines are broken after morphology: increase morph_size >>> # - If filtered image has "undefined" corners: adjust cross_width to match line thickness (after morphology) >>> # - If corners are missed during search: decrease grow_threshold or increase search_region
Attributes:
- _header (MatLike | Split[MatLike]): Loaded header image(s)
- _aligner (HeaderAligner | Split[HeaderAligner]): Header alignment engine(s)
- _template (HeaderTemplate | Split[HeaderTemplate]): Parsed header structure(s)
- _grid_detector (GridDetector | Split[GridDetector]): Grid detection engine(s)
- _cell_heights (list[int] | Split[list[int]]): Computed cell heights in pixels
Raises:
- TauluException: If header files don't exist, annotation is missing, or Split parameters are used incorrectly with single headers
See Also:
TableGrid: The result object with methods for accessing cellsSplit: Container for paired left/right parametersGridDetector: Lower-level grid detection (for advanced usage)HeaderAligner: Lower-level header alignment (for advanced usage)
158 def __init__( 159 self, 160 header_image_path: PathLike[str] | str | Split[PathLike[str] | str], 161 cell_height_factor: float | list[float] | Split[float | list[float]] = [1.0], 162 header_anno_path: PathLike[str] 163 | str 164 | Split[PathLike[str] | str] 165 | None = None, 166 sauvola_k: float | Split[float] = 0.25, 167 search_region: int | Split[int] = 60, 168 distance_penalty: float | Split[float] = 0.4, 169 cross_width: int | Split[int] = 10, 170 morph_size: int | Split[int] = 4, 171 kernel_size: int | Split[int] = 41, 172 processing_scale: float | Split[float] = 1.0, 173 min_rows: int | Split[int] = 5, 174 look_distance: int | Split[int] = 3, 175 grow_threshold: float | Split[float] = 0.3, 176 ): 177 """ 178 Args: 179 header_image_path: 180 Path to the header template image(s). The header should be a cropped 181 image showing a clear view of the table's first row. An annotation 182 file (.json) must exist alongside the image, created via `Taulu.annotate()`. 183 For split tables, provide a `Split` containing left and right header paths. 184 185 cell_height_factor: 186 Height of data rows relative to header height. For example, if your 187 header is 100px tall and data rows are 80px tall, use 0.8. 188 189 - **float**: All rows have the same height 190 - **list[float]**: Different heights for different rows. The last value 191 is repeated for any additional rows beyond the list length. Useful when 192 the first data row is taller than subsequent rows. 193 - **Split**: Different height factors for left and right sides 194 195 Default: [1.0] 196 197 header_anno_path (PathLike[str] | str | Split[PathLike[str] | str] | None): 198 Optional explicit path to header annotation JSON file(s). If None, 199 looks for a .json file with the same name as `header_image_path`. 200 Default: None 201 202 sauvola_k (float | Split[float]): 203 Threshold sensitivity for Sauvola adaptive binarization (0.0-1.0). 204 Controls how aggressively the algorithm converts the image to binary. 205 206 - **Lower values** (0.04-0.15): Preserve faint lines, more noise 207 - **Higher values** (0.20-0.35): Remove noise, may lose faint lines 208 209 Start with 0.25 and adjust based on your image quality. 210 Default: 0.25 211 212 search_region (int | Split[int]): 213 Size in pixels of the square region to search for the next corner point. 214 The algorithm estimates where a corner should be, then searches within 215 this region for the best match. 216 217 - **Smaller values** (20-40): Faster, requires well-aligned tables 218 - **Larger values** (60-100): More robust to warping and distortion 219 220 Default: 60 221 222 distance_penalty (float | Split[float]): 223 Weight factor [0, 1] for penalizing corners far from expected position. 224 Uses Gaussian weighting within the search region. 225 226 - **0.0**: No penalty, any position in search region is equally valid 227 - **0.5**: Moderate preference for positions near the expected location 228 - **1.0**: Strong preference, only accepts positions very close to expected 229 230 Default: 0.4 231 232 cross_width (int | Split[int]): 233 Width in pixels of the cross-shaped kernel used to detect intersections. 234 Should approximately match the thickness of your table rules AFTER 235 morphological dilation. 236 237 **Tuning**: Look at the dilated image in debug_view. The cross_width 238 should match the thickness of the black lines you see. 239 Default: 10 240 241 morph_size (int | Split[int]): 242 Size of morphological structuring element for dilation. Controls how 243 much gap-bridging occurs to connect broken line segments. 244 245 - **Smaller values** (2-4): Minimal connection, preserves thin lines 246 - **Larger values** (6-10): Connects larger gaps, but thickens lines 247 248 Note: Increasing this requires increasing `cross_width` proportionally. 249 Default: 4 250 251 kernel_size (int | Split[int]): 252 Size of the cross-shaped kernel (must be odd). Larger kernels are more 253 selective, reducing false positives but potentially missing valid corners. 254 255 - **Smaller values** (21-31): More sensitive, finds more candidates 256 - **Larger values** (41-61): More selective, fewer false positives 257 258 Default: 41 259 260 processing_scale (float | Split[float]): 261 Image downscaling factor (0, 1] for processing speed. Processing is done 262 on scaled images, then results are scaled back to original size. 263 264 - **1.0**: Full resolution (slowest, most accurate) 265 - **0.5-0.75**: Good balance for high-res scans (2x-4x speedup) 266 - **0.25-0.5**: Fast processing for very large images 267 268 Default: 1.0 269 270 min_rows (int | Split[int]): 271 Minimum number of rows required before the algorithm considers the 272 table complete. Prevents stopping too early on tables with initial 273 low-confidence detections. 274 Default: 5 275 276 look_distance (int | Split[int]): 277 Number of adjacent rows/columns to examine when extrapolating missing 278 corners using polynomial regression. Higher values provide more context 279 but may smooth over legitimate variations. 280 281 - **2-3**: Good for consistent grids 282 - **4-6**: Better for grids with some irregularity 283 284 Default: 3 285 286 grow_threshold (float | Split[float]): 287 Initial minimum confidence [0, 1] required to accept a detected corner 288 during the growing phase. The algorithm may adaptively lower this 289 threshold if growth stalls. 290 291 - **Higher values** (0.5-0.8): Stricter, fewer errors but may miss valid corners 292 - **Lower values** (0.2-0.4): More permissive, finds more corners but more errors 293 294 Default: 0.3 295 296 """ 297 self._processing_scale = processing_scale 298 self._cell_height_factor = cell_height_factor 299 300 if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split): 301 header = Split(Path(header_image_path.left), Path(header_image_path.right)) 302 303 if not exists(header.left.with_suffix(".png")) or not exists( 304 header.right.with_suffix(".png") 305 ): 306 raise TauluException( 307 "The header images you provided do not exist (or they aren't .png files)" 308 ) 309 310 if header_anno_path is None: 311 if not exists(header.left.with_suffix(".json")) or not exists( 312 header.right.with_suffix(".json") 313 ): 314 raise TauluException( 315 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 316 ) 317 318 template_left = HeaderTemplate.from_saved( 319 header.left.with_suffix(".json") 320 ) 321 template_right = HeaderTemplate.from_saved( 322 header.right.with_suffix(".json") 323 ) 324 325 else: 326 if not exists(header_anno_path.left) or not exists( 327 header_anno_path.right 328 ): 329 raise TauluException( 330 "The header annotation files you provided do not exist (or they aren't .json files)" 331 ) 332 333 template_left = HeaderTemplate.from_saved(header_anno_path.left) 334 template_right = HeaderTemplate.from_saved(header_anno_path.right) 335 336 self._header = Split( 337 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 338 ) 339 340 self._aligner = Split( 341 HeaderAligner( 342 self._header.left, scale=get_param(self._processing_scale, "left") 343 ), 344 HeaderAligner( 345 self._header.right, scale=get_param(self._processing_scale, "right") 346 ), 347 ) 348 349 self._template = Split(template_left, template_right) 350 351 self._cell_heights = Split( 352 self._template.left.cell_heights(get_param(cell_height_factor, "left")), 353 self._template.right.cell_heights( 354 get_param(cell_height_factor, "right") 355 ), 356 ) 357 358 # Create GridDetector for left and right with potentially different parameters 359 self._grid_detector = Split( 360 GridDetector( 361 kernel_size=get_param(kernel_size, "left"), 362 cross_width=get_param(cross_width, "left"), 363 morph_size=get_param(morph_size, "left"), 364 search_region=get_param(search_region, "left"), 365 sauvola_k=get_param(sauvola_k, "left"), 366 distance_penalty=get_param(distance_penalty, "left"), 367 scale=get_param(self._processing_scale, "left"), 368 min_rows=get_param(min_rows, "left"), 369 look_distance=get_param(look_distance, "left"), 370 grow_threshold=get_param(grow_threshold, "left"), 371 ), 372 GridDetector( 373 kernel_size=get_param(kernel_size, "right"), 374 cross_width=get_param(cross_width, "right"), 375 morph_size=get_param(morph_size, "right"), 376 search_region=get_param(search_region, "right"), 377 sauvola_k=get_param(sauvola_k, "right"), 378 distance_penalty=get_param(distance_penalty, "right"), 379 scale=get_param(self._processing_scale, "right"), 380 min_rows=get_param(min_rows, "right"), 381 look_distance=get_param(look_distance, "right"), 382 grow_threshold=get_param(grow_threshold, "right"), 383 ), 384 ) 385 386 else: 387 header_image_path = Path(header_image_path) 388 self._header = cv2.imread(os.fspath(header_image_path)) 389 self._aligner = HeaderAligner(self._header) 390 self._template = HeaderTemplate.from_saved( 391 header_image_path.with_suffix(".json") 392 ) 393 394 # For single header, parameters should not be Split objects 395 if any( 396 isinstance(param, Split) 397 for param in [ 398 sauvola_k, 399 search_region, 400 distance_penalty, 401 cross_width, 402 morph_size, 403 kernel_size, 404 processing_scale, 405 min_rows, 406 look_distance, 407 grow_threshold, 408 cell_height_factor, 409 ] 410 ): 411 raise TauluException( 412 "Split parameters can only be used with split headers (tuple header_path)" 413 ) 414 415 self._cell_heights = self._template.cell_heights(self._cell_height_factor) 416 417 self._grid_detector = GridDetector( 418 kernel_size=kernel_size, 419 cross_width=cross_width, 420 morph_size=morph_size, 421 search_region=search_region, 422 sauvola_k=sauvola_k, 423 distance_penalty=distance_penalty, 424 scale=self._processing_scale, 425 min_rows=min_rows, 426 look_distance=look_distance, 427 grow_threshold=grow_threshold, 428 )
Arguments:
- header_image_path: Path to the header template image(s). The header should be a cropped
image showing a clear view of the table's first row. An annotation
file (.json) must exist alongside the image, created via
Taulu.annotate(). For split tables, provide aSplitcontaining left and right header paths. cell_height_factor: Height of data rows relative to header height. For example, if your header is 100px tall and data rows are 80px tall, use 0.8.
- float: All rows have the same height
- list[float]: Different heights for different rows. The last value is repeated for any additional rows beyond the list length. Useful when the first data row is taller than subsequent rows.
- Split: Different height factors for left and right sides
Default: [1.0]
- header_anno_path (PathLike[str] | str | Split[PathLike[str] | str] | None): Optional explicit path to header annotation JSON file(s). If None,
looks for a .json file with the same name as
header_image_path. Default: None sauvola_k (float | Split[float]): Threshold sensitivity for Sauvola adaptive binarization (0.0-1.0). Controls how aggressively the algorithm converts the image to binary.
- Lower values (0.04-0.15): Preserve faint lines, more noise
- Higher values (0.20-0.35): Remove noise, may lose faint lines
Start with 0.25 and adjust based on your image quality. Default: 0.25
search_region (int | Split[int]): Size in pixels of the square region to search for the next corner point. The algorithm estimates where a corner should be, then searches within this region for the best match.
- Smaller values (20-40): Faster, requires well-aligned tables
- Larger values (60-100): More robust to warping and distortion
Default: 60
distance_penalty (float | Split[float]): Weight factor [0, 1] for penalizing corners far from expected position. Uses Gaussian weighting within the search region.
- 0.0: No penalty, any position in search region is equally valid
- 0.5: Moderate preference for positions near the expected location
- 1.0: Strong preference, only accepts positions very close to expected
Default: 0.4
cross_width (int | Split[int]): Width in pixels of the cross-shaped kernel used to detect intersections. Should approximately match the thickness of your table rules AFTER morphological dilation.
Tuning: Look at the dilated image in debug_view. The cross_width should match the thickness of the black lines you see. Default: 10
morph_size (int | Split[int]): Size of morphological structuring element for dilation. Controls how much gap-bridging occurs to connect broken line segments.
- Smaller values (2-4): Minimal connection, preserves thin lines
- Larger values (6-10): Connects larger gaps, but thickens lines
Note: Increasing this requires increasing
cross_widthproportionally. Default: 4kernel_size (int | Split[int]): Size of the cross-shaped kernel (must be odd). Larger kernels are more selective, reducing false positives but potentially missing valid corners.
- Smaller values (21-31): More sensitive, finds more candidates
- Larger values (41-61): More selective, fewer false positives
Default: 41
processing_scale (float | Split[float]): Image downscaling factor (0, 1] for processing speed. Processing is done on scaled images, then results are scaled back to original size.
- 1.0: Full resolution (slowest, most accurate)
- 0.5-0.75: Good balance for high-res scans (2x-4x speedup)
- 0.25-0.5: Fast processing for very large images
Default: 1.0
- min_rows (int | Split[int]): Minimum number of rows required before the algorithm considers the table complete. Prevents stopping too early on tables with initial low-confidence detections. Default: 5
look_distance (int | Split[int]): Number of adjacent rows/columns to examine when extrapolating missing corners using polynomial regression. Higher values provide more context but may smooth over legitimate variations.
- 2-3: Good for consistent grids
- 4-6: Better for grids with some irregularity
Default: 3
grow_threshold (float | Split[float]): Initial minimum confidence [0, 1] required to accept a detected corner during the growing phase. The algorithm may adaptively lower this threshold if growth stalls.
- Higher values (0.5-0.8): Stricter, fewer errors but may miss valid corners
- Lower values (0.2-0.4): More permissive, finds more corners but more errors
Default: 0.3
430 @staticmethod 431 def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str): 432 """ 433 Interactive tool to create header annotations for table segmentation. 434 435 This method guides you through a two-step annotation process: 436 437 1. **Crop the header**: Click four corners to define the header region 438 2. **Annotate lines**: Click pairs of points to define each vertical and 439 horizontal line in the header 440 441 The annotations are saved as: 442 - A cropped header image (.png) at `output_path` 443 - A JSON file (.json) containing line coordinates 444 445 ## Annotation Guidelines 446 447 **Which lines to annotate:** 448 - All vertical lines that extend into the table body (column separators) 449 - The top horizontal line of the header 450 - The bottom horizontal line of the header (top of data rows) 451 452 **Order doesn't matter** - annotate lines in any order that's convenient. 453 454 **To annotate a line:** 455 1. Click once at one endpoint 456 2. Click again at the other endpoint 457 3. A green line appears showing your annotation 458 459 **To undo:** 460 - Right-click anywhere to remove the last line you drew 461 462 **When finished:** 463 - Press 'n' to save and exit 464 - Press 'q' to quit without saving 465 466 Args: 467 image_path (PathLike[str] | str): Path to a table image containing 468 a clear view of the header. This can be a full table image. 469 output_path (PathLike[str] | str): Where to save the cropped header 470 image. The annotation JSON will be saved with the same name but 471 .json extension. 472 473 Raises: 474 TauluException: If image_path doesn't exist or output_path is a directory 475 476 Examples: 477 Annotate a single header: 478 479 >>> from taulu import Taulu 480 >>> Taulu.annotate("scan_page_01.png", "header.png") 481 # Interactive window opens 482 # After annotation: creates header.png and header.json 483 484 Annotate left and right headers for a split table: 485 486 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 487 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 488 # Creates header_left.{png,json} and header_right.{png,json} 489 490 Notes: 491 - The header image doesn't need to be perfectly cropped initially - 492 the tool will help you crop it precisely 493 - Annotation accuracy is important: misaligned lines will cause 494 segmentation errors 495 - You can re-run this method to update annotations if needed 496 """ 497 498 if not exists(image_path): 499 raise TauluException(f"Image path {image_path} does not exist") 500 501 if os.path.isdir(output_path): 502 raise TauluException("Output path should be a file") 503 504 output_path = Path(output_path) 505 506 template = HeaderTemplate.annotate_image( 507 os.fspath(image_path), crop=output_path.with_suffix(".png") 508 ) 509 510 template.save(output_path.with_suffix(".json"))
Interactive tool to create header annotations for table segmentation.
This method guides you through a two-step annotation process:
- Crop the header: Click four corners to define the header region
- Annotate lines: Click pairs of points to define each vertical and horizontal line in the header
The annotations are saved as:
- A cropped header image (.png) at
output_path - A JSON file (.json) containing line coordinates
Annotation Guidelines
Which lines to annotate:
- All vertical lines that extend into the table body (column separators)
- The top horizontal line of the header
- The bottom horizontal line of the header (top of data rows)
Order doesn't matter - annotate lines in any order that's convenient.
To annotate a line:
- Click once at one endpoint
- Click again at the other endpoint
- A green line appears showing your annotation
To undo:
- Right-click anywhere to remove the last line you drew
When finished:
- Press 'n' to save and exit
- Press 'q' to quit without saving
Arguments:
- image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
- output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.
Raises:
- TauluException: If image_path doesn't exist or output_path is a directory
Examples:
Annotate a single header:
>>> from taulu import Taulu >>> Taulu.annotate("scan_page_01.png", "header.png") <h1 id="interactive-window-opens">Interactive window opens</h1>After annotation: creates header.png and header.json
Annotate left and right headers for a split table:
>>> Taulu.annotate("scan_page_01.png", "header_left.png") >>> Taulu.annotate("scan_page_01.png", "header_right.png") <h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>
Notes:
- The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely
- Annotation accuracy is important: misaligned lines will cause segmentation errors
- You can re-run this method to update annotations if needed
512 def segment_table( 513 self, 514 image: MatLike | PathLike[str] | str, 515 filtered: Optional[MatLike | PathLike[str] | str] = None, 516 debug_view: bool = False, 517 ) -> TableGrid: 518 """ 519 Segment a table image into a grid of cells. 520 521 This is the main entry point for the taulu package. It orchestrates: 522 523 1. **Header alignment**: Locates the table by matching the header template 524 to the image using feature-based registration (ORB features + homography) 525 2. **Grid detection**: Applies morphological filtering and cross-correlation 526 to find corner intersections 527 3. **Grid growing**: Iteratively detects corners row-by-row and column-by-column, 528 starting from the aligned header position 529 4. **Extrapolation**: Fills in any missing corners using polynomial regression 530 based on neighboring detected points 531 5. **Smoothing**: Refines corner positions for consistency 532 533 ## Performance Notes 534 535 Processing time depends on: 536 - Image resolution (use `processing_scale < 1.0` for large images) 537 - Table complexity (more rows/columns = longer processing) 538 - Parameter settings 539 540 ## Troubleshooting 541 542 **If segmentation fails (returns incomplete grid):** 543 1. Enable `debug_view=True` to see where it stops 544 2. Check if header alignment is correct (first debug image) 545 3. Verify cross-correlation shows bright spots at corners 546 4. Adjust `grow_threshold` (lower if stopping too early) 547 5. Increase `search_region` if corners are far from expected positions 548 549 **If segmentation is inaccurate (corners in wrong positions):** 550 1. Check binarization quality (adjust `sauvola_k`) 551 2. Verify cross-kernel size matches line thickness (adjust `cross_width`) 552 3. Ensure morphology isn't over-connecting (reduce `morph_size`) 553 4. Increase `distance_penalty` to enforce expected positions more strictly 554 555 Args: 556 image (MatLike | PathLike[str] | str): Table image to segment. 557 Can be a file path or a numpy array (BGR or grayscale). 558 559 filtered (MatLike | PathLike[str] | str | None): Optional pre-filtered 560 binary image to use instead of computing it internally. 561 Must be the same size as `image`. If provided, parameters related 562 to filtering (e.g. `sauvola_k`, `morph_size`) are ignored. 563 564 **GPU acceleration**: Use trained CNN model for corner detection: 565 566 >>> from taulu.gpu import DeepConvNet, apply_kernel_to_image_tiled 567 >>> model = DeepConvNet.load("model.pth") 568 >>> filtered = apply_kernel_to_image_tiled(model, image) 569 >>> grid = taulu.segment_table(image, filtered=filtered) 570 571 Default: None 572 573 debug_view (bool): If True, opens OpenCV windows showing intermediate 574 processing steps: 575 - Header alignment overlay 576 - Binarized image 577 - After morphological operations 578 - Cross-correlation result 579 - Growing progress (corner-by-corner) 580 581 **Controls:** 582 - Press 'n' to advance to next step 583 - Press 'q' to quit immediately 584 585 Useful for parameter tuning and understanding failures. 586 Default: False 587 588 Returns: 589 TableGrid: A grid structure containing detected corner positions with 590 methods for: 591 592 **Position queries:** 593 - `cell(point)`: Get (row, col) at pixel coordinates (x, y) 594 - `cell_polygon(cell)`: Get 4 corners of a cell as (lt, rt, rb, lb) 595 - `region(start, end)`: Get bounding box for a cell range 596 597 **Image extraction:** 598 - `crop_cell(img, cell, margin=0)`: Extract single cell with optional margin 599 - `crop_region(img, start, end, margin=0)`: Extract rectangular region 600 601 **Visualization:** 602 - `show_cells(img)`: Interactive cell viewer (click to highlight) 603 - `highlight_all_cells(img)`: Draw all cell boundaries 604 - `visualize_points(img)`: Show detected corner points 605 606 **Analysis:** 607 - `text_regions(img, row)`: Find continuous text regions in a row 608 - `cells()`: Generator yielding all (row, col) indices 609 610 **Persistence:** 611 - `save(path)`: Save grid to JSON file 612 - `TableGrid.from_saved(path)`: Load grid from JSON 613 614 **Properties:** 615 - `rows`: Number of data rows (header not included) 616 - `cols`: Number of columns 617 - `points`: Raw list of detected corner coordinates 618 619 Raises: 620 TauluException: If image cannot be loaded, header alignment fails, 621 or grid detection produces no results 622 623 Examples: 624 Basic segmentation: 625 626 >>> from taulu import Taulu 627 >>> import cv2 628 >>> 629 >>> taulu = Taulu("header.png") 630 >>> grid = taulu.segment_table("table_page_01.png") 631 >>> 632 >>> print(f"Detected {grid.rows} rows and {grid.cols} columns") 633 >>> 634 >>> # Extract first cell 635 >>> img = cv2.imread("table_page_01.png") 636 >>> cell_img = grid.crop_cell(img, (0, 0)) 637 >>> cv2.imwrite("cell_0_0.png", cell_img) 638 639 Debug mode for parameter tuning: 640 641 >>> grid = taulu.segment_table("table_page_01.png", debug_view=True) 642 # Windows open showing each step 643 # Adjust parameters based on what you see 644 645 Process multiple images with the same header: 646 647 >>> taulu = Taulu("header.png", sauvola_k=0.25) 648 >>> 649 >>> for i in range(1, 11): 650 ... img_path = f"table_page_{i:02d}.png" 651 ... grid = taulu.segment_table(img_path) 652 ... grid.save(f"grid_{i:02d}.json") 653 ... print(f"Page {i}: {grid.rows} rows detected") 654 655 Extract all cells from a table: 656 657 >>> img = cv2.imread("table.png") 658 >>> grid = taulu.segment_table("table.png") 659 >>> 660 >>> for row, col in grid.cells(): 661 ... cell_img = grid.crop_cell(img, (row, col), margin=5) 662 ... cv2.imwrite(f"cell_{row}_{col}.png", cell_img) 663 664 Find text regions for OCR: 665 666 >>> for row in range(grid.rows): 667 ... text_regions = grid.text_regions(img, row) 668 ... for start_cell, end_cell in text_regions: 669 ... # Extract region spanning multiple cells 670 ... region_img = grid.crop_region(img, start_cell, end_cell) 671 ... # Run OCR on region_img... 672 673 See Also: 674 - `TableGrid`: Complete documentation of the returned object 675 - `GridDetector.find_table_points()`: Lower-level grid detection 676 - `HeaderAligner.align()`: Lower-level header alignment 677 """ 678 679 if not isinstance(image, MatLike): 680 image = cv2.imread(os.fspath(image)) 681 682 now = perf_counter() 683 h = self._aligner.align(image, visual=debug_view) 684 align_time = perf_counter() - now 685 logger.info(f"Header alignment took {align_time:.2f} seconds") 686 687 # find the starting point for the table grid algorithm 688 left_top_template = self._template.intersection((1, 0)) 689 if isinstance(left_top_template, Split): 690 left_top_template = Split( 691 (int(left_top_template.left[0]), int(left_top_template.left[1])), 692 (int(left_top_template.right[0]), int(left_top_template.right[1])), 693 ) 694 else: 695 left_top_template = (int(left_top_template[0]), int(left_top_template[1])) 696 697 left_top_table = self._aligner.template_to_img(h, left_top_template) 698 699 now = perf_counter() 700 table = self._grid_detector.find_table_points( 701 image, 702 left_top_table, 703 self._template.cell_widths(0), 704 self._cell_heights, 705 visual=debug_view, 706 filtered=filtered, 707 ) 708 grid_time = perf_counter() - now 709 logger.info(f"Grid detection took {grid_time:.2f} seconds") 710 711 if isinstance(table, Split): 712 table = TableGrid.from_split(table, (0, 0)) 713 714 return table
Segment a table image into a grid of cells.
This is the main entry point for the taulu package. It orchestrates:
- Header alignment: Locates the table by matching the header template to the image using feature-based registration (ORB features + homography)
- Grid detection: Applies morphological filtering and cross-correlation to find corner intersections
- Grid growing: Iteratively detects corners row-by-row and column-by-column, starting from the aligned header position
- Extrapolation: Fills in any missing corners using polynomial regression based on neighboring detected points
- Smoothing: Refines corner positions for consistency
Performance Notes
Processing time depends on:
- Image resolution (use
processing_scale < 1.0for large images) - Table complexity (more rows/columns = longer processing)
- Parameter settings
Troubleshooting
If segmentation fails (returns incomplete grid):
- Enable
debug_view=Trueto see where it stops - Check if header alignment is correct (first debug image)
- Verify cross-correlation shows bright spots at corners
- Adjust
grow_threshold(lower if stopping too early) - Increase
search_regionif corners are far from expected positions
If segmentation is inaccurate (corners in wrong positions):
- Check binarization quality (adjust
sauvola_k) - Verify cross-kernel size matches line thickness (adjust
cross_width) - Ensure morphology isn't over-connecting (reduce
morph_size) - Increase
distance_penaltyto enforce expected positions more strictly
Arguments:
- image (MatLike | PathLike[str] | str): Table image to segment. Can be a file path or a numpy array (BGR or grayscale).
filtered (MatLike | PathLike[str] | str | None): Optional pre-filtered binary image to use instead of computing it internally. Must be the same size as
image. If provided, parameters related to filtering (e.g.sauvola_k,morph_size) are ignored.GPU acceleration: Use trained CNN model for corner detection:
>>> from taulu.gpu import DeepConvNet, apply_kernel_to_image_tiled >>> model = DeepConvNet.load("model.pth") >>> filtered = apply_kernel_to_image_tiled(model, image) >>> grid = taulu.segment_table(image, filtered=filtered)Default: None
debug_view (bool): If True, opens OpenCV windows showing intermediate processing steps:
- Header alignment overlay
- Binarized image
- After morphological operations
- Cross-correlation result
- Growing progress (corner-by-corner)
Controls:
- Press 'n' to advance to next step
- Press 'q' to quit immediately
Useful for parameter tuning and understanding failures. Default: False
Returns:
TableGrid: A grid structure containing detected corner positions with methods for:
**Position queries:** - `cell(point)`: Get (row, col) at pixel coordinates (x, y) - `cell_polygon(cell)`: Get 4 corners of a cell as (lt, rt, rb, lb) - `region(start, end)`: Get bounding box for a cell range **Image extraction:** - `crop_cell(img, cell, margin=0)`: Extract single cell with optional margin - `crop_region(img, start, end, margin=0)`: Extract rectangular region **Visualization:** - `show_cells(img)`: Interactive cell viewer (click to highlight) - `highlight_all_cells(img)`: Draw all cell boundaries - `visualize_points(img)`: Show detected corner points **Analysis:** - `text_regions(img, row)`: Find continuous text regions in a row - `cells()`: Generator yielding all (row, col) indices **Persistence:** - `save(path)`: Save grid to JSON file - `TableGrid.from_saved(path)`: Load grid from JSON **Properties:** - `rows`: Number of data rows (header not included) - `cols`: Number of columns - `points`: Raw list of detected corner coordinates
Raises:
- TauluException: If image cannot be loaded, header alignment fails, or grid detection produces no results
Examples:
Basic segmentation:
>>> from taulu import Taulu >>> import cv2 >>> >>> taulu = Taulu("header.png") >>> grid = taulu.segment_table("table_page_01.png") >>> >>> print(f"Detected {grid.rows} rows and {grid.cols} columns") >>> >>> # Extract first cell >>> img = cv2.imread("table_page_01.png") >>> cell_img = grid.crop_cell(img, (0, 0)) >>> cv2.imwrite("cell_0_0.png", cell_img)Debug mode for parameter tuning:
>>> grid = taulu.segment_table("table_page_01.png", debug_view=True) <h1 id="windows-open-showing-each-step">Windows open showing each step</h1>Adjust parameters based on what you see
Process multiple images with the same header:
>>> taulu = Taulu("header.png", sauvola_k=0.25) >>> >>> for i in range(1, 11): ... img_path = f"table_page_{i:02d}.png" ... grid = taulu.segment_table(img_path) ... grid.save(f"grid_{i:02d}.json") ... print(f"Page {i}: {grid.rows} rows detected")Extract all cells from a table:
>>> img = cv2.imread("table.png") >>> grid = taulu.segment_table("table.png") >>> >>> for row, col in grid.cells(): ... cell_img = grid.crop_cell(img, (row, col), margin=5) ... cv2.imwrite(f"cell_{row}_{col}.png", cell_img)Find text regions for OCR:
>>> for row in range(grid.rows): ... text_regions = grid.text_regions(img, row) ... for start_cell, end_cell in text_regions: ... # Extract region spanning multiple cells ... region_img = grid.crop_region(img, start_cell, end_cell) ... # Run OCR on region_img...
See Also:
TableGrid: Complete documentation of the returned objectGridDetector.find_table_points(): Lower-level grid detectionHeaderAligner.align(): Lower-level header alignment