taulu
Taulu - segment tables from images
Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.
Here is an example python script of how to use Taulu:
from taulu import Taulu
import os
def setup():
# create an Annotation file of the headers in the image
# (one for the left header, one for the right)
# and store them in the examples directory
print("Annotating the LEFT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
print("Annotating the RIGHT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
def main():
taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png"))
table = taulu.segment_table("../data/table_00.png", cell_height_factor=0.8, debug_view=True)
table.show_cells("../data/table_00.png")
if __name__ == "__main__":
if os.path.exists("table_00_header_left.png") and os.path.exists(
"table_00_header_right.png"
):
main()
else:
setup()
main()
If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class
1""" 2Taulu - *segment tables from images* 3 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells). 5 6To use this package, you first need to make an annotation of the headers in your table images. 7The idea is that these headers will be similar across your full set of images, and they will be 8used as a starting point for the search algorithm that finds the table grid. 9 10Here is an example python script of how to use Taulu: 11```python 12from taulu import Taulu 13import os 14 15 16def setup(): 17 # create an Annotation file of the headers in the image 18 # (one for the left header, one for the right) 19 # and store them in the examples directory 20 print("Annotating the LEFT header...") 21 Taulu.annotate("../data/table_00.png", "table_00_header_left.png") 22 23 print("Annotating the RIGHT header...") 24 Taulu.annotate("../data/table_00.png", "table_00_header_right.png") 25 26 27def main(): 28 taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png")) 29 table = taulu.segment_table("../data/table_00.png", cell_height_factor=0.8, debug_view=True) 30 31 table.show_cells("../data/table_00.png") 32 33 34if __name__ == "__main__": 35 if os.path.exists("table_00_header_left.png") and os.path.exists( 36 "table_00_header_right.png" 37 ): 38 main() 39 else: 40 setup() 41 main() 42 43``` 44 45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu) 46""" 47 48from .grid import GridDetector, TableGrid 49from .header_aligner import HeaderAligner 50from .header_template import HeaderTemplate 51from .table_indexer import TableIndexer 52from .split import Split 53from .taulu import Taulu 54 55__pdoc__ = {} 56__pdoc__["constants"] = False 57__pdoc__["main"] = False 58__pdoc__["decorators"] = False 59__pdoc__["error"] = False 60__pdoc__["types"] = False 61__pdoc__["img_util"] = False 62 63__all__ = [ 64 "GridDetector", 65 "TableGrid", 66 "HeaderAligner", 67 "HeaderTemplate", 68 "TableIndexer", 69 "Split", 70 "Taulu", 71] 72 73try: 74 from . import gpu 75 76 __all__.append("gpu") 77except ImportError: 78 pass
119class GridDetector: 120 """ 121 Detects table grid intersections using morphological filtering and template matching. 122 123 This detector implements a multi-stage pipeline: 124 125 1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting 126 2. **Morphological operations**: Dilation to connect broken rule segments 127 3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find 128 rule intersections where horizontal and vertical lines meet 129 4. **Grid growing**: Iterative point detection starting from a known seed point 130 131 The cross-kernel is designed to match the specific geometry of your table rules. 132 It should be sized so that after morphology, it aligns with actual corner shapes. 133 134 ## Tuning Guidelines 135 136 - **kernel_size**: Increase if you need more selectivity (fewer false positives) 137 - **cross_width/height**: Should match rule thickness after morphology 138 - **morph_size**: Increase to connect more broken lines, but this thickens rules 139 - **sauvola_k**: Increase to threshold more aggressively (remove noise) 140 - **search_region**: Increase for documents with more warping/distortion 141 - **distance_penalty**: Increase to prefer corners closer to expected positions 142 143 ## Visual Debugging 144 145 Set `visual=True` in methods to see intermediate results and tune parameters. 146 """ 147 148 def __init__( 149 self, 150 kernel_size: int = 21, 151 cross_width: int = 6, 152 cross_height: Optional[int] = None, 153 morph_size: Optional[int] = None, 154 sauvola_k: float = 0.04, 155 sauvola_window: int = 15, 156 scale: float = 1.0, 157 search_region: int = 40, 158 distance_penalty: float = 0.4, 159 min_rows: int = 5, 160 grow_threshold: float = 0.3, 161 look_distance: int = 4, 162 ): 163 """ 164 Args: 165 kernel_size (int): the size of the cross kernel 166 a larger kernel size often means that more penalty is applied, often leading 167 to more sparse results 168 cross_width (int): the width of one of the edges in the cross filter, should be 169 roughly equal to the width of the rules in the image after morphology is applied 170 cross_height (int | None): useful if the horizontal rules and vertical rules 171 have different sizes 172 morph_size (int | None): the size of the morphology operators that are applied before 173 the cross kernel. 'bridges the gaps' of broken-up lines 174 sauvola_k (float): threshold parameter for sauvola thresholding 175 sauvola_window (int): window_size parameter for sauvola thresholding 176 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 177 search_region (int): area in which to search for a new max value in `find_nearest` etc. 178 distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 179 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 180 grow_threshold (float): the threshold for accepting a new point when growing the table 181 look_distance (int): how many points away to look when calculating the median slope 182 """ 183 self._validate_parameters( 184 kernel_size, 185 cross_width, 186 cross_height, 187 morph_size, 188 search_region, 189 sauvola_k, 190 sauvola_window, 191 distance_penalty, 192 ) 193 194 self._kernel_size = kernel_size 195 self._cross_width = cross_width 196 self._cross_height = cross_width if cross_height is None else cross_height 197 self._morph_size = morph_size if morph_size is not None else cross_width 198 self._search_region = search_region 199 self._sauvola_k = sauvola_k 200 self._sauvola_window = sauvola_window 201 self._distance_penalty = distance_penalty 202 self._scale = scale 203 self._min_rows = min_rows 204 self._grow_threshold = grow_threshold 205 self._look_distance = look_distance 206 207 self._cross_kernel = self._create_cross_kernel() 208 209 def _validate_parameters( 210 self, 211 kernel_size: int, 212 cross_width: int, 213 cross_height: Optional[int], 214 morph_size: Optional[int], 215 search_region: int, 216 sauvola_k: float, 217 sauvola_window: int, 218 distance_penalty: float, 219 ) -> None: 220 """Validate initialization parameters.""" 221 if kernel_size % 2 == 0: 222 raise ValueError("kernel_size must be odd") 223 if ( 224 kernel_size <= 0 225 or cross_width <= 0 226 or search_region <= 0 227 or sauvola_window <= 0 228 ): 229 raise ValueError("Size parameters must be positive") 230 if cross_height is not None and cross_height <= 0: 231 raise ValueError("cross_height must be positive") 232 if morph_size is not None and morph_size <= 0: 233 raise ValueError("morph_size must be positive") 234 if not 0 <= distance_penalty <= 1: 235 raise ValueError("distance_penalty must be in [0, 1]") 236 if sauvola_k <= 0: 237 raise ValueError("sauvola_k must be positive") 238 239 def _create_gaussian_weights(self, region_size: int) -> NDArray: 240 """ 241 Create a 2D Gaussian weight mask. 242 243 Args: 244 shape (tuple[int, int]): Shape of the region (height, width) 245 p (float): Minimum value at the edge = 1 - p 246 247 Returns: 248 NDArray: Gaussian weight mask 249 """ 250 if self._distance_penalty == 0: 251 return np.ones((region_size, region_size), dtype=np.float32) 252 253 y = np.linspace(-1, 1, region_size) 254 x = np.linspace(-1, 1, region_size) 255 xv, yv = np.meshgrid(x, y) 256 dist_squared = xv**2 + yv**2 257 258 # Prevent log(0) when distance_penalty is 1 259 if self._distance_penalty >= 0.999: 260 sigma = 0.1 # Small sigma for very sharp peak 261 else: 262 sigma = np.sqrt(-1 / (2 * np.log(1 - self._distance_penalty))) 263 264 weights = np.exp(-dist_squared / (2 * sigma**2)) 265 266 return weights.astype(np.float32) 267 268 def _create_cross_kernel(self) -> NDArray: 269 kernel = np.zeros((self._kernel_size, self._kernel_size), dtype=np.uint8) 270 center = self._kernel_size // 2 271 272 # Create horizontal bar 273 h_start = max(0, center - self._cross_height // 2) 274 h_end = min(self._kernel_size, center + (self._cross_height + 1) // 2) 275 kernel[h_start:h_end, :] = 255 276 277 # Create vertical bar 278 v_start = max(0, center - self._cross_width // 2) 279 v_end = min(self._kernel_size, center + (self._cross_width + 1) // 2) 280 kernel[:, v_start:v_end] = 255 281 282 return kernel 283 284 def _apply_morphology(self, binary: MatLike) -> MatLike: 285 # Define a horizontal kernel (adjust width as needed) 286 kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._morph_size, 1)) 287 kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._morph_size)) 288 289 # Apply dilation 290 dilated = cv.dilate(binary, kernel_hor, iterations=1) 291 dilated = cv.dilate(dilated, kernel_ver, iterations=1) 292 293 return dilated 294 295 def _apply_cross_matching(self, img: MatLike) -> MatLike: 296 """Apply cross kernel template matching.""" 297 pad_y = self._cross_kernel.shape[0] // 2 298 pad_x = self._cross_kernel.shape[1] // 2 299 300 padded = cv.copyMakeBorder( 301 img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0 302 ) 303 304 filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED) 305 # Invert and normalize to 0-255 range 306 filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX) 307 return filtered.astype(np.uint8) 308 309 def apply(self, img: MatLike, visual: bool = False) -> MatLike: 310 """ 311 Apply the grid detection filter to the input image. 312 313 Args: 314 img (MatLike): the input image 315 visual (bool): whether to show intermediate steps 316 317 Returns: 318 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 319 """ 320 321 if img is None or img.size == 0: 322 raise ValueError("Input image is empty or None") 323 324 binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window) 325 326 if visual: 327 imu.show(binary, title="thresholded") 328 329 binary = self._apply_morphology(binary) 330 331 if visual: 332 imu.show(binary, title="dilated") 333 334 filtered = self._apply_cross_matching(binary) 335 336 return filtered 337 338 @log_calls(level=logging.DEBUG, include_return=True) 339 def find_nearest( 340 self, filtered: MatLike, point: Point, region: Optional[int] = None 341 ) -> Tuple[Point, float]: 342 """ 343 Find the nearest 'corner match' in the image, along with its score [0,1] 344 345 Args: 346 filtered (MatLike): the filtered image (obtained through `apply`) 347 point (tuple[int, int]): the approximate target point (x, y) 348 region (None | int): alternative value for search region, 349 overwriting the `__init__` parameter `region` 350 """ 351 352 if filtered is None or filtered.size == 0: 353 raise ValueError("Filtered image is empty or None") 354 355 region_size = region if region is not None else self._search_region 356 x, y = point 357 358 # Calculate crop boundaries 359 crop_x = max(0, x - region_size // 2) 360 crop_y = max(0, y - region_size // 2) 361 crop_width = min(region_size, filtered.shape[1] - crop_x) 362 crop_height = min(region_size, filtered.shape[0] - crop_y) 363 364 # Handle edge cases 365 if crop_width <= 0 or crop_height <= 0: 366 logger.warning(f"Point {point} is outside image bounds") 367 return point, 0.0 368 369 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 370 371 if cropped.size == 0: 372 return point, 0.0 373 374 # Always apply Gaussian weighting by extending crop if needed 375 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 376 # Perfect size - apply weights directly 377 weights = self._create_gaussian_weights(region_size) 378 weighted = cropped.astype(np.float32) * weights 379 else: 380 # Extend crop to match region_size, apply weights, then restore 381 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 382 383 # Calculate offset to center the cropped region in extended array 384 offset_y = (region_size - cropped.shape[0]) // 2 385 offset_x = (region_size - cropped.shape[1]) // 2 386 387 # Place cropped region in center of extended array 388 extended[ 389 offset_y : offset_y + cropped.shape[0], 390 offset_x : offset_x + cropped.shape[1], 391 ] = cropped 392 393 # Apply Gaussian weights to extended array 394 weights = self._create_gaussian_weights(region_size) 395 weighted_extended = extended.astype(np.float32) * weights 396 397 # Extract the original region back out 398 weighted = weighted_extended[ 399 offset_y : offset_y + cropped.shape[0], 400 offset_x : offset_x + cropped.shape[1], 401 ] 402 403 best_idx = np.argmax(weighted) 404 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 405 406 result_point = ( 407 int(crop_x + best_x), 408 int(crop_y + best_y), 409 ) 410 result_confidence = float(weighted[best_y, best_x]) / 255.0 411 412 return result_point, result_confidence 413 414 def find_table_points( 415 self, 416 img: MatLike | PathLike[str], 417 left_top: Point, 418 cell_widths: list[int], 419 cell_heights: list[int] | int, 420 visual: bool = False, 421 window: str = WINDOW, 422 goals_width: Optional[int] = None, 423 filtered: Optional[MatLike | PathLike[str]] = None, 424 smooth: bool = False 425 ) -> "TableGrid": 426 """ 427 Parse the image to a `TableGrid` structure that holds all of the 428 intersections between horizontal and vertical rules, starting near the `left_top` point 429 430 Args: 431 img (MatLike): the input image of a table 432 left_top (tuple[int, int]): the starting point of the algorithm 433 cell_widths (list[int]): the expected widths of the cells (based on a header template) 434 cell_heights (list[int]): the expected height of the rows of data. 435 The last value from this list is used until the image has no more vertical space. 436 visual (bool): whether to show intermediate steps 437 window (str): the name of the OpenCV window to use for visualization 438 goals_width (int | None): the width of the goal region when searching for the next point. 439 If None, defaults to 1.5 * search_region 440 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 441 calculating the filtered image from scratch 442 smooth (bool): if True, smooth the grid after detection, using local heuristics 443 444 Returns: 445 a TableGrid object 446 """ 447 448 if goals_width is None: 449 goals_width = self._search_region * 3 // 2 450 451 if not cell_widths: 452 raise ValueError("cell_widths must contain at least one value") 453 454 if not isinstance(img, np.ndarray): 455 img = cv.imread(os.fspath(img)) 456 457 if filtered is None: 458 filtered = self.apply(img, visual) 459 else: 460 if not isinstance(filtered, np.ndarray): 461 filtered = cv.imread(os.fspath(filtered)) 462 463 filtered = ensure_gray(filtered) 464 465 if visual: 466 imu.show(filtered, window=window) 467 468 if isinstance(cell_heights, int): 469 cell_heights = [cell_heights] 470 471 left_top, confidence = self.find_nearest( 472 filtered, left_top, int(self._search_region * 3) 473 ) 474 475 if confidence < 0.1: 476 logger.warning( 477 f"Low confidence for the starting point: {confidence} at {left_top}" 478 ) 479 480 # resize all parameters according to scale 481 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 482 483 if visual: 484 imu.push(img) 485 486 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 487 cell_widths = [int(w * self._scale) for w in cell_widths] 488 cell_heights = [int(h * self._scale) for h in cell_heights] 489 left_top = (int(left_top[0] * self._scale), int(left_top[1] * self._scale)) 490 search_region = int(self._search_region * self._scale) 491 492 img_gray = ensure_gray(img) 493 filtered_gray = ensure_gray(filtered) 494 495 table_grower = TableGrower( 496 img_gray, 497 filtered_gray, 498 cell_widths, # pyright: ignore 499 cell_heights, # pyright: ignore 500 left_top, 501 search_region, 502 self._distance_penalty, 503 self._look_distance, 504 self._grow_threshold, 505 self._min_rows, 506 ) 507 508 def show_grower_progress(wait: bool = False): 509 img_orig = np.copy(img) 510 corners = table_grower.get_all_corners() 511 for y in range(len(corners)): 512 for x in range(len(corners[y])): 513 if corners[y][x] is not None: 514 img_orig = imu.draw_points( 515 img_orig, 516 [corners[y][x]], 517 color=(0, 0, 255), 518 thickness=30, 519 ) 520 521 edge = table_grower.get_edge_points() 522 523 for point, score in edge: 524 color = (100, int(clamp(score * 255, 0, 255)), 100) 525 imu.draw_point(img_orig, point, color=color, thickness=20) 526 527 imu.show(img_orig, wait=wait) 528 529 if visual: 530 threshold = self._grow_threshold 531 look_distance = self._look_distance 532 533 # python implementation of rust loops, for visualization purposes 534 # note this is a LOT slower 535 while table_grower.grow_point(img_gray, filtered_gray) is not None: 536 show_grower_progress() 537 538 show_grower_progress(True) 539 540 original_threshold = threshold 541 542 loops_without_change = 0 543 544 while not table_grower.is_table_complete(): 545 loops_without_change += 1 546 547 if loops_without_change > 50: 548 break 549 550 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 551 show_grower_progress() 552 553 loops_without_change = 0 554 555 grown = False 556 while table_grower.grow_point(img_gray, filtered_gray) is not None: 557 show_grower_progress() 558 grown = True 559 threshold = min(0.1 + 0.9 * threshold, original_threshold) 560 table_grower.set_threshold(threshold) 561 562 if not grown: 563 threshold *= 0.9 564 table_grower.set_threshold(threshold) 565 566 else: 567 threshold *= 0.9 568 table_grower.set_threshold(threshold) 569 570 if table_grower.grow_point(img_gray, filtered_gray) is not None: 571 show_grower_progress() 572 loops_without_change = 0 573 574 else: 575 table_grower.grow_table(img_gray, filtered_gray) 576 577 if smooth: 578 table_grower.smooth_grid() 579 corners = table_grower.get_all_corners() 580 logger.info( 581 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 582 ) 583 # rescale corners back to original size 584 if self._scale != 1.0: 585 for y in range(len(corners)): 586 for x in range(len(corners[y])): 587 if corners[y][x] is not None: 588 corners[y][x] = ( 589 int(corners[y][x][0] / self._scale), # pyright:ignore 590 int(corners[y][x][1] / self._scale), # pyright:ignore 591 ) 592 593 return TableGrid(corners) # pyright: ignore 594 595 def _visualize_grid(self, img: MatLike, points: List[List[Point]]) -> None: 596 """Visualize the detected grid points.""" 597 all_points = [point for row in points for point in row] 598 drawn = imu.draw_points(img, all_points) 599 imu.show(drawn, wait=True) 600 601 def _visualize_path_finding( 602 self, 603 path: List[Point], 604 current: Point, 605 next_point: Point, 606 previous_row_target: Optional[Point] = None, 607 region_center: Optional[Point] = None, 608 region_size: Optional[int] = None, 609 ) -> None: 610 """Visualize the path finding process for debugging.""" 611 global show_time 612 613 screen = imu.pop() 614 615 # if gray, convert to BGR 616 if len(screen.shape) == 2 or screen.shape[2] == 1: 617 debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR) 618 else: 619 debug_img = cast(MatLike, screen) 620 621 debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2) 622 debug_img = imu.draw_points( 623 debug_img, [current], color=(0, 255, 0), thickness=3 624 ) 625 debug_img = imu.draw_points( 626 debug_img, [next_point], color=(0, 0, 255), thickness=2 627 ) 628 629 # Draw previous row target if available 630 if previous_row_target is not None: 631 debug_img = imu.draw_points( 632 debug_img, [previous_row_target], color=(255, 0, 255), thickness=2 633 ) 634 635 # Draw search region if available 636 if region_center is not None and region_size is not None: 637 top_left = ( 638 max(0, region_center[0] - region_size // 2), 639 max(0, region_center[1] - region_size // 2), 640 ) 641 bottom_right = ( 642 min(debug_img.shape[1], region_center[0] + region_size // 2), 643 min(debug_img.shape[0], region_center[1] + region_size // 2), 644 ) 645 cv.rectangle( 646 debug_img, 647 top_left, 648 bottom_right, 649 color=(255, 0, 0), 650 thickness=2, 651 lineType=cv.LINE_AA, 652 ) 653 654 imu.push(debug_img) 655 656 show_time += 1 657 if show_time % 10 != 1: 658 return 659 660 imu.show(debug_img, title="Next column point", wait=False) 661 # time.sleep(0.003) 662 663 @log_calls(level=logging.DEBUG, include_return=True) 664 def _astar( 665 self, 666 img: np.ndarray, 667 start: tuple[int, int], 668 goals: list[tuple[int, int]], 669 direction: str, 670 ) -> Optional[List[Point]]: 671 """ 672 Find the best path between the start point and one of the goal points on the image 673 """ 674 675 if not goals: 676 return None 677 678 if self._scale != 1.0: 679 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 680 start = (int(start[0] * self._scale), int(start[1] * self._scale)) 681 goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals] 682 683 # calculate bounding box with margin 684 all_points = goals + [start] 685 xs = [p[0] for p in all_points] 686 ys = [p[1] for p in all_points] 687 688 margin = 30 689 top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin)) 690 bottom_right = ( 691 min(img.shape[1], max(xs) + margin), 692 min(img.shape[0], max(ys) + margin), 693 ) 694 695 # check bounds 696 if ( 697 top_left[0] >= bottom_right[0] 698 or top_left[1] >= bottom_right[1] 699 or top_left[0] >= img.shape[1] 700 or top_left[1] >= img.shape[0] 701 ): 702 return None 703 704 # transform coordinates to cropped image 705 start_local = (start[0] - top_left[0], start[1] - top_left[1]) 706 goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals] 707 708 cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]] 709 710 if cropped.size == 0: 711 return None 712 713 path = rust_astar(cropped, start_local, goals_local, direction) 714 715 if path is None: 716 return None 717 718 if self._scale != 1.0: 719 path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path] 720 top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale)) 721 722 return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]
Detects table grid intersections using morphological filtering and template matching.
This detector implements a multi-stage pipeline:
- Binarization: Sauvola adaptive thresholding to handle varying lighting
- Morphological operations: Dilation to connect broken rule segments
- Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
- Grid growing: Iterative point detection starting from a known seed point
The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.
Tuning Guidelines
- kernel_size: Increase if you need more selectivity (fewer false positives)
- cross_width/height: Should match rule thickness after morphology
- morph_size: Increase to connect more broken lines, but this thickens rules
- sauvola_k: Increase to threshold more aggressively (remove noise)
- search_region: Increase for documents with more warping/distortion
- distance_penalty: Increase to prefer corners closer to expected positions
Visual Debugging
Set visual=True in methods to see intermediate results and tune parameters.
148 def __init__( 149 self, 150 kernel_size: int = 21, 151 cross_width: int = 6, 152 cross_height: Optional[int] = None, 153 morph_size: Optional[int] = None, 154 sauvola_k: float = 0.04, 155 sauvola_window: int = 15, 156 scale: float = 1.0, 157 search_region: int = 40, 158 distance_penalty: float = 0.4, 159 min_rows: int = 5, 160 grow_threshold: float = 0.3, 161 look_distance: int = 4, 162 ): 163 """ 164 Args: 165 kernel_size (int): the size of the cross kernel 166 a larger kernel size often means that more penalty is applied, often leading 167 to more sparse results 168 cross_width (int): the width of one of the edges in the cross filter, should be 169 roughly equal to the width of the rules in the image after morphology is applied 170 cross_height (int | None): useful if the horizontal rules and vertical rules 171 have different sizes 172 morph_size (int | None): the size of the morphology operators that are applied before 173 the cross kernel. 'bridges the gaps' of broken-up lines 174 sauvola_k (float): threshold parameter for sauvola thresholding 175 sauvola_window (int): window_size parameter for sauvola thresholding 176 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 177 search_region (int): area in which to search for a new max value in `find_nearest` etc. 178 distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 179 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 180 grow_threshold (float): the threshold for accepting a new point when growing the table 181 look_distance (int): how many points away to look when calculating the median slope 182 """ 183 self._validate_parameters( 184 kernel_size, 185 cross_width, 186 cross_height, 187 morph_size, 188 search_region, 189 sauvola_k, 190 sauvola_window, 191 distance_penalty, 192 ) 193 194 self._kernel_size = kernel_size 195 self._cross_width = cross_width 196 self._cross_height = cross_width if cross_height is None else cross_height 197 self._morph_size = morph_size if morph_size is not None else cross_width 198 self._search_region = search_region 199 self._sauvola_k = sauvola_k 200 self._sauvola_window = sauvola_window 201 self._distance_penalty = distance_penalty 202 self._scale = scale 203 self._min_rows = min_rows 204 self._grow_threshold = grow_threshold 205 self._look_distance = look_distance 206 207 self._cross_kernel = self._create_cross_kernel()
Arguments:
- kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
- cross_width (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
- cross_height (int | None): useful if the horizontal rules and vertical rules have different sizes
- morph_size (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
- sauvola_k (float): threshold parameter for sauvola thresholding
- sauvola_window (int): window_size parameter for sauvola thresholding
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- search_region (int): area in which to search for a new max value in
find_nearestetc. - distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
- min_rows (int): minimum number of rows to find before stopping the table finding algorithm
- grow_threshold (float): the threshold for accepting a new point when growing the table
- look_distance (int): how many points away to look when calculating the median slope
309 def apply(self, img: MatLike, visual: bool = False) -> MatLike: 310 """ 311 Apply the grid detection filter to the input image. 312 313 Args: 314 img (MatLike): the input image 315 visual (bool): whether to show intermediate steps 316 317 Returns: 318 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 319 """ 320 321 if img is None or img.size == 0: 322 raise ValueError("Input image is empty or None") 323 324 binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window) 325 326 if visual: 327 imu.show(binary, title="thresholded") 328 329 binary = self._apply_morphology(binary) 330 331 if visual: 332 imu.show(binary, title="dilated") 333 334 filtered = self._apply_cross_matching(binary) 335 336 return filtered
Apply the grid detection filter to the input image.
Arguments:
- img (MatLike): the input image
- visual (bool): whether to show intermediate steps
Returns:
MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
338 @log_calls(level=logging.DEBUG, include_return=True) 339 def find_nearest( 340 self, filtered: MatLike, point: Point, region: Optional[int] = None 341 ) -> Tuple[Point, float]: 342 """ 343 Find the nearest 'corner match' in the image, along with its score [0,1] 344 345 Args: 346 filtered (MatLike): the filtered image (obtained through `apply`) 347 point (tuple[int, int]): the approximate target point (x, y) 348 region (None | int): alternative value for search region, 349 overwriting the `__init__` parameter `region` 350 """ 351 352 if filtered is None or filtered.size == 0: 353 raise ValueError("Filtered image is empty or None") 354 355 region_size = region if region is not None else self._search_region 356 x, y = point 357 358 # Calculate crop boundaries 359 crop_x = max(0, x - region_size // 2) 360 crop_y = max(0, y - region_size // 2) 361 crop_width = min(region_size, filtered.shape[1] - crop_x) 362 crop_height = min(region_size, filtered.shape[0] - crop_y) 363 364 # Handle edge cases 365 if crop_width <= 0 or crop_height <= 0: 366 logger.warning(f"Point {point} is outside image bounds") 367 return point, 0.0 368 369 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 370 371 if cropped.size == 0: 372 return point, 0.0 373 374 # Always apply Gaussian weighting by extending crop if needed 375 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 376 # Perfect size - apply weights directly 377 weights = self._create_gaussian_weights(region_size) 378 weighted = cropped.astype(np.float32) * weights 379 else: 380 # Extend crop to match region_size, apply weights, then restore 381 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 382 383 # Calculate offset to center the cropped region in extended array 384 offset_y = (region_size - cropped.shape[0]) // 2 385 offset_x = (region_size - cropped.shape[1]) // 2 386 387 # Place cropped region in center of extended array 388 extended[ 389 offset_y : offset_y + cropped.shape[0], 390 offset_x : offset_x + cropped.shape[1], 391 ] = cropped 392 393 # Apply Gaussian weights to extended array 394 weights = self._create_gaussian_weights(region_size) 395 weighted_extended = extended.astype(np.float32) * weights 396 397 # Extract the original region back out 398 weighted = weighted_extended[ 399 offset_y : offset_y + cropped.shape[0], 400 offset_x : offset_x + cropped.shape[1], 401 ] 402 403 best_idx = np.argmax(weighted) 404 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 405 406 result_point = ( 407 int(crop_x + best_x), 408 int(crop_y + best_y), 409 ) 410 result_confidence = float(weighted[best_y, best_x]) / 255.0 411 412 return result_point, result_confidence
Find the nearest 'corner match' in the image, along with its score [0,1]
Arguments:
414 def find_table_points( 415 self, 416 img: MatLike | PathLike[str], 417 left_top: Point, 418 cell_widths: list[int], 419 cell_heights: list[int] | int, 420 visual: bool = False, 421 window: str = WINDOW, 422 goals_width: Optional[int] = None, 423 filtered: Optional[MatLike | PathLike[str]] = None, 424 smooth: bool = False 425 ) -> "TableGrid": 426 """ 427 Parse the image to a `TableGrid` structure that holds all of the 428 intersections between horizontal and vertical rules, starting near the `left_top` point 429 430 Args: 431 img (MatLike): the input image of a table 432 left_top (tuple[int, int]): the starting point of the algorithm 433 cell_widths (list[int]): the expected widths of the cells (based on a header template) 434 cell_heights (list[int]): the expected height of the rows of data. 435 The last value from this list is used until the image has no more vertical space. 436 visual (bool): whether to show intermediate steps 437 window (str): the name of the OpenCV window to use for visualization 438 goals_width (int | None): the width of the goal region when searching for the next point. 439 If None, defaults to 1.5 * search_region 440 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 441 calculating the filtered image from scratch 442 smooth (bool): if True, smooth the grid after detection, using local heuristics 443 444 Returns: 445 a TableGrid object 446 """ 447 448 if goals_width is None: 449 goals_width = self._search_region * 3 // 2 450 451 if not cell_widths: 452 raise ValueError("cell_widths must contain at least one value") 453 454 if not isinstance(img, np.ndarray): 455 img = cv.imread(os.fspath(img)) 456 457 if filtered is None: 458 filtered = self.apply(img, visual) 459 else: 460 if not isinstance(filtered, np.ndarray): 461 filtered = cv.imread(os.fspath(filtered)) 462 463 filtered = ensure_gray(filtered) 464 465 if visual: 466 imu.show(filtered, window=window) 467 468 if isinstance(cell_heights, int): 469 cell_heights = [cell_heights] 470 471 left_top, confidence = self.find_nearest( 472 filtered, left_top, int(self._search_region * 3) 473 ) 474 475 if confidence < 0.1: 476 logger.warning( 477 f"Low confidence for the starting point: {confidence} at {left_top}" 478 ) 479 480 # resize all parameters according to scale 481 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 482 483 if visual: 484 imu.push(img) 485 486 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 487 cell_widths = [int(w * self._scale) for w in cell_widths] 488 cell_heights = [int(h * self._scale) for h in cell_heights] 489 left_top = (int(left_top[0] * self._scale), int(left_top[1] * self._scale)) 490 search_region = int(self._search_region * self._scale) 491 492 img_gray = ensure_gray(img) 493 filtered_gray = ensure_gray(filtered) 494 495 table_grower = TableGrower( 496 img_gray, 497 filtered_gray, 498 cell_widths, # pyright: ignore 499 cell_heights, # pyright: ignore 500 left_top, 501 search_region, 502 self._distance_penalty, 503 self._look_distance, 504 self._grow_threshold, 505 self._min_rows, 506 ) 507 508 def show_grower_progress(wait: bool = False): 509 img_orig = np.copy(img) 510 corners = table_grower.get_all_corners() 511 for y in range(len(corners)): 512 for x in range(len(corners[y])): 513 if corners[y][x] is not None: 514 img_orig = imu.draw_points( 515 img_orig, 516 [corners[y][x]], 517 color=(0, 0, 255), 518 thickness=30, 519 ) 520 521 edge = table_grower.get_edge_points() 522 523 for point, score in edge: 524 color = (100, int(clamp(score * 255, 0, 255)), 100) 525 imu.draw_point(img_orig, point, color=color, thickness=20) 526 527 imu.show(img_orig, wait=wait) 528 529 if visual: 530 threshold = self._grow_threshold 531 look_distance = self._look_distance 532 533 # python implementation of rust loops, for visualization purposes 534 # note this is a LOT slower 535 while table_grower.grow_point(img_gray, filtered_gray) is not None: 536 show_grower_progress() 537 538 show_grower_progress(True) 539 540 original_threshold = threshold 541 542 loops_without_change = 0 543 544 while not table_grower.is_table_complete(): 545 loops_without_change += 1 546 547 if loops_without_change > 50: 548 break 549 550 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 551 show_grower_progress() 552 553 loops_without_change = 0 554 555 grown = False 556 while table_grower.grow_point(img_gray, filtered_gray) is not None: 557 show_grower_progress() 558 grown = True 559 threshold = min(0.1 + 0.9 * threshold, original_threshold) 560 table_grower.set_threshold(threshold) 561 562 if not grown: 563 threshold *= 0.9 564 table_grower.set_threshold(threshold) 565 566 else: 567 threshold *= 0.9 568 table_grower.set_threshold(threshold) 569 570 if table_grower.grow_point(img_gray, filtered_gray) is not None: 571 show_grower_progress() 572 loops_without_change = 0 573 574 else: 575 table_grower.grow_table(img_gray, filtered_gray) 576 577 if smooth: 578 table_grower.smooth_grid() 579 corners = table_grower.get_all_corners() 580 logger.info( 581 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 582 ) 583 # rescale corners back to original size 584 if self._scale != 1.0: 585 for y in range(len(corners)): 586 for x in range(len(corners[y])): 587 if corners[y][x] is not None: 588 corners[y][x] = ( 589 int(corners[y][x][0] / self._scale), # pyright:ignore 590 int(corners[y][x][1] / self._scale), # pyright:ignore 591 ) 592 593 return TableGrid(corners) # pyright: ignore
Parse the image to a TableGrid structure that holds all of the
intersections between horizontal and vertical rules, starting near the left_top point
Arguments:
- img (MatLike): the input image of a table
- left_top (tuple[int, int]): the starting point of the algorithm
- cell_widths (list[int]): the expected widths of the cells (based on a header template)
- cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
- visual (bool): whether to show intermediate steps
- window (str): the name of the OpenCV window to use for visualization
- goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_region
- filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
- smooth (bool): if True, smooth the grid after detection, using local heuristics
Returns:
a TableGrid object
725class TableGrid(TableIndexer): 726 """ 727 A data class that allows segmenting the image into cells 728 """ 729 730 _right_offset: int | None = None 731 732 def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None): 733 """ 734 Args: 735 points: a 2D list of intersections between hor. and vert. rules 736 """ 737 self._points = points 738 self._right_offset = right_offset 739 740 @property 741 def points(self) -> list[list[Point]]: 742 return self._points 743 744 def row(self, i: int) -> list[Point]: 745 assert 0 <= i and i < len(self._points) 746 return self._points[i] 747 748 @property 749 def cols(self) -> int: 750 if self._right_offset is not None: 751 return len(self.row(0)) - 2 752 else: 753 return len(self.row(0)) - 1 754 755 @property 756 def rows(self) -> int: 757 return len(self._points) - 1 758 759 @staticmethod 760 def from_split( 761 split_grids: Split["TableGrid"], offsets: Split[Point] 762 ) -> "TableGrid": 763 """ 764 Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image 765 766 Args: 767 split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table 768 offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened 769 """ 770 771 def offset_points(points, offset): 772 return [ 773 [(p[0] + offset[0], p[1] + offset[1]) for p in row] for row in points 774 ] 775 776 split_points = split_grids.apply( 777 lambda grid, offset: offset_points(grid.points, offset), offsets 778 ) 779 780 points = [] 781 782 rows = min(split_grids.left.rows, split_grids.right.rows) 783 784 for row in range(rows + 1): 785 row_points = [] 786 787 row_points.extend(split_points.left[row]) 788 row_points.extend(split_points.right[row]) 789 790 points.append(row_points) 791 792 table_grid = TableGrid(points, split_grids.left.cols) 793 794 return table_grid 795 796 def save(self, path: str | Path): 797 with open(path, "w") as f: 798 json.dump({"points": self.points, "right_offset": self._right_offset}, f) 799 800 @staticmethod 801 def from_saved(path: str | Path) -> "TableGrid": 802 with open(path, "r") as f: 803 points = json.load(f) 804 right_offset = points.get("right_offset", None) 805 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 806 return TableGrid(points, right_offset) 807 808 def add_left_col(self, width: int): 809 for row in self._points: 810 first = row[0] 811 new_first = (first[0] - width, first[1]) 812 row.insert(0, new_first) 813 814 def add_top_row(self, height: int): 815 new_row = [] 816 for point in self._points[0]: 817 new_row.append((point[0], point[1] - height)) 818 819 self.points.insert(0, new_row) 820 821 def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool: 822 """point: x, y""" 823 lt, rt, rb, lb = rect 824 x, y = point 825 826 top = _Rule(*lt, *rt) 827 if top._y_at_x(x) > y: 828 return False 829 830 right = _Rule(*rt, *rb) 831 if right._x_at_y(y) < x: 832 return False 833 834 bottom = _Rule(*lb, *rb) 835 if bottom._y_at_x(x) < y: 836 return False 837 838 left = _Rule(*lb, *lt) 839 if left._x_at_y(y) > x: 840 return False 841 842 return True 843 844 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 845 for r in range(len(self._points) - 1): 846 offset = 0 847 for c in range(len(self.row(0)) - 1): 848 if self._right_offset is not None and c == self._right_offset: 849 offset = -1 850 continue 851 852 if self._surrounds( 853 [ 854 self._points[r][c], 855 self._points[r][c + 1], 856 self._points[r + 1][c + 1], 857 self._points[r + 1][c], 858 ], 859 point, 860 ): 861 return (r, c + offset) 862 863 return (-1, -1) 864 865 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 866 r, c = cell 867 868 self._check_row_idx(r) 869 self._check_col_idx(c) 870 871 if self._right_offset is not None and c >= self._right_offset: 872 c = c + 1 873 874 return ( 875 self._points[r][c], 876 self._points[r][c + 1], 877 self._points[r + 1][c + 1], 878 self._points[r + 1][c], 879 ) 880 881 def region( 882 self, start: tuple[int, int], end: tuple[int, int] 883 ) -> tuple[Point, Point, Point, Point]: 884 r0, c0 = start 885 r1, c1 = end 886 887 self._check_row_idx(r0) 888 self._check_row_idx(r1) 889 self._check_col_idx(c0) 890 self._check_col_idx(c1) 891 892 if self._right_offset is not None and c0 >= self._right_offset: 893 c0 = c0 + 1 894 895 if self._right_offset is not None and c1 >= self._right_offset: 896 c1 = c1 + 1 897 898 lt = self._points[r0][c0] 899 rt = self._points[r0][c1 + 1] 900 rb = self._points[r1 + 1][c1 + 1] 901 lb = self._points[r1 + 1][c0] 902 903 return lt, rt, rb, lb 904 905 def visualize_points(self, img: MatLike): 906 """ 907 Draw the detected table points on the image for visual verification 908 """ 909 import colorsys 910 911 def clr(index, total_steps): 912 hue = index / total_steps # Normalized hue between 0 and 1 913 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 914 return int(r * 255), int(g * 255), int(b * 255) 915 916 for i, row in enumerate(self._points): 917 for p in row: 918 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 919 920 imu.show(img) 921 922 def text_regions( 923 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 924 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 925 def vertical_rule_crop(row: int, col: int): 926 self._check_col_idx(col) 927 self._check_row_idx(row) 928 929 if self._right_offset is not None and col >= self._right_offset: 930 col = col + 1 931 932 top = self._points[row][col] 933 bottom = self._points[row + 1][col] 934 935 left = int(min(top[0], bottom[0])) 936 right = int(max(top[0], bottom[0])) 937 938 return img[ 939 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 940 left - margin_x : right + margin_x, 941 ] 942 943 result = [] 944 945 start = None 946 for col in range(self.cols): 947 crop = vertical_rule_crop(row, col) 948 text_over_score = imu.text_presence_score(crop) 949 text_over = text_over_score > -0.10 950 951 if not text_over: 952 if start is not None: 953 result.append(((row, start), (row, col - 1))) 954 start = col 955 956 if start is not None: 957 result.append(((row, start), (row, self.cols - 1))) 958 959 return result
A data class that allows segmenting the image into cells
732 def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None): 733 """ 734 Args: 735 points: a 2D list of intersections between hor. and vert. rules 736 """ 737 self._points = points 738 self._right_offset = right_offset
Arguments:
- points: a 2D list of intersections between hor. and vert. rules
759 @staticmethod 760 def from_split( 761 split_grids: Split["TableGrid"], offsets: Split[Point] 762 ) -> "TableGrid": 763 """ 764 Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image 765 766 Args: 767 split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table 768 offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened 769 """ 770 771 def offset_points(points, offset): 772 return [ 773 [(p[0] + offset[0], p[1] + offset[1]) for p in row] for row in points 774 ] 775 776 split_points = split_grids.apply( 777 lambda grid, offset: offset_points(grid.points, offset), offsets 778 ) 779 780 points = [] 781 782 rows = min(split_grids.left.rows, split_grids.right.rows) 783 784 for row in range(rows + 1): 785 row_points = [] 786 787 row_points.extend(split_points.left[row]) 788 row_points.extend(split_points.right[row]) 789 790 points.append(row_points) 791 792 table_grid = TableGrid(points, split_grids.left.cols) 793 794 return table_grid
Convert two TableGrid objects into one, that is able to segment the original (non-cropped) image
Arguments:
- split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
- offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
800 @staticmethod 801 def from_saved(path: str | Path) -> "TableGrid": 802 with open(path, "r") as f: 803 points = json.load(f) 804 right_offset = points.get("right_offset", None) 805 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 806 return TableGrid(points, right_offset)
844 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 845 for r in range(len(self._points) - 1): 846 offset = 0 847 for c in range(len(self.row(0)) - 1): 848 if self._right_offset is not None and c == self._right_offset: 849 offset = -1 850 continue 851 852 if self._surrounds( 853 [ 854 self._points[r][c], 855 self._points[r][c + 1], 856 self._points[r + 1][c + 1], 857 self._points[r + 1][c], 858 ], 859 point, 860 ): 861 return (r, c + offset) 862 863 return (-1, -1)
Returns the coordinate (row, col) of the cell that contains the given position
Arguments:
- point (tuple[float, float]): a location in the input image
Returns:
tuple[int, int]: the cell index (row, col) that contains the given point
865 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 866 r, c = cell 867 868 self._check_row_idx(r) 869 self._check_col_idx(c) 870 871 if self._right_offset is not None and c >= self._right_offset: 872 c = c + 1 873 874 return ( 875 self._points[r][c], 876 self._points[r][c + 1], 877 self._points[r + 1][c + 1], 878 self._points[r + 1][c], 879 )
returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position
881 def region( 882 self, start: tuple[int, int], end: tuple[int, int] 883 ) -> tuple[Point, Point, Point, Point]: 884 r0, c0 = start 885 r1, c1 = end 886 887 self._check_row_idx(r0) 888 self._check_row_idx(r1) 889 self._check_col_idx(c0) 890 self._check_col_idx(c1) 891 892 if self._right_offset is not None and c0 >= self._right_offset: 893 c0 = c0 + 1 894 895 if self._right_offset is not None and c1 >= self._right_offset: 896 c1 = c1 + 1 897 898 lt = self._points[r0][c0] 899 rt = self._points[r0][c1 + 1] 900 rb = self._points[r1 + 1][c1 + 1] 901 lb = self._points[r1 + 1][c0] 902 903 return lt, rt, rb, lb
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
905 def visualize_points(self, img: MatLike): 906 """ 907 Draw the detected table points on the image for visual verification 908 """ 909 import colorsys 910 911 def clr(index, total_steps): 912 hue = index / total_steps # Normalized hue between 0 and 1 913 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 914 return int(r * 255), int(g * 255), int(b * 255) 915 916 for i, row in enumerate(self._points): 917 for p in row: 918 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 919 920 imu.show(img)
Draw the detected table points on the image for visual verification
922 def text_regions( 923 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 924 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 925 def vertical_rule_crop(row: int, col: int): 926 self._check_col_idx(col) 927 self._check_row_idx(row) 928 929 if self._right_offset is not None and col >= self._right_offset: 930 col = col + 1 931 932 top = self._points[row][col] 933 bottom = self._points[row + 1][col] 934 935 left = int(min(top[0], bottom[0])) 936 right = int(max(top[0], bottom[0])) 937 938 return img[ 939 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 940 left - margin_x : right + margin_x, 941 ] 942 943 result = [] 944 945 start = None 946 for col in range(self.cols): 947 crop = vertical_rule_crop(row, col) 948 text_over_score = imu.text_presence_score(crop) 949 text_over = text_over_score > -0.10 950 951 if not text_over: 952 if start is not None: 953 result.append(((row, start), (row, col - 1))) 954 start = col 955 956 if start is not None: 957 result.append(((row, start), (row, self.cols - 1))) 958 959 return result
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
23class HeaderAligner: 24 """ 25 Aligns table header templates to subject images using feature-based registration. 26 27 This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and 28 matching to compute a homography transformation that maps points from a header 29 template image to their corresponding locations in full table images. 30 31 ## How it Works 32 33 1. **Feature Detection**: Extracts ORB keypoints from both template and subject 34 2. **Feature Matching**: Finds correspondences using Hamming distance 35 3. **Filtering**: Keeps top matches and prunes based on spatial consistency 36 4. **Homography Estimation**: Computes perspective transform using RANSAC 37 38 The computed homography can then transform any point from template space to 39 image space, allowing you to locate table structures based on your annotation. 40 41 ## Preprocessing Options 42 43 - Set `k` parameter to apply Sauvola thresholding before feature detection. 44 This can improve matching on documents with variable lighting. 45 - Set `k=None` to use raw images (just extract blue channel for BGR images) 46 47 ## Tuning Guidelines 48 49 - **max_features**: Increase if matching fails on complex templates 50 - **match_fraction**: Decrease if you get many incorrect matches 51 - **max_dist**: Increase for documents with more warping/distortion 52 - **scale**: Decrease (<1.0) to speed up on high-resolution images 53 54 Args: 55 template (MatLike | PathLike[str] | str | None): Header template image or path. 56 This should contain a clear, representative view of the table header. 57 max_features (int): Maximum ORB features to detect. More features = slower 58 but potentially more robust matching. 59 patch_size (int): ORB patch size for feature extraction. 60 match_fraction (float): Fraction [0, 1] of matches to keep after sorting by 61 quality. Higher = more matches but potentially more outliers. 62 scale (float): Image downscaling factor (0, 1] for processing speed. 63 max_dist (float): Maximum allowed distance (relative to image size) between 64 matched keypoints. Filters out spatially inconsistent matches. 65 k (float | None): Sauvola threshold parameter for preprocessing. If None, 66 no thresholding is applied. Typical range: 0.03-0.15. 67 """ 68 69 def __init__( 70 self, 71 template: None | MatLike | PathLike[str] | str = None, 72 max_features: int = 25_000, 73 patch_size: int = 31, 74 match_fraction: float = 0.6, 75 scale: float = 1.0, 76 max_dist: float = 1.00, 77 k: float | None = 0.05, 78 ): 79 """ 80 Args: 81 template (MatLike | str): (path of) template image, with the table template clearly visible 82 max_features (int): maximal number of features that will be extracted by ORB 83 patch_size (int): for ORB feature extractor 84 match_fraction (float): best fraction of matches that are kept 85 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 86 max_dist (float): maximum distance (relative to image size) of matched features. 87 Increase this value if the warping between image and template needs to be more agressive 88 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 89 """ 90 91 if type(template) is str or type(template) is PathLike: 92 value = cv.imread(fspath(template)) 93 template = value 94 95 self._k = k 96 if scale > 1.0: 97 raise TauluException( 98 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 99 ) 100 if scale == 0: 101 raise TauluException("Use 0 < scale <= 1.0") 102 103 self._scale = scale 104 self._template = self._scale_img(cast(MatLike, template)) 105 self._template_orig: None | MatLike = None 106 self._preprocess_template() 107 self._max_features = max_features 108 self._patch_size = patch_size 109 self._match_fraction = match_fraction 110 self._max_dist = max_dist 111 112 def _scale_img(self, img: MatLike) -> MatLike: 113 if self._scale == 1.0: 114 return img 115 116 return cv.resize(img, None, fx=self._scale, fy=self._scale) 117 118 def _unscale_img(self, img: MatLike) -> MatLike: 119 if self._scale == 1.0: 120 return img 121 122 return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale) 123 124 def _unscale_homography(self, h: np.ndarray) -> np.ndarray: 125 if self._scale == 1.0: 126 return h 127 128 scale_matrix = np.diag([self._scale, self._scale, 1.0]) 129 # inv_scale_matrix = np.linalg.inv(scale_matrix) 130 inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0]) 131 # return inv_scale_matrix @ h @ scale_matrix 132 return inv_scale_matrix @ h @ scale_matrix 133 134 @property 135 def template(self): 136 """The template image that subject images are aligned to""" 137 return self._template 138 139 @template.setter 140 def template(self, value: MatLike | str): 141 """Set the template image as a path or an image""" 142 143 if type(value) is str: 144 value = cv.imread(value) 145 self._template = value 146 147 # TODO: check if the image has the right properties (dimensions etc.) 148 self._template = cast(MatLike, value) 149 150 self._preprocess_template() 151 152 def _preprocess_template(self): 153 self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY) 154 if self._k is not None: 155 self._template = imu.sauvola(self._template, self._k) 156 self._template = cv.bitwise_not(self._template) 157 else: 158 _, _, self._template = cv.split(self._template) 159 160 def _preprocess_image(self, img: MatLike): 161 if self._template_orig is None: 162 raise TauluException("process the template first") 163 164 if self._k is not None: 165 img = imu.sauvola(img, self._k) 166 img = cv.bitwise_not(img) 167 else: 168 _, _, img = cv.split(img) 169 170 return img 171 172 @log_calls(level=logging.DEBUG, include_return=True) 173 def _find_transform_of_template_on( 174 self, im: MatLike, visual: bool = False, window: str = WINDOW 175 ): 176 im = self._scale_img(im) 177 # Detect ORB features and compute descriptors. 178 orb = cv.ORB_create( 179 self._max_features, # type:ignore 180 patchSize=self._patch_size, 181 ) 182 keypoints_im, descriptors_im = orb.detectAndCompute(im, None) 183 keypoints_tg, descriptors_tg = orb.detectAndCompute(self._template, None) 184 185 # Match features 186 matcher = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True) 187 matches = matcher.match(descriptors_im, descriptors_tg) 188 189 # Sort matches by score 190 matches = sorted(matches, key=lambda x: x.distance) 191 192 # Remove not so good matches 193 numGoodMatches = int(len(matches) * self._match_fraction) 194 matches = matches[:numGoodMatches] 195 196 if visual: 197 final_img_filtered = cv.drawMatches( 198 im, 199 keypoints_im, 200 self._template, 201 keypoints_tg, 202 matches[:10], 203 None, # type:ignore 204 cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, 205 ) 206 imu.show(final_img_filtered, title="matches", window=window) 207 208 # Extract location of good matches 209 points1 = np.zeros((len(matches), 2), dtype=np.float32) 210 points2 = np.zeros((len(matches), 2), dtype=np.float32) 211 212 for i, match in enumerate(matches): 213 points1[i, :] = keypoints_tg[match.trainIdx].pt 214 points2[i, :] = keypoints_im[match.queryIdx].pt 215 216 # Prune reference points based upon distance between 217 # key points. This assumes a fairly good alignment to start with 218 # due to the protocol used (location of the sheets) 219 p1 = pd.DataFrame(data=points1) 220 p2 = pd.DataFrame(data=points2) 221 refdist = abs(p1 - p2) 222 223 mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist) 224 mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist) 225 mask = mask_x & mask_y 226 points1 = points1[mask.to_numpy()] 227 points2 = points2[mask.to_numpy()] 228 229 # Find homography 230 h, _ = cv.findHomography(points1, points2, cv.RANSAC) 231 232 return self._unscale_homography(h) 233 234 def view_alignment(self, img: MatLike, h: NDArray): 235 """ 236 Show the alignment of the template on the given image 237 by transforming it using the supplied transformation matrix `h` 238 and visualising both on different channels 239 240 Args: 241 img (MatLike): the image on which the template is transformed 242 h (NDArray): the transformation matrix 243 """ 244 245 im = imu.ensure_gray(img) 246 header = imu.ensure_gray(self._unscale_img(self._template)) 247 height, width = im.shape 248 249 header_warped = cv.warpPerspective(header, h, (width, height)) 250 251 merged = np.full((height, width, 3), 255, dtype=np.uint8) 252 253 merged[..., 1] = im 254 merged[..., 2] = header_warped 255 256 return imu.show(merged) 257 258 @log_calls(level=logging.DEBUG, include_return=True) 259 def align( 260 self, img: MatLike | str, visual: bool = False, window: str = WINDOW 261 ) -> NDArray: 262 """ 263 Calculates a homogeneous transformation matrix that maps pixels of 264 the template to the given image 265 """ 266 267 logger.info("Aligning header with supplied table image") 268 269 if type(img) is str: 270 img = cv.imread(img) 271 img = cast(MatLike, img) 272 273 img = self._preprocess_image(img) 274 275 h = self._find_transform_of_template_on(img, visual, window) 276 277 if visual: 278 self.view_alignment(img, h) 279 280 return h 281 282 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 283 """ 284 Transform the given point (in template-space) using the transformation h 285 (obtained through the `align` method) 286 287 Args: 288 h (NDArray): transformation matrix of shape (3, 3) 289 point (Iterable[int]): the to-be-transformed point, should conform to (x, y) 290 """ 291 292 point = np.array([[point[0], point[1], 1]]) # type:ignore 293 transformed = np.dot(h, point.T) # type:ignore 294 295 transformed /= transformed[2] 296 297 return int(transformed[0][0]), int(transformed[1][0])
Aligns table header templates to subject images using feature-based registration.
This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and matching to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.
How it Works
- Feature Detection: Extracts ORB keypoints from both template and subject
- Feature Matching: Finds correspondences using Hamming distance
- Filtering: Keeps top matches and prunes based on spatial consistency
- Homography Estimation: Computes perspective transform using RANSAC
The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.
Preprocessing Options
- Set
kparameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting. - Set
k=Noneto use raw images (just extract blue channel for BGR images)
Tuning Guidelines
- max_features: Increase if matching fails on complex templates
- match_fraction: Decrease if you get many incorrect matches
- max_dist: Increase for documents with more warping/distortion
- scale: Decrease (<1.0) to speed up on high-resolution images
Arguments:
- template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
- max_features (int): Maximum ORB features to detect. More features = slower but potentially more robust matching.
- patch_size (int): ORB patch size for feature extraction.
- match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
- scale (float): Image downscaling factor (0, 1] for processing speed.
- max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
- k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.
69 def __init__( 70 self, 71 template: None | MatLike | PathLike[str] | str = None, 72 max_features: int = 25_000, 73 patch_size: int = 31, 74 match_fraction: float = 0.6, 75 scale: float = 1.0, 76 max_dist: float = 1.00, 77 k: float | None = 0.05, 78 ): 79 """ 80 Args: 81 template (MatLike | str): (path of) template image, with the table template clearly visible 82 max_features (int): maximal number of features that will be extracted by ORB 83 patch_size (int): for ORB feature extractor 84 match_fraction (float): best fraction of matches that are kept 85 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 86 max_dist (float): maximum distance (relative to image size) of matched features. 87 Increase this value if the warping between image and template needs to be more agressive 88 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 89 """ 90 91 if type(template) is str or type(template) is PathLike: 92 value = cv.imread(fspath(template)) 93 template = value 94 95 self._k = k 96 if scale > 1.0: 97 raise TauluException( 98 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 99 ) 100 if scale == 0: 101 raise TauluException("Use 0 < scale <= 1.0") 102 103 self._scale = scale 104 self._template = self._scale_img(cast(MatLike, template)) 105 self._template_orig: None | MatLike = None 106 self._preprocess_template() 107 self._max_features = max_features 108 self._patch_size = patch_size 109 self._match_fraction = match_fraction 110 self._max_dist = max_dist
Arguments:
- template (MatLike | str): (path of) template image, with the table template clearly visible
- max_features (int): maximal number of features that will be extracted by ORB
- patch_size (int): for ORB feature extractor
- match_fraction (float): best fraction of matches that are kept
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
- k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
134 @property 135 def template(self): 136 """The template image that subject images are aligned to""" 137 return self._template
The template image that subject images are aligned to
234 def view_alignment(self, img: MatLike, h: NDArray): 235 """ 236 Show the alignment of the template on the given image 237 by transforming it using the supplied transformation matrix `h` 238 and visualising both on different channels 239 240 Args: 241 img (MatLike): the image on which the template is transformed 242 h (NDArray): the transformation matrix 243 """ 244 245 im = imu.ensure_gray(img) 246 header = imu.ensure_gray(self._unscale_img(self._template)) 247 height, width = im.shape 248 249 header_warped = cv.warpPerspective(header, h, (width, height)) 250 251 merged = np.full((height, width, 3), 255, dtype=np.uint8) 252 253 merged[..., 1] = im 254 merged[..., 2] = header_warped 255 256 return imu.show(merged)
Show the alignment of the template on the given image
by transforming it using the supplied transformation matrix h
and visualising both on different channels
Arguments:
- img (MatLike): the image on which the template is transformed
- h (NDArray): the transformation matrix
258 @log_calls(level=logging.DEBUG, include_return=True) 259 def align( 260 self, img: MatLike | str, visual: bool = False, window: str = WINDOW 261 ) -> NDArray: 262 """ 263 Calculates a homogeneous transformation matrix that maps pixels of 264 the template to the given image 265 """ 266 267 logger.info("Aligning header with supplied table image") 268 269 if type(img) is str: 270 img = cv.imread(img) 271 img = cast(MatLike, img) 272 273 img = self._preprocess_image(img) 274 275 h = self._find_transform_of_template_on(img, visual, window) 276 277 if visual: 278 self.view_alignment(img, h) 279 280 return h
Calculates a homogeneous transformation matrix that maps pixels of the template to the given image
282 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 283 """ 284 Transform the given point (in template-space) using the transformation h 285 (obtained through the `align` method) 286 287 Args: 288 h (NDArray): transformation matrix of shape (3, 3) 289 point (Iterable[int]): the to-be-transformed point, should conform to (x, y) 290 """ 291 292 point = np.array([[point[0], point[1], 1]]) # type:ignore 293 transformed = np.dot(h, point.T) # type:ignore 294 295 transformed /= transformed[2] 296 297 return int(transformed[0][0]), int(transformed[1][0])
Transform the given point (in template-space) using the transformation h
(obtained through the align method)
Arguments:
- h (NDArray): transformation matrix of shape (3, 3)
- point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
151class HeaderTemplate(TableIndexer): 152 def __init__(self, rules: Iterable[Iterable[int]]): 153 """ 154 A TableTemplate is a collection of rules of a table. This class implements methods 155 for finding cell positions in a table image, given the template the image adheres to. 156 157 Args: 158 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 159 """ 160 161 super().__init__() 162 self._rules = [_Rule(*rule) for rule in rules] 163 self._h_rules = sorted( 164 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 165 ) 166 self._v_rules = sorted( 167 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 168 ) 169 170 @log_calls(level=logging.DEBUG) 171 def save(self, path: PathLike[str]): 172 """ 173 Save the HeaderTemplate to the given path, as a json 174 """ 175 176 data = {"rules": [r.to_dict() for r in self._rules]} 177 178 with open(path, "w") as f: 179 json.dump(data, f) 180 181 @staticmethod 182 @log_calls(level=logging.DEBUG) 183 def from_saved(path: PathLike[str]) -> "HeaderTemplate": 184 with open(path, "r") as f: 185 data = json.load(f) 186 rules = data["rules"] 187 rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules] 188 189 return HeaderTemplate(rules) 190 191 @property 192 def cols(self) -> int: 193 return len(self._v_rules) - 1 194 195 @property 196 def rows(self) -> int: 197 return len(self._h_rules) - 1 198 199 @staticmethod 200 @log_calls(level=logging.DEBUG) 201 def annotate_image( 202 template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10 203 ) -> "HeaderTemplate": 204 """ 205 Utility method that allows users to create a template form a template image. 206 207 The user is asked to click to annotate lines (two clicks per line). 208 209 Args: 210 template: the image on which to annotate the header lines 211 crop (str | None): if str, crop the template image first, then do the annotation. 212 The cropped image will be stored at the supplied path 213 margin (int): margin to add around the cropping of the header 214 """ 215 216 if type(template) is str: 217 value = cv.imread(template) 218 template = value 219 template = cast(MatLike, template) 220 221 if crop is not None: 222 cropped = HeaderTemplate._crop(template, margin) 223 cv.imwrite(os.fspath(crop), cropped) 224 template = cropped 225 226 start_point = None 227 lines: list[list[int]] = [] 228 229 anno_template = np.copy(template) 230 231 def get_point(event, x, y, flags, params): 232 nonlocal lines, start_point, anno_template 233 _ = flags 234 _ = params 235 if event == cv.EVENT_LBUTTONDOWN: 236 if start_point is not None: 237 line: list[int] = [start_point[1], start_point[0], x, y] 238 239 cv.line( # type:ignore 240 anno_template, # type:ignore 241 (start_point[1], start_point[0]), 242 (x, y), 243 (0, 255, 0), 244 2, 245 cv.LINE_AA, 246 ) 247 cv.imshow(constants.WINDOW, anno_template) # type:ignore 248 249 lines.append(line) 250 start_point = None 251 else: 252 start_point = (y, x) 253 elif event == cv.EVENT_RBUTTONDOWN: 254 start_point = None 255 256 # remove the last annotation 257 lines = lines[:-1] 258 259 anno_template = np.copy(anno_template) 260 261 for line in lines: 262 cv.line( 263 template, 264 (line[0], line[1]), 265 (line[2], line[3]), 266 (0, 255, 0), 267 2, 268 cv.LINE_AA, 269 ) 270 271 cv.imshow(constants.WINDOW, template) 272 273 print(ANNO_HELP) 274 275 imu.show(anno_template, get_point, title="annotate the header") 276 277 return HeaderTemplate(lines) 278 279 @staticmethod 280 @log_calls(level=logging.DEBUG, include_return=True) 281 def _crop(template: MatLike, margin: int = 10) -> MatLike: 282 """ 283 Crop the image to contain only the annotations, such that it can be used as the header image in the taulu workflow. 284 """ 285 286 points = [] 287 anno_template = np.copy(template) 288 289 def get_point(event, x, y, flags, params): 290 nonlocal points, anno_template 291 _ = flags 292 _ = params 293 if event == cv.EVENT_LBUTTONDOWN: 294 point = (x, y) 295 296 cv.circle( # type:ignore 297 anno_template, # type:ignore 298 (x, y), 299 4, 300 (0, 255, 0), 301 2, 302 ) 303 cv.imshow(constants.WINDOW, anno_template) # type:ignore 304 305 points.append(point) 306 elif event == cv.EVENT_RBUTTONDOWN: 307 # remove the last annotation 308 points = points[:-1] 309 310 anno_template = np.copy(anno_template) 311 312 for p in points: 313 cv.circle( 314 anno_template, 315 p, 316 4, 317 (0, 255, 0), 318 2, 319 ) 320 321 cv.imshow(constants.WINDOW, anno_template) 322 323 print(CROP_HELP) 324 325 imu.show(anno_template, get_point, title="crop the header") 326 327 assert len(points) == 4, ( 328 "you need to annotate the four corners of the table in order to crop it" 329 ) 330 331 # crop the image to contain all of the points (just crop rectangularly, x, y, w, h) 332 # Convert points to numpy array 333 points_np = np.array(points) 334 335 # Find bounding box 336 x_min = np.min(points_np[:, 0]) 337 y_min = np.min(points_np[:, 1]) 338 x_max = np.max(points_np[:, 0]) 339 y_max = np.max(points_np[:, 1]) 340 341 # Compute width and height 342 width = x_max - x_min 343 height = y_max - y_min 344 345 # Ensure integers and within image boundaries 346 x_min = max(int(x_min), 0) 347 y_min = max(int(y_min), 0) 348 width = int(width) 349 height = int(height) 350 351 # Crop the image 352 cropped = template[ 353 y_min - margin : y_min + height + margin, 354 x_min - margin : x_min + width + margin, 355 ] 356 357 return cropped 358 359 @staticmethod 360 def from_vgg_annotation(annotation: str) -> "HeaderTemplate": 361 """ 362 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 363 364 Args: 365 annotation (str): the path of the annotation csv file 366 """ 367 368 rules = [] 369 with open(annotation, "r") as csvfile: 370 reader = csv.DictReader(csvfile) 371 for row in reader: 372 shape_attributes = json.loads(row["region_shape_attributes"]) 373 if shape_attributes["name"] == "polyline": 374 x_points = shape_attributes["all_points_x"] 375 y_points = shape_attributes["all_points_y"] 376 if len(x_points) == 2 and len(y_points) == 2: 377 rules.append( 378 [x_points[0], y_points[0], x_points[1], y_points[1]] 379 ) 380 381 return HeaderTemplate(rules) 382 383 def cell_width(self, i: int) -> int: 384 self._check_col_idx(i) 385 return int(self._v_rules[i + 1]._x - self._v_rules[i]._x) 386 387 def cell_widths(self, start: int = 0) -> list[int]: 388 return [self.cell_width(i) for i in range(start, self.cols)] 389 390 def cell_height(self, header_factor: float = 0.8) -> int: 391 return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor) 392 393 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 394 if isinstance(header_factors, float): 395 header_factors = [header_factors] 396 header_factors = cast(list, header_factors) 397 return [ 398 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 399 ] 400 401 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 402 """ 403 Returns the interaction of the index[0]th horizontal rule and the 404 index[1]th vertical rule 405 """ 406 407 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 408 assert ints is not None 409 return ints 410 411 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 412 """ 413 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 414 415 Args: 416 point (tuple[float, float]): the coordinates in the template image 417 418 Returns: 419 tuple[int, int]: (row, col) 420 """ 421 422 x, y = point 423 424 row = -1 425 col = -1 426 427 for i in range(self.rows): 428 y0 = self._h_rules[i]._y_at_x(x) 429 y1 = self._h_rules[i + 1]._y_at_x(x) 430 if min(y0, y1) <= y <= max(y0, y1): 431 row = i 432 break 433 434 for i in range(self.cols): 435 x0 = self._v_rules[i]._x_at_y(y) 436 x1 = self._v_rules[i + 1]._x_at_y(y) 437 if min(x0, x1) <= x <= max(x0, x1): 438 col = i 439 break 440 441 if row == -1 or col == -1: 442 return (-1, -1) 443 444 return (row, col) 445 446 def cell_polygon( 447 self, cell: tuple[int, int] 448 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 449 """ 450 Return points (x,y) that make up a polygon around the requested cell 451 (top left, top right, bottom right, bottom left) 452 """ 453 454 row, col = cell 455 456 self._check_col_idx(col) 457 self._check_row_idx(row) 458 459 top_rule = self._h_rules[row] 460 bottom_rule = self._h_rules[row + 1] 461 left_rule = self._v_rules[col] 462 right_rule = self._v_rules[col + 1] 463 464 # Calculate corner points using intersections 465 top_left = top_rule.intersection(left_rule) 466 top_right = top_rule.intersection(right_rule) 467 bottom_left = bottom_rule.intersection(left_rule) 468 bottom_right = bottom_rule.intersection(right_rule) 469 470 if not all( 471 [ 472 point is not None 473 for point in [top_left, top_right, bottom_left, bottom_right] 474 ] 475 ): 476 raise TauluException("the lines around this cell do not intersect") 477 478 return top_left, top_right, bottom_right, bottom_left # type:ignore 479 480 def region( 481 self, start: tuple[int, int], end: tuple[int, int] 482 ) -> tuple[Point, Point, Point, Point]: 483 self._check_row_idx(start[0]) 484 self._check_row_idx(end[0]) 485 self._check_col_idx(start[1]) 486 self._check_col_idx(end[1]) 487 488 # the rules that surround this row 489 top_rule = self._h_rules[start[0]] 490 bottom_rule = self._h_rules[end[0] + 1] 491 left_rule = self._v_rules[start[1]] 492 right_rule = self._v_rules[end[1] + 1] 493 494 # four points that will be the bounding polygon of the result, 495 # which needs to be rectified 496 top_left = top_rule.intersection(left_rule) 497 top_right = top_rule.intersection(right_rule) 498 bottom_left = bottom_rule.intersection(left_rule) 499 bottom_right = bottom_rule.intersection(right_rule) 500 501 if ( 502 top_left is None 503 or top_right is None 504 or bottom_left is None 505 or bottom_right is None 506 ): 507 raise TauluException("the lines around this row do not intersect properly") 508 509 def to_point(pnt) -> Point: 510 return (int(pnt[0]), int(pnt[1])) 511 512 return ( 513 to_point(top_left), 514 to_point(top_right), 515 to_point(bottom_right), 516 to_point(bottom_left), 517 ) 518 519 def text_regions( 520 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 521 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 522 raise TauluException("text_regions should not be called on a HeaderTemplate")
Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.
152 def __init__(self, rules: Iterable[Iterable[int]]): 153 """ 154 A TableTemplate is a collection of rules of a table. This class implements methods 155 for finding cell positions in a table image, given the template the image adheres to. 156 157 Args: 158 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 159 """ 160 161 super().__init__() 162 self._rules = [_Rule(*rule) for rule in rules] 163 self._h_rules = sorted( 164 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 165 ) 166 self._v_rules = sorted( 167 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 168 )
A TableTemplate is a collection of rules of a table. This class implements methods for finding cell positions in a table image, given the template the image adheres to.
Arguments:
- rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
170 @log_calls(level=logging.DEBUG) 171 def save(self, path: PathLike[str]): 172 """ 173 Save the HeaderTemplate to the given path, as a json 174 """ 175 176 data = {"rules": [r.to_dict() for r in self._rules]} 177 178 with open(path, "w") as f: 179 json.dump(data, f)
Save the HeaderTemplate to the given path, as a json
199 @staticmethod 200 @log_calls(level=logging.DEBUG) 201 def annotate_image( 202 template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10 203 ) -> "HeaderTemplate": 204 """ 205 Utility method that allows users to create a template form a template image. 206 207 The user is asked to click to annotate lines (two clicks per line). 208 209 Args: 210 template: the image on which to annotate the header lines 211 crop (str | None): if str, crop the template image first, then do the annotation. 212 The cropped image will be stored at the supplied path 213 margin (int): margin to add around the cropping of the header 214 """ 215 216 if type(template) is str: 217 value = cv.imread(template) 218 template = value 219 template = cast(MatLike, template) 220 221 if crop is not None: 222 cropped = HeaderTemplate._crop(template, margin) 223 cv.imwrite(os.fspath(crop), cropped) 224 template = cropped 225 226 start_point = None 227 lines: list[list[int]] = [] 228 229 anno_template = np.copy(template) 230 231 def get_point(event, x, y, flags, params): 232 nonlocal lines, start_point, anno_template 233 _ = flags 234 _ = params 235 if event == cv.EVENT_LBUTTONDOWN: 236 if start_point is not None: 237 line: list[int] = [start_point[1], start_point[0], x, y] 238 239 cv.line( # type:ignore 240 anno_template, # type:ignore 241 (start_point[1], start_point[0]), 242 (x, y), 243 (0, 255, 0), 244 2, 245 cv.LINE_AA, 246 ) 247 cv.imshow(constants.WINDOW, anno_template) # type:ignore 248 249 lines.append(line) 250 start_point = None 251 else: 252 start_point = (y, x) 253 elif event == cv.EVENT_RBUTTONDOWN: 254 start_point = None 255 256 # remove the last annotation 257 lines = lines[:-1] 258 259 anno_template = np.copy(anno_template) 260 261 for line in lines: 262 cv.line( 263 template, 264 (line[0], line[1]), 265 (line[2], line[3]), 266 (0, 255, 0), 267 2, 268 cv.LINE_AA, 269 ) 270 271 cv.imshow(constants.WINDOW, template) 272 273 print(ANNO_HELP) 274 275 imu.show(anno_template, get_point, title="annotate the header") 276 277 return HeaderTemplate(lines)
Utility method that allows users to create a template form a template image.
The user is asked to click to annotate lines (two clicks per line).
Arguments:
- template: the image on which to annotate the header lines
- crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
- margin (int): margin to add around the cropping of the header
359 @staticmethod 360 def from_vgg_annotation(annotation: str) -> "HeaderTemplate": 361 """ 362 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 363 364 Args: 365 annotation (str): the path of the annotation csv file 366 """ 367 368 rules = [] 369 with open(annotation, "r") as csvfile: 370 reader = csv.DictReader(csvfile) 371 for row in reader: 372 shape_attributes = json.loads(row["region_shape_attributes"]) 373 if shape_attributes["name"] == "polyline": 374 x_points = shape_attributes["all_points_x"] 375 y_points = shape_attributes["all_points_y"] 376 if len(x_points) == 2 and len(y_points) == 2: 377 rules.append( 378 [x_points[0], y_points[0], x_points[1], y_points[1]] 379 ) 380 381 return HeaderTemplate(rules)
Create a TableTemplate from annotations made in vgg, using the polylines tool.
Arguments:
- annotation (str): the path of the annotation csv file
393 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 394 if isinstance(header_factors, float): 395 header_factors = [header_factors] 396 header_factors = cast(list, header_factors) 397 return [ 398 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 399 ]
401 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 402 """ 403 Returns the interaction of the index[0]th horizontal rule and the 404 index[1]th vertical rule 405 """ 406 407 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 408 assert ints is not None 409 return ints
Returns the interaction of the index[0]th horizontal rule and the index[1]th vertical rule
411 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 412 """ 413 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 414 415 Args: 416 point (tuple[float, float]): the coordinates in the template image 417 418 Returns: 419 tuple[int, int]: (row, col) 420 """ 421 422 x, y = point 423 424 row = -1 425 col = -1 426 427 for i in range(self.rows): 428 y0 = self._h_rules[i]._y_at_x(x) 429 y1 = self._h_rules[i + 1]._y_at_x(x) 430 if min(y0, y1) <= y <= max(y0, y1): 431 row = i 432 break 433 434 for i in range(self.cols): 435 x0 = self._v_rules[i]._x_at_y(y) 436 x1 = self._v_rules[i + 1]._x_at_y(y) 437 if min(x0, x1) <= x <= max(x0, x1): 438 col = i 439 break 440 441 if row == -1 or col == -1: 442 return (-1, -1) 443 444 return (row, col)
Get the cell index (row, col) that corresponds with the point (x, y) in the template image
Arguments:
- point (tuple[float, float]): the coordinates in the template image
Returns:
tuple[int, int]: (row, col)
446 def cell_polygon( 447 self, cell: tuple[int, int] 448 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 449 """ 450 Return points (x,y) that make up a polygon around the requested cell 451 (top left, top right, bottom right, bottom left) 452 """ 453 454 row, col = cell 455 456 self._check_col_idx(col) 457 self._check_row_idx(row) 458 459 top_rule = self._h_rules[row] 460 bottom_rule = self._h_rules[row + 1] 461 left_rule = self._v_rules[col] 462 right_rule = self._v_rules[col + 1] 463 464 # Calculate corner points using intersections 465 top_left = top_rule.intersection(left_rule) 466 top_right = top_rule.intersection(right_rule) 467 bottom_left = bottom_rule.intersection(left_rule) 468 bottom_right = bottom_rule.intersection(right_rule) 469 470 if not all( 471 [ 472 point is not None 473 for point in [top_left, top_right, bottom_left, bottom_right] 474 ] 475 ): 476 raise TauluException("the lines around this cell do not intersect") 477 478 return top_left, top_right, bottom_right, bottom_left # type:ignore
Return points (x,y) that make up a polygon around the requested cell (top left, top right, bottom right, bottom left)
480 def region( 481 self, start: tuple[int, int], end: tuple[int, int] 482 ) -> tuple[Point, Point, Point, Point]: 483 self._check_row_idx(start[0]) 484 self._check_row_idx(end[0]) 485 self._check_col_idx(start[1]) 486 self._check_col_idx(end[1]) 487 488 # the rules that surround this row 489 top_rule = self._h_rules[start[0]] 490 bottom_rule = self._h_rules[end[0] + 1] 491 left_rule = self._v_rules[start[1]] 492 right_rule = self._v_rules[end[1] + 1] 493 494 # four points that will be the bounding polygon of the result, 495 # which needs to be rectified 496 top_left = top_rule.intersection(left_rule) 497 top_right = top_rule.intersection(right_rule) 498 bottom_left = bottom_rule.intersection(left_rule) 499 bottom_right = bottom_rule.intersection(right_rule) 500 501 if ( 502 top_left is None 503 or top_right is None 504 or bottom_left is None 505 or bottom_right is None 506 ): 507 raise TauluException("the lines around this row do not intersect properly") 508 509 def to_point(pnt) -> Point: 510 return (int(pnt[0]), int(pnt[1])) 511 512 return ( 513 to_point(top_left), 514 to_point(top_right), 515 to_point(bottom_right), 516 to_point(bottom_left), 517 )
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
519 def text_regions( 520 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 521 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 522 raise TauluException("text_regions should not be called on a HeaderTemplate")
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
72class TableIndexer(ABC): 73 """ 74 Subclasses implement methods for going from a pixel in the input image to a table cell index, 75 and cropping an image to the given table cell index. 76 """ 77 78 def __init__(self): 79 self._col_offset = 0 80 81 @property 82 def col_offset(self) -> int: 83 return self._col_offset 84 85 @col_offset.setter 86 def col_offset(self, value: int): 87 assert value >= 0 88 self._col_offset = value 89 90 @property 91 @abstractmethod 92 def cols(self) -> int: 93 pass 94 95 @property 96 @abstractmethod 97 def rows(self) -> int: 98 pass 99 100 def cells(self) -> Generator[tuple[int, int], None, None]: 101 for row in range(self.rows): 102 for col in range(self.cols): 103 yield (row, col) 104 105 def _check_row_idx(self, row: int): 106 if row < 0: 107 raise TauluException("row number needs to be positive or zero") 108 if row >= self.rows: 109 raise TauluException(f"row number too high: {row} >= {self.rows}") 110 111 def _check_col_idx(self, col: int): 112 if col < 0: 113 raise TauluException("col number needs to be positive or zero") 114 if col >= self.cols: 115 raise TauluException(f"col number too high: {col} >= {self.cols}") 116 117 @abstractmethod 118 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 119 """ 120 Returns the coordinate (row, col) of the cell that contains the given position 121 122 Args: 123 point (tuple[float, float]): a location in the input image 124 125 Returns: 126 tuple[int, int]: the cell index (row, col) that contains the given point 127 """ 128 pass 129 130 @abstractmethod 131 def cell_polygon( 132 self, cell: tuple[int, int] 133 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 134 """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position""" 135 pass 136 137 def _highlight_cell( 138 self, 139 image: MatLike, 140 cell: tuple[int, int], 141 color: tuple[int, int, int] = (0, 0, 255), 142 thickness: int = 2, 143 ): 144 polygon = self.cell_polygon(cell) 145 points = np.int32(list(polygon)) # type:ignore 146 cv.polylines(image, [points], True, color, thickness, cv.LINE_AA) # type:ignore 147 cv.putText( 148 image, 149 str(cell), 150 (int(polygon[3][0] + 10), int(polygon[3][1] - 10)), 151 cv.FONT_HERSHEY_PLAIN, 152 2.0, 153 (255, 255, 255), 154 2, 155 ) 156 157 def highlight_all_cells( 158 self, 159 image: MatLike, 160 color: tuple[int, int, int] = (0, 0, 255), 161 thickness: int = 1, 162 ) -> MatLike: 163 img = np.copy(image) 164 165 for cell in self.cells(): 166 self._highlight_cell(img, cell, color, thickness) 167 168 return img 169 170 def select_one_cell( 171 self, 172 image: MatLike, 173 window: str = WINDOW, 174 color: tuple[int, int, int] = (255, 0, 0), 175 thickness: int = 2, 176 ) -> tuple[int, int] | None: 177 clicked = None 178 179 def click_event(event, x, y, flags, params): 180 nonlocal clicked 181 182 img = np.copy(image) 183 _ = flags 184 _ = params 185 if event == cv.EVENT_LBUTTONDOWN: 186 cell = self.cell((x, y)) 187 if cell[0] >= 0: 188 clicked = cell 189 else: 190 return 191 self._highlight_cell(img, cell, color, thickness) 192 cv.imshow(window, img) 193 194 imu.show(image, click_event=click_event, title="select one cell", window=window) 195 196 return clicked 197 198 def show_cells( 199 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 200 ) -> list[tuple[int, int]]: 201 if not isinstance(image, np.ndarray): 202 image = cv.imread(os.fspath(image)) 203 204 img = np.copy(image) 205 206 cells = [] 207 208 def click_event(event, x, y, flags, params): 209 _ = flags 210 _ = params 211 if event == cv.EVENT_LBUTTONDOWN: 212 cell = self.cell((x, y)) 213 if cell[0] >= 0: 214 cells.append(cell) 215 else: 216 return 217 self._highlight_cell(img, cell) 218 cv.imshow(window, img) 219 220 imu.show( 221 img, 222 click_event=click_event, 223 title="click to highlight cells", 224 window=window, 225 ) 226 227 return cells 228 229 @abstractmethod 230 def region( 231 self, 232 start: tuple[int, int], 233 end: tuple[int, int], 234 ) -> tuple[Point, Point, Point, Point]: 235 """ 236 Get the bounding box for the rectangular region that goes from start to end 237 238 Returns: 239 4 points: lt, rt, rb, lb, in format (x, y) 240 """ 241 pass 242 243 def crop_region( 244 self, 245 image: MatLike, 246 start: tuple[int, int], 247 end: tuple[int, int], 248 margin: int = 0, 249 margin_top: int | None = None, 250 margin_bottom: int | None = None, 251 margin_left: int | None = None, 252 margin_right: int | None = None, 253 margin_y: int | None = None, 254 margin_x: int | None = None, 255 ) -> MatLike: 256 """Crop the input image to a rectangular region with the start and end cells as extremes""" 257 258 region = self.region(start, end) 259 260 lt, rt, rb, lb = _apply_margin( 261 *region, 262 margin=margin, 263 margin_top=margin_top, 264 margin_bottom=margin_bottom, 265 margin_left=margin_left, 266 margin_right=margin_right, 267 margin_y=margin_y, 268 margin_x=margin_x, 269 ) 270 271 # apply margins according to priority: 272 # margin_top > margin_y > margin (etc.) 273 274 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 275 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 276 277 # crop by doing a perspective transform to the desired quad 278 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 279 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 280 M = cv.getPerspectiveTransform(src_pts, dst_pts) 281 warped = cv.warpPerspective(image, M, (int(w), int(h))) # type:ignore 282 283 return warped 284 285 @abstractmethod 286 def text_regions( 287 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 288 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 289 """ 290 Split the row into regions of continuous text 291 292 Returns 293 list[tuple[int, int]]: a list of spans (start col, end col) 294 """ 295 296 pass 297 298 def crop_cell(self, image, cell: tuple[int, int], margin: int = 0) -> MatLike: 299 return self.crop_region(image, cell, cell, margin)
Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.
117 @abstractmethod 118 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 119 """ 120 Returns the coordinate (row, col) of the cell that contains the given position 121 122 Args: 123 point (tuple[float, float]): a location in the input image 124 125 Returns: 126 tuple[int, int]: the cell index (row, col) that contains the given point 127 """ 128 pass
Returns the coordinate (row, col) of the cell that contains the given position
Arguments:
- point (tuple[float, float]): a location in the input image
Returns:
tuple[int, int]: the cell index (row, col) that contains the given point
130 @abstractmethod 131 def cell_polygon( 132 self, cell: tuple[int, int] 133 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 134 """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position""" 135 pass
returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position
170 def select_one_cell( 171 self, 172 image: MatLike, 173 window: str = WINDOW, 174 color: tuple[int, int, int] = (255, 0, 0), 175 thickness: int = 2, 176 ) -> tuple[int, int] | None: 177 clicked = None 178 179 def click_event(event, x, y, flags, params): 180 nonlocal clicked 181 182 img = np.copy(image) 183 _ = flags 184 _ = params 185 if event == cv.EVENT_LBUTTONDOWN: 186 cell = self.cell((x, y)) 187 if cell[0] >= 0: 188 clicked = cell 189 else: 190 return 191 self._highlight_cell(img, cell, color, thickness) 192 cv.imshow(window, img) 193 194 imu.show(image, click_event=click_event, title="select one cell", window=window) 195 196 return clicked
198 def show_cells( 199 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 200 ) -> list[tuple[int, int]]: 201 if not isinstance(image, np.ndarray): 202 image = cv.imread(os.fspath(image)) 203 204 img = np.copy(image) 205 206 cells = [] 207 208 def click_event(event, x, y, flags, params): 209 _ = flags 210 _ = params 211 if event == cv.EVENT_LBUTTONDOWN: 212 cell = self.cell((x, y)) 213 if cell[0] >= 0: 214 cells.append(cell) 215 else: 216 return 217 self._highlight_cell(img, cell) 218 cv.imshow(window, img) 219 220 imu.show( 221 img, 222 click_event=click_event, 223 title="click to highlight cells", 224 window=window, 225 ) 226 227 return cells
229 @abstractmethod 230 def region( 231 self, 232 start: tuple[int, int], 233 end: tuple[int, int], 234 ) -> tuple[Point, Point, Point, Point]: 235 """ 236 Get the bounding box for the rectangular region that goes from start to end 237 238 Returns: 239 4 points: lt, rt, rb, lb, in format (x, y) 240 """ 241 pass
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
243 def crop_region( 244 self, 245 image: MatLike, 246 start: tuple[int, int], 247 end: tuple[int, int], 248 margin: int = 0, 249 margin_top: int | None = None, 250 margin_bottom: int | None = None, 251 margin_left: int | None = None, 252 margin_right: int | None = None, 253 margin_y: int | None = None, 254 margin_x: int | None = None, 255 ) -> MatLike: 256 """Crop the input image to a rectangular region with the start and end cells as extremes""" 257 258 region = self.region(start, end) 259 260 lt, rt, rb, lb = _apply_margin( 261 *region, 262 margin=margin, 263 margin_top=margin_top, 264 margin_bottom=margin_bottom, 265 margin_left=margin_left, 266 margin_right=margin_right, 267 margin_y=margin_y, 268 margin_x=margin_x, 269 ) 270 271 # apply margins according to priority: 272 # margin_top > margin_y > margin (etc.) 273 274 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 275 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 276 277 # crop by doing a perspective transform to the desired quad 278 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 279 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 280 M = cv.getPerspectiveTransform(src_pts, dst_pts) 281 warped = cv.warpPerspective(image, M, (int(w), int(h))) # type:ignore 282 283 return warped
Crop the input image to a rectangular region with the start and end cells as extremes
285 @abstractmethod 286 def text_regions( 287 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 288 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 289 """ 290 Split the row into regions of continuous text 291 292 Returns 293 list[tuple[int, int]]: a list of spans (start col, end col) 294 """ 295 296 pass
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
15class Split(Generic[T]): 16 """ 17 Container for paired left/right data with convenient manipulation methods. 18 19 The Split class is designed for working with table images that span two pages 20 or have distinct left and right sections. It allows you to: 21 - Store related data for both sides 22 - Apply functions to both sides simultaneously 23 - Access attributes/methods of contained objects transparently 24 25 Examples: 26 >>> # Create a split with different parameters for each side 27 >>> thresholds = Split(0.25, 0.30) 28 >>> 29 >>> # Apply a function to both sides 30 >>> images = Split(left_img, right_img) 31 >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) 32 >>> 33 >>> # Use with different parameters per side 34 >>> results = images.apply( 35 ... lambda img, k: sauvola_threshold(img, k), 36 ... k=thresholds # k.left used for left img, k.right for right 37 ... ) 38 >>> 39 >>> # Access methods of contained objects directly 40 >>> templates = Split(template_left, template_right) 41 >>> widths = templates.cell_widths(0) # Calls on both templates 42 43 Type Parameters: 44 T: The type of objects stored in left and right 45 """ 46 47 def __init__(self, left: T | None = None, right: T | None = None): 48 """ 49 Initialize a Split container. 50 51 Args: 52 left: Data for the left side 53 right: Data for the right side 54 55 Note: 56 Both can initially be None. Use the `append` method or set 57 properties directly to populate. 58 """ 59 self._left = left 60 self._right = right 61 62 @property 63 def left(self) -> T: 64 assert self._left is not None 65 return self._left 66 67 @left.setter 68 def left(self, value: T): 69 self._left = value 70 71 @property 72 def right(self) -> T: 73 assert self._right is not None 74 return self._right 75 76 @right.setter 77 def right(self, value: T): 78 self._right = value 79 80 def append(self, value: T): 81 if self._left is None: 82 self._left = value 83 else: 84 self._right = value 85 86 def __repr__(self) -> str: 87 return f"left: {self._left}, right: {self._right}" 88 89 def __iter__(self): 90 assert self._left is not None 91 assert self._right is not None 92 return iter((self._left, self._right)) 93 94 def __getitem__(self, index: bool) -> T: 95 assert self._left is not None 96 assert self._right is not None 97 if int(index) == 0: 98 return self._left 99 else: 100 return self._right 101 102 def apply( 103 self, 104 funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]", 105 *args, 106 **kwargs, 107 ) -> "Split[V]": 108 if not isinstance(funcs, Split): 109 funcs = Split(funcs, funcs) 110 111 def get_arg(side: str, arg): 112 if isinstance(arg, Split): 113 return getattr(arg, side) 114 return arg 115 116 def call(side: str): 117 func = getattr(funcs, side) 118 target = getattr(self, side) 119 120 side_args = [get_arg(side, arg) for arg in args] 121 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 122 123 return func(target, *side_args, **side_kwargs) 124 125 return Split(call("left"), call("right")) 126 127 def __getattr__(self, attr_name: str): 128 if attr_name in self.__dict__: 129 return getattr(self, attr_name) 130 131 def wrapper(*args, **kwargs): 132 return self.apply( 133 Split( 134 getattr(self.left.__class__, attr_name), 135 getattr(self.right.__class__, attr_name), 136 ), 137 *args, 138 **kwargs, 139 ) 140 141 return wrapper
Container for paired left/right data with convenient manipulation methods.
The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:
- Store related data for both sides
- Apply functions to both sides simultaneously
- Access attributes/methods of contained objects transparently
Examples:
>>> # Create a split with different parameters for each side >>> thresholds = Split(0.25, 0.30) >>> >>> # Apply a function to both sides >>> images = Split(left_img, right_img) >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) >>> >>> # Use with different parameters per side >>> results = images.apply( ... lambda img, k: sauvola_threshold(img, k), ... k=thresholds # k.left used for left img, k.right for right ... ) >>> >>> # Access methods of contained objects directly >>> templates = Split(template_left, template_right) >>> widths = templates.cell_widths(0) # Calls on both templates
Type Parameters:
T: The type of objects stored in left and right
47 def __init__(self, left: T | None = None, right: T | None = None): 48 """ 49 Initialize a Split container. 50 51 Args: 52 left: Data for the left side 53 right: Data for the right side 54 55 Note: 56 Both can initially be None. Use the `append` method or set 57 properties directly to populate. 58 """ 59 self._left = left 60 self._right = right
Initialize a Split container.
Arguments:
- left: Data for the left side
- right: Data for the right side
Note:
Both can initially be None. Use the
appendmethod or set properties directly to populate.
102 def apply( 103 self, 104 funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]", 105 *args, 106 **kwargs, 107 ) -> "Split[V]": 108 if not isinstance(funcs, Split): 109 funcs = Split(funcs, funcs) 110 111 def get_arg(side: str, arg): 112 if isinstance(arg, Split): 113 return getattr(arg, side) 114 return arg 115 116 def call(side: str): 117 func = getattr(funcs, side) 118 target = getattr(self, side) 119 120 side_args = [get_arg(side, arg) for arg in args] 121 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 122 123 return func(target, *side_args, **side_kwargs) 124 125 return Split(call("left"), call("right"))
36class Taulu: 37 """ 38 High-level API for table segmentation from images. 39 40 Taulu provides a simplified interface that orchestrates header alignment, 41 grid detection, and table segmentation into a single workflow. It's designed 42 to hide complexity while still allowing fine-tuned control through parameters. 43 44 ## Workflow Overview 45 46 1. **Header Template Creation**: Use `Taulu.annotate()` to create annotated 47 header images that define your table structure 48 2. **Initialization**: Create a Taulu instance with your header(s) and parameters 49 3. **Segmentation**: Call `segment_table()` on your table images to get a 50 `TableGrid` object containing all detected cell boundaries 51 52 ## Single vs Split Tables 53 54 Taulu supports two modes: 55 56 - **Single header**: For tables that fit on one page or have consistent structure 57 - **Split header**: For tables that span two pages (left/right) with potentially 58 different parameters for each side 59 60 Use `Split[T]` objects to provide different parameters for left and right sides. 61 62 ## Parameter Tuning Strategy 63 64 If segmentation fails or is inaccurate: 65 66 1. **Visual debugging**: Set `debug_view=True` in `segment_table()` to see 67 intermediate results 68 2. **Adjust thresholding**: Modify `sauvola_k` to change binarization sensitivity 69 - Increase to remove more noise (more aggressive) 70 - Decrease to preserve faint lines 71 3. **Tune cross-kernel**: Adjust `cross_width`, `cross_height`, `kernel_size` 72 to match your rule thickness after morphology 73 4. **Morphology**: Increase `morph_size` to connect broken lines, but be aware 74 this also thickens lines (requiring larger cross_width) 75 5. **Search parameters**: Increase `search_region` for warped documents, 76 adjust `distance_penalty` to control how strictly positions are enforced 77 6. **Growth parameters**: Lower `grow_threshold` if the algorithm stops too early, 78 increase `look_distance` for better extrapolation 79 80 Examples: 81 Basic usage with a single header: 82 83 >>> from taulu import Taulu 84 >>> 85 >>> # First, create annotated header (one-time setup) 86 >>> Taulu.annotate("table_image.png", "header.png") 87 >>> # This creates header.png and header.json 88 >>> 89 >>> # Initialize Taulu with the header 90 >>> taulu = Taulu( 91 ... header_image_path="header.png", 92 ... cell_height_factor=0.8, # Rows are 80% of header height 93 ... sauvola_k=0.25, 94 ... search_region=60, 95 ... cross_width=10 96 ... ) 97 >>> 98 >>> # Segment a table image 99 >>> grid = taulu.segment_table("table_page_01.png") 100 >>> 101 >>> # Use the grid to extract cells 102 >>> import cv2 103 >>> img = cv2.imread("table_page_01.png") 104 >>> cell_image = grid.crop_cell(img, (0, 0)) # First cell 105 106 Using split headers for two-page tables: 107 108 >>> from taulu import Taulu, Split 109 >>> 110 >>> # Annotate both headers 111 >>> Taulu.annotate("scan_01.png", "header_left.png") 112 >>> Taulu.annotate("scan_01.png", "header_right.png") 113 >>> 114 >>> # Use different parameters for each side 115 >>> taulu = Taulu( 116 ... header_image_path=Split("header_left.png", "header_right.png"), 117 ... cell_height_factor=Split([0.8, 0.9], [0.75]), # Different row heights 118 ... sauvola_k=Split(0.25, 0.30), # Different thresholds 119 ... cross_width=10 # Same for both sides 120 ... ) 121 >>> 122 >>> # Segment returns a unified grid 123 >>> grid = taulu.segment_table("scan_01.png") 124 125 Debug visualization to tune parameters: 126 127 >>> taulu = Taulu("header.png", sauvola_k=0.15) 128 >>> 129 >>> # Opens windows showing each processing step 130 >>> # Press 'n' to advance, 'q' to quit 131 >>> grid = taulu.segment_table("table.png", debug_view=True) 132 >>> 133 >>> # Adjust parameters based on what you see: 134 >>> # - If binarization is too noisy: increase sauvola_k 135 >>> # - If lines are broken after morphology: increase morph_size 136 >>> # - If filtered image has "undefined" corners: adjust cross_width to match line thickness (after morphology) 137 >>> # - If corners are missed during search: decrease grow_threshold or increase search_region 138 139 140 Attributes: 141 _header (MatLike | Split[MatLike]): Loaded header image(s) 142 _aligner (HeaderAligner | Split[HeaderAligner]): Header alignment engine(s) 143 _template (HeaderTemplate | Split[HeaderTemplate]): Parsed header structure(s) 144 _grid_detector (GridDetector | Split[GridDetector]): Grid detection engine(s) 145 _cell_heights (list[int] | Split[list[int]]): Computed cell heights in pixels 146 147 Raises: 148 TauluException: If header files don't exist, annotation is missing, or 149 Split parameters are used incorrectly with single headers 150 151 See Also: 152 - `TableGrid`: The result object with methods for accessing cells 153 - `Split`: Container for paired left/right parameters 154 - `GridDetector`: Lower-level grid detection (for advanced usage) 155 - `HeaderAligner`: Lower-level header alignment (for advanced usage) 156 """ 157 158 def __init__( 159 self, 160 header_image_path: PathLike[str] | str | Split[PathLike[str] | str], 161 cell_height_factor: float | list[float] | Split[float | list[float]] = [1.0], 162 header_anno_path: PathLike[str] 163 | str 164 | Split[PathLike[str] | str] 165 | None = None, 166 sauvola_k: float | Split[float] = 0.25, 167 search_region: int | Split[int] = 60, 168 distance_penalty: float | Split[float] = 0.4, 169 cross_width: int | Split[int] = 10, 170 morph_size: int | Split[int] = 4, 171 kernel_size: int | Split[int] = 41, 172 processing_scale: float | Split[float] = 1.0, 173 min_rows: int | Split[int] = 5, 174 look_distance: int | Split[int] = 3, 175 grow_threshold: float | Split[float] = 0.3, 176 smooth_grid: bool = False, 177 ): 178 """ 179 Args: 180 header_image_path: 181 Path to the header template image(s). The header should be a cropped 182 image showing a clear view of the table's first row. An annotation 183 file (.json) must exist alongside the image, created via `Taulu.annotate()`. 184 For split tables, provide a `Split` containing left and right header paths. 185 186 cell_height_factor: 187 Height of data rows relative to header height. For example, if your 188 header is 100px tall and data rows are 80px tall, use 0.8. 189 190 - **float**: All rows have the same height 191 - **list[float]**: Different heights for different rows. The last value 192 is repeated for any additional rows beyond the list length. Useful when 193 the first data row is taller than subsequent rows. 194 - **Split**: Different height factors for left and right sides 195 196 Default: [1.0] 197 198 header_anno_path (PathLike[str] | str | Split[PathLike[str] | str] | None): 199 Optional explicit path to header annotation JSON file(s). If None, 200 looks for a .json file with the same name as `header_image_path`. 201 Default: None 202 203 sauvola_k (float | Split[float]): 204 Threshold sensitivity for Sauvola adaptive binarization (0.0-1.0). 205 Controls how aggressively the algorithm converts the image to binary. 206 207 - **Lower values** (0.04-0.15): Preserve faint lines, more noise 208 - **Higher values** (0.20-0.35): Remove noise, may lose faint lines 209 210 Start with 0.25 and adjust based on your image quality. 211 Default: 0.25 212 213 search_region (int | Split[int]): 214 Size in pixels of the square region to search for the next corner point. 215 The algorithm estimates where a corner should be, then searches within 216 this region for the best match. 217 218 - **Smaller values** (20-40): Faster, requires well-aligned tables 219 - **Larger values** (60-100): More robust to warping and distortion 220 221 Default: 60 222 223 distance_penalty (float | Split[float]): 224 Weight factor [0, 1] for penalizing corners far from expected position. 225 Uses Gaussian weighting within the search region. 226 227 - **0.0**: No penalty, any position in search region is equally valid 228 - **0.5**: Moderate preference for positions near the expected location 229 - **1.0**: Strong preference, only accepts positions very close to expected 230 231 Default: 0.4 232 233 cross_width (int | Split[int]): 234 Width in pixels of the cross-shaped kernel used to detect intersections. 235 Should approximately match the thickness of your table rules AFTER 236 morphological dilation. 237 238 **Tuning**: Look at the dilated image in debug_view. The cross_width 239 should match the thickness of the black lines you see. 240 Default: 10 241 242 morph_size (int | Split[int]): 243 Size of morphological structuring element for dilation. Controls how 244 much gap-bridging occurs to connect broken line segments. 245 246 - **Smaller values** (2-4): Minimal connection, preserves thin lines 247 - **Larger values** (6-10): Connects larger gaps, but thickens lines 248 249 Note: Increasing this requires increasing `cross_width` proportionally. 250 Default: 4 251 252 kernel_size (int | Split[int]): 253 Size of the cross-shaped kernel (must be odd). Larger kernels are more 254 selective, reducing false positives but potentially missing valid corners. 255 256 - **Smaller values** (21-31): More sensitive, finds more candidates 257 - **Larger values** (41-61): More selective, fewer false positives 258 259 Default: 41 260 261 processing_scale (float | Split[float]): 262 Image downscaling factor (0, 1] for processing speed. Processing is done 263 on scaled images, then results are scaled back to original size. 264 265 - **1.0**: Full resolution (slowest, most accurate) 266 - **0.5-0.75**: Good balance for high-res scans (2x-4x speedup) 267 - **0.25-0.5**: Fast processing for very large images 268 269 Default: 1.0 270 271 min_rows (int | Split[int]): 272 Minimum number of rows required before the algorithm considers the 273 table complete. Prevents stopping too early on tables with initial 274 low-confidence detections. 275 Default: 5 276 277 look_distance (int | Split[int]): 278 Number of adjacent rows/columns to examine when extrapolating missing 279 corners using polynomial regression. Higher values provide more context 280 but may smooth over legitimate variations. 281 282 - **2-3**: Good for consistent grids 283 - **4-6**: Better for grids with some irregularity 284 285 Default: 3 286 287 grow_threshold (float | Split[float]): 288 Initial minimum confidence [0, 1] required to accept a detected corner 289 during the growing phase. The algorithm may adaptively lower this 290 threshold if growth stalls. 291 292 - **Higher values** (0.5-0.8): Stricter, fewer errors but may miss valid corners 293 - **Lower values** (0.2-0.4): More permissive, finds more corners but more errors 294 295 Default: 0.3 296 297 smooth_grid (bool | Split[bool]): 298 Whether or not to apply local smoothing logic to the grid after point detection. 299 This may clean up rugged parts of the grid but could also lead to small inaccuracies 300 if the grid is actually locally _not smooth_. 301 302 Default: False 303 304 """ 305 self._processing_scale = processing_scale 306 self._cell_height_factor = cell_height_factor 307 self._smooth = smooth_grid 308 309 if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split): 310 header = Split(Path(header_image_path.left), Path(header_image_path.right)) 311 312 if not exists(header.left.with_suffix(".png")) or not exists( 313 header.right.with_suffix(".png") 314 ): 315 raise TauluException( 316 "The header images you provided do not exist (or they aren't .png files)" 317 ) 318 319 if header_anno_path is None: 320 if not exists(header.left.with_suffix(".json")) or not exists( 321 header.right.with_suffix(".json") 322 ): 323 raise TauluException( 324 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 325 ) 326 327 template_left = HeaderTemplate.from_saved( 328 header.left.with_suffix(".json") 329 ) 330 template_right = HeaderTemplate.from_saved( 331 header.right.with_suffix(".json") 332 ) 333 334 else: 335 if not exists(header_anno_path.left) or not exists( 336 header_anno_path.right 337 ): 338 raise TauluException( 339 "The header annotation files you provided do not exist (or they aren't .json files)" 340 ) 341 342 template_left = HeaderTemplate.from_saved(header_anno_path.left) 343 template_right = HeaderTemplate.from_saved(header_anno_path.right) 344 345 self._header = Split( 346 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 347 ) 348 349 self._aligner = Split( 350 HeaderAligner( 351 self._header.left, scale=get_param(self._processing_scale, "left") 352 ), 353 HeaderAligner( 354 self._header.right, scale=get_param(self._processing_scale, "right") 355 ), 356 ) 357 358 self._template = Split(template_left, template_right) 359 360 self._cell_heights = Split( 361 self._template.left.cell_heights(get_param(cell_height_factor, "left")), 362 self._template.right.cell_heights( 363 get_param(cell_height_factor, "right") 364 ), 365 ) 366 367 # Create GridDetector for left and right with potentially different parameters 368 self._grid_detector = Split( 369 GridDetector( 370 kernel_size=get_param(kernel_size, "left"), 371 cross_width=get_param(cross_width, "left"), 372 morph_size=get_param(morph_size, "left"), 373 search_region=get_param(search_region, "left"), 374 sauvola_k=get_param(sauvola_k, "left"), 375 distance_penalty=get_param(distance_penalty, "left"), 376 scale=get_param(self._processing_scale, "left"), 377 min_rows=get_param(min_rows, "left"), 378 look_distance=get_param(look_distance, "left"), 379 grow_threshold=get_param(grow_threshold, "left"), 380 ), 381 GridDetector( 382 kernel_size=get_param(kernel_size, "right"), 383 cross_width=get_param(cross_width, "right"), 384 morph_size=get_param(morph_size, "right"), 385 search_region=get_param(search_region, "right"), 386 sauvola_k=get_param(sauvola_k, "right"), 387 distance_penalty=get_param(distance_penalty, "right"), 388 scale=get_param(self._processing_scale, "right"), 389 min_rows=get_param(min_rows, "right"), 390 look_distance=get_param(look_distance, "right"), 391 grow_threshold=get_param(grow_threshold, "right"), 392 ), 393 ) 394 395 else: 396 header_image_path = Path(header_image_path) 397 self._header = cv2.imread(os.fspath(header_image_path)) 398 self._aligner = HeaderAligner(self._header) 399 self._template = HeaderTemplate.from_saved( 400 header_image_path.with_suffix(".json") 401 ) 402 403 # For single header, parameters should not be Split objects 404 if any( 405 isinstance(param, Split) 406 for param in [ 407 sauvola_k, 408 search_region, 409 distance_penalty, 410 cross_width, 411 morph_size, 412 kernel_size, 413 processing_scale, 414 min_rows, 415 look_distance, 416 grow_threshold, 417 cell_height_factor, 418 ] 419 ): 420 raise TauluException( 421 "Split parameters can only be used with split headers (tuple header_path)" 422 ) 423 424 self._cell_heights = self._template.cell_heights(self._cell_height_factor) 425 426 self._grid_detector = GridDetector( 427 kernel_size=kernel_size, 428 cross_width=cross_width, 429 morph_size=morph_size, 430 search_region=search_region, 431 sauvola_k=sauvola_k, 432 distance_penalty=distance_penalty, 433 scale=self._processing_scale, 434 min_rows=min_rows, 435 look_distance=look_distance, 436 grow_threshold=grow_threshold, 437 ) 438 439 @staticmethod 440 def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str): 441 """ 442 Interactive tool to create header annotations for table segmentation. 443 444 This method guides you through a two-step annotation process: 445 446 1. **Crop the header**: Click four corners to define the header region 447 2. **Annotate lines**: Click pairs of points to define each vertical and 448 horizontal line in the header 449 450 The annotations are saved as: 451 - A cropped header image (.png) at `output_path` 452 - A JSON file (.json) containing line coordinates 453 454 ## Annotation Guidelines 455 456 **Which lines to annotate:** 457 - All vertical lines that extend into the table body (column separators) 458 - The top horizontal line of the header 459 - The bottom horizontal line of the header (top of data rows) 460 461 **Order doesn't matter** - annotate lines in any order that's convenient. 462 463 **To annotate a line:** 464 1. Click once at one endpoint 465 2. Click again at the other endpoint 466 3. A green line appears showing your annotation 467 468 **To undo:** 469 - Right-click anywhere to remove the last line you drew 470 471 **When finished:** 472 - Press 'n' to save and exit 473 - Press 'q' to quit without saving 474 475 Args: 476 image_path (PathLike[str] | str): Path to a table image containing 477 a clear view of the header. This can be a full table image. 478 output_path (PathLike[str] | str): Where to save the cropped header 479 image. The annotation JSON will be saved with the same name but 480 .json extension. 481 482 Raises: 483 TauluException: If image_path doesn't exist or output_path is a directory 484 485 Examples: 486 Annotate a single header: 487 488 >>> from taulu import Taulu 489 >>> Taulu.annotate("scan_page_01.png", "header.png") 490 # Interactive window opens 491 # After annotation: creates header.png and header.json 492 493 Annotate left and right headers for a split table: 494 495 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 496 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 497 # Creates header_left.{png,json} and header_right.{png,json} 498 499 Notes: 500 - The header image doesn't need to be perfectly cropped initially - 501 the tool will help you crop it precisely 502 - Annotation accuracy is important: misaligned lines will cause 503 segmentation errors 504 - You can re-run this method to update annotations if needed 505 """ 506 507 if not exists(image_path): 508 raise TauluException(f"Image path {image_path} does not exist") 509 510 if os.path.isdir(output_path): 511 raise TauluException("Output path should be a file") 512 513 output_path = Path(output_path) 514 515 template = HeaderTemplate.annotate_image( 516 os.fspath(image_path), crop=output_path.with_suffix(".png") 517 ) 518 519 template.save(output_path.with_suffix(".json")) 520 521 def segment_table( 522 self, 523 image: MatLike | PathLike[str] | str, 524 filtered: Optional[MatLike | PathLike[str] | str] = None, 525 debug_view: bool = False, 526 ) -> TableGrid: 527 """ 528 Segment a table image into a grid of cells. 529 530 This is the main entry point for the taulu package. It orchestrates: 531 532 1. **Header alignment**: Locates the table by matching the header template 533 to the image using feature-based registration (ORB features + homography) 534 2. **Grid detection**: Applies morphological filtering and cross-correlation 535 to find corner intersections 536 3. **Grid growing**: Iteratively detects corners row-by-row and column-by-column, 537 starting from the aligned header position 538 4. **Extrapolation**: Fills in any missing corners using polynomial regression 539 based on neighboring detected points 540 5. **Smoothing**: Refines corner positions for consistency 541 542 ## Performance Notes 543 544 Processing time depends on: 545 - Image resolution (use `processing_scale < 1.0` for large images) 546 - Table complexity (more rows/columns = longer processing) 547 - Parameter settings 548 549 ## Troubleshooting 550 551 **If segmentation fails (returns incomplete grid):** 552 1. Enable `debug_view=True` to see where it stops 553 2. Check if header alignment is correct (first debug image) 554 3. Verify cross-correlation shows bright spots at corners 555 4. Adjust `grow_threshold` (lower if stopping too early) 556 5. Increase `search_region` if corners are far from expected positions 557 558 **If segmentation is inaccurate (corners in wrong positions):** 559 1. Check binarization quality (adjust `sauvola_k`) 560 2. Verify cross-kernel size matches line thickness (adjust `cross_width`) 561 3. Ensure morphology isn't over-connecting (reduce `morph_size`) 562 4. Increase `distance_penalty` to enforce expected positions more strictly 563 564 Args: 565 image (MatLike | PathLike[str] | str): Table image to segment. 566 Can be a file path or a numpy array (BGR or grayscale). 567 568 filtered (MatLike | PathLike[str] | str | None): Optional pre-filtered 569 binary image to use instead of computing it internally. 570 Must be the same size as `image`. If provided, parameters related 571 to filtering (e.g. `sauvola_k`, `morph_size`) are ignored. 572 573 **GPU acceleration**: Use trained CNN model for corner detection: 574 575 >>> from taulu.gpu import DeepConvNet, apply_kernel_to_image_tiled 576 >>> model = DeepConvNet.load("model.pth") 577 >>> filtered = apply_kernel_to_image_tiled(model, image) 578 >>> grid = taulu.segment_table(image, filtered=filtered) 579 580 Default: None 581 582 debug_view (bool): If True, opens OpenCV windows showing intermediate 583 processing steps: 584 - Header alignment overlay 585 - Binarized image 586 - After morphological operations 587 - Cross-correlation result 588 - Growing progress (corner-by-corner) 589 590 **Controls:** 591 - Press 'n' to advance to next step 592 - Press 'q' to quit immediately 593 594 Useful for parameter tuning and understanding failures. 595 Default: False 596 597 Returns: 598 TableGrid: A grid structure containing detected corner positions with 599 methods for: 600 601 **Position queries:** 602 - `cell(point)`: Get (row, col) at pixel coordinates (x, y) 603 - `cell_polygon(cell)`: Get 4 corners of a cell as (lt, rt, rb, lb) 604 - `region(start, end)`: Get bounding box for a cell range 605 606 **Image extraction:** 607 - `crop_cell(img, cell, margin=0)`: Extract single cell with optional margin 608 - `crop_region(img, start, end, margin=0)`: Extract rectangular region 609 610 **Visualization:** 611 - `show_cells(img)`: Interactive cell viewer (click to highlight) 612 - `highlight_all_cells(img)`: Draw all cell boundaries 613 - `visualize_points(img)`: Show detected corner points 614 615 **Analysis:** 616 - `text_regions(img, row)`: Find continuous text regions in a row 617 - `cells()`: Generator yielding all (row, col) indices 618 619 **Persistence:** 620 - `save(path)`: Save grid to JSON file 621 - `TableGrid.from_saved(path)`: Load grid from JSON 622 623 **Properties:** 624 - `rows`: Number of data rows (header not included) 625 - `cols`: Number of columns 626 - `points`: Raw list of detected corner coordinates 627 628 Raises: 629 TauluException: If image cannot be loaded, header alignment fails, 630 or grid detection produces no results 631 632 Examples: 633 Basic segmentation: 634 635 >>> from taulu import Taulu 636 >>> import cv2 637 >>> 638 >>> taulu = Taulu("header.png") 639 >>> grid = taulu.segment_table("table_page_01.png") 640 >>> 641 >>> print(f"Detected {grid.rows} rows and {grid.cols} columns") 642 >>> 643 >>> # Extract first cell 644 >>> img = cv2.imread("table_page_01.png") 645 >>> cell_img = grid.crop_cell(img, (0, 0)) 646 >>> cv2.imwrite("cell_0_0.png", cell_img) 647 648 Debug mode for parameter tuning: 649 650 >>> grid = taulu.segment_table("table_page_01.png", debug_view=True) 651 # Windows open showing each step 652 # Adjust parameters based on what you see 653 654 Process multiple images with the same header: 655 656 >>> taulu = Taulu("header.png", sauvola_k=0.25) 657 >>> 658 >>> for i in range(1, 11): 659 ... img_path = f"table_page_{i:02d}.png" 660 ... grid = taulu.segment_table(img_path) 661 ... grid.save(f"grid_{i:02d}.json") 662 ... print(f"Page {i}: {grid.rows} rows detected") 663 664 Extract all cells from a table: 665 666 >>> img = cv2.imread("table.png") 667 >>> grid = taulu.segment_table("table.png") 668 >>> 669 >>> for row, col in grid.cells(): 670 ... cell_img = grid.crop_cell(img, (row, col), margin=5) 671 ... cv2.imwrite(f"cell_{row}_{col}.png", cell_img) 672 673 Find text regions for OCR: 674 675 >>> for row in range(grid.rows): 676 ... text_regions = grid.text_regions(img, row) 677 ... for start_cell, end_cell in text_regions: 678 ... # Extract region spanning multiple cells 679 ... region_img = grid.crop_region(img, start_cell, end_cell) 680 ... # Run OCR on region_img... 681 682 See Also: 683 - `TableGrid`: Complete documentation of the returned object 684 - `GridDetector.find_table_points()`: Lower-level grid detection 685 - `HeaderAligner.align()`: Lower-level header alignment 686 """ 687 688 if not isinstance(image, MatLike): 689 image = cv2.imread(os.fspath(image)) 690 691 now = perf_counter() 692 h = self._aligner.align(image, visual=debug_view) 693 align_time = perf_counter() - now 694 logger.info(f"Header alignment took {align_time:.2f} seconds") 695 696 # find the starting point for the table grid algorithm 697 left_top_template = self._template.intersection((1, 0)) 698 if isinstance(left_top_template, Split): 699 left_top_template = Split( 700 (int(left_top_template.left[0]), int(left_top_template.left[1])), 701 (int(left_top_template.right[0]), int(left_top_template.right[1])), 702 ) 703 else: 704 left_top_template = (int(left_top_template[0]), int(left_top_template[1])) 705 706 left_top_table = self._aligner.template_to_img(h, left_top_template) 707 708 now = perf_counter() 709 table = self._grid_detector.find_table_points( 710 image, 711 left_top_table, 712 self._template.cell_widths(0), 713 self._cell_heights, 714 visual=debug_view, 715 filtered=filtered, 716 smooth=self._smooth 717 ) 718 grid_time = perf_counter() - now 719 logger.info(f"Grid detection took {grid_time:.2f} seconds") 720 721 if isinstance(table, Split): 722 table = TableGrid.from_split(table, (0, 0)) 723 724 return table
High-level API for table segmentation from images.
Taulu provides a simplified interface that orchestrates header alignment, grid detection, and table segmentation into a single workflow. It's designed to hide complexity while still allowing fine-tuned control through parameters.
Workflow Overview
- Header Template Creation: Use
Taulu.annotate()to create annotated header images that define your table structure - Initialization: Create a Taulu instance with your header(s) and parameters
- Segmentation: Call
segment_table()on your table images to get aTableGridobject containing all detected cell boundaries
Single vs Split Tables
Taulu supports two modes:
- Single header: For tables that fit on one page or have consistent structure
- Split header: For tables that span two pages (left/right) with potentially different parameters for each side
Use Split[T] objects to provide different parameters for left and right sides.
Parameter Tuning Strategy
If segmentation fails or is inaccurate:
- Visual debugging: Set
debug_view=Trueinsegment_table()to see intermediate results - Adjust thresholding: Modify
sauvola_kto change binarization sensitivity- Increase to remove more noise (more aggressive)
- Decrease to preserve faint lines
- Tune cross-kernel: Adjust
cross_width,cross_height,kernel_sizeto match your rule thickness after morphology - Morphology: Increase
morph_sizeto connect broken lines, but be aware this also thickens lines (requiring larger cross_width) - Search parameters: Increase
search_regionfor warped documents, adjustdistance_penaltyto control how strictly positions are enforced - Growth parameters: Lower
grow_thresholdif the algorithm stops too early, increaselook_distancefor better extrapolation
Examples:
Basic usage with a single header:
>>> from taulu import Taulu >>> >>> # First, create annotated header (one-time setup) >>> Taulu.annotate("table_image.png", "header.png") >>> # This creates header.png and header.json >>> >>> # Initialize Taulu with the header >>> taulu = Taulu( ... header_image_path="header.png", ... cell_height_factor=0.8, # Rows are 80% of header height ... sauvola_k=0.25, ... search_region=60, ... cross_width=10 ... ) >>> >>> # Segment a table image >>> grid = taulu.segment_table("table_page_01.png") >>> >>> # Use the grid to extract cells >>> import cv2 >>> img = cv2.imread("table_page_01.png") >>> cell_image = grid.crop_cell(img, (0, 0)) # First cellUsing split headers for two-page tables:
>>> from taulu import Taulu, Split >>> >>> # Annotate both headers >>> Taulu.annotate("scan_01.png", "header_left.png") >>> Taulu.annotate("scan_01.png", "header_right.png") >>> >>> # Use different parameters for each side >>> taulu = Taulu( ... header_image_path=Split("header_left.png", "header_right.png"), ... cell_height_factor=Split([0.8, 0.9], [0.75]), # Different row heights ... sauvola_k=Split(0.25, 0.30), # Different thresholds ... cross_width=10 # Same for both sides ... ) >>> >>> # Segment returns a unified grid >>> grid = taulu.segment_table("scan_01.png")Debug visualization to tune parameters:
>>> taulu = Taulu("header.png", sauvola_k=0.15) >>> >>> # Opens windows showing each processing step >>> # Press 'n' to advance, 'q' to quit >>> grid = taulu.segment_table("table.png", debug_view=True) >>> >>> # Adjust parameters based on what you see: >>> # - If binarization is too noisy: increase sauvola_k >>> # - If lines are broken after morphology: increase morph_size >>> # - If filtered image has "undefined" corners: adjust cross_width to match line thickness (after morphology) >>> # - If corners are missed during search: decrease grow_threshold or increase search_region
Attributes:
- _header (MatLike | Split[MatLike]): Loaded header image(s)
- _aligner (HeaderAligner | Split[HeaderAligner]): Header alignment engine(s)
- _template (HeaderTemplate | Split[HeaderTemplate]): Parsed header structure(s)
- _grid_detector (GridDetector | Split[GridDetector]): Grid detection engine(s)
- _cell_heights (list[int] | Split[list[int]]): Computed cell heights in pixels
Raises:
- TauluException: If header files don't exist, annotation is missing, or Split parameters are used incorrectly with single headers
See Also:
TableGrid: The result object with methods for accessing cellsSplit: Container for paired left/right parametersGridDetector: Lower-level grid detection (for advanced usage)HeaderAligner: Lower-level header alignment (for advanced usage)
158 def __init__( 159 self, 160 header_image_path: PathLike[str] | str | Split[PathLike[str] | str], 161 cell_height_factor: float | list[float] | Split[float | list[float]] = [1.0], 162 header_anno_path: PathLike[str] 163 | str 164 | Split[PathLike[str] | str] 165 | None = None, 166 sauvola_k: float | Split[float] = 0.25, 167 search_region: int | Split[int] = 60, 168 distance_penalty: float | Split[float] = 0.4, 169 cross_width: int | Split[int] = 10, 170 morph_size: int | Split[int] = 4, 171 kernel_size: int | Split[int] = 41, 172 processing_scale: float | Split[float] = 1.0, 173 min_rows: int | Split[int] = 5, 174 look_distance: int | Split[int] = 3, 175 grow_threshold: float | Split[float] = 0.3, 176 smooth_grid: bool = False, 177 ): 178 """ 179 Args: 180 header_image_path: 181 Path to the header template image(s). The header should be a cropped 182 image showing a clear view of the table's first row. An annotation 183 file (.json) must exist alongside the image, created via `Taulu.annotate()`. 184 For split tables, provide a `Split` containing left and right header paths. 185 186 cell_height_factor: 187 Height of data rows relative to header height. For example, if your 188 header is 100px tall and data rows are 80px tall, use 0.8. 189 190 - **float**: All rows have the same height 191 - **list[float]**: Different heights for different rows. The last value 192 is repeated for any additional rows beyond the list length. Useful when 193 the first data row is taller than subsequent rows. 194 - **Split**: Different height factors for left and right sides 195 196 Default: [1.0] 197 198 header_anno_path (PathLike[str] | str | Split[PathLike[str] | str] | None): 199 Optional explicit path to header annotation JSON file(s). If None, 200 looks for a .json file with the same name as `header_image_path`. 201 Default: None 202 203 sauvola_k (float | Split[float]): 204 Threshold sensitivity for Sauvola adaptive binarization (0.0-1.0). 205 Controls how aggressively the algorithm converts the image to binary. 206 207 - **Lower values** (0.04-0.15): Preserve faint lines, more noise 208 - **Higher values** (0.20-0.35): Remove noise, may lose faint lines 209 210 Start with 0.25 and adjust based on your image quality. 211 Default: 0.25 212 213 search_region (int | Split[int]): 214 Size in pixels of the square region to search for the next corner point. 215 The algorithm estimates where a corner should be, then searches within 216 this region for the best match. 217 218 - **Smaller values** (20-40): Faster, requires well-aligned tables 219 - **Larger values** (60-100): More robust to warping and distortion 220 221 Default: 60 222 223 distance_penalty (float | Split[float]): 224 Weight factor [0, 1] for penalizing corners far from expected position. 225 Uses Gaussian weighting within the search region. 226 227 - **0.0**: No penalty, any position in search region is equally valid 228 - **0.5**: Moderate preference for positions near the expected location 229 - **1.0**: Strong preference, only accepts positions very close to expected 230 231 Default: 0.4 232 233 cross_width (int | Split[int]): 234 Width in pixels of the cross-shaped kernel used to detect intersections. 235 Should approximately match the thickness of your table rules AFTER 236 morphological dilation. 237 238 **Tuning**: Look at the dilated image in debug_view. The cross_width 239 should match the thickness of the black lines you see. 240 Default: 10 241 242 morph_size (int | Split[int]): 243 Size of morphological structuring element for dilation. Controls how 244 much gap-bridging occurs to connect broken line segments. 245 246 - **Smaller values** (2-4): Minimal connection, preserves thin lines 247 - **Larger values** (6-10): Connects larger gaps, but thickens lines 248 249 Note: Increasing this requires increasing `cross_width` proportionally. 250 Default: 4 251 252 kernel_size (int | Split[int]): 253 Size of the cross-shaped kernel (must be odd). Larger kernels are more 254 selective, reducing false positives but potentially missing valid corners. 255 256 - **Smaller values** (21-31): More sensitive, finds more candidates 257 - **Larger values** (41-61): More selective, fewer false positives 258 259 Default: 41 260 261 processing_scale (float | Split[float]): 262 Image downscaling factor (0, 1] for processing speed. Processing is done 263 on scaled images, then results are scaled back to original size. 264 265 - **1.0**: Full resolution (slowest, most accurate) 266 - **0.5-0.75**: Good balance for high-res scans (2x-4x speedup) 267 - **0.25-0.5**: Fast processing for very large images 268 269 Default: 1.0 270 271 min_rows (int | Split[int]): 272 Minimum number of rows required before the algorithm considers the 273 table complete. Prevents stopping too early on tables with initial 274 low-confidence detections. 275 Default: 5 276 277 look_distance (int | Split[int]): 278 Number of adjacent rows/columns to examine when extrapolating missing 279 corners using polynomial regression. Higher values provide more context 280 but may smooth over legitimate variations. 281 282 - **2-3**: Good for consistent grids 283 - **4-6**: Better for grids with some irregularity 284 285 Default: 3 286 287 grow_threshold (float | Split[float]): 288 Initial minimum confidence [0, 1] required to accept a detected corner 289 during the growing phase. The algorithm may adaptively lower this 290 threshold if growth stalls. 291 292 - **Higher values** (0.5-0.8): Stricter, fewer errors but may miss valid corners 293 - **Lower values** (0.2-0.4): More permissive, finds more corners but more errors 294 295 Default: 0.3 296 297 smooth_grid (bool | Split[bool]): 298 Whether or not to apply local smoothing logic to the grid after point detection. 299 This may clean up rugged parts of the grid but could also lead to small inaccuracies 300 if the grid is actually locally _not smooth_. 301 302 Default: False 303 304 """ 305 self._processing_scale = processing_scale 306 self._cell_height_factor = cell_height_factor 307 self._smooth = smooth_grid 308 309 if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split): 310 header = Split(Path(header_image_path.left), Path(header_image_path.right)) 311 312 if not exists(header.left.with_suffix(".png")) or not exists( 313 header.right.with_suffix(".png") 314 ): 315 raise TauluException( 316 "The header images you provided do not exist (or they aren't .png files)" 317 ) 318 319 if header_anno_path is None: 320 if not exists(header.left.with_suffix(".json")) or not exists( 321 header.right.with_suffix(".json") 322 ): 323 raise TauluException( 324 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 325 ) 326 327 template_left = HeaderTemplate.from_saved( 328 header.left.with_suffix(".json") 329 ) 330 template_right = HeaderTemplate.from_saved( 331 header.right.with_suffix(".json") 332 ) 333 334 else: 335 if not exists(header_anno_path.left) or not exists( 336 header_anno_path.right 337 ): 338 raise TauluException( 339 "The header annotation files you provided do not exist (or they aren't .json files)" 340 ) 341 342 template_left = HeaderTemplate.from_saved(header_anno_path.left) 343 template_right = HeaderTemplate.from_saved(header_anno_path.right) 344 345 self._header = Split( 346 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 347 ) 348 349 self._aligner = Split( 350 HeaderAligner( 351 self._header.left, scale=get_param(self._processing_scale, "left") 352 ), 353 HeaderAligner( 354 self._header.right, scale=get_param(self._processing_scale, "right") 355 ), 356 ) 357 358 self._template = Split(template_left, template_right) 359 360 self._cell_heights = Split( 361 self._template.left.cell_heights(get_param(cell_height_factor, "left")), 362 self._template.right.cell_heights( 363 get_param(cell_height_factor, "right") 364 ), 365 ) 366 367 # Create GridDetector for left and right with potentially different parameters 368 self._grid_detector = Split( 369 GridDetector( 370 kernel_size=get_param(kernel_size, "left"), 371 cross_width=get_param(cross_width, "left"), 372 morph_size=get_param(morph_size, "left"), 373 search_region=get_param(search_region, "left"), 374 sauvola_k=get_param(sauvola_k, "left"), 375 distance_penalty=get_param(distance_penalty, "left"), 376 scale=get_param(self._processing_scale, "left"), 377 min_rows=get_param(min_rows, "left"), 378 look_distance=get_param(look_distance, "left"), 379 grow_threshold=get_param(grow_threshold, "left"), 380 ), 381 GridDetector( 382 kernel_size=get_param(kernel_size, "right"), 383 cross_width=get_param(cross_width, "right"), 384 morph_size=get_param(morph_size, "right"), 385 search_region=get_param(search_region, "right"), 386 sauvola_k=get_param(sauvola_k, "right"), 387 distance_penalty=get_param(distance_penalty, "right"), 388 scale=get_param(self._processing_scale, "right"), 389 min_rows=get_param(min_rows, "right"), 390 look_distance=get_param(look_distance, "right"), 391 grow_threshold=get_param(grow_threshold, "right"), 392 ), 393 ) 394 395 else: 396 header_image_path = Path(header_image_path) 397 self._header = cv2.imread(os.fspath(header_image_path)) 398 self._aligner = HeaderAligner(self._header) 399 self._template = HeaderTemplate.from_saved( 400 header_image_path.with_suffix(".json") 401 ) 402 403 # For single header, parameters should not be Split objects 404 if any( 405 isinstance(param, Split) 406 for param in [ 407 sauvola_k, 408 search_region, 409 distance_penalty, 410 cross_width, 411 morph_size, 412 kernel_size, 413 processing_scale, 414 min_rows, 415 look_distance, 416 grow_threshold, 417 cell_height_factor, 418 ] 419 ): 420 raise TauluException( 421 "Split parameters can only be used with split headers (tuple header_path)" 422 ) 423 424 self._cell_heights = self._template.cell_heights(self._cell_height_factor) 425 426 self._grid_detector = GridDetector( 427 kernel_size=kernel_size, 428 cross_width=cross_width, 429 morph_size=morph_size, 430 search_region=search_region, 431 sauvola_k=sauvola_k, 432 distance_penalty=distance_penalty, 433 scale=self._processing_scale, 434 min_rows=min_rows, 435 look_distance=look_distance, 436 grow_threshold=grow_threshold, 437 )
Arguments:
- header_image_path: Path to the header template image(s). The header should be a cropped
image showing a clear view of the table's first row. An annotation
file (.json) must exist alongside the image, created via
Taulu.annotate(). For split tables, provide aSplitcontaining left and right header paths. cell_height_factor: Height of data rows relative to header height. For example, if your header is 100px tall and data rows are 80px tall, use 0.8.
- float: All rows have the same height
- list[float]: Different heights for different rows. The last value is repeated for any additional rows beyond the list length. Useful when the first data row is taller than subsequent rows.
- Split: Different height factors for left and right sides
Default: [1.0]
- header_anno_path (PathLike[str] | str | Split[PathLike[str] | str] | None): Optional explicit path to header annotation JSON file(s). If None,
looks for a .json file with the same name as
header_image_path. Default: None sauvola_k (float | Split[float]): Threshold sensitivity for Sauvola adaptive binarization (0.0-1.0). Controls how aggressively the algorithm converts the image to binary.
- Lower values (0.04-0.15): Preserve faint lines, more noise
- Higher values (0.20-0.35): Remove noise, may lose faint lines
Start with 0.25 and adjust based on your image quality. Default: 0.25
search_region (int | Split[int]): Size in pixels of the square region to search for the next corner point. The algorithm estimates where a corner should be, then searches within this region for the best match.
- Smaller values (20-40): Faster, requires well-aligned tables
- Larger values (60-100): More robust to warping and distortion
Default: 60
distance_penalty (float | Split[float]): Weight factor [0, 1] for penalizing corners far from expected position. Uses Gaussian weighting within the search region.
- 0.0: No penalty, any position in search region is equally valid
- 0.5: Moderate preference for positions near the expected location
- 1.0: Strong preference, only accepts positions very close to expected
Default: 0.4
cross_width (int | Split[int]): Width in pixels of the cross-shaped kernel used to detect intersections. Should approximately match the thickness of your table rules AFTER morphological dilation.
Tuning: Look at the dilated image in debug_view. The cross_width should match the thickness of the black lines you see. Default: 10
morph_size (int | Split[int]): Size of morphological structuring element for dilation. Controls how much gap-bridging occurs to connect broken line segments.
- Smaller values (2-4): Minimal connection, preserves thin lines
- Larger values (6-10): Connects larger gaps, but thickens lines
Note: Increasing this requires increasing
cross_widthproportionally. Default: 4kernel_size (int | Split[int]): Size of the cross-shaped kernel (must be odd). Larger kernels are more selective, reducing false positives but potentially missing valid corners.
- Smaller values (21-31): More sensitive, finds more candidates
- Larger values (41-61): More selective, fewer false positives
Default: 41
processing_scale (float | Split[float]): Image downscaling factor (0, 1] for processing speed. Processing is done on scaled images, then results are scaled back to original size.
- 1.0: Full resolution (slowest, most accurate)
- 0.5-0.75: Good balance for high-res scans (2x-4x speedup)
- 0.25-0.5: Fast processing for very large images
Default: 1.0
- min_rows (int | Split[int]): Minimum number of rows required before the algorithm considers the table complete. Prevents stopping too early on tables with initial low-confidence detections. Default: 5
look_distance (int | Split[int]): Number of adjacent rows/columns to examine when extrapolating missing corners using polynomial regression. Higher values provide more context but may smooth over legitimate variations.
- 2-3: Good for consistent grids
- 4-6: Better for grids with some irregularity
Default: 3
grow_threshold (float | Split[float]): Initial minimum confidence [0, 1] required to accept a detected corner during the growing phase. The algorithm may adaptively lower this threshold if growth stalls.
- Higher values (0.5-0.8): Stricter, fewer errors but may miss valid corners
- Lower values (0.2-0.4): More permissive, finds more corners but more errors
Default: 0.3
smooth_grid (bool | Split[bool]): Whether or not to apply local smoothing logic to the grid after point detection. This may clean up rugged parts of the grid but could also lead to small inaccuracies if the grid is actually locally _not smooth_.
Default: False
439 @staticmethod 440 def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str): 441 """ 442 Interactive tool to create header annotations for table segmentation. 443 444 This method guides you through a two-step annotation process: 445 446 1. **Crop the header**: Click four corners to define the header region 447 2. **Annotate lines**: Click pairs of points to define each vertical and 448 horizontal line in the header 449 450 The annotations are saved as: 451 - A cropped header image (.png) at `output_path` 452 - A JSON file (.json) containing line coordinates 453 454 ## Annotation Guidelines 455 456 **Which lines to annotate:** 457 - All vertical lines that extend into the table body (column separators) 458 - The top horizontal line of the header 459 - The bottom horizontal line of the header (top of data rows) 460 461 **Order doesn't matter** - annotate lines in any order that's convenient. 462 463 **To annotate a line:** 464 1. Click once at one endpoint 465 2. Click again at the other endpoint 466 3. A green line appears showing your annotation 467 468 **To undo:** 469 - Right-click anywhere to remove the last line you drew 470 471 **When finished:** 472 - Press 'n' to save and exit 473 - Press 'q' to quit without saving 474 475 Args: 476 image_path (PathLike[str] | str): Path to a table image containing 477 a clear view of the header. This can be a full table image. 478 output_path (PathLike[str] | str): Where to save the cropped header 479 image. The annotation JSON will be saved with the same name but 480 .json extension. 481 482 Raises: 483 TauluException: If image_path doesn't exist or output_path is a directory 484 485 Examples: 486 Annotate a single header: 487 488 >>> from taulu import Taulu 489 >>> Taulu.annotate("scan_page_01.png", "header.png") 490 # Interactive window opens 491 # After annotation: creates header.png and header.json 492 493 Annotate left and right headers for a split table: 494 495 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 496 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 497 # Creates header_left.{png,json} and header_right.{png,json} 498 499 Notes: 500 - The header image doesn't need to be perfectly cropped initially - 501 the tool will help you crop it precisely 502 - Annotation accuracy is important: misaligned lines will cause 503 segmentation errors 504 - You can re-run this method to update annotations if needed 505 """ 506 507 if not exists(image_path): 508 raise TauluException(f"Image path {image_path} does not exist") 509 510 if os.path.isdir(output_path): 511 raise TauluException("Output path should be a file") 512 513 output_path = Path(output_path) 514 515 template = HeaderTemplate.annotate_image( 516 os.fspath(image_path), crop=output_path.with_suffix(".png") 517 ) 518 519 template.save(output_path.with_suffix(".json"))
Interactive tool to create header annotations for table segmentation.
This method guides you through a two-step annotation process:
- Crop the header: Click four corners to define the header region
- Annotate lines: Click pairs of points to define each vertical and horizontal line in the header
The annotations are saved as:
- A cropped header image (.png) at
output_path - A JSON file (.json) containing line coordinates
Annotation Guidelines
Which lines to annotate:
- All vertical lines that extend into the table body (column separators)
- The top horizontal line of the header
- The bottom horizontal line of the header (top of data rows)
Order doesn't matter - annotate lines in any order that's convenient.
To annotate a line:
- Click once at one endpoint
- Click again at the other endpoint
- A green line appears showing your annotation
To undo:
- Right-click anywhere to remove the last line you drew
When finished:
- Press 'n' to save and exit
- Press 'q' to quit without saving
Arguments:
- image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
- output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.
Raises:
- TauluException: If image_path doesn't exist or output_path is a directory
Examples:
Annotate a single header:
>>> from taulu import Taulu >>> Taulu.annotate("scan_page_01.png", "header.png") <h1 id="interactive-window-opens">Interactive window opens</h1>After annotation: creates header.png and header.json
Annotate left and right headers for a split table:
>>> Taulu.annotate("scan_page_01.png", "header_left.png") >>> Taulu.annotate("scan_page_01.png", "header_right.png") <h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>
Notes:
- The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely
- Annotation accuracy is important: misaligned lines will cause segmentation errors
- You can re-run this method to update annotations if needed
521 def segment_table( 522 self, 523 image: MatLike | PathLike[str] | str, 524 filtered: Optional[MatLike | PathLike[str] | str] = None, 525 debug_view: bool = False, 526 ) -> TableGrid: 527 """ 528 Segment a table image into a grid of cells. 529 530 This is the main entry point for the taulu package. It orchestrates: 531 532 1. **Header alignment**: Locates the table by matching the header template 533 to the image using feature-based registration (ORB features + homography) 534 2. **Grid detection**: Applies morphological filtering and cross-correlation 535 to find corner intersections 536 3. **Grid growing**: Iteratively detects corners row-by-row and column-by-column, 537 starting from the aligned header position 538 4. **Extrapolation**: Fills in any missing corners using polynomial regression 539 based on neighboring detected points 540 5. **Smoothing**: Refines corner positions for consistency 541 542 ## Performance Notes 543 544 Processing time depends on: 545 - Image resolution (use `processing_scale < 1.0` for large images) 546 - Table complexity (more rows/columns = longer processing) 547 - Parameter settings 548 549 ## Troubleshooting 550 551 **If segmentation fails (returns incomplete grid):** 552 1. Enable `debug_view=True` to see where it stops 553 2. Check if header alignment is correct (first debug image) 554 3. Verify cross-correlation shows bright spots at corners 555 4. Adjust `grow_threshold` (lower if stopping too early) 556 5. Increase `search_region` if corners are far from expected positions 557 558 **If segmentation is inaccurate (corners in wrong positions):** 559 1. Check binarization quality (adjust `sauvola_k`) 560 2. Verify cross-kernel size matches line thickness (adjust `cross_width`) 561 3. Ensure morphology isn't over-connecting (reduce `morph_size`) 562 4. Increase `distance_penalty` to enforce expected positions more strictly 563 564 Args: 565 image (MatLike | PathLike[str] | str): Table image to segment. 566 Can be a file path or a numpy array (BGR or grayscale). 567 568 filtered (MatLike | PathLike[str] | str | None): Optional pre-filtered 569 binary image to use instead of computing it internally. 570 Must be the same size as `image`. If provided, parameters related 571 to filtering (e.g. `sauvola_k`, `morph_size`) are ignored. 572 573 **GPU acceleration**: Use trained CNN model for corner detection: 574 575 >>> from taulu.gpu import DeepConvNet, apply_kernel_to_image_tiled 576 >>> model = DeepConvNet.load("model.pth") 577 >>> filtered = apply_kernel_to_image_tiled(model, image) 578 >>> grid = taulu.segment_table(image, filtered=filtered) 579 580 Default: None 581 582 debug_view (bool): If True, opens OpenCV windows showing intermediate 583 processing steps: 584 - Header alignment overlay 585 - Binarized image 586 - After morphological operations 587 - Cross-correlation result 588 - Growing progress (corner-by-corner) 589 590 **Controls:** 591 - Press 'n' to advance to next step 592 - Press 'q' to quit immediately 593 594 Useful for parameter tuning and understanding failures. 595 Default: False 596 597 Returns: 598 TableGrid: A grid structure containing detected corner positions with 599 methods for: 600 601 **Position queries:** 602 - `cell(point)`: Get (row, col) at pixel coordinates (x, y) 603 - `cell_polygon(cell)`: Get 4 corners of a cell as (lt, rt, rb, lb) 604 - `region(start, end)`: Get bounding box for a cell range 605 606 **Image extraction:** 607 - `crop_cell(img, cell, margin=0)`: Extract single cell with optional margin 608 - `crop_region(img, start, end, margin=0)`: Extract rectangular region 609 610 **Visualization:** 611 - `show_cells(img)`: Interactive cell viewer (click to highlight) 612 - `highlight_all_cells(img)`: Draw all cell boundaries 613 - `visualize_points(img)`: Show detected corner points 614 615 **Analysis:** 616 - `text_regions(img, row)`: Find continuous text regions in a row 617 - `cells()`: Generator yielding all (row, col) indices 618 619 **Persistence:** 620 - `save(path)`: Save grid to JSON file 621 - `TableGrid.from_saved(path)`: Load grid from JSON 622 623 **Properties:** 624 - `rows`: Number of data rows (header not included) 625 - `cols`: Number of columns 626 - `points`: Raw list of detected corner coordinates 627 628 Raises: 629 TauluException: If image cannot be loaded, header alignment fails, 630 or grid detection produces no results 631 632 Examples: 633 Basic segmentation: 634 635 >>> from taulu import Taulu 636 >>> import cv2 637 >>> 638 >>> taulu = Taulu("header.png") 639 >>> grid = taulu.segment_table("table_page_01.png") 640 >>> 641 >>> print(f"Detected {grid.rows} rows and {grid.cols} columns") 642 >>> 643 >>> # Extract first cell 644 >>> img = cv2.imread("table_page_01.png") 645 >>> cell_img = grid.crop_cell(img, (0, 0)) 646 >>> cv2.imwrite("cell_0_0.png", cell_img) 647 648 Debug mode for parameter tuning: 649 650 >>> grid = taulu.segment_table("table_page_01.png", debug_view=True) 651 # Windows open showing each step 652 # Adjust parameters based on what you see 653 654 Process multiple images with the same header: 655 656 >>> taulu = Taulu("header.png", sauvola_k=0.25) 657 >>> 658 >>> for i in range(1, 11): 659 ... img_path = f"table_page_{i:02d}.png" 660 ... grid = taulu.segment_table(img_path) 661 ... grid.save(f"grid_{i:02d}.json") 662 ... print(f"Page {i}: {grid.rows} rows detected") 663 664 Extract all cells from a table: 665 666 >>> img = cv2.imread("table.png") 667 >>> grid = taulu.segment_table("table.png") 668 >>> 669 >>> for row, col in grid.cells(): 670 ... cell_img = grid.crop_cell(img, (row, col), margin=5) 671 ... cv2.imwrite(f"cell_{row}_{col}.png", cell_img) 672 673 Find text regions for OCR: 674 675 >>> for row in range(grid.rows): 676 ... text_regions = grid.text_regions(img, row) 677 ... for start_cell, end_cell in text_regions: 678 ... # Extract region spanning multiple cells 679 ... region_img = grid.crop_region(img, start_cell, end_cell) 680 ... # Run OCR on region_img... 681 682 See Also: 683 - `TableGrid`: Complete documentation of the returned object 684 - `GridDetector.find_table_points()`: Lower-level grid detection 685 - `HeaderAligner.align()`: Lower-level header alignment 686 """ 687 688 if not isinstance(image, MatLike): 689 image = cv2.imread(os.fspath(image)) 690 691 now = perf_counter() 692 h = self._aligner.align(image, visual=debug_view) 693 align_time = perf_counter() - now 694 logger.info(f"Header alignment took {align_time:.2f} seconds") 695 696 # find the starting point for the table grid algorithm 697 left_top_template = self._template.intersection((1, 0)) 698 if isinstance(left_top_template, Split): 699 left_top_template = Split( 700 (int(left_top_template.left[0]), int(left_top_template.left[1])), 701 (int(left_top_template.right[0]), int(left_top_template.right[1])), 702 ) 703 else: 704 left_top_template = (int(left_top_template[0]), int(left_top_template[1])) 705 706 left_top_table = self._aligner.template_to_img(h, left_top_template) 707 708 now = perf_counter() 709 table = self._grid_detector.find_table_points( 710 image, 711 left_top_table, 712 self._template.cell_widths(0), 713 self._cell_heights, 714 visual=debug_view, 715 filtered=filtered, 716 smooth=self._smooth 717 ) 718 grid_time = perf_counter() - now 719 logger.info(f"Grid detection took {grid_time:.2f} seconds") 720 721 if isinstance(table, Split): 722 table = TableGrid.from_split(table, (0, 0)) 723 724 return table
Segment a table image into a grid of cells.
This is the main entry point for the taulu package. It orchestrates:
- Header alignment: Locates the table by matching the header template to the image using feature-based registration (ORB features + homography)
- Grid detection: Applies morphological filtering and cross-correlation to find corner intersections
- Grid growing: Iteratively detects corners row-by-row and column-by-column, starting from the aligned header position
- Extrapolation: Fills in any missing corners using polynomial regression based on neighboring detected points
- Smoothing: Refines corner positions for consistency
Performance Notes
Processing time depends on:
- Image resolution (use
processing_scale < 1.0for large images) - Table complexity (more rows/columns = longer processing)
- Parameter settings
Troubleshooting
If segmentation fails (returns incomplete grid):
- Enable
debug_view=Trueto see where it stops - Check if header alignment is correct (first debug image)
- Verify cross-correlation shows bright spots at corners
- Adjust
grow_threshold(lower if stopping too early) - Increase
search_regionif corners are far from expected positions
If segmentation is inaccurate (corners in wrong positions):
- Check binarization quality (adjust
sauvola_k) - Verify cross-kernel size matches line thickness (adjust
cross_width) - Ensure morphology isn't over-connecting (reduce
morph_size) - Increase
distance_penaltyto enforce expected positions more strictly
Arguments:
- image (MatLike | PathLike[str] | str): Table image to segment. Can be a file path or a numpy array (BGR or grayscale).
filtered (MatLike | PathLike[str] | str | None): Optional pre-filtered binary image to use instead of computing it internally. Must be the same size as
image. If provided, parameters related to filtering (e.g.sauvola_k,morph_size) are ignored.GPU acceleration: Use trained CNN model for corner detection:
>>> from taulu.gpu import DeepConvNet, apply_kernel_to_image_tiled >>> model = DeepConvNet.load("model.pth") >>> filtered = apply_kernel_to_image_tiled(model, image) >>> grid = taulu.segment_table(image, filtered=filtered)Default: None
debug_view (bool): If True, opens OpenCV windows showing intermediate processing steps:
- Header alignment overlay
- Binarized image
- After morphological operations
- Cross-correlation result
- Growing progress (corner-by-corner)
Controls:
- Press 'n' to advance to next step
- Press 'q' to quit immediately
Useful for parameter tuning and understanding failures. Default: False
Returns:
TableGrid: A grid structure containing detected corner positions with methods for:
**Position queries:** - `cell(point)`: Get (row, col) at pixel coordinates (x, y) - `cell_polygon(cell)`: Get 4 corners of a cell as (lt, rt, rb, lb) - `region(start, end)`: Get bounding box for a cell range **Image extraction:** - `crop_cell(img, cell, margin=0)`: Extract single cell with optional margin - `crop_region(img, start, end, margin=0)`: Extract rectangular region **Visualization:** - `show_cells(img)`: Interactive cell viewer (click to highlight) - `highlight_all_cells(img)`: Draw all cell boundaries - `visualize_points(img)`: Show detected corner points **Analysis:** - `text_regions(img, row)`: Find continuous text regions in a row - `cells()`: Generator yielding all (row, col) indices **Persistence:** - `save(path)`: Save grid to JSON file - `TableGrid.from_saved(path)`: Load grid from JSON **Properties:** - `rows`: Number of data rows (header not included) - `cols`: Number of columns - `points`: Raw list of detected corner coordinates
Raises:
- TauluException: If image cannot be loaded, header alignment fails, or grid detection produces no results
Examples:
Basic segmentation:
>>> from taulu import Taulu >>> import cv2 >>> >>> taulu = Taulu("header.png") >>> grid = taulu.segment_table("table_page_01.png") >>> >>> print(f"Detected {grid.rows} rows and {grid.cols} columns") >>> >>> # Extract first cell >>> img = cv2.imread("table_page_01.png") >>> cell_img = grid.crop_cell(img, (0, 0)) >>> cv2.imwrite("cell_0_0.png", cell_img)Debug mode for parameter tuning:
>>> grid = taulu.segment_table("table_page_01.png", debug_view=True) <h1 id="windows-open-showing-each-step">Windows open showing each step</h1>Adjust parameters based on what you see
Process multiple images with the same header:
>>> taulu = Taulu("header.png", sauvola_k=0.25) >>> >>> for i in range(1, 11): ... img_path = f"table_page_{i:02d}.png" ... grid = taulu.segment_table(img_path) ... grid.save(f"grid_{i:02d}.json") ... print(f"Page {i}: {grid.rows} rows detected")Extract all cells from a table:
>>> img = cv2.imread("table.png") >>> grid = taulu.segment_table("table.png") >>> >>> for row, col in grid.cells(): ... cell_img = grid.crop_cell(img, (row, col), margin=5) ... cv2.imwrite(f"cell_{row}_{col}.png", cell_img)Find text regions for OCR:
>>> for row in range(grid.rows): ... text_regions = grid.text_regions(img, row) ... for start_cell, end_cell in text_regions: ... # Extract region spanning multiple cells ... region_img = grid.crop_region(img, start_cell, end_cell) ... # Run OCR on region_img...
See Also:
TableGrid: Complete documentation of the returned objectGridDetector.find_table_points(): Lower-level grid detectionHeaderAligner.align(): Lower-level header alignment