taulu

Taulu - segment tables from images

Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).

To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.

Here is an example python script of how to use Taulu:

from taulu import Taulu, Split
import os


def setup():
    # create an Annotation file of the headers in the image
    # (one for the left header, one for the right)
    # and store them in the examples directory
    print("Annotating the LEFT header...")
    Taulu.annotate("../data/table_00.png", "table_00_header_left.png")

    print("Annotating the RIGHT header...")
    Taulu.annotate("../data/table_00.png", "table_00_header_right.png")


def main():
    taulu = Taulu(Split("table_00_header_left.png", "table_00_header_right.png"))
    table = taulu.segment_table("../data/table_00.png", debug_view=True)

    table.show_cells("../data/table_00.png")


if __name__ == "__main__":
    if os.path.exists("table_00_header_left.png") and os.path.exists(
        "table_00_header_right.png"
    ):
        main()
    else:
        setup()
        main()

If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class

 1"""
 2Taulu - *segment tables from images*
 3
 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
 5
 6To use this package, you first need to make an annotation of the headers in your table images.
 7The idea is that these headers will be similar across your full set of images, and they will be
 8used as a starting point for the search algorithm that finds the table grid.
 9
10Here is an example python script of how to use Taulu:
11```python
12from taulu import Taulu, Split
13import os
14
15
16def setup():
17    # create an Annotation file of the headers in the image
18    # (one for the left header, one for the right)
19    # and store them in the examples directory
20    print("Annotating the LEFT header...")
21    Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
22
23    print("Annotating the RIGHT header...")
24    Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
25
26
27def main():
28    taulu = Taulu(Split("table_00_header_left.png", "table_00_header_right.png"))
29    table = taulu.segment_table("../data/table_00.png", debug_view=True)
30
31    table.show_cells("../data/table_00.png")
32
33
34if __name__ == "__main__":
35    if os.path.exists("table_00_header_left.png") and os.path.exists(
36        "table_00_header_right.png"
37    ):
38        main()
39    else:
40        setup()
41        main()
42
43```
44
45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu)
46"""
47
48from .config import TauluConfig
49from .grid import GridDetector, TableGrid
50from .header_aligner import HeaderAligner, MatchMethod
51from .header_template import HeaderTemplate
52from .split import Split
53from .table_indexer import TableIndexer
54from .taulu import Taulu
55
56__pdoc__ = {}
57__pdoc__["constants"] = False
58__pdoc__["main"] = False
59__pdoc__["decorators"] = False
60__pdoc__["error"] = False
61__pdoc__["types"] = False
62__pdoc__["img_util"] = False
63
64__all__ = [
65    "GridDetector",
66    "HeaderAligner",
67    "HeaderTemplate",
68    "MatchMethod",
69    "Split",
70    "TableGrid",
71    "TableIndexer",
72    "Taulu",
73    "TauluConfig",
74]
75
76try:
77    from . import gpu  # noqa: F401  # ty: ignore[unresolved-import]
78
79    __all__.append("gpu")
80except ImportError:
81    pass
class GridDetector:
120class GridDetector:
121    """
122    Detects table grid intersections using morphological filtering and template matching.
123
124    This detector implements a multi-stage pipeline:
125
126    1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting
127    2. **Morphological operations**: Dilation to connect broken rule segments
128    3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find
129       rule intersections where horizontal and vertical lines meet
130    4. **Grid growing**: Iterative point detection starting from a known seed point
131
132    The cross-kernel is designed to match the specific geometry of your table rules.
133    It should be sized so that after morphology, it aligns with actual corner shapes.
134
135    ## Tuning Guidelines
136
137    - **kernel_size**: Increase if you need more selectivity (fewer false positives)
138    - **cross_width/height**: Should match rule thickness after morphology
139    - **morph_size**: Increase to connect more broken lines, but this thickens rules
140    - **sauvola_k**: Increase to threshold more aggressively (remove noise)
141    - **search_region**: Increase for documents with more warping/distortion
142    - **distance_penalty**: Increase to prefer corners closer to expected positions
143
144    ## Visual Debugging
145
146    Set `visual=True` in methods to see intermediate results and tune parameters.
147    """
148
149    def __init__(
150        self,
151        kernel_size: int = 21,
152        cross_width: int = 6,
153        cross_height: int | None = None,
154        morph_size: int | None = None,
155        sauvola_k: float = 0.04,
156        sauvola_window: int = 15,
157        scale: float = 1.0,
158        search_region: int = 40,
159        distance_penalty: float = 0.4,
160        skip_astar_threshold: float = 0.2,
161        min_rows: int = 5,
162        grow_threshold: float = 0.3,
163        look_distance: int = 4,
164        cuts: int = 3,
165        cut_fraction: float = 0.5,
166    ):
167        """
168        Args:
169            kernel_size (int): the size of the cross kernel
170                a larger kernel size often means that more penalty is applied, often leading
171                to more sparse results
172            cross_width (int): the width of one of the edges in the cross filter, should be
173                roughly equal to the width of the rules in the image after morphology is applied
174            cross_height (int | None): useful if the horizontal rules and vertical rules
175                have different sizes
176            morph_size (int | None): the size of the morphology operators that are applied before
177                the cross kernel. 'bridges the gaps' of broken-up lines
178            sauvola_k (float): threshold parameter for sauvola thresholding
179            sauvola_window (int): window_size parameter for sauvola thresholding
180            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
181            search_region (int): area in which to search for a new max value in `find_nearest` etc.
182            distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
183            skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
184            min_rows (int): minimum number of rows to find before stopping the table finding algorithm
185            grow_threshold (float): the threshold for accepting a new point when growing the table
186            look_distance (int): how many points away to look when calculating the median slope
187            cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
188            cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
189        """
190        self._validate_parameters(
191            kernel_size,
192            cross_width,
193            cross_height,
194            morph_size,
195            search_region,
196            sauvola_k,
197            sauvola_window,
198            distance_penalty,
199            skip_astar_threshold,
200            cuts,
201            cut_fraction,
202        )
203
204        self._kernel_size = kernel_size
205        self._cross_width = cross_width
206        self._cross_height = cross_width if cross_height is None else cross_height
207        self._morph_size = morph_size if morph_size is not None else cross_width
208        self._search_region = search_region
209        self._sauvola_k = sauvola_k
210        self._sauvola_window = sauvola_window
211        self._distance_penalty = distance_penalty
212        self._scale = scale
213        self._skip_astar_threshold = skip_astar_threshold
214        self._min_rows = min_rows
215        self._grow_threshold = grow_threshold
216        self._look_distance = look_distance
217        self._cuts = cuts
218        self._cut_fraction = cut_fraction
219
220        self._cross_kernel = self._create_cross_kernel()
221
222    def _validate_parameters(
223        self,
224        kernel_size: int,
225        cross_width: int,
226        cross_height: int | None,
227        morph_size: int | None,
228        search_region: int,
229        sauvola_k: float,
230        sauvola_window: int,
231        distance_penalty: float,
232        skip_astar_threshold: float,
233        cuts: int,
234        cut_fraction: float,
235    ) -> None:
236        """Validate initialization parameters."""
237        if kernel_size % 2 == 0:
238            raise ValueError("kernel_size must be odd")
239        if (
240            kernel_size <= 0
241            or cross_width <= 0
242            or search_region <= 0
243            or sauvola_window <= 0
244        ):
245            raise ValueError("Size parameters must be positive")
246        if cross_height is not None and cross_height <= 0:
247            raise ValueError("cross_height must be positive")
248        if morph_size is not None and morph_size <= 0:
249            raise ValueError("morph_size must be positive")
250        if not 0 <= distance_penalty <= 1:
251            raise ValueError("distance_penalty must be in [0, 1]")
252        if sauvola_k <= 0:
253            raise ValueError("sauvola_k must be positive")
254        if skip_astar_threshold < 0 or skip_astar_threshold > 1:
255            raise ValueError("skip_astar_threshold must be in [0, 1]")
256        if cut_fraction < 0 or cut_fraction > 1:
257            raise ValueError("cut_fraction must be in [0, 1]")
258        if cuts < 0:
259            raise ValueError("cuts must be zero or positive")
260
261    def _create_gaussian_weights(self, region_size: int) -> NDArray:
262        """
263        Create a 2D Gaussian weight mask.
264
265        Args:
266            shape (tuple[int, int]): Shape of the region (height, width)
267            p (float): Minimum value at the edge = 1 - p
268
269        Returns:
270            NDArray: Gaussian weight mask
271        """
272        if self._distance_penalty == 0:
273            return np.ones((region_size, region_size), dtype=np.float32)
274
275        y = np.linspace(-1, 1, region_size)
276        x = np.linspace(-1, 1, region_size)
277        xv, yv = np.meshgrid(x, y)
278        dist_squared = xv**2 + yv**2
279
280        # Prevent log(0) when distance_penalty is 1
281        if self._distance_penalty >= 0.999:
282            sigma = 0.1  # Small sigma for very sharp peak
283        else:
284            sigma = np.sqrt(-1 / (2 * np.log(1 - self._distance_penalty)))
285
286        weights = np.exp(-dist_squared / (2 * sigma**2))
287
288        return weights.astype(np.float32)
289
290    def _create_cross_kernel(self) -> NDArray:
291        kernel = np.zeros((self._kernel_size, self._kernel_size), dtype=np.uint8)
292        center = self._kernel_size // 2
293
294        # Create horizontal bar
295        h_start = max(0, center - self._cross_height // 2)
296        h_end = min(self._kernel_size, center + (self._cross_height + 1) // 2)
297        kernel[h_start:h_end, :] = 255
298
299        # Create vertical bar
300        v_start = max(0, center - self._cross_width // 2)
301        v_end = min(self._kernel_size, center + (self._cross_width + 1) // 2)
302        kernel[:, v_start:v_end] = 255
303
304        return kernel
305
306    def _apply_morphology(self, binary: MatLike) -> MatLike:
307        # Define a horizontal kernel (adjust width as needed)
308        kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._morph_size, 1))
309        kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._morph_size))
310
311        # Apply dilation
312        dilated = cv.dilate(binary, kernel_hor, iterations=1)
313        dilated = cv.dilate(dilated, kernel_ver, iterations=1)
314
315        return dilated
316
317    def _apply_cross_matching(self, img: MatLike) -> MatLike:
318        """Apply cross kernel template matching."""
319        pad_y = self._cross_kernel.shape[0] // 2
320        pad_x = self._cross_kernel.shape[1] // 2
321
322        padded = cv.copyMakeBorder(
323            img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0
324        )
325
326        filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED)
327        # Invert and normalize to 0-255 range
328        filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX)
329        return filtered.astype(np.uint8)
330
331    def apply(
332        self, img: MatLike, visual: bool = False, visual_notebook: bool = False
333    ) -> MatLike:
334        """
335        Apply the grid detection filter to the input image.
336
337        Args:
338            img (MatLike): the input image
339            visual (bool): whether to show intermediate steps via OpenCV windows
340            visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook
341
342        Returns:
343            MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
344        """
345
346        if img is None or img.size == 0:
347            raise ValueError("Input image is empty or None")
348
349        binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window)
350
351        if visual:
352            imu.show(binary, title="thresholded")
353        if visual_notebook:
354            imu.show_notebook(binary, title="thresholded")
355
356        binary = self._apply_morphology(binary)
357
358        if visual:
359            imu.show(binary, title="dilated")
360        if visual_notebook:
361            imu.show_notebook(binary, title="dilated")
362
363        filtered = self._apply_cross_matching(binary)
364
365        return filtered
366
367    @log_calls(level=logging.DEBUG, include_return=True)
368    def find_nearest(
369        self, filtered: MatLike, point: Point, region: int | None = None
370    ) -> tuple[Point, float]:
371        """
372        Find the nearest 'corner match' in the image, along with its score [0,1]
373
374        Args:
375            filtered (MatLike): the filtered image (obtained through `apply`)
376            point (tuple[int, int]): the approximate target point (x, y)
377            region (None | int): alternative value for search region,
378                overwriting the `__init__` parameter `region`
379        """
380
381        if filtered is None or filtered.size == 0:
382            raise ValueError("Filtered image is empty or None")
383
384        region_size = region if region is not None else self._search_region
385        x, y = point
386
387        # Calculate crop boundaries
388        crop_x = max(0, x - region_size // 2)
389        crop_y = max(0, y - region_size // 2)
390        crop_width = min(region_size, filtered.shape[1] - crop_x)
391        crop_height = min(region_size, filtered.shape[0] - crop_y)
392
393        # Handle edge cases
394        if crop_width <= 0 or crop_height <= 0:
395            logger.warning(f"Point {point} is outside image bounds")
396            return point, 0.0
397
398        cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width]
399
400        if cropped.size == 0:
401            return point, 0.0
402
403        # Always apply Gaussian weighting by extending crop if needed
404        if cropped.shape[0] == region_size and cropped.shape[1] == region_size:
405            # Perfect size - apply weights directly
406            weights = self._create_gaussian_weights(region_size)
407            weighted = cropped.astype(np.float32) * weights
408        else:
409            # Extend crop to match region_size, apply weights, then restore
410            extended = np.zeros((region_size, region_size), dtype=cropped.dtype)
411
412            # Calculate offset to center the cropped region in extended array
413            offset_y = (region_size - cropped.shape[0]) // 2
414            offset_x = (region_size - cropped.shape[1]) // 2
415
416            # Place cropped region in center of extended array
417            extended[
418                offset_y : offset_y + cropped.shape[0],
419                offset_x : offset_x + cropped.shape[1],
420            ] = cropped
421
422            # Apply Gaussian weights to extended array
423            weights = self._create_gaussian_weights(region_size)
424            weighted_extended = extended.astype(np.float32) * weights
425
426            # Extract the original region back out
427            weighted = weighted_extended[
428                offset_y : offset_y + cropped.shape[0],
429                offset_x : offset_x + cropped.shape[1],
430            ]
431
432        best_idx = np.argmax(weighted)
433        best_y, best_x = np.unravel_index(best_idx, cropped.shape)
434
435        result_point = (
436            int(crop_x + best_x),
437            int(crop_y + best_y),
438        )
439        result_confidence = float(weighted[best_y, best_x]) / 255.0
440
441        return result_point, result_confidence
442
443    def find_table_points(
444        self,
445        img: MatLike | PathLike[str],
446        top_row: list[Point | None],
447        cell_widths: list[int],
448        cell_heights: list[int] | int,
449        visual: bool = False,
450        visual_notebook: bool = False,
451        window: str = WINDOW,
452        goals_width: int | None = None,
453        filtered: MatLike | PathLike[str] | None = None,
454        smooth: bool = False,
455        smooth_strength: float = 0.5,
456        smooth_iterations: int = 1,
457        smooth_degree: int = 1,
458    ) -> "TableGrid":
459        """
460        Parse the image to a `TableGrid` structure that holds all of the
461        intersections between horizontal and vertical rules, starting near the `left_top` point
462
463        Args:
464            img (MatLike): the input image of a table
465            top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
466            cell_widths (list[int]): the expected widths of the cells (based on a header template)
467            cell_heights (list[int]): the expected height of the rows of data.
468                The last value from this list is used until the image has no more vertical space.
469            visual (bool): whether to show intermediate steps
470            window (str): the name of the OpenCV window to use for visualization
471            goals_width (int | None): the width of the goal region when searching for the next point.
472                If None, defaults to 1.5 * search_region
473            filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of
474                calculating the filtered image from scratch
475            smooth (bool): if True, smooth the grid after detection
476            smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
477            smooth_iterations (int): number of smoothing passes. Default: 3
478            smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1
479
480        Returns:
481            a TableGrid object
482        """
483
484        if goals_width is None:
485            goals_width = self._search_region * 3 // 2
486
487        if not cell_widths:
488            raise ValueError("cell_widths must contain at least one value")
489
490        if not isinstance(img, np.ndarray):
491            tmp_img = cv.imread(os.fspath(cast(PathLike[str], img)))
492            assert tmp_img is not None
493            img = tmp_img
494
495        img = cast(MatLike, img)
496
497        if filtered is None:
498            filtered = self.apply(img, visual, visual_notebook)
499        else:
500            if not isinstance(filtered, np.ndarray):
501                filtered = cv.imread(os.fspath(filtered))
502
503            filtered = ensure_gray(filtered)
504
505        if visual:
506            imu.show(filtered, window=window)
507        if visual_notebook:
508            imu.show_notebook(filtered, title="filtered")
509
510        if isinstance(cell_heights, int):
511            cell_heights = [cell_heights]
512
513        for i in range(len(top_row)):
514            if top_row[i] is None:
515                continue
516
517            point = top_row[i]
518            assert point is not None
519            adjusted, confidence = self.find_nearest(
520                filtered, point, int(self._search_region * 2)
521            )
522
523            if confidence < 0.15:
524                top_row[i] = None
525            else:
526                top_row[i] = adjusted
527
528        if not any(top_row):
529            logger.error("No good starting candidates given")
530
531        # resize all parameters according to scale
532        img = cv.resize(img, None, fx=self._scale, fy=self._scale)
533
534        if visual:
535            imu.push(img)
536
537        filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale)
538        cell_widths = [int(w * self._scale) for w in cell_widths]
539        cell_heights = [int(h * self._scale) for h in cell_heights]
540        top_row = [
541            (int(p[0] * self._scale), int(p[1] * self._scale))
542            if p is not None
543            else None
544            for p in top_row
545        ]
546        search_region = int(self._search_region * self._scale)
547
548        img_gray = ensure_gray(img)
549        filtered_gray = ensure_gray(filtered)
550
551        table_grower = TableGrower(
552            filtered_gray,
553            cell_widths,
554            cell_heights,
555            top_row,
556            search_region,
557            self._distance_penalty,
558            self._look_distance,
559            self._grow_threshold,
560            self._skip_astar_threshold,
561            self._min_rows,
562            self._cuts,
563            self._cut_fraction,
564        )
565
566        def show_grower_progress(wait: bool = False):
567            img_orig = np.copy(img)
568            corners = table_grower.get_all_corners()
569            for y in range(len(corners)):
570                for x in range(len(corners[y])):
571                    if corners[y][x] is not None:
572                        img_orig = imu.draw_points(
573                            img_orig,
574                            [corners[y][x]],  # type:ignore
575                            color=(0, 0, 255),
576                            thickness=30,
577                        )
578
579            edge = table_grower.get_edge_points()
580
581            for point, score in edge:
582                color = (100, int(clamp(score * 255, 0, 255)), 100)
583                imu.draw_point(img_orig, point, color=color, thickness=20)
584
585            imu.show(img_orig, wait=wait)
586
587        if visual:
588            threshold = self._grow_threshold
589
590            # python implementation of rust loops, for visualization purposes
591            # note this is a LOT slower
592            while table_grower.grow_point(img_gray, filtered_gray) is not None:
593                show_grower_progress()
594
595            show_grower_progress(True)
596
597            original_threshold = threshold
598
599            loops_without_change = 0
600
601            while not table_grower.is_table_complete():
602                loops_without_change += 1
603
604                if loops_without_change > 50:
605                    break
606
607                if table_grower.extrapolate_one(img_gray, filtered_gray) is not None:
608                    show_grower_progress()
609
610                    loops_without_change = 0
611
612                    grown = False
613                    while table_grower.grow_point(img_gray, filtered_gray) is not None:
614                        show_grower_progress()
615                        grown = True
616                        threshold = min(0.1 + 0.9 * threshold, original_threshold)
617                        table_grower.set_threshold(threshold)
618
619                    if not grown:
620                        threshold *= 0.9
621                        table_grower.set_threshold(threshold)
622
623                else:
624                    threshold *= 0.9
625                    table_grower.set_threshold(threshold)
626
627                    if table_grower.grow_point(img_gray, filtered_gray) is not None:
628                        show_grower_progress()
629                        loops_without_change = 0
630
631        else:
632            table_grower.grow_table(img_gray, filtered_gray)
633
634        if smooth:
635            table_grower.smooth_grid(smooth_strength, smooth_iterations, smooth_degree)
636        corners = table_grower.get_all_corners()
637        logger.info(
638            f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns"
639        )
640        # rescale corners back to original size
641        if self._scale != 1.0:
642            for y in range(len(corners)):
643                for x in range(len(corners[y])):
644                    if corners[y][x] is not None:
645                        corners[y][x] = (
646                            int(corners[y][x][0] / self._scale),  # type:ignore
647                            int(corners[y][x][1] / self._scale),  # type:ignore
648                        )
649
650        return TableGrid(corners)  # type: ignore
651
652    def _visualize_grid(self, img: MatLike, points: list[list[Point]]) -> None:
653        """Visualize the detected grid points."""
654        all_points = [point for row in points for point in row]
655        drawn = imu.draw_points(img, all_points)
656        imu.show(drawn, wait=True)
657
658    def _visualize_path_finding(
659        self,
660        path: list[Point],
661        current: Point,
662        next_point: Point,
663        previous_row_target: Point | None = None,
664        region_center: Point | None = None,
665        region_size: int | None = None,
666    ) -> None:
667        """Visualize the path finding process for debugging."""
668        global show_time
669
670        screen = imu.pop()
671
672        # if gray, convert to BGR
673        if len(screen.shape) == 2 or screen.shape[2] == 1:
674            debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR)
675
676        debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2)
677        debug_img = imu.draw_points(
678            debug_img, [current], color=(0, 255, 0), thickness=3
679        )
680        debug_img = imu.draw_points(
681            debug_img, [next_point], color=(0, 0, 255), thickness=2
682        )
683
684        # Draw previous row target if available
685        if previous_row_target is not None:
686            debug_img = imu.draw_points(
687                debug_img, [previous_row_target], color=(255, 0, 255), thickness=2
688            )
689
690        # Draw search region if available
691        if region_center is not None and region_size is not None:
692            top_left = (
693                max(0, region_center[0] - region_size // 2),
694                max(0, region_center[1] - region_size // 2),
695            )
696            bottom_right = (
697                min(debug_img.shape[1], region_center[0] + region_size // 2),
698                min(debug_img.shape[0], region_center[1] + region_size // 2),
699            )
700            cv.rectangle(
701                debug_img,
702                top_left,
703                bottom_right,
704                color=(255, 0, 0),
705                thickness=2,
706                lineType=cv.LINE_AA,
707            )
708
709        imu.push(debug_img)
710
711        show_time += 1
712        if show_time % 10 != 1:
713            return
714
715        imu.show(debug_img, title="Next column point", wait=False)
716        # time.sleep(0.003)
717
718    @log_calls(level=logging.DEBUG, include_return=True)
719    def _astar(
720        self,
721        img: np.ndarray,
722        start: tuple[int, int],
723        goals: list[tuple[int, int]],
724        direction: str,
725    ) -> list[Point] | None:
726        """
727        Find the best path between the start point and one of the goal points on the image
728        """
729
730        if not goals:
731            return None
732
733        if self._scale != 1.0:
734            img = cv.resize(img, None, fx=self._scale, fy=self._scale)
735            start = (int(start[0] * self._scale), int(start[1] * self._scale))
736            goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals]
737
738        # calculate bounding box with margin
739        all_points = [*goals, start]
740        xs = [p[0] for p in all_points]
741        ys = [p[1] for p in all_points]
742
743        margin = 30
744        top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin))
745        bottom_right = (
746            min(img.shape[1], max(xs) + margin),
747            min(img.shape[0], max(ys) + margin),
748        )
749
750        # check bounds
751        if (
752            top_left[0] >= bottom_right[0]
753            or top_left[1] >= bottom_right[1]
754            or top_left[0] >= img.shape[1]
755            or top_left[1] >= img.shape[0]
756        ):
757            return None
758
759        # transform coordinates to cropped image
760        start_local = (start[0] - top_left[0], start[1] - top_left[1])
761        goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals]
762
763        cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]]
764
765        if cropped.size == 0:
766            return None
767
768        path = rust_astar(cropped, start_local, goals_local, direction)
769
770        if path is None:
771            return None
772
773        if self._scale != 1.0:
774            path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path]
775            top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale))
776
777        return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]

Detects table grid intersections using morphological filtering and template matching.

This detector implements a multi-stage pipeline:

  1. Binarization: Sauvola adaptive thresholding to handle varying lighting
  2. Morphological operations: Dilation to connect broken rule segments
  3. Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
  4. Grid growing: Iterative point detection starting from a known seed point

The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.

Tuning Guidelines

  • kernel_size: Increase if you need more selectivity (fewer false positives)
  • cross_width/height: Should match rule thickness after morphology
  • morph_size: Increase to connect more broken lines, but this thickens rules
  • sauvola_k: Increase to threshold more aggressively (remove noise)
  • search_region: Increase for documents with more warping/distortion
  • distance_penalty: Increase to prefer corners closer to expected positions

Visual Debugging

Set visual=True in methods to see intermediate results and tune parameters.

GridDetector( kernel_size: int = 21, cross_width: int = 6, cross_height: int | None = None, morph_size: int | None = None, sauvola_k: float = 0.04, sauvola_window: int = 15, scale: float = 1.0, search_region: int = 40, distance_penalty: float = 0.4, skip_astar_threshold: float = 0.2, min_rows: int = 5, grow_threshold: float = 0.3, look_distance: int = 4, cuts: int = 3, cut_fraction: float = 0.5)
149    def __init__(
150        self,
151        kernel_size: int = 21,
152        cross_width: int = 6,
153        cross_height: int | None = None,
154        morph_size: int | None = None,
155        sauvola_k: float = 0.04,
156        sauvola_window: int = 15,
157        scale: float = 1.0,
158        search_region: int = 40,
159        distance_penalty: float = 0.4,
160        skip_astar_threshold: float = 0.2,
161        min_rows: int = 5,
162        grow_threshold: float = 0.3,
163        look_distance: int = 4,
164        cuts: int = 3,
165        cut_fraction: float = 0.5,
166    ):
167        """
168        Args:
169            kernel_size (int): the size of the cross kernel
170                a larger kernel size often means that more penalty is applied, often leading
171                to more sparse results
172            cross_width (int): the width of one of the edges in the cross filter, should be
173                roughly equal to the width of the rules in the image after morphology is applied
174            cross_height (int | None): useful if the horizontal rules and vertical rules
175                have different sizes
176            morph_size (int | None): the size of the morphology operators that are applied before
177                the cross kernel. 'bridges the gaps' of broken-up lines
178            sauvola_k (float): threshold parameter for sauvola thresholding
179            sauvola_window (int): window_size parameter for sauvola thresholding
180            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
181            search_region (int): area in which to search for a new max value in `find_nearest` etc.
182            distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
183            skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
184            min_rows (int): minimum number of rows to find before stopping the table finding algorithm
185            grow_threshold (float): the threshold for accepting a new point when growing the table
186            look_distance (int): how many points away to look when calculating the median slope
187            cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
188            cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
189        """
190        self._validate_parameters(
191            kernel_size,
192            cross_width,
193            cross_height,
194            morph_size,
195            search_region,
196            sauvola_k,
197            sauvola_window,
198            distance_penalty,
199            skip_astar_threshold,
200            cuts,
201            cut_fraction,
202        )
203
204        self._kernel_size = kernel_size
205        self._cross_width = cross_width
206        self._cross_height = cross_width if cross_height is None else cross_height
207        self._morph_size = morph_size if morph_size is not None else cross_width
208        self._search_region = search_region
209        self._sauvola_k = sauvola_k
210        self._sauvola_window = sauvola_window
211        self._distance_penalty = distance_penalty
212        self._scale = scale
213        self._skip_astar_threshold = skip_astar_threshold
214        self._min_rows = min_rows
215        self._grow_threshold = grow_threshold
216        self._look_distance = look_distance
217        self._cuts = cuts
218        self._cut_fraction = cut_fraction
219
220        self._cross_kernel = self._create_cross_kernel()
Arguments:
  • kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
  • cross_width (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
  • cross_height (int | None): useful if the horizontal rules and vertical rules have different sizes
  • morph_size (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
  • sauvola_k (float): threshold parameter for sauvola thresholding
  • sauvola_window (int): window_size parameter for sauvola thresholding
  • scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
  • search_region (int): area in which to search for a new max value in find_nearest etc.
  • distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
  • skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
  • min_rows (int): minimum number of rows to find before stopping the table finding algorithm
  • grow_threshold (float): the threshold for accepting a new point when growing the table
  • look_distance (int): how many points away to look when calculating the median slope
  • cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
  • cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
def apply( self, img: Union[cv2.Mat, numpy.ndarray], visual: bool = False, visual_notebook: bool = False) -> Union[cv2.Mat, numpy.ndarray]:
331    def apply(
332        self, img: MatLike, visual: bool = False, visual_notebook: bool = False
333    ) -> MatLike:
334        """
335        Apply the grid detection filter to the input image.
336
337        Args:
338            img (MatLike): the input image
339            visual (bool): whether to show intermediate steps via OpenCV windows
340            visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook
341
342        Returns:
343            MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
344        """
345
346        if img is None or img.size == 0:
347            raise ValueError("Input image is empty or None")
348
349        binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window)
350
351        if visual:
352            imu.show(binary, title="thresholded")
353        if visual_notebook:
354            imu.show_notebook(binary, title="thresholded")
355
356        binary = self._apply_morphology(binary)
357
358        if visual:
359            imu.show(binary, title="dilated")
360        if visual_notebook:
361            imu.show_notebook(binary, title="dilated")
362
363        filtered = self._apply_cross_matching(binary)
364
365        return filtered

Apply the grid detection filter to the input image.

Arguments:
  • img (MatLike): the input image
  • visual (bool): whether to show intermediate steps via OpenCV windows
  • visual_notebook (bool): whether to show intermediate steps inline in a Jupyter notebook
Returns:

MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules

@log_calls(level=logging.DEBUG, include_return=True)
def find_nearest( self, filtered: Union[cv2.Mat, numpy.ndarray], point: tuple[int, int], region: int | None = None) -> tuple[tuple[int, int], float]:
367    @log_calls(level=logging.DEBUG, include_return=True)
368    def find_nearest(
369        self, filtered: MatLike, point: Point, region: int | None = None
370    ) -> tuple[Point, float]:
371        """
372        Find the nearest 'corner match' in the image, along with its score [0,1]
373
374        Args:
375            filtered (MatLike): the filtered image (obtained through `apply`)
376            point (tuple[int, int]): the approximate target point (x, y)
377            region (None | int): alternative value for search region,
378                overwriting the `__init__` parameter `region`
379        """
380
381        if filtered is None or filtered.size == 0:
382            raise ValueError("Filtered image is empty or None")
383
384        region_size = region if region is not None else self._search_region
385        x, y = point
386
387        # Calculate crop boundaries
388        crop_x = max(0, x - region_size // 2)
389        crop_y = max(0, y - region_size // 2)
390        crop_width = min(region_size, filtered.shape[1] - crop_x)
391        crop_height = min(region_size, filtered.shape[0] - crop_y)
392
393        # Handle edge cases
394        if crop_width <= 0 or crop_height <= 0:
395            logger.warning(f"Point {point} is outside image bounds")
396            return point, 0.0
397
398        cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width]
399
400        if cropped.size == 0:
401            return point, 0.0
402
403        # Always apply Gaussian weighting by extending crop if needed
404        if cropped.shape[0] == region_size and cropped.shape[1] == region_size:
405            # Perfect size - apply weights directly
406            weights = self._create_gaussian_weights(region_size)
407            weighted = cropped.astype(np.float32) * weights
408        else:
409            # Extend crop to match region_size, apply weights, then restore
410            extended = np.zeros((region_size, region_size), dtype=cropped.dtype)
411
412            # Calculate offset to center the cropped region in extended array
413            offset_y = (region_size - cropped.shape[0]) // 2
414            offset_x = (region_size - cropped.shape[1]) // 2
415
416            # Place cropped region in center of extended array
417            extended[
418                offset_y : offset_y + cropped.shape[0],
419                offset_x : offset_x + cropped.shape[1],
420            ] = cropped
421
422            # Apply Gaussian weights to extended array
423            weights = self._create_gaussian_weights(region_size)
424            weighted_extended = extended.astype(np.float32) * weights
425
426            # Extract the original region back out
427            weighted = weighted_extended[
428                offset_y : offset_y + cropped.shape[0],
429                offset_x : offset_x + cropped.shape[1],
430            ]
431
432        best_idx = np.argmax(weighted)
433        best_y, best_x = np.unravel_index(best_idx, cropped.shape)
434
435        result_point = (
436            int(crop_x + best_x),
437            int(crop_y + best_y),
438        )
439        result_confidence = float(weighted[best_y, best_x]) / 255.0
440
441        return result_point, result_confidence

Find the nearest 'corner match' in the image, along with its score [0,1]

Arguments:
  • filtered (MatLike): the filtered image (obtained through apply)
  • point (tuple[int, int]): the approximate target point (x, y)
  • region (None | int): alternative value for search region, overwriting the __init__ parameter region
def find_table_points( self, img: Union[cv2.Mat, numpy.ndarray, os.PathLike[str]], top_row: list[tuple[int, int] | None], cell_widths: list[int], cell_heights: list[int] | int, visual: bool = False, visual_notebook: bool = False, window: str = 'taulu', goals_width: int | None = None, filtered: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], NoneType] = None, smooth: bool = False, smooth_strength: float = 0.5, smooth_iterations: int = 1, smooth_degree: int = 1) -> TableGrid:
443    def find_table_points(
444        self,
445        img: MatLike | PathLike[str],
446        top_row: list[Point | None],
447        cell_widths: list[int],
448        cell_heights: list[int] | int,
449        visual: bool = False,
450        visual_notebook: bool = False,
451        window: str = WINDOW,
452        goals_width: int | None = None,
453        filtered: MatLike | PathLike[str] | None = None,
454        smooth: bool = False,
455        smooth_strength: float = 0.5,
456        smooth_iterations: int = 1,
457        smooth_degree: int = 1,
458    ) -> "TableGrid":
459        """
460        Parse the image to a `TableGrid` structure that holds all of the
461        intersections between horizontal and vertical rules, starting near the `left_top` point
462
463        Args:
464            img (MatLike): the input image of a table
465            top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
466            cell_widths (list[int]): the expected widths of the cells (based on a header template)
467            cell_heights (list[int]): the expected height of the rows of data.
468                The last value from this list is used until the image has no more vertical space.
469            visual (bool): whether to show intermediate steps
470            window (str): the name of the OpenCV window to use for visualization
471            goals_width (int | None): the width of the goal region when searching for the next point.
472                If None, defaults to 1.5 * search_region
473            filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of
474                calculating the filtered image from scratch
475            smooth (bool): if True, smooth the grid after detection
476            smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
477            smooth_iterations (int): number of smoothing passes. Default: 3
478            smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1
479
480        Returns:
481            a TableGrid object
482        """
483
484        if goals_width is None:
485            goals_width = self._search_region * 3 // 2
486
487        if not cell_widths:
488            raise ValueError("cell_widths must contain at least one value")
489
490        if not isinstance(img, np.ndarray):
491            tmp_img = cv.imread(os.fspath(cast(PathLike[str], img)))
492            assert tmp_img is not None
493            img = tmp_img
494
495        img = cast(MatLike, img)
496
497        if filtered is None:
498            filtered = self.apply(img, visual, visual_notebook)
499        else:
500            if not isinstance(filtered, np.ndarray):
501                filtered = cv.imread(os.fspath(filtered))
502
503            filtered = ensure_gray(filtered)
504
505        if visual:
506            imu.show(filtered, window=window)
507        if visual_notebook:
508            imu.show_notebook(filtered, title="filtered")
509
510        if isinstance(cell_heights, int):
511            cell_heights = [cell_heights]
512
513        for i in range(len(top_row)):
514            if top_row[i] is None:
515                continue
516
517            point = top_row[i]
518            assert point is not None
519            adjusted, confidence = self.find_nearest(
520                filtered, point, int(self._search_region * 2)
521            )
522
523            if confidence < 0.15:
524                top_row[i] = None
525            else:
526                top_row[i] = adjusted
527
528        if not any(top_row):
529            logger.error("No good starting candidates given")
530
531        # resize all parameters according to scale
532        img = cv.resize(img, None, fx=self._scale, fy=self._scale)
533
534        if visual:
535            imu.push(img)
536
537        filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale)
538        cell_widths = [int(w * self._scale) for w in cell_widths]
539        cell_heights = [int(h * self._scale) for h in cell_heights]
540        top_row = [
541            (int(p[0] * self._scale), int(p[1] * self._scale))
542            if p is not None
543            else None
544            for p in top_row
545        ]
546        search_region = int(self._search_region * self._scale)
547
548        img_gray = ensure_gray(img)
549        filtered_gray = ensure_gray(filtered)
550
551        table_grower = TableGrower(
552            filtered_gray,
553            cell_widths,
554            cell_heights,
555            top_row,
556            search_region,
557            self._distance_penalty,
558            self._look_distance,
559            self._grow_threshold,
560            self._skip_astar_threshold,
561            self._min_rows,
562            self._cuts,
563            self._cut_fraction,
564        )
565
566        def show_grower_progress(wait: bool = False):
567            img_orig = np.copy(img)
568            corners = table_grower.get_all_corners()
569            for y in range(len(corners)):
570                for x in range(len(corners[y])):
571                    if corners[y][x] is not None:
572                        img_orig = imu.draw_points(
573                            img_orig,
574                            [corners[y][x]],  # type:ignore
575                            color=(0, 0, 255),
576                            thickness=30,
577                        )
578
579            edge = table_grower.get_edge_points()
580
581            for point, score in edge:
582                color = (100, int(clamp(score * 255, 0, 255)), 100)
583                imu.draw_point(img_orig, point, color=color, thickness=20)
584
585            imu.show(img_orig, wait=wait)
586
587        if visual:
588            threshold = self._grow_threshold
589
590            # python implementation of rust loops, for visualization purposes
591            # note this is a LOT slower
592            while table_grower.grow_point(img_gray, filtered_gray) is not None:
593                show_grower_progress()
594
595            show_grower_progress(True)
596
597            original_threshold = threshold
598
599            loops_without_change = 0
600
601            while not table_grower.is_table_complete():
602                loops_without_change += 1
603
604                if loops_without_change > 50:
605                    break
606
607                if table_grower.extrapolate_one(img_gray, filtered_gray) is not None:
608                    show_grower_progress()
609
610                    loops_without_change = 0
611
612                    grown = False
613                    while table_grower.grow_point(img_gray, filtered_gray) is not None:
614                        show_grower_progress()
615                        grown = True
616                        threshold = min(0.1 + 0.9 * threshold, original_threshold)
617                        table_grower.set_threshold(threshold)
618
619                    if not grown:
620                        threshold *= 0.9
621                        table_grower.set_threshold(threshold)
622
623                else:
624                    threshold *= 0.9
625                    table_grower.set_threshold(threshold)
626
627                    if table_grower.grow_point(img_gray, filtered_gray) is not None:
628                        show_grower_progress()
629                        loops_without_change = 0
630
631        else:
632            table_grower.grow_table(img_gray, filtered_gray)
633
634        if smooth:
635            table_grower.smooth_grid(smooth_strength, smooth_iterations, smooth_degree)
636        corners = table_grower.get_all_corners()
637        logger.info(
638            f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns"
639        )
640        # rescale corners back to original size
641        if self._scale != 1.0:
642            for y in range(len(corners)):
643                for x in range(len(corners[y])):
644                    if corners[y][x] is not None:
645                        corners[y][x] = (
646                            int(corners[y][x][0] / self._scale),  # type:ignore
647                            int(corners[y][x][1] / self._scale),  # type:ignore
648                        )
649
650        return TableGrid(corners)  # type: ignore

Parse the image to a TableGrid structure that holds all of the intersections between horizontal and vertical rules, starting near the left_top point

Arguments:
  • img (MatLike): the input image of a table
  • top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
  • cell_widths (list[int]): the expected widths of the cells (based on a header template)
  • cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
  • visual (bool): whether to show intermediate steps
  • window (str): the name of the OpenCV window to use for visualization
  • goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_region
  • filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
  • smooth (bool): if True, smooth the grid after detection
  • smooth_strength (float): blend factor per smoothing iteration (0.0-1.0). Default: 0.5
  • smooth_iterations (int): number of smoothing passes. Default: 3
  • smooth_degree (int): polynomial degree for smoothing regression (1 or 2). Default: 1
Returns:

a TableGrid object

class HeaderAligner:
 27class HeaderAligner:
 28    """
 29    Aligns table header templates to subject images using feature-based registration.
 30
 31    This class supports multiple feature detection and matching methods to compute
 32    a homography transformation that maps points from a header template image to
 33    their corresponding locations in full table images.
 34
 35    ## How it Works
 36
 37    1. **Feature Detection**: Extracts keypoints from both template and subject
 38    2. **Feature Matching**: Finds correspondences using the selected matcher
 39    3. **Filtering**: Keeps top matches and prunes based on spatial consistency
 40    4. **Homography Estimation**: Computes perspective transform using RANSAC
 41
 42    The computed homography can then transform any point from template space to
 43    image space, allowing you to locate table structures based on your annotation.
 44
 45    ## Available Methods
 46
 47    - **orb** (default): ORB features with BFMatcher (Hamming distance). Fast and
 48      patent-free. Good for most use cases.
 49    - **sift**: SIFT features with FLANN-based matcher. More robust to scale and
 50      rotation changes. Slower but often more accurate.
 51    - **surf**: SURF features with BFMatcher (L2 norm). Requires opencv-contrib-python
 52      with non-free modules enabled. Fast and robust.
 53    - **akaze**: AKAZE features with BFMatcher (Hamming distance). Patent-free,
 54      handles scale/rotation well, and often more robust than ORB on documents.
 55
 56    ## Preprocessing Options
 57
 58    - Set `k` parameter to apply Sauvola thresholding before feature detection.
 59      This can improve matching on documents with variable lighting.
 60    - Set `k=None` to use raw images (just extract blue channel for BGR images)
 61
 62    ## Tuning Guidelines
 63
 64    - **max_features**: Increase if matching fails on complex templates
 65    - **match_fraction**: Decrease if you get many incorrect matches
 66    - **max_dist**: Increase for documents with more warping/distortion
 67    - **scale**: Decrease (<1.0) to speed up on high-resolution images
 68
 69    Args:
 70        template (MatLike | PathLike[str] | str | None): Header template image or path.
 71            This should contain a clear, representative view of the table header.
 72        method (MatchMethod): Feature detection/matching method. One of "orb", "sift",
 73            or "surf". Default is "orb".
 74        max_features (int): Maximum features to detect. More features = slower
 75            but potentially more robust matching.
 76        patch_size (int): ORB patch size for feature extraction (only used with "orb").
 77        match_fraction (float): Fraction [0, 1] of matches to keep after sorting by
 78            quality. Higher = more matches but potentially more outliers.
 79        scale (float): Image downscaling factor (0, 1] for processing speed.
 80        max_dist (float): Maximum allowed distance (relative to image size) between
 81            matched keypoints. Filters out spatially inconsistent matches.
 82        k (float | None): Sauvola threshold parameter for preprocessing. If None,
 83            no thresholding is applied. Typical range: 0.03-0.15.
 84    """
 85
 86    def __init__(
 87        self,
 88        template: None | MatLike | PathLike[str] | str = None,
 89        method: MatchMethod = "orb",
 90        max_features: int = 100_000,
 91        patch_size: int = 31,
 92        match_fraction: float = 0.3,
 93        scale: float = 1.0,
 94        max_dist: float = 1.00,
 95        k: float | None = None,
 96    ):
 97        """
 98        Args:
 99            template (MatLike | str): (path of) template image, with the table template clearly visible
100            method (MatchMethod): feature detection/matching method ("orb", "sift", or "surf")
101            max_features (int): maximal number of features that will be extracted
102            patch_size (int): for ORB feature extractor (only used with method="orb")
103            match_fraction (float): best fraction of matches that are kept
104            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
105            max_dist (float): maximum distance (relative to image size) of matched features.
106                Increase this value if the warping between image and template needs to be more agressive
107            k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
108        """
109
110        if type(template) is str or type(template) is PathLike:
111            value = cv.imread(fspath(template))
112            template = value
113
114        self._method = method
115        self._k = k
116        if scale > 1.0:
117            raise TauluException(
118                "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0"
119            )
120        if scale == 0:
121            raise TauluException("Use 0 < scale <= 1.0")
122
123        self._scale = scale
124        self._template = self._scale_img(cast(MatLike, template))
125        self._template_orig: None | MatLike = None
126        self._preprocess_template()
127        self._max_features = max_features
128        self._patch_size = patch_size
129        self._match_fraction = match_fraction
130        self._max_dist = max_dist
131        self._validate_method()
132        self._matches_notebook_img = None
133
134    def _scale_img(self, img: MatLike) -> MatLike:
135        if self._scale == 1.0:
136            return img
137
138        return cv.resize(img, None, fx=self._scale, fy=self._scale)
139
140    def _unscale_img(self, img: MatLike) -> MatLike:
141        if self._scale == 1.0:
142            return img
143
144        return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale)
145
146    def _unscale_homography(self, h: np.ndarray) -> np.ndarray:
147        if self._scale == 1.0:
148            return h
149
150        scale_matrix = np.diag([self._scale, self._scale, 1.0])
151        # inv_scale_matrix = np.linalg.inv(scale_matrix)
152        inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0])
153        # return inv_scale_matrix @ h @ scale_matrix
154        return inv_scale_matrix @ h @ scale_matrix
155
156    @property
157    def method(self) -> MatchMethod:
158        """The feature detection/matching method being used."""
159        return self._method
160
161    @property
162    def template(self):
163        """The template image that subject images are aligned to"""
164        return self._template
165
166    @template.setter
167    def template(self, value: MatLike | str):
168        """Set the template image as a path or an image"""
169
170        if type(value) is str:
171            tmp_value = cv.imread(value)
172            assert tmp_value is not None
173            value = tmp_value
174            self._template = value
175
176        # TODO: check if the image has the right properties (dimensions etc.)
177        self._template = cast(MatLike, value)
178
179        self._preprocess_template()
180
181    def _preprocess_template(self):
182        self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY)
183        if self._k is not None:
184            self._template = imu.sauvola(self._template, self._k)
185            self._template = cv.bitwise_not(self._template)
186        else:
187            _, _, self._template = cv.split(self._template)
188
189    def _preprocess_image(self, img: MatLike):
190        if self._template_orig is None:
191            raise TauluException("process the template first")
192
193        if self._k is not None:
194            img = imu.sauvola(img, self._k)
195            img = cv.bitwise_not(img)
196        else:
197            _, _, img = cv.split(img)
198
199        return img
200
201    def _validate_method(self):
202        """Validate that the selected method is available."""
203        if self._method == "surf":
204            if not hasattr(cv, "xfeatures2d"):
205                raise TauluException(
206                    "SURF requires opencv-contrib-python with non-free modules. "
207                    "Install with: pip install opencv-contrib-python"
208                )
209
210    def _create_detector(self):
211        """Create the feature detector based on the selected method."""
212        if self._method == "orb":
213            return cv.ORB_create(  # type:ignore
214                self._max_features,
215                patchSize=self._patch_size,
216            )
217        elif self._method == "sift":
218            return cv.SIFT_create(  # type:ignore
219                nfeatures=self._max_features, sigma=2.5, edgeThreshold=10
220            )
221        elif self._method == "akaze":
222            return cv.AKAZE_create()  # type:ignore
223        elif self._method == "surf":
224            # SURF is in xfeatures2d (requires opencv-contrib-python)
225            return cv.xfeatures2d.SURF_create(hessianThreshold=400)  # type:ignore
226        else:
227            raise TauluException(f"Unknown method: {self._method}")
228
229    def _create_matcher(self):
230        """Create the feature matcher based on the selected method."""
231        if self._method == "orb":
232            # ORB uses binary descriptors -> Hamming distance
233            return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
234        elif self._method == "sift":
235            # SIFT uses float descriptors -> L2 norm with crossCheck
236            return cv.BFMatcher(cv.NORM_L2, crossCheck=True)
237        elif self._method == "akaze":
238            # AKAZE uses binary descriptors -> Hamming distance
239            return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
240        elif self._method == "surf":
241            # SURF uses float descriptors -> L2 norm
242            return cv.BFMatcher(cv.NORM_L2, crossCheck=True)
243        else:
244            raise TauluException(f"Unknown method: {self._method}")
245
246    def _match_features(self, matcher, descriptors_im, descriptors_tg):
247        """Match features using BFMatcher with crossCheck for all methods."""
248        return list(matcher.match(descriptors_im, descriptors_tg))
249
250    @log_calls(level=logging.DEBUG, include_return=True)
251    def _find_transform_of_template_on(
252        self,
253        im: MatLike,
254        visual: bool = False,
255        visual_notebook: bool = False,
256        window: str = WINDOW,
257    ):
258        im = self._scale_img(im)
259
260        # Create detector and matcher based on selected method
261        detector = self._create_detector()
262        matcher = self._create_matcher()
263
264        # Detect features and compute descriptors
265        keypoints_im, descriptors_im = detector.detectAndCompute(im, None)
266        keypoints_tg, descriptors_tg = detector.detectAndCompute(self._template, None)
267
268        if descriptors_im is None or descriptors_tg is None:
269            raise TauluException("No features detected in one or both images")
270
271        # Match features
272        matches = self._match_features(matcher, descriptors_im, descriptors_tg)
273
274        # Sort matches by score
275        matches = sorted(matches, key=lambda x: x.distance)
276
277        # Remove not so good matches
278        num_good_matches = int(len(matches) * self._match_fraction)
279        matches = matches[:num_good_matches]
280
281        if visual or visual_notebook:
282            final_img_filtered = cv.drawMatches(
283                im,
284                keypoints_im,
285                self._template,
286                keypoints_tg,
287                matches[:10],
288                None,
289                cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
290            )
291            if visual:
292                imu.show(final_img_filtered, title="matches", window=window)
293            if visual_notebook:
294                self._matches_notebook_img = final_img_filtered
295
296        # Extract location of good matches
297        points1 = np.zeros((len(matches), 2), dtype=np.float32)
298        points2 = np.zeros((len(matches), 2), dtype=np.float32)
299
300        for i, match in enumerate(matches):
301            points1[i, :] = keypoints_tg[match.trainIdx].pt
302            points2[i, :] = keypoints_im[match.queryIdx].pt
303
304        # Prune reference points based upon distance between
305        # key points. This assumes a fairly good alignment to start with
306        # due to the protocol used (location of the sheets)
307        p1 = pd.DataFrame(data=points1)
308        p2 = pd.DataFrame(data=points2)
309        refdist = abs(p1 - p2)
310
311        mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist)
312        mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist)
313        mask = mask_x & mask_y
314        mask_array = mask.to_numpy()
315        points1 = points1[mask_array]
316        points2 = points2[mask_array]
317
318        # Filter matches for visualization
319        filtered_matches = [
320            m for m, keep in zip(matches, mask_array, strict=False) if keep
321        ]
322
323        if visual:
324            final_img_filtered = cv.drawMatches(
325                im,
326                keypoints_im,
327                self._template,
328                keypoints_tg,
329                filtered_matches[:100],
330                None,
331                cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
332            )
333            imu.show(final_img_filtered, title="matches", window=window)
334
335        # Find homography
336        h, _ = cv.findHomography(points1, points2, cv.RANSAC)
337
338        return self._unscale_homography(h)
339
340    def show_matches_notebook(self):
341        """Display the stored feature matches image in the notebook (call after grid detection)."""
342        if self._matches_notebook_img is not None:
343            imu.show_notebook(self._matches_notebook_img, title="matches")
344            self._matches_notebook_img = None
345
346    def view_alignment(self, img: MatLike, h: NDArray):
347        """
348        Show the alignment of the template on the given image
349        by transforming it using the supplied transformation matrix `h`
350        and visualising both on different channels
351
352        Args:
353            img (MatLike): the image on which the template is transformed
354            h (NDArray): the transformation matrix
355        """
356
357        im = imu.ensure_gray(img)
358        header = imu.ensure_gray(self._unscale_img(self._template))
359        height, width = im.shape
360
361        header_warped = cv.warpPerspective(header, h, (width, height))
362
363        merged = np.full((height, width, 3), 255, dtype=np.uint8)
364
365        merged[..., 1] = im
366        merged[..., 2] = header_warped
367
368        return imu.show(merged)
369
370    @log_calls(level=logging.DEBUG, include_return=True)
371    def align(
372        self,
373        img: MatLike | str,
374        visual: bool = False,
375        visual_notebook: bool = False,
376        window: str = WINDOW,
377    ) -> NDArray:
378        """
379        Calculates a homogeneous transformation matrix that maps pixels of
380        the template to the given image
381        """
382
383        logger.info("Aligning header with supplied table image")
384
385        if type(img) is str:
386            tmp_img = cv.imread(img)
387            assert tmp_img is not None
388            img = tmp_img
389        img = cast(MatLike, img)
390
391        img = self._preprocess_image(img)
392
393        h = self._find_transform_of_template_on(img, visual, visual_notebook, window)
394
395        if visual:
396            self.view_alignment(img, h)
397
398        return h
399
400    def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]:
401        """
402        Transform the given point (in template-space) using the transformation h
403        (obtained through the `align` method)
404
405        Args:
406            h (NDArray): transformation matrix of shape (3, 3)
407            point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
408        """
409
410        point = np.array([[point[0], point[1], 1]])  # type:ignore
411        transformed = np.dot(h, point.T)
412
413        transformed /= transformed[2]
414
415        return int(transformed[0][0]), int(transformed[1][0])

Aligns table header templates to subject images using feature-based registration.

This class supports multiple feature detection and matching methods to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.

How it Works

  1. Feature Detection: Extracts keypoints from both template and subject
  2. Feature Matching: Finds correspondences using the selected matcher
  3. Filtering: Keeps top matches and prunes based on spatial consistency
  4. Homography Estimation: Computes perspective transform using RANSAC

The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.

Available Methods

  • orb (default): ORB features with BFMatcher (Hamming distance). Fast and patent-free. Good for most use cases.
  • sift: SIFT features with FLANN-based matcher. More robust to scale and rotation changes. Slower but often more accurate.
  • surf: SURF features with BFMatcher (L2 norm). Requires opencv-contrib-python with non-free modules enabled. Fast and robust.
  • akaze: AKAZE features with BFMatcher (Hamming distance). Patent-free, handles scale/rotation well, and often more robust than ORB on documents.

Preprocessing Options

  • Set k parameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting.
  • Set k=None to use raw images (just extract blue channel for BGR images)

Tuning Guidelines

  • max_features: Increase if matching fails on complex templates
  • match_fraction: Decrease if you get many incorrect matches
  • max_dist: Increase for documents with more warping/distortion
  • scale: Decrease (<1.0) to speed up on high-resolution images
Arguments:
  • template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
  • method (MatchMethod): Feature detection/matching method. One of "orb", "sift", or "surf". Default is "orb".
  • max_features (int): Maximum features to detect. More features = slower but potentially more robust matching.
  • patch_size (int): ORB patch size for feature extraction (only used with "orb").
  • match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
  • scale (float): Image downscaling factor (0, 1] for processing speed.
  • max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
  • k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.
HeaderAligner( template: Union[NoneType, cv2.Mat, numpy.ndarray, os.PathLike[str], str] = None, method: Literal['orb', 'sift', 'surf', 'akaze'] = 'orb', max_features: int = 100000, patch_size: int = 31, match_fraction: float = 0.3, scale: float = 1.0, max_dist: float = 1.0, k: float | None = None)
 86    def __init__(
 87        self,
 88        template: None | MatLike | PathLike[str] | str = None,
 89        method: MatchMethod = "orb",
 90        max_features: int = 100_000,
 91        patch_size: int = 31,
 92        match_fraction: float = 0.3,
 93        scale: float = 1.0,
 94        max_dist: float = 1.00,
 95        k: float | None = None,
 96    ):
 97        """
 98        Args:
 99            template (MatLike | str): (path of) template image, with the table template clearly visible
100            method (MatchMethod): feature detection/matching method ("orb", "sift", or "surf")
101            max_features (int): maximal number of features that will be extracted
102            patch_size (int): for ORB feature extractor (only used with method="orb")
103            match_fraction (float): best fraction of matches that are kept
104            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
105            max_dist (float): maximum distance (relative to image size) of matched features.
106                Increase this value if the warping between image and template needs to be more agressive
107            k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
108        """
109
110        if type(template) is str or type(template) is PathLike:
111            value = cv.imread(fspath(template))
112            template = value
113
114        self._method = method
115        self._k = k
116        if scale > 1.0:
117            raise TauluException(
118                "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0"
119            )
120        if scale == 0:
121            raise TauluException("Use 0 < scale <= 1.0")
122
123        self._scale = scale
124        self._template = self._scale_img(cast(MatLike, template))
125        self._template_orig: None | MatLike = None
126        self._preprocess_template()
127        self._max_features = max_features
128        self._patch_size = patch_size
129        self._match_fraction = match_fraction
130        self._max_dist = max_dist
131        self._validate_method()
132        self._matches_notebook_img = None
Arguments:
  • template (MatLike | str): (path of) template image, with the table template clearly visible
  • method (MatchMethod): feature detection/matching method ("orb", "sift", or "surf")
  • max_features (int): maximal number of features that will be extracted
  • patch_size (int): for ORB feature extractor (only used with method="orb")
  • match_fraction (float): best fraction of matches that are kept
  • scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
  • max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
  • k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
method: Literal['orb', 'sift', 'surf', 'akaze']
156    @property
157    def method(self) -> MatchMethod:
158        """The feature detection/matching method being used."""
159        return self._method

The feature detection/matching method being used.

template
161    @property
162    def template(self):
163        """The template image that subject images are aligned to"""
164        return self._template

The template image that subject images are aligned to

def show_matches_notebook(self):
340    def show_matches_notebook(self):
341        """Display the stored feature matches image in the notebook (call after grid detection)."""
342        if self._matches_notebook_img is not None:
343            imu.show_notebook(self._matches_notebook_img, title="matches")
344            self._matches_notebook_img = None

Display the stored feature matches image in the notebook (call after grid detection).

def view_alignment( self, img: Union[cv2.Mat, numpy.ndarray], h: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]]):
346    def view_alignment(self, img: MatLike, h: NDArray):
347        """
348        Show the alignment of the template on the given image
349        by transforming it using the supplied transformation matrix `h`
350        and visualising both on different channels
351
352        Args:
353            img (MatLike): the image on which the template is transformed
354            h (NDArray): the transformation matrix
355        """
356
357        im = imu.ensure_gray(img)
358        header = imu.ensure_gray(self._unscale_img(self._template))
359        height, width = im.shape
360
361        header_warped = cv.warpPerspective(header, h, (width, height))
362
363        merged = np.full((height, width, 3), 255, dtype=np.uint8)
364
365        merged[..., 1] = im
366        merged[..., 2] = header_warped
367
368        return imu.show(merged)

Show the alignment of the template on the given image by transforming it using the supplied transformation matrix h and visualising both on different channels

Arguments:
  • img (MatLike): the image on which the template is transformed
  • h (NDArray): the transformation matrix
@log_calls(level=logging.DEBUG, include_return=True)
def align( self, img: Union[cv2.Mat, numpy.ndarray, str], visual: bool = False, visual_notebook: bool = False, window: str = 'taulu') -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]]:
370    @log_calls(level=logging.DEBUG, include_return=True)
371    def align(
372        self,
373        img: MatLike | str,
374        visual: bool = False,
375        visual_notebook: bool = False,
376        window: str = WINDOW,
377    ) -> NDArray:
378        """
379        Calculates a homogeneous transformation matrix that maps pixels of
380        the template to the given image
381        """
382
383        logger.info("Aligning header with supplied table image")
384
385        if type(img) is str:
386            tmp_img = cv.imread(img)
387            assert tmp_img is not None
388            img = tmp_img
389        img = cast(MatLike, img)
390
391        img = self._preprocess_image(img)
392
393        h = self._find_transform_of_template_on(img, visual, visual_notebook, window)
394
395        if visual:
396            self.view_alignment(img, h)
397
398        return h

Calculates a homogeneous transformation matrix that maps pixels of the template to the given image

def template_to_img( self, h: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]], point: Iterable[int]) -> tuple[int, int]:
400    def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]:
401        """
402        Transform the given point (in template-space) using the transformation h
403        (obtained through the `align` method)
404
405        Args:
406            h (NDArray): transformation matrix of shape (3, 3)
407            point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
408        """
409
410        point = np.array([[point[0], point[1], 1]])  # type:ignore
411        transformed = np.dot(h, point.T)
412
413        transformed /= transformed[2]
414
415        return int(transformed[0][0]), int(transformed[1][0])

Transform the given point (in template-space) using the transformation h (obtained through the align method)

Arguments:
  • h (NDArray): transformation matrix of shape (3, 3)
  • point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
class HeaderTemplate(taulu.TableIndexer):
188class HeaderTemplate(TableIndexer):
189    """
190    Defines the structure of a table header as a set of rules (lines).
191
192    Created via `HeaderTemplate.from_saved` (loading a JSON annotation) or
193    `AnnotationSession` (interactive annotation). Provides cell position
194    lookups and expected row heights for the grid-growing algorithm.
195    """
196
197    def __init__(self, rules: Iterable[Iterable[int]]):
198        """
199        Args:
200            rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
201        """
202
203        super().__init__()
204        self._rules = [_Rule(*rule) for rule in rules]
205        self._h_rules = sorted(
206            [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y
207        )
208        self._v_rules = sorted(
209            [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x
210        )
211
212    @log_calls(level=logging.DEBUG)
213    def save(self, path: PathLike[str]):
214        """
215        Save the HeaderTemplate to the given path, as a json
216        """
217
218        data = {"rules": [r.to_dict() for r in self._rules]}
219
220        with open(path, "w") as f:
221            json.dump(data, f)
222
223    @staticmethod
224    @log_calls(level=logging.DEBUG)
225    def from_saved(path: PathLike[str] | str) -> "HeaderTemplate":
226        with open(path) as f:
227            data = json.load(f)
228            rules = data["rules"]
229            rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules]
230
231            return HeaderTemplate(rules)
232
233    @property
234    def cols(self) -> int:
235        return len(self._v_rules) - 1
236
237    @property
238    def rows(self) -> int:
239        return len(self._h_rules) - 1
240
241    @staticmethod
242    @log_calls(level=logging.DEBUG)
243    def annotate_image(
244        template: MatLike | str,
245        crop: PathLike[str] | str | None = None,
246        margin: int = 10,
247    ) -> "HeaderTemplate":
248        """
249        Utility method that allows users to create a template form a template image.
250
251        The user is asked to click to annotate lines (two clicks per line).
252
253        Args:
254            template: the image on which to annotate the header lines
255            crop (str | None): if str, crop the template image first, then do the annotation.
256                The cropped image will be stored at the supplied path
257            margin (int): margin to add around the cropping of the header
258        """
259
260        if type(template) is str:
261            value = cv.imread(template)
262            assert value is not None
263            template = value
264        template = cast(MatLike, template)
265
266        if crop is not None:
267            cropped = HeaderTemplate._crop(template, margin)
268            cv.imwrite(os.fspath(crop), cropped)
269            template = cropped
270
271        start_point = None
272        lines: list[list[int]] = []
273
274        anno_template = np.copy(template)
275
276        def get_point(event, x, y, flags, params):
277            nonlocal lines, start_point, anno_template
278            _ = flags
279            _ = params
280            if event == cv.EVENT_LBUTTONDOWN:
281                if start_point is not None:
282                    line: list[int] = [start_point[1], start_point[0], x, y]
283
284                    cv.line(
285                        anno_template,
286                        (start_point[1], start_point[0]),
287                        (x, y),
288                        (0, 255, 0),
289                        2,
290                        cv.LINE_AA,
291                    )
292                    cv.imshow(constants.WINDOW, anno_template)
293
294                    lines.append(line)
295                    start_point = None
296                else:
297                    start_point = (y, x)
298            elif event == cv.EVENT_RBUTTONDOWN:
299                start_point = None
300
301                # remove the last annotation
302                lines = lines[:-1]
303
304                anno_template = np.copy(anno_template)
305
306                for line in lines:
307                    cv.line(
308                        template,
309                        (line[0], line[1]),
310                        (line[2], line[3]),
311                        (0, 255, 0),
312                        2,
313                        cv.LINE_AA,
314                    )
315
316                cv.imshow(constants.WINDOW, template)
317
318        print(ANNO_HELP)
319
320        imu.show(anno_template, get_point, title="annotate the header")
321
322        return HeaderTemplate(lines)
323
324    @staticmethod
325    @log_calls(level=logging.DEBUG)
326    def annotate_image_notebook(
327        template: MatLike | str,
328        crop: PathLike[str] | str | None = None,
329        margin: int = 10,
330    ) -> "AnnotationSession":
331        """
332        Notebook-compatible version of annotate_image. Returns an AnnotationSession immediately.
333        Interact with the widget and click Done to finalize.
334        Access the result via session.result after clicking Done.
335
336        Args:
337            template: the image on which to annotate the header lines
338            crop (str | None): if str, crop the template image first, then do the annotation.
339                The cropped image will be stored at the supplied path
340            margin (int): margin to add around the cropping of the header
341
342        Returns:
343            AnnotationSession: access .result after clicking Done to get the HeaderTemplate.
344        """
345        if isinstance(template, str):
346            tmp = cv.imread(template)
347            assert tmp is not None
348            template = tmp
349
350        session = AnnotationSession(crop)
351
352        if crop is not None:
353            # First show crop UI, then annotation UI
354            HeaderTemplate._crop_notebook(template, margin, session)
355        else:
356            # Go directly to annotation
357            HeaderTemplate._show_annotation_ui(template, session)
358
359        return session
360
361    @staticmethod
362    def _crop_notebook(template: MatLike, margin: int, session: "AnnotationSession"):
363        """Notebook-compatible crop UI using matplotlib + ipywidgets."""
364        import ipywidgets as widgets
365        from IPython.display import display
366
367        display_img = cv.cvtColor(template, cv.COLOR_BGR2RGB)
368
369        points: list[tuple[int, int]] = []
370        drawn_points: list = []
371
372        fig, ax = plt.subplots(figsize=(15, 15))
373
374        fig.canvas.toolbar_visible = False  # ty:ignore[unresolved-attribute]
375        fig.canvas.header_visible = False  # ty:ignore[unresolved-attribute]
376
377        ax.imshow(display_img, origin="upper")
378        ax.set_title(
379            "Annotate the header: \nClick 4 corners of the header region such that the entire header is contained within the rectangle."
380        )
381        ax.set_axis_off()
382
383        # Create ipywidgets buttons
384        done_button = widgets.Button(
385            description="Done Cropping",
386            button_style="success",
387            layout=widgets.Layout(width="200px", height="50px"),
388        )
389
390        undo_button = widgets.Button(
391            description="Undo Last Point",
392            button_style="warning",
393            layout=widgets.Layout(width="200px", height="50px"),
394        )
395
396        done_button.style.font_size = "18px"
397        undo_button.style.font_size = "18px"
398
399        status_label = widgets.Label(
400            value="Press 'Done' when finished. Press 'Undo Last Point' to remove the last point.",
401            style={"font_size": "18px"},
402        )
403
404        def on_click(event):
405            if event.inaxes != ax or event.xdata is None or event.ydata is None:
406                return
407
408            # Round coordinates to integers for pixel-perfect annotation
409            x, y = round(event.xdata), round(event.ydata)
410
411            # Validate coordinates are within image bounds
412            img_h, img_w = template.shape[:2]
413            x = max(0, min(x, img_w - 1))
414            y = max(0, min(y, img_h - 1))
415
416            if event.button == 1:  # Left click - add point
417                points.append((x, y))
418                (point_marker,) = ax.plot(x, y, "go", markersize=10)
419                drawn_points.append(point_marker)
420                status_label.value = f"Points: {len(points)}/4"
421                fig.canvas.draw_idle()
422
423        def on_undo(_):
424            if points:
425                points.pop()
426                drawn_points.pop().remove()
427                status_label.value = f"Points: {len(points)}/4"
428                fig.canvas.draw_idle()
429
430        def on_done(_):
431            nonlocal cid
432
433            if len(points) != 4:
434                status_label.value = (
435                    f"Error: Need exactly 4 points! Currently have {len(points)}"
436                )
437                return
438
439            fig.canvas.mpl_disconnect(cid)
440
441            # Crop the image
442            points_np = np.array(points)
443            img_h, img_w = template.shape[:2]
444            x_min = max(int(np.min(points_np[:, 0])) - margin, 0)
445            y_min = max(int(np.min(points_np[:, 1])) - margin, 0)
446            x_max = min(int(np.max(points_np[:, 0])) + margin, img_w)
447            y_max = min(int(np.max(points_np[:, 1])) + margin, img_h)
448
449            cropped = template[y_min:y_max, x_min:x_max]
450
451            # Save cropped image if path provided
452            if session._crop_path is not None:
453                cv.imwrite(os.fspath(session._crop_path), cropped)
454
455            plt.close(fig)
456            container.clear_output()
457            with container:
458                HeaderTemplate._show_annotation_ui(cropped, session)
459
460        done_button.on_click(on_done)
461        undo_button.on_click(on_undo)
462
463        cid = fig.canvas.mpl_connect("button_press_event", on_click)
464
465        # Anchor an Output widget to the cell, then render inside it so that
466        # on_done can clear and re-populate it without leaving the cell context.
467        container = widgets.Output()
468        display(container)
469        with container:
470            plt.tight_layout(pad=0)
471            plt.show()
472            display(widgets.HBox([done_button, undo_button, status_label]))
473
474    @staticmethod
475    def _show_annotation_ui(template: MatLike, session: "AnnotationSession"):
476        """Show the line annotation UI using matplotlib + ipywidgets."""
477        import ipywidgets as widgets
478        from IPython.display import display
479
480        print(
481            "\x1b[32m[Taulu]: Don't forget to save annotations with annotation.save()!\x1b[0m"
482        )
483
484        display_img = cv.cvtColor(template, cv.COLOR_BGR2RGB)
485
486        lines: list[list[int]] = []
487        start_point: list[tuple[int, int] | None] = [None]
488        drawn_lines: list = []
489        start_markers: list = []
490
491        fig, ax = plt.subplots(figsize=(15, 12))
492        fig.canvas.toolbar_visible = False  # ty:ignore[unresolved-attribute]
493        fig.canvas.header_visible = False  # ty:ignore[unresolved-attribute]
494        ax.imshow(display_img, origin="upper")
495        ax.set_title("Click pairs of points to draw lines. Lines: 0")
496        ax.set_axis_off()
497
498        # Create ipywidgets buttons
499        done_button = widgets.Button(
500            description="Done Annotating",
501            button_style="success",
502            layout=widgets.Layout(width="200px", height="50px"),
503        )
504        undo_button = widgets.Button(
505            description="Undo Last Line",
506            button_style="warning",
507            layout=widgets.Layout(width="200px", height="50px"),
508        )
509        status_label = widgets.Label(
510            value="Click to start a line, click again to end it",
511            style={"font_size": "18px"},
512        )
513
514        done_button.style.font_size = "18px"
515        undo_button.style.font_size = "18px"
516
517        def on_click(event):
518            if event.inaxes != ax or event.xdata is None or event.ydata is None:
519                return
520
521            # Round coordinates to integers for pixel-perfect annotation
522            x, y = round(event.xdata), round(event.ydata)
523
524            # Validate coordinates are within image bounds
525            img_h, img_w = template.shape[:2]
526            x = max(0, min(x, img_w - 1))
527            y = max(0, min(y, img_h - 1))
528
529            if event.button == 1:  # Left click
530                if start_point[0] is not None:
531                    x0, y0 = start_point[0]
532                    lines.append([x0, y0, x, y])
533                    (ln,) = ax.plot([x0, x], [y0, y], color="lime", linewidth=2)
534                    drawn_lines.append(ln)
535                    # Remove the start-point marker now that the line is complete
536                    if start_markers:
537                        start_markers.pop().remove()
538                    ax.set_title(
539                        f"Click pairs of points to draw lines. Lines: {len(lines)}"
540                    )
541                    status_label.value = (
542                        f"Line {len(lines)} added. Click to start next line."
543                    )
544                    fig.canvas.draw_idle()
545                    start_point[0] = None
546                else:
547                    start_point[0] = (x, y)
548                    status_label.value = (
549                        f"Start point set at ({x}, {y}). Click end point."
550                    )
551                    # Draw a temporary marker (tracked so undo can remove it)
552                    (marker,) = ax.plot(x, y, "ro", markersize=5)
553                    start_markers.append(marker)
554                    fig.canvas.draw_idle()
555
556        def on_undo(_):
557            # Clear any pending start-point marker
558            if start_markers:
559                start_markers.pop().remove()
560            start_point[0] = None
561            if lines:
562                lines.pop()
563                drawn_lines.pop().remove()
564                ax.set_title(
565                    f"Click pairs of points to draw lines. Lines: {len(lines)}"
566                )
567                status_label.value = f"Undone. Lines: {len(lines)}"
568                fig.canvas.draw_idle()
569
570        def on_done(_):
571            session._result = HeaderTemplate(lines)
572            fig.canvas.mpl_disconnect(cid)
573            done_button.disabled = True
574            undo_button.disabled = True
575            ax.set_title(
576                f"Done! {len(lines)} lines annotated. Call session.save() to save."
577            )
578            status_label.value = (
579                "Annotation complete! Run session.save('filename.json') to save."
580            )
581            fig.canvas.draw_idle()
582
583        done_button.on_click(on_done)
584        undo_button.on_click(on_undo)
585
586        cid = fig.canvas.mpl_connect("button_press_event", on_click)
587
588        # Display figure first, then buttons below
589        plt.tight_layout(pad=0)
590        plt.show()
591        display(widgets.HBox([done_button, undo_button, status_label]))
592
593    @staticmethod
594    @log_calls(level=logging.DEBUG, include_return=True)
595    def _crop(template: MatLike, margin: int = 10) -> MatLike:
596        """
597        Crop the image to contain only the annotations, such that it can be used as the header image in the taulu workflow.
598        """
599
600        points = []
601        anno_template = np.copy(template)
602
603        def get_point(event, x, y, flags, params):
604            nonlocal points, anno_template
605            _ = flags
606            _ = params
607            if event == cv.EVENT_LBUTTONDOWN:
608                point = (x, y)
609
610                cv.circle(
611                    anno_template,
612                    (x, y),
613                    4,
614                    (0, 255, 0),
615                    2,
616                )
617                cv.imshow(constants.WINDOW, anno_template)
618
619                points.append(point)
620            elif event == cv.EVENT_RBUTTONDOWN:
621                # remove the last annotation
622                points = points[:-1]
623
624                anno_template = np.copy(anno_template)
625
626                for p in points:
627                    cv.circle(
628                        anno_template,
629                        p,
630                        4,
631                        (0, 255, 0),
632                        2,
633                    )
634
635                cv.imshow(constants.WINDOW, anno_template)
636
637        print(CROP_HELP)
638
639        imu.show(anno_template, get_point, title="crop the header")
640
641        assert len(points) == 4, (
642            "you need to annotate the four corners of the table in order to crop it"
643        )
644
645        # crop the image to contain all of the points (just crop rectangularly, x, y, w, h)
646        # Convert points to numpy array
647        points_np = np.array(points)
648
649        # Find bounding box
650        x_min = np.min(points_np[:, 0])
651        y_min = np.min(points_np[:, 1])
652        x_max = np.max(points_np[:, 0])
653        y_max = np.max(points_np[:, 1])
654
655        # Compute width and height
656        width = x_max - x_min
657        height = y_max - y_min
658
659        # Ensure integers and within image boundaries
660        x_min = max(int(x_min), 0)
661        y_min = max(int(y_min), 0)
662        width = int(width)
663        height = int(height)
664
665        # Crop the image
666        cropped = template[
667            y_min - margin : y_min + height + margin,
668            x_min - margin : x_min + width + margin,
669        ]
670
671        return cropped
672
673    @staticmethod
674    def from_vgg_annotation(annotation: str) -> "HeaderTemplate":
675        """
676        Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool.
677
678        Args:
679            annotation (str): the path of the annotation csv file
680        """
681
682        rules = []
683        with open(annotation) as csvfile:
684            reader = csv.DictReader(csvfile)
685            for row in reader:
686                shape_attributes = json.loads(row["region_shape_attributes"])
687                if shape_attributes["name"] == "polyline":
688                    x_points = shape_attributes["all_points_x"]
689                    y_points = shape_attributes["all_points_y"]
690                    if len(x_points) == 2 and len(y_points) == 2:
691                        rules.append(
692                            [x_points[0], y_points[0], x_points[1], y_points[1]]
693                        )
694
695        return HeaderTemplate(rules)
696
697    def cell_width(self, i: int) -> int:
698        self._check_col_idx(i)
699        return int(self._v_rules[i + 1]._x - self._v_rules[i]._x)
700
701    def cell_widths(self, start: int = 0) -> list[int]:
702        return [self.cell_width(i) for i in range(start, self.cols)]
703
704    def cell_height(self, header_factor: float = 0.8) -> int:
705        return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor)
706
707    def cell_heights(self, header_factors: list[float] | float) -> list[int]:
708        if isinstance(header_factors, float):
709            header_factors = [header_factors]
710        header_factors = cast(list, header_factors)
711        return [
712            int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors
713        ]
714
715    def intersection(self, index: tuple[int, int]) -> tuple[float, float]:
716        """
717        Returns the interaction of the index[0]th horizontal rule and the
718        index[1]th vertical rule
719        """
720
721        ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]])
722        assert ints is not None
723        return ints
724
725    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
726        """
727        Get the cell index (row, col) that corresponds with the point (x, y) in the template image
728
729        Args:
730            point (tuple[float, float]): the coordinates in the template image
731
732        Returns:
733            tuple[int, int]: (row, col)
734        """
735
736        x, y = point
737
738        row = -1
739        col = -1
740
741        for i in range(self.rows):
742            y0 = self._h_rules[i]._y_at_x(x)
743            y1 = self._h_rules[i + 1]._y_at_x(x)
744            if min(y0, y1) <= y <= max(y0, y1):
745                row = i
746                break
747
748        for i in range(self.cols):
749            x0 = self._v_rules[i]._x_at_y(y)
750            x1 = self._v_rules[i + 1]._x_at_y(y)
751            if min(x0, x1) <= x <= max(x0, x1):
752                col = i
753                break
754
755        if row == -1 or col == -1:
756            return (-1, -1)
757
758        return (row, col)
759
760    def cell_polygon(
761        self, cell: tuple[int, int]
762    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
763        """
764        Return points (x,y) that make up a polygon around the requested cell
765        (top left, top right, bottom right, bottom left)
766        """
767
768        row, col = cell
769
770        self._check_col_idx(col)
771        self._check_row_idx(row)
772
773        top_rule = self._h_rules[row]
774        bottom_rule = self._h_rules[row + 1]
775        left_rule = self._v_rules[col]
776        right_rule = self._v_rules[col + 1]
777
778        # Calculate corner points using intersections
779        top_left = top_rule.intersection(left_rule)
780        top_right = top_rule.intersection(right_rule)
781        bottom_left = bottom_rule.intersection(left_rule)
782        bottom_right = bottom_rule.intersection(right_rule)
783
784        if not all(
785            point is not None
786            for point in [top_left, top_right, bottom_left, bottom_right]
787        ):
788            raise TauluException("the lines around this cell do not intersect")
789
790        return top_left, top_right, bottom_right, bottom_left  # type:ignore
791
792    def region(
793        self, start: tuple[int, int], end: tuple[int, int]
794    ) -> tuple[Point, Point, Point, Point]:
795        self._check_row_idx(start[0])
796        self._check_row_idx(end[0])
797        self._check_col_idx(start[1])
798        self._check_col_idx(end[1])
799
800        # the rules that surround this row
801        top_rule = self._h_rules[start[0]]
802        bottom_rule = self._h_rules[end[0] + 1]
803        left_rule = self._v_rules[start[1]]
804        right_rule = self._v_rules[end[1] + 1]
805
806        # four points that will be the bounding polygon of the result,
807        # which needs to be rectified
808        top_left = top_rule.intersection(left_rule)
809        top_right = top_rule.intersection(right_rule)
810        bottom_left = bottom_rule.intersection(left_rule)
811        bottom_right = bottom_rule.intersection(right_rule)
812
813        if (
814            top_left is None
815            or top_right is None
816            or bottom_left is None
817            or bottom_right is None
818        ):
819            raise TauluException("the lines around this row do not intersect properly")
820
821        def to_point(pnt) -> Point:
822            return (int(pnt[0]), int(pnt[1]))
823
824        return (
825            to_point(top_left),
826            to_point(top_right),
827            to_point(bottom_right),
828            to_point(bottom_left),
829        )
830
831    def text_regions(
832        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20
833    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
834        raise TauluException("text_regions should not be called on a HeaderTemplate")

Defines the structure of a table header as a set of rules (lines).

Created via HeaderTemplate.from_saved (loading a JSON annotation) or AnnotationSession (interactive annotation). Provides cell position lookups and expected row heights for the grid-growing algorithm.

HeaderTemplate(rules: Iterable[Iterable[int]])
197    def __init__(self, rules: Iterable[Iterable[int]]):
198        """
199        Args:
200            rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
201        """
202
203        super().__init__()
204        self._rules = [_Rule(*rule) for rule in rules]
205        self._h_rules = sorted(
206            [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y
207        )
208        self._v_rules = sorted(
209            [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x
210        )
Arguments:
  • rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
@log_calls(level=logging.DEBUG)
def save(self, path: os.PathLike[str]):
212    @log_calls(level=logging.DEBUG)
213    def save(self, path: PathLike[str]):
214        """
215        Save the HeaderTemplate to the given path, as a json
216        """
217
218        data = {"rules": [r.to_dict() for r in self._rules]}
219
220        with open(path, "w") as f:
221            json.dump(data, f)

Save the HeaderTemplate to the given path, as a json

@staticmethod
@log_calls(level=logging.DEBUG)
def from_saved(path: os.PathLike[str] | str) -> HeaderTemplate:
223    @staticmethod
224    @log_calls(level=logging.DEBUG)
225    def from_saved(path: PathLike[str] | str) -> "HeaderTemplate":
226        with open(path) as f:
227            data = json.load(f)
228            rules = data["rules"]
229            rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules]
230
231            return HeaderTemplate(rules)
cols: int
233    @property
234    def cols(self) -> int:
235        return len(self._v_rules) - 1
rows: int
237    @property
238    def rows(self) -> int:
239        return len(self._h_rules) - 1
@staticmethod
@log_calls(level=logging.DEBUG)
def annotate_image( template: Union[cv2.Mat, numpy.ndarray, str], crop: os.PathLike[str] | str | None = None, margin: int = 10) -> HeaderTemplate:
241    @staticmethod
242    @log_calls(level=logging.DEBUG)
243    def annotate_image(
244        template: MatLike | str,
245        crop: PathLike[str] | str | None = None,
246        margin: int = 10,
247    ) -> "HeaderTemplate":
248        """
249        Utility method that allows users to create a template form a template image.
250
251        The user is asked to click to annotate lines (two clicks per line).
252
253        Args:
254            template: the image on which to annotate the header lines
255            crop (str | None): if str, crop the template image first, then do the annotation.
256                The cropped image will be stored at the supplied path
257            margin (int): margin to add around the cropping of the header
258        """
259
260        if type(template) is str:
261            value = cv.imread(template)
262            assert value is not None
263            template = value
264        template = cast(MatLike, template)
265
266        if crop is not None:
267            cropped = HeaderTemplate._crop(template, margin)
268            cv.imwrite(os.fspath(crop), cropped)
269            template = cropped
270
271        start_point = None
272        lines: list[list[int]] = []
273
274        anno_template = np.copy(template)
275
276        def get_point(event, x, y, flags, params):
277            nonlocal lines, start_point, anno_template
278            _ = flags
279            _ = params
280            if event == cv.EVENT_LBUTTONDOWN:
281                if start_point is not None:
282                    line: list[int] = [start_point[1], start_point[0], x, y]
283
284                    cv.line(
285                        anno_template,
286                        (start_point[1], start_point[0]),
287                        (x, y),
288                        (0, 255, 0),
289                        2,
290                        cv.LINE_AA,
291                    )
292                    cv.imshow(constants.WINDOW, anno_template)
293
294                    lines.append(line)
295                    start_point = None
296                else:
297                    start_point = (y, x)
298            elif event == cv.EVENT_RBUTTONDOWN:
299                start_point = None
300
301                # remove the last annotation
302                lines = lines[:-1]
303
304                anno_template = np.copy(anno_template)
305
306                for line in lines:
307                    cv.line(
308                        template,
309                        (line[0], line[1]),
310                        (line[2], line[3]),
311                        (0, 255, 0),
312                        2,
313                        cv.LINE_AA,
314                    )
315
316                cv.imshow(constants.WINDOW, template)
317
318        print(ANNO_HELP)
319
320        imu.show(anno_template, get_point, title="annotate the header")
321
322        return HeaderTemplate(lines)

Utility method that allows users to create a template form a template image.

The user is asked to click to annotate lines (two clicks per line).

Arguments:
  • template: the image on which to annotate the header lines
  • crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
  • margin (int): margin to add around the cropping of the header
@staticmethod
@log_calls(level=logging.DEBUG)
def annotate_image_notebook( template: Union[cv2.Mat, numpy.ndarray, str], crop: os.PathLike[str] | str | None = None, margin: int = 10) -> taulu.header_template.AnnotationSession:
324    @staticmethod
325    @log_calls(level=logging.DEBUG)
326    def annotate_image_notebook(
327        template: MatLike | str,
328        crop: PathLike[str] | str | None = None,
329        margin: int = 10,
330    ) -> "AnnotationSession":
331        """
332        Notebook-compatible version of annotate_image. Returns an AnnotationSession immediately.
333        Interact with the widget and click Done to finalize.
334        Access the result via session.result after clicking Done.
335
336        Args:
337            template: the image on which to annotate the header lines
338            crop (str | None): if str, crop the template image first, then do the annotation.
339                The cropped image will be stored at the supplied path
340            margin (int): margin to add around the cropping of the header
341
342        Returns:
343            AnnotationSession: access .result after clicking Done to get the HeaderTemplate.
344        """
345        if isinstance(template, str):
346            tmp = cv.imread(template)
347            assert tmp is not None
348            template = tmp
349
350        session = AnnotationSession(crop)
351
352        if crop is not None:
353            # First show crop UI, then annotation UI
354            HeaderTemplate._crop_notebook(template, margin, session)
355        else:
356            # Go directly to annotation
357            HeaderTemplate._show_annotation_ui(template, session)
358
359        return session

Notebook-compatible version of annotate_image. Returns an AnnotationSession immediately. Interact with the widget and click Done to finalize. Access the result via session.result after clicking Done.

Arguments:
  • template: the image on which to annotate the header lines
  • crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
  • margin (int): margin to add around the cropping of the header
Returns:

AnnotationSession: access .result after clicking Done to get the HeaderTemplate.

@staticmethod
def from_vgg_annotation(annotation: str) -> HeaderTemplate:
673    @staticmethod
674    def from_vgg_annotation(annotation: str) -> "HeaderTemplate":
675        """
676        Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool.
677
678        Args:
679            annotation (str): the path of the annotation csv file
680        """
681
682        rules = []
683        with open(annotation) as csvfile:
684            reader = csv.DictReader(csvfile)
685            for row in reader:
686                shape_attributes = json.loads(row["region_shape_attributes"])
687                if shape_attributes["name"] == "polyline":
688                    x_points = shape_attributes["all_points_x"]
689                    y_points = shape_attributes["all_points_y"]
690                    if len(x_points) == 2 and len(y_points) == 2:
691                        rules.append(
692                            [x_points[0], y_points[0], x_points[1], y_points[1]]
693                        )
694
695        return HeaderTemplate(rules)

Create a TableTemplate from annotations made in vgg, using the polylines tool.

Arguments:
  • annotation (str): the path of the annotation csv file
def cell_width(self, i: int) -> int:
697    def cell_width(self, i: int) -> int:
698        self._check_col_idx(i)
699        return int(self._v_rules[i + 1]._x - self._v_rules[i]._x)
def cell_widths(self, start: int = 0) -> list[int]:
701    def cell_widths(self, start: int = 0) -> list[int]:
702        return [self.cell_width(i) for i in range(start, self.cols)]
def cell_height(self, header_factor: float = 0.8) -> int:
704    def cell_height(self, header_factor: float = 0.8) -> int:
705        return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor)
def cell_heights(self, header_factors: list[float] | float) -> list[int]:
707    def cell_heights(self, header_factors: list[float] | float) -> list[int]:
708        if isinstance(header_factors, float):
709            header_factors = [header_factors]
710        header_factors = cast(list, header_factors)
711        return [
712            int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors
713        ]
def intersection(self, index: tuple[int, int]) -> tuple[float, float]:
715    def intersection(self, index: tuple[int, int]) -> tuple[float, float]:
716        """
717        Returns the interaction of the index[0]th horizontal rule and the
718        index[1]th vertical rule
719        """
720
721        ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]])
722        assert ints is not None
723        return ints

Returns the interaction of the index[0]th horizontal rule and the index[1]th vertical rule

def cell(self, point: tuple[float, float]) -> tuple[int, int]:
725    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
726        """
727        Get the cell index (row, col) that corresponds with the point (x, y) in the template image
728
729        Args:
730            point (tuple[float, float]): the coordinates in the template image
731
732        Returns:
733            tuple[int, int]: (row, col)
734        """
735
736        x, y = point
737
738        row = -1
739        col = -1
740
741        for i in range(self.rows):
742            y0 = self._h_rules[i]._y_at_x(x)
743            y1 = self._h_rules[i + 1]._y_at_x(x)
744            if min(y0, y1) <= y <= max(y0, y1):
745                row = i
746                break
747
748        for i in range(self.cols):
749            x0 = self._v_rules[i]._x_at_y(y)
750            x1 = self._v_rules[i + 1]._x_at_y(y)
751            if min(x0, x1) <= x <= max(x0, x1):
752                col = i
753                break
754
755        if row == -1 or col == -1:
756            return (-1, -1)
757
758        return (row, col)

Get the cell index (row, col) that corresponds with the point (x, y) in the template image

Arguments:
  • point (tuple[float, float]): the coordinates in the template image
Returns:

tuple[int, int]: (row, col)

def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
760    def cell_polygon(
761        self, cell: tuple[int, int]
762    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
763        """
764        Return points (x,y) that make up a polygon around the requested cell
765        (top left, top right, bottom right, bottom left)
766        """
767
768        row, col = cell
769
770        self._check_col_idx(col)
771        self._check_row_idx(row)
772
773        top_rule = self._h_rules[row]
774        bottom_rule = self._h_rules[row + 1]
775        left_rule = self._v_rules[col]
776        right_rule = self._v_rules[col + 1]
777
778        # Calculate corner points using intersections
779        top_left = top_rule.intersection(left_rule)
780        top_right = top_rule.intersection(right_rule)
781        bottom_left = bottom_rule.intersection(left_rule)
782        bottom_right = bottom_rule.intersection(right_rule)
783
784        if not all(
785            point is not None
786            for point in [top_left, top_right, bottom_left, bottom_right]
787        ):
788            raise TauluException("the lines around this cell do not intersect")
789
790        return top_left, top_right, bottom_right, bottom_left  # type:ignore

Return points (x,y) that make up a polygon around the requested cell (top left, top right, bottom right, bottom left)

def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
792    def region(
793        self, start: tuple[int, int], end: tuple[int, int]
794    ) -> tuple[Point, Point, Point, Point]:
795        self._check_row_idx(start[0])
796        self._check_row_idx(end[0])
797        self._check_col_idx(start[1])
798        self._check_col_idx(end[1])
799
800        # the rules that surround this row
801        top_rule = self._h_rules[start[0]]
802        bottom_rule = self._h_rules[end[0] + 1]
803        left_rule = self._v_rules[start[1]]
804        right_rule = self._v_rules[end[1] + 1]
805
806        # four points that will be the bounding polygon of the result,
807        # which needs to be rectified
808        top_left = top_rule.intersection(left_rule)
809        top_right = top_rule.intersection(right_rule)
810        bottom_left = bottom_rule.intersection(left_rule)
811        bottom_right = bottom_rule.intersection(right_rule)
812
813        if (
814            top_left is None
815            or top_right is None
816            or bottom_left is None
817            or bottom_right is None
818        ):
819            raise TauluException("the lines around this row do not intersect properly")
820
821        def to_point(pnt) -> Point:
822            return (int(pnt[0]), int(pnt[1]))
823
824        return (
825            to_point(top_left),
826            to_point(top_right),
827            to_point(bottom_right),
828            to_point(bottom_left),
829        )

Get the bounding box for the rectangular region that goes from start to end

Returns:

4 points: lt, rt, rb, lb, in format (x, y)

def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 10, margin_y: int = -20) -> list[tuple[tuple[int, int], tuple[int, int]]]:
831    def text_regions(
832        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20
833    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
834        raise TauluException("text_regions should not be called on a HeaderTemplate")

Split the row into regions of continuous text

Returns list[tuple[int, int]]: a list of spans (start col, end col)

MatchMethod = typing.Literal['orb', 'sift', 'surf', 'akaze']
class Split(typing.Generic[T]):
 14class Split[T]:
 15    """
 16    Container for paired left/right data with convenient manipulation methods.
 17
 18    The Split class is designed for working with table images that span two pages
 19    or have distinct left and right sections. It allows you to:
 20    - Store related data for both sides
 21    - Apply functions to both sides simultaneously
 22    - Access attributes/methods of contained objects transparently
 23
 24    Examples:
 25        >>> # Create a split with different parameters for each side
 26        >>> thresholds = Split(0.25, 0.30)
 27        >>>
 28        >>> # Apply a function to both sides
 29        >>> images = Split(left_img, right_img)
 30        >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5)))
 31        >>>
 32        >>> # Use with different parameters per side
 33        >>> results = images.apply(
 34        ...     lambda img, k: sauvola_threshold(img, k),
 35        ...     k=thresholds  # k.left used for left img, k.right for right
 36        ... )
 37        >>>
 38        >>> # Access methods of contained objects directly
 39        >>> templates = Split(template_left, template_right)
 40        >>> widths = templates.cell_widths(0)  # Calls on both templates
 41
 42    Type Parameters:
 43        T: The type of objects stored in left and right
 44    """
 45
 46    def __init__(self, left: T | None = None, right: T | None = None):
 47        """
 48        Initialize a Split container.
 49
 50        Args:
 51            left: Data for the left side
 52            right: Data for the right side
 53
 54        Note:
 55            Both can initially be None. Use the `append` method or set
 56            properties directly to populate.
 57        """
 58        self._left = left
 59        self._right = right
 60
 61    @property
 62    def left(self) -> T:
 63        assert self._left is not None
 64        return self._left
 65
 66    @left.setter
 67    def left(self, value: T):
 68        self._left = value
 69
 70    @property
 71    def right(self) -> T:
 72        assert self._right is not None
 73        return self._right
 74
 75    @right.setter
 76    def right(self, value: T):
 77        self._right = value
 78
 79    def append(self, value: T):
 80        if self._left is None:
 81            self._left = value
 82        else:
 83            self._right = value
 84
 85    def __repr__(self) -> str:
 86        return f"left: {self._left}, right: {self._right}"
 87
 88    def __iter__(self):
 89        assert self._left is not None
 90        assert self._right is not None
 91        return iter((self._left, self._right))
 92
 93    def __getitem__(self, index: bool | int) -> T:
 94        assert self._left is not None
 95        assert self._right is not None
 96        if int(index) == 0:
 97            return self._left
 98        else:
 99            return self._right
100
101    def apply(
102        self,
103        funcs: "Split[Callable[..., V]] | Callable[..., V]",
104        *args,
105        **kwargs,
106    ) -> "Split[V]":
107        if not isinstance(funcs, Split):
108            funcs = Split(funcs, funcs)
109
110        def get_arg(side: str, arg):
111            if isinstance(arg, Split):
112                return getattr(arg, side)
113            return arg
114
115        def call(side: str):
116            func = getattr(funcs, side)
117            target = getattr(self, side)
118
119            side_args = [get_arg(side, arg) for arg in args]
120            side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()}
121
122            return func(target, *side_args, **side_kwargs)
123
124        return Split(call("left"), call("right"))
125
126    def __getattr__(self, attr_name: str):
127        if attr_name in self.__dict__:
128            return getattr(self, attr_name)
129
130        def wrapper(*args, **kwargs):
131            return self.apply(
132                Split(
133                    getattr(self.left.__class__, attr_name),
134                    getattr(self.right.__class__, attr_name),
135                ),
136                *args,
137                **kwargs,
138            )
139
140        return wrapper

Container for paired left/right data with convenient manipulation methods.

The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:

  • Store related data for both sides
  • Apply functions to both sides simultaneously
  • Access attributes/methods of contained objects transparently
Examples:
>>> # Create a split with different parameters for each side
>>> thresholds = Split(0.25, 0.30)
>>>
>>> # Apply a function to both sides
>>> images = Split(left_img, right_img)
>>> processed = images.apply(lambda img: cv2.blur(img, (5, 5)))
>>>
>>> # Use with different parameters per side
>>> results = images.apply(
...     lambda img, k: sauvola_threshold(img, k),
...     k=thresholds  # k.left used for left img, k.right for right
... )
>>>
>>> # Access methods of contained objects directly
>>> templates = Split(template_left, template_right)
>>> widths = templates.cell_widths(0)  # Calls on both templates
Type Parameters:

T: The type of objects stored in left and right

Split(left: Optional[T] = None, right: Optional[T] = None)
46    def __init__(self, left: T | None = None, right: T | None = None):
47        """
48        Initialize a Split container.
49
50        Args:
51            left: Data for the left side
52            right: Data for the right side
53
54        Note:
55            Both can initially be None. Use the `append` method or set
56            properties directly to populate.
57        """
58        self._left = left
59        self._right = right

Initialize a Split container.

Arguments:
  • left: Data for the left side
  • right: Data for the right side
Note:

Both can initially be None. Use the append method or set properties directly to populate.

left: T
61    @property
62    def left(self) -> T:
63        assert self._left is not None
64        return self._left
right: T
70    @property
71    def right(self) -> T:
72        assert self._right is not None
73        return self._right
def append(self, value: T):
79    def append(self, value: T):
80        if self._left is None:
81            self._left = value
82        else:
83            self._right = value
def apply( self, funcs: Union[Split[Callable[..., ~V]], Callable[..., ~V]], *args, **kwargs) -> Split[~V]:
101    def apply(
102        self,
103        funcs: "Split[Callable[..., V]] | Callable[..., V]",
104        *args,
105        **kwargs,
106    ) -> "Split[V]":
107        if not isinstance(funcs, Split):
108            funcs = Split(funcs, funcs)
109
110        def get_arg(side: str, arg):
111            if isinstance(arg, Split):
112                return getattr(arg, side)
113            return arg
114
115        def call(side: str):
116            func = getattr(funcs, side)
117            target = getattr(self, side)
118
119            side_args = [get_arg(side, arg) for arg in args]
120            side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()}
121
122            return func(target, *side_args, **side_kwargs)
123
124        return Split(call("left"), call("right"))
class TableGrid(taulu.TableIndexer):
 780class TableGrid(TableIndexer):
 781    """
 782    Represents a detected table grid as a 2D array of intersection points.
 783
 784    Returned by `Taulu.segment_table`. Provides methods for querying cell
 785    locations, cropping cells/regions from the source image, and interactive
 786    visualization. Can be saved to and restored from JSON.
 787    """
 788
 789    _right_offset: int | None = None
 790
 791    def __init__(self, points: list[list[Point]], right_offset: int | None = None):
 792        """
 793        Args:
 794            points: a 2D list of intersections between hor. and vert. rules
 795        """
 796        self._points = points
 797        self._right_offset = right_offset
 798
 799    @property
 800    def points(self) -> list[list[Point]]:
 801        return self._points
 802
 803    def row(self, i: int) -> list[Point]:
 804        assert 0 <= i and i < len(self._points)
 805        return self._points[i]
 806
 807    @property
 808    def cols(self) -> int:
 809        if self._right_offset is not None:
 810            return len(self.row(0)) - 2
 811        else:
 812            return len(self.row(0)) - 1
 813
 814    @property
 815    def rows(self) -> int:
 816        return len(self._points) - 1
 817
 818    @property
 819    def right_offset(self) -> int | None:
 820        return self._right_offset
 821
 822    @staticmethod
 823    def from_split(
 824        split_grids: Split["TableGrid"], offsets: Split[Point]
 825    ) -> "TableGrid":
 826        """
 827        Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image
 828        Args:
 829            split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
 830            offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
 831        """
 832
 833        def offset_points(points, offset):
 834            return [
 835                [
 836                    (p[0] + offset[0], p[1] + offset[1]) if p is not None else None
 837                    for p in row
 838                ]
 839                for row in points
 840            ]
 841
 842        split_points = split_grids.apply(
 843            lambda grid, offset: offset_points(grid.points, offset), offsets
 844        )
 845        points = []
 846        rows = min(split_grids.left.rows, split_grids.right.rows)
 847        for row in range(rows + 1):
 848            left_row = split_points.left[row]
 849            right_row = split_points.right[row]
 850
 851            # Skip rows that contain None values
 852            if any(p is None for p in left_row) or any(p is None for p in right_row):
 853                logger.warning(
 854                    f"Skipping row {row} in from_split due to incomplete grid data"
 855                )
 856                continue
 857
 858            row_points = []
 859            row_points.extend(left_row)
 860            row_points.extend(right_row)
 861            points.append(row_points)
 862        if not points:
 863            raise ValueError(
 864                "Cannot create TableGrid from split: no complete rows found in both grids"
 865            )
 866        table_grid = TableGrid(points, split_grids.left.cols)
 867        return table_grid
 868
 869    def save(self, path: str | Path):
 870        """
 871        Persist the table grid to a JSON file.
 872
 873        Saves the grid corner points and right_offset (for split tables) to disk,
 874        allowing the grid to be reloaded later without re-running detection.
 875
 876        Args:
 877            path: Path to save the JSON file.
 878
 879        Example:
 880            >>> grid = taulu.segment_table("table.png")
 881            >>> grid.save("grid.json")
 882        """
 883        with open(path, "w") as f:
 884            json.dump({"points": self.points, "right_offset": self._right_offset}, f)
 885
 886    @staticmethod
 887    def from_saved(path: str | Path) -> "TableGrid":
 888        """
 889        Load a previously saved TableGrid from a JSON file.
 890
 891        Args:
 892            path: Path to the JSON file created by `save()`.
 893
 894        Returns:
 895            A TableGrid instance with the saved corner points.
 896
 897        Example:
 898            >>> grid = TableGrid.from_saved("grid.json")
 899            >>> cell = grid.crop_cell(image, (0, 0))
 900        """
 901        with open(path) as f:
 902            points = json.load(f)
 903            right_offset = points.get("right_offset", None)
 904            points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]]
 905            return TableGrid(points, right_offset)
 906
 907    def add_left_col(self, width: int):
 908        for row in self._points:
 909            first = row[0]
 910            new_first = (first[0] - width, first[1])
 911            row.insert(0, new_first)
 912
 913    def add_top_row(self, height: int):
 914        new_row = []
 915        for point in self._points[0]:
 916            new_row.append((point[0], point[1] - height))
 917
 918        self.points.insert(0, new_row)
 919
 920    def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool:
 921        """point: x, y"""
 922        lt, rt, rb, lb = rect
 923        x, y = point
 924
 925        top = _Rule(*lt, *rt)
 926        if top._y_at_x(x) > y:
 927            return False
 928
 929        right = _Rule(*rt, *rb)
 930        if right._x_at_y(y) < x:
 931            return False
 932
 933        bottom = _Rule(*lb, *rb)
 934        if bottom._y_at_x(x) < y:
 935            return False
 936
 937        left = _Rule(*lb, *lt)
 938        if left._x_at_y(y) > x:
 939            return False
 940
 941        return True
 942
 943    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
 944        """
 945        Get the cell indices (row, col) containing a pixel coordinate.
 946
 947        Searches through all cells to find which one contains the given point,
 948        accounting for the non-rectangular (perspective-warped) cell boundaries.
 949
 950        Args:
 951            point: Pixel coordinates (x, y) in the original image.
 952
 953        Returns:
 954            (row, col) indices of the containing cell, or (-1, -1) if the point
 955            is outside all cells.
 956
 957        Example:
 958            >>> grid = taulu.segment_table("table.png")
 959            >>> row, col = grid.cell((150, 200))
 960            >>> if row >= 0:
 961            ...     print(f"Point is in cell ({row}, {col})")
 962        """
 963        for r in range(len(self._points) - 1):
 964            offset = 0
 965            for c in range(len(self.row(0)) - 1):
 966                if self._right_offset is not None and c == self._right_offset:
 967                    offset = -1
 968                    continue
 969
 970                if self._surrounds(
 971                    [
 972                        self._points[r][c],
 973                        self._points[r][c + 1],
 974                        self._points[r + 1][c + 1],
 975                        self._points[r + 1][c],
 976                    ],
 977                    point,
 978                ):
 979                    return (r, c + offset)
 980
 981        return (-1, -1)
 982
 983    def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]:
 984        """
 985        Get the four corner coordinates of a cell.
 986
 987        Returns the corners in clockwise order starting from top-left,
 988        suitable for use with OpenCV drawing functions.
 989
 990        Args:
 991            cell: Cell indices as (row, col).
 992
 993        Returns:
 994            Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order:
 995            top-left, top-right, bottom-right, bottom-left.
 996
 997        Raises:
 998            TauluException: If row or col indices are out of bounds.
 999
1000        Example:
1001            >>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
1002            >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
1003            >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
1004        """
1005        r, c = cell
1006
1007        self._check_row_idx(r)
1008        self._check_col_idx(c)
1009
1010        if self._right_offset is not None and c >= self._right_offset:
1011            c = c + 1
1012
1013        return (
1014            self._points[r][c],
1015            self._points[r][c + 1],
1016            self._points[r + 1][c + 1],
1017            self._points[r + 1][c],
1018        )
1019
1020    def region(
1021        self, start: tuple[int, int], end: tuple[int, int]
1022    ) -> tuple[Point, Point, Point, Point]:
1023        """
1024        Get the bounding polygon for a rectangular region of cells.
1025
1026        Returns the four corner coordinates that enclose all cells from
1027        start to end (inclusive).
1028
1029        Args:
1030            start: Top-left cell as (row, col).
1031            end: Bottom-right cell as (row, col).
1032
1033        Returns:
1034            Four corner points (lt, rt, rb, lb) enclosing the region,
1035            each as (x, y) pixel coordinates.
1036
1037        Raises:
1038            TauluException: If any row or col indices are out of bounds.
1039
1040        Example:
1041            >>> # Get bounding box for cells (0,0) through (2,3)
1042            >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1043        """
1044        r0, c0 = start
1045        r1, c1 = end
1046
1047        self._check_row_idx(r0)
1048        self._check_row_idx(r1)
1049        self._check_col_idx(c0)
1050        self._check_col_idx(c1)
1051
1052        if self._right_offset is not None and c0 >= self._right_offset:
1053            c0 = c0 + 1
1054
1055        if self._right_offset is not None and c1 >= self._right_offset:
1056            c1 = c1 + 1
1057
1058        lt = self._points[r0][c0]
1059        rt = self._points[r0][c1 + 1]
1060        rb = self._points[r1 + 1][c1 + 1]
1061        lb = self._points[r1 + 1][c0]
1062
1063        return lt, rt, rb, lb
1064
1065    def visualize_points(self, img: MatLike):
1066        """
1067        Draw the detected table points on the image for visual verification
1068        """
1069        import colorsys
1070
1071        def clr(index, total_steps):
1072            hue = index / total_steps  # Normalized hue between 0 and 1
1073            r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
1074            return int(r * 255), int(g * 255), int(b * 255)
1075
1076        for i, row in enumerate(self._points):
1077            for p in row:
1078                cv.circle(img, p, 4, clr(i, len(self._points)), -1)
1079
1080        imu.show(img)
1081
1082    def text_regions(
1083        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3
1084    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1085        def vertical_rule_crop(row: int, col: int):
1086            self._check_col_idx(col)
1087            self._check_row_idx(row)
1088
1089            if self._right_offset is not None and col >= self._right_offset:
1090                col = col + 1
1091
1092            top = self._points[row][col]
1093            bottom = self._points[row + 1][col]
1094
1095            left = int(min(top[0], bottom[0]))
1096            right = int(max(top[0], bottom[0]))
1097
1098            return img[
1099                int(top[1]) - margin_y : int(bottom[1]) + margin_y,
1100                left - margin_x : right + margin_x,
1101            ]
1102
1103        result = []
1104
1105        start = None
1106        for col in range(self.cols):
1107            crop = vertical_rule_crop(row, col)
1108            text_over_score = imu.text_presence_score(crop)
1109            text_over = text_over_score > -0.10
1110
1111            if not text_over:
1112                if start is not None:
1113                    result.append(((row, start), (row, col - 1)))
1114                start = col
1115
1116        if start is not None:
1117            result.append(((row, start), (row, self.cols - 1)))
1118
1119        return result

Represents a detected table grid as a 2D array of intersection points.

Returned by Taulu.segment_table. Provides methods for querying cell locations, cropping cells/regions from the source image, and interactive visualization. Can be saved to and restored from JSON.

TableGrid(points: list[list[tuple[int, int]]], right_offset: int | None = None)
791    def __init__(self, points: list[list[Point]], right_offset: int | None = None):
792        """
793        Args:
794            points: a 2D list of intersections between hor. and vert. rules
795        """
796        self._points = points
797        self._right_offset = right_offset
Arguments:
  • points: a 2D list of intersections between hor. and vert. rules
points: list[list[tuple[int, int]]]
799    @property
800    def points(self) -> list[list[Point]]:
801        return self._points
def row(self, i: int) -> list[tuple[int, int]]:
803    def row(self, i: int) -> list[Point]:
804        assert 0 <= i and i < len(self._points)
805        return self._points[i]
cols: int
807    @property
808    def cols(self) -> int:
809        if self._right_offset is not None:
810            return len(self.row(0)) - 2
811        else:
812            return len(self.row(0)) - 1
rows: int
814    @property
815    def rows(self) -> int:
816        return len(self._points) - 1
right_offset: int | None
818    @property
819    def right_offset(self) -> int | None:
820        return self._right_offset
@staticmethod
def from_split( split_grids: Split[TableGrid], offsets: Split[tuple[int, int]]) -> TableGrid:
822    @staticmethod
823    def from_split(
824        split_grids: Split["TableGrid"], offsets: Split[Point]
825    ) -> "TableGrid":
826        """
827        Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image
828        Args:
829            split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
830            offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
831        """
832
833        def offset_points(points, offset):
834            return [
835                [
836                    (p[0] + offset[0], p[1] + offset[1]) if p is not None else None
837                    for p in row
838                ]
839                for row in points
840            ]
841
842        split_points = split_grids.apply(
843            lambda grid, offset: offset_points(grid.points, offset), offsets
844        )
845        points = []
846        rows = min(split_grids.left.rows, split_grids.right.rows)
847        for row in range(rows + 1):
848            left_row = split_points.left[row]
849            right_row = split_points.right[row]
850
851            # Skip rows that contain None values
852            if any(p is None for p in left_row) or any(p is None for p in right_row):
853                logger.warning(
854                    f"Skipping row {row} in from_split due to incomplete grid data"
855                )
856                continue
857
858            row_points = []
859            row_points.extend(left_row)
860            row_points.extend(right_row)
861            points.append(row_points)
862        if not points:
863            raise ValueError(
864                "Cannot create TableGrid from split: no complete rows found in both grids"
865            )
866        table_grid = TableGrid(points, split_grids.left.cols)
867        return table_grid

Convert two TableGrid objects into one, that is able to segment the original (non-cropped) image

Arguments:
  • split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
  • offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
def save(self, path: str | pathlib._local.Path):
869    def save(self, path: str | Path):
870        """
871        Persist the table grid to a JSON file.
872
873        Saves the grid corner points and right_offset (for split tables) to disk,
874        allowing the grid to be reloaded later without re-running detection.
875
876        Args:
877            path: Path to save the JSON file.
878
879        Example:
880            >>> grid = taulu.segment_table("table.png")
881            >>> grid.save("grid.json")
882        """
883        with open(path, "w") as f:
884            json.dump({"points": self.points, "right_offset": self._right_offset}, f)

Persist the table grid to a JSON file.

Saves the grid corner points and right_offset (for split tables) to disk, allowing the grid to be reloaded later without re-running detection.

Arguments:
  • path: Path to save the JSON file.
Example:
>>> grid = taulu.segment_table("table.png")
>>> grid.save("grid.json")
@staticmethod
def from_saved(path: str | pathlib._local.Path) -> TableGrid:
886    @staticmethod
887    def from_saved(path: str | Path) -> "TableGrid":
888        """
889        Load a previously saved TableGrid from a JSON file.
890
891        Args:
892            path: Path to the JSON file created by `save()`.
893
894        Returns:
895            A TableGrid instance with the saved corner points.
896
897        Example:
898            >>> grid = TableGrid.from_saved("grid.json")
899            >>> cell = grid.crop_cell(image, (0, 0))
900        """
901        with open(path) as f:
902            points = json.load(f)
903            right_offset = points.get("right_offset", None)
904            points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]]
905            return TableGrid(points, right_offset)

Load a previously saved TableGrid from a JSON file.

Arguments:
  • path: Path to the JSON file created by save().
Returns:

A TableGrid instance with the saved corner points.

Example:
>>> grid = TableGrid.from_saved("grid.json")
>>> cell = grid.crop_cell(image, (0, 0))
def add_left_col(self, width: int):
907    def add_left_col(self, width: int):
908        for row in self._points:
909            first = row[0]
910            new_first = (first[0] - width, first[1])
911            row.insert(0, new_first)
def add_top_row(self, height: int):
913    def add_top_row(self, height: int):
914        new_row = []
915        for point in self._points[0]:
916            new_row.append((point[0], point[1] - height))
917
918        self.points.insert(0, new_row)
def cell(self, point: tuple[float, float]) -> tuple[int, int]:
943    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
944        """
945        Get the cell indices (row, col) containing a pixel coordinate.
946
947        Searches through all cells to find which one contains the given point,
948        accounting for the non-rectangular (perspective-warped) cell boundaries.
949
950        Args:
951            point: Pixel coordinates (x, y) in the original image.
952
953        Returns:
954            (row, col) indices of the containing cell, or (-1, -1) if the point
955            is outside all cells.
956
957        Example:
958            >>> grid = taulu.segment_table("table.png")
959            >>> row, col = grid.cell((150, 200))
960            >>> if row >= 0:
961            ...     print(f"Point is in cell ({row}, {col})")
962        """
963        for r in range(len(self._points) - 1):
964            offset = 0
965            for c in range(len(self.row(0)) - 1):
966                if self._right_offset is not None and c == self._right_offset:
967                    offset = -1
968                    continue
969
970                if self._surrounds(
971                    [
972                        self._points[r][c],
973                        self._points[r][c + 1],
974                        self._points[r + 1][c + 1],
975                        self._points[r + 1][c],
976                    ],
977                    point,
978                ):
979                    return (r, c + offset)
980
981        return (-1, -1)

Get the cell indices (row, col) containing a pixel coordinate.

Searches through all cells to find which one contains the given point, accounting for the non-rectangular (perspective-warped) cell boundaries.

Arguments:
  • point: Pixel coordinates (x, y) in the original image.
Returns:

(row, col) indices of the containing cell, or (-1, -1) if the point is outside all cells.

Example:
>>> grid = taulu.segment_table("table.png")
>>> row, col = grid.cell((150, 200))
>>> if row >= 0:
...     print(f"Point is in cell ({row}, {col})")
def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
 983    def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]:
 984        """
 985        Get the four corner coordinates of a cell.
 986
 987        Returns the corners in clockwise order starting from top-left,
 988        suitable for use with OpenCV drawing functions.
 989
 990        Args:
 991            cell: Cell indices as (row, col).
 992
 993        Returns:
 994            Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order:
 995            top-left, top-right, bottom-right, bottom-left.
 996
 997        Raises:
 998            TauluException: If row or col indices are out of bounds.
 999
1000        Example:
1001            >>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
1002            >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
1003            >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
1004        """
1005        r, c = cell
1006
1007        self._check_row_idx(r)
1008        self._check_col_idx(c)
1009
1010        if self._right_offset is not None and c >= self._right_offset:
1011            c = c + 1
1012
1013        return (
1014            self._points[r][c],
1015            self._points[r][c + 1],
1016            self._points[r + 1][c + 1],
1017            self._points[r + 1][c],
1018        )

Get the four corner coordinates of a cell.

Returns the corners in clockwise order starting from top-left, suitable for use with OpenCV drawing functions.

Arguments:
  • cell: Cell indices as (row, col).
Returns:

Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: top-left, top-right, bottom-right, bottom-left.

Raises:
  • TauluException: If row or col indices are out of bounds.
Example:
>>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
>>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
>>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
1020    def region(
1021        self, start: tuple[int, int], end: tuple[int, int]
1022    ) -> tuple[Point, Point, Point, Point]:
1023        """
1024        Get the bounding polygon for a rectangular region of cells.
1025
1026        Returns the four corner coordinates that enclose all cells from
1027        start to end (inclusive).
1028
1029        Args:
1030            start: Top-left cell as (row, col).
1031            end: Bottom-right cell as (row, col).
1032
1033        Returns:
1034            Four corner points (lt, rt, rb, lb) enclosing the region,
1035            each as (x, y) pixel coordinates.
1036
1037        Raises:
1038            TauluException: If any row or col indices are out of bounds.
1039
1040        Example:
1041            >>> # Get bounding box for cells (0,0) through (2,3)
1042            >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1043        """
1044        r0, c0 = start
1045        r1, c1 = end
1046
1047        self._check_row_idx(r0)
1048        self._check_row_idx(r1)
1049        self._check_col_idx(c0)
1050        self._check_col_idx(c1)
1051
1052        if self._right_offset is not None and c0 >= self._right_offset:
1053            c0 = c0 + 1
1054
1055        if self._right_offset is not None and c1 >= self._right_offset:
1056            c1 = c1 + 1
1057
1058        lt = self._points[r0][c0]
1059        rt = self._points[r0][c1 + 1]
1060        rb = self._points[r1 + 1][c1 + 1]
1061        lb = self._points[r1 + 1][c0]
1062
1063        return lt, rt, rb, lb

Get the bounding polygon for a rectangular region of cells.

Returns the four corner coordinates that enclose all cells from start to end (inclusive).

Arguments:
  • start: Top-left cell as (row, col).
  • end: Bottom-right cell as (row, col).
Returns:

Four corner points (lt, rt, rb, lb) enclosing the region, each as (x, y) pixel coordinates.

Raises:
  • TauluException: If any row or col indices are out of bounds.
Example:
>>> # Get bounding box for cells (0,0) through (2,3)
>>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
def visualize_points(self, img: Union[cv2.Mat, numpy.ndarray]):
1065    def visualize_points(self, img: MatLike):
1066        """
1067        Draw the detected table points on the image for visual verification
1068        """
1069        import colorsys
1070
1071        def clr(index, total_steps):
1072            hue = index / total_steps  # Normalized hue between 0 and 1
1073            r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
1074            return int(r * 255), int(g * 255), int(b * 255)
1075
1076        for i, row in enumerate(self._points):
1077            for p in row:
1078                cv.circle(img, p, 4, clr(i, len(self._points)), -1)
1079
1080        imu.show(img)

Draw the detected table points on the image for visual verification

def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 10, margin_y: int = -3) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1082    def text_regions(
1083        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3
1084    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1085        def vertical_rule_crop(row: int, col: int):
1086            self._check_col_idx(col)
1087            self._check_row_idx(row)
1088
1089            if self._right_offset is not None and col >= self._right_offset:
1090                col = col + 1
1091
1092            top = self._points[row][col]
1093            bottom = self._points[row + 1][col]
1094
1095            left = int(min(top[0], bottom[0]))
1096            right = int(max(top[0], bottom[0]))
1097
1098            return img[
1099                int(top[1]) - margin_y : int(bottom[1]) + margin_y,
1100                left - margin_x : right + margin_x,
1101            ]
1102
1103        result = []
1104
1105        start = None
1106        for col in range(self.cols):
1107            crop = vertical_rule_crop(row, col)
1108            text_over_score = imu.text_presence_score(crop)
1109            text_over = text_over_score > -0.10
1110
1111            if not text_over:
1112                if start is not None:
1113                    result.append(((row, start), (row, col - 1)))
1114                start = col
1115
1116        if start is not None:
1117            result.append(((row, start), (row, self.cols - 1)))
1118
1119        return result

Split the row into regions of continuous text

Returns list[tuple[int, int]]: a list of spans (start col, end col)

class TableIndexer(abc.ABC):
 95class TableIndexer(ABC):
 96    """
 97    Abstract base class for table cell indexing and cropping.
 98
 99    Subclasses (`TableGrid`, `HeaderTemplate`) implement the `cols`, `rows`,
100    and `cell_polygon` interface. This base provides shared methods for
101    mapping pixel coordinates to cell indices and cropping cells/regions.
102    """
103
104    def __init__(self):
105        self._col_offset = 0
106
107    @property
108    def col_offset(self) -> int:
109        return self._col_offset
110
111    @col_offset.setter
112    def col_offset(self, value: int):
113        assert value >= 0
114        self._col_offset = value
115
116    @property
117    @abstractmethod
118    def cols(self) -> int:
119        pass
120
121    @property
122    @abstractmethod
123    def rows(self) -> int:
124        pass
125
126    def cells(self) -> Generator[tuple[int, int]]:
127        """
128        Generate all cell indices in row-major order.
129
130        Yields (row, col) tuples for every cell in the table, iterating
131        through each row from left to right, top to bottom.
132
133        Yields:
134            tuple[int, int]: Cell indices as (row, col).
135
136        Example:
137            >>> for row, col in grid.cells():
138            ...     cell_img = grid.crop_cell(image, (row, col))
139            ...     process(cell_img)
140        """
141        for row in range(self.rows):
142            for col in range(self.cols):
143                yield (row, col)
144
145    def _check_row_idx(self, row: int):
146        if row < 0:
147            raise TauluException("row number needs to be positive or zero")
148        if row >= self.rows:
149            raise TauluException(f"row number too high: {row} >= {self.rows}")
150
151    def _check_col_idx(self, col: int):
152        if col < 0:
153            raise TauluException("col number needs to be positive or zero")
154        if col >= self.cols:
155            raise TauluException(f"col number too high: {col} >= {self.cols}")
156
157    @abstractmethod
158    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
159        """
160        Returns the coordinate (row, col) of the cell that contains the given position
161
162        Args:
163            point (tuple[float, float]): a location in the input image
164
165        Returns:
166            tuple[int, int]: the cell index (row, col) that contains the given point
167        """
168        pass
169
170    @abstractmethod
171    def cell_polygon(
172        self, cell: tuple[int, int]
173    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
174        """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position"""
175        pass
176
177    def _highlight_cell(
178        self,
179        image: MatLike,
180        cell: tuple[int, int],
181        color: tuple[int, int, int] = (0, 0, 255),
182        thickness: int = 2,
183    ):
184        polygon = self.cell_polygon(cell)
185        points = np.int32(list(polygon))  # type:ignore
186        cv.polylines(image, [points], True, color, thickness, cv.LINE_AA)
187        cv.putText(
188            image,
189            str(cell),
190            (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
191            cv.FONT_HERSHEY_PLAIN,
192            2.0,
193            (255, 255, 255),
194            2,
195        )
196
197    def highlight_all_cells(
198        self,
199        image: MatLike | os.PathLike[str] | str,
200        color: tuple[int, int, int] = (0, 0, 255),
201        thickness: int = 1,
202    ) -> MatLike:
203        if not isinstance(image, np.ndarray):
204            image = cv.imread(os.fspath(image))  # ty:ignore
205        img = np.copy(image)
206
207        for cell in self.cells():
208            self._highlight_cell(img, cell, color, thickness)
209
210        return img
211
212    def select_one_cell(
213        self,
214        image: MatLike,
215        window: str = WINDOW,
216        color: tuple[int, int, int] = (255, 0, 0),
217        thickness: int = 2,
218    ) -> tuple[int, int] | None:
219        clicked = None
220
221        def click_event(event, x, y, flags, params):
222            nonlocal clicked
223
224            img = np.copy(image)
225            _ = flags
226            _ = params
227            if event == cv.EVENT_LBUTTONDOWN:
228                cell = self.cell((x, y))
229                if cell[0] >= 0:
230                    clicked = cell
231                else:
232                    return
233                self._highlight_cell(img, cell, color, thickness)
234                cv.imshow(window, img)
235
236        imu.show(image, click_event=click_event, title="select one cell", window=window)
237
238        return clicked
239
240    def show_cells(
241        self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW
242    ) -> list[tuple[int, int]] | ShowCellsSession:
243        """
244        Interactively display and highlight table cells.
245
246        In standard environments, shows an OpenCV window where clicking highlights cells.
247        In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.
248
249        Args:
250            image: Source image (path or array).
251            window: OpenCV window name (ignored in notebooks).
252
253        Returns:
254            list[tuple[int, int]]: Clicked cell indices (non-notebook).
255            ShowCellsSession: Session object with .cells property (notebook).
256
257        Example:
258            >>> # Standard Python
259            >>> cells = grid.show_cells("table.png")
260            >>>
261            >>> # Jupyter Notebook
262            >>> session = grid.show_cells("table.png")
263            >>> # ... click cells ...
264            >>> cells = session.cells
265        """
266        if not isinstance(image, np.ndarray):
267            image = cv.imread(os.fspath(image))  # ty:ignore
268
269        def running_in_notebook() -> bool:
270            try:
271                from IPython import get_ipython
272
273                ip = get_ipython()
274                return ip is not None and "IPKernelApp" in ip.config
275            except Exception:
276                return False
277
278        use_notebook = running_in_notebook()
279
280        if use_notebook:
281            return self.show_cells_notebook(image)
282        else:
283            img = np.copy(image)
284            cells = []
285
286            def click_event(event, x, y, flags, params):
287                _ = flags
288                _ = params
289                if event == cv.EVENT_LBUTTONDOWN:
290                    cell = self.cell((x, y))
291                    if cell[0] >= 0:
292                        cells.append(cell)
293                    else:
294                        return
295                    self._highlight_cell(img, cell)
296                    cv.imshow(window, img)
297
298            imu.show(
299                img,
300                click_event=click_event,
301                title="click to highlight cells",
302                window=window,
303            )
304
305            return cells
306
307    def show_cells_notebook(
308        self, image: MatLike | os.PathLike[str] | str
309    ) -> ShowCellsSession:
310        """
311        Notebook-compatible version of show_cells using matplotlib.
312
313        Returns a ShowCellsSession immediately. Click on cells to highlight them.
314        Access clicked cells via session.cells.
315
316        Args:
317            image: Source image (path or array).
318
319        Returns:
320            ShowCellsSession: Access .cells to get list of clicked cell indices.
321
322        Example:
323            >>> session = grid.show_cells_notebook("table.png")
324            >>> # Click cells in the interactive plot
325            >>> print(session.cells)  # [(0, 0), (1, 2), ...]
326        """
327        if not isinstance(image, np.ndarray):
328            tmp_image = cv.imread(os.fspath(image))
329            assert tmp_image is not None
330            image = tmp_image
331
332        import ipywidgets as widgets
333        import matplotlib.pyplot as plt
334        from IPython.display import display
335
336        session = ShowCellsSession()
337
338        # Convert BGR to RGB for matplotlib
339        display_img = cv.cvtColor(image, cv.COLOR_BGR2RGB)
340        img_with_highlights = np.copy(display_img)
341
342        fig, ax = plt.subplots(figsize=(15, 12))
343        fig.canvas.toolbar_visible = False  # ty:ignore[unresolved-attribute]
344        fig.canvas.header_visible = False  # ty:ignore[unresolved-attribute]
345
346        im_display = ax.imshow(img_with_highlights)
347        ax.set_title("Click cells to highlight them. Cells clicked: 0")
348        ax.set_axis_off()
349
350        # Create buttons
351        done_button = widgets.Button(
352            description="Done",
353            button_style="success",
354            layout=widgets.Layout(width="150px", height="50px"),
355        )
356        clear_button = widgets.Button(
357            description="Clear All",
358            button_style="warning",
359            layout=widgets.Layout(width="150px", height="50px"),
360        )
361        undo_button = widgets.Button(
362            description="Undo Last",
363            button_style="info",
364            layout=widgets.Layout(width="150px", height="50px"),
365        )
366
367        done_button.style.font_size = "18px"
368        clear_button.style.font_size = "18px"
369        undo_button.style.font_size = "18px"
370
371        status_label = widgets.Label(
372            value="Click on cells to highlight them", style={"font_size": "18px"}
373        )
374
375        def draw_highlight(cell_idx: tuple[int, int]):
376            """Draw a highlighted cell on the image."""
377            polygon = self.cell_polygon(cell_idx)
378            points = np.array(list(polygon), dtype=np.int32)
379
380            # Draw polyline on the RGB image
381            cv.polylines(
382                img_with_highlights,
383                [points],
384                True,
385                (255, 0, 0),  # Red in RGB
386                2,
387                cv.LINE_AA,
388            )
389
390            # Draw cell index text
391            cv.putText(
392                img_with_highlights,
393                str(cell_idx),
394                (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
395                cv.FONT_HERSHEY_PLAIN,
396                2.0,
397                (255, 255, 255),  # White in RGB
398                2,
399            )
400
401        def redraw_all():
402            """Redraw the image with all current highlights."""
403            nonlocal img_with_highlights
404            img_with_highlights = np.copy(display_img)
405
406            for cell_idx in session._cells:
407                draw_highlight(cell_idx)
408
409            im_display.set_data(img_with_highlights)
410            ax.set_title(
411                f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
412            )
413            fig.canvas.draw_idle()
414
415        def on_click(event):
416            if event.inaxes != ax or event.xdata is None:
417                return
418
419            x, y = int(event.xdata), int(event.ydata)
420
421            if event.button == 1:  # Left click
422                cell_idx = self.cell((x, y))
423                if cell_idx[0] >= 0:
424                    session._cells.append(cell_idx)
425                    draw_highlight(cell_idx)
426                    im_display.set_data(img_with_highlights)
427                    ax.set_title(
428                        f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
429                    )
430                    status_label.value = (
431                        f"Cell {cell_idx} highlighted. Total: {len(session._cells)}"
432                    )
433                    fig.canvas.draw_idle()
434                else:
435                    status_label.value = f"Click at ({x}, {y}) is outside table bounds"
436
437        def on_clear(_):
438            session._cells.clear()
439            redraw_all()
440            status_label.value = "All highlights cleared"
441
442        def on_undo(_):
443            if session._cells:
444                removed = session._cells.pop()
445                redraw_all()
446                status_label.value = (
447                    f"Removed cell {removed}. Remaining: {len(session._cells)}"
448                )
449            else:
450                status_label.value = "No cells to undo"
451
452        def on_done(_):
453            fig.canvas.mpl_disconnect(cid)
454            done_button.disabled = True
455            clear_button.disabled = True
456            undo_button.disabled = True
457            ax.set_title(f"Done! {len(session._cells)} cells highlighted.")
458            status_label.value = "Complete! Access clicked cells via session.cells"
459            fig.canvas.draw_idle()
460
461        done_button.on_click(on_done)
462        clear_button.on_click(on_clear)
463        undo_button.on_click(on_undo)
464
465        cid = fig.canvas.mpl_connect("button_press_event", on_click)
466
467        plt.tight_layout(pad=0)
468        plt.show()
469        display(widgets.HBox([done_button, clear_button, undo_button, status_label]))
470
471        return session
472
473    @abstractmethod
474    def region(
475        self,
476        start: tuple[int, int],
477        end: tuple[int, int],
478    ) -> tuple[Point, Point, Point, Point]:
479        """
480        Get the bounding box for the rectangular region that goes from start to end
481
482        Returns:
483            4 points: lt, rt, rb, lb, in format (x, y)
484        """
485        pass
486
487    def crop_region(
488        self,
489        image: MatLike,
490        start: tuple[int, int],
491        end: tuple[int, int],
492        margin: int = 0,
493        margin_top: int | None = None,
494        margin_bottom: int | None = None,
495        margin_left: int | None = None,
496        margin_right: int | None = None,
497        margin_y: int | None = None,
498        margin_x: int | None = None,
499    ) -> MatLike:
500        """
501        Extract a multi-cell region from the image with perspective correction.
502
503        Crops the image to include all cells from start to end (inclusive),
504        applying a perspective transform to produce a rectangular output.
505
506        Args:
507            image: Source image (BGR or grayscale).
508            start: Top-left cell as (row, col).
509            end: Bottom-right cell as (row, col).
510            margin: Uniform margin in pixels (default 0).
511            margin_top: Override top margin.
512            margin_bottom: Override bottom margin.
513            margin_left: Override left margin.
514            margin_right: Override right margin.
515            margin_y: Override vertical margins (top and bottom).
516            margin_x: Override horizontal margins (left and right).
517
518        Returns:
519            Cropped and perspective-corrected image.
520
521        Example:
522            >>> # Extract a 3x2 region starting at cell (1, 0)
523            >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
524        """
525
526        region = self.region(start, end)
527
528        lt, rt, rb, lb = _apply_margin(
529            *region,
530            margin=margin,
531            margin_top=margin_top,
532            margin_bottom=margin_bottom,
533            margin_left=margin_left,
534            margin_right=margin_right,
535            margin_y=margin_y,
536            margin_x=margin_x,
537        )
538
539        # apply margins according to priority:
540        # margin_top > margin_y > margin (etc.)
541
542        w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2
543        h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2
544
545        # crop by doing a perspective transform to the desired quad
546        src_pts = np.array([lt, rt, rb, lb], dtype="float32")
547        dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32")
548        m = cv.getPerspectiveTransform(src_pts, dst_pts)
549        warped = cv.warpPerspective(image, m, (int(w), int(h)))
550
551        return warped
552
553    @abstractmethod
554    def text_regions(
555        self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0
556    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
557        """
558        Split the row into regions of continuous text
559
560        Returns
561            list[tuple[int, int]]: a list of spans (start col, end col)
562        """
563
564        pass
565
566    def crop_cell(
567        self,
568        image,
569        cell: tuple[int, int],
570        margin: int = 0,
571        margin_top: int | None = None,
572        margin_bottom: int | None = None,
573        margin_left: int | None = None,
574        margin_right: int | None = None,
575        margin_y: int | None = None,
576        margin_x: int | None = None,
577    ) -> MatLike:
578        """
579        Extract a single cell from the image with perspective correction.
580
581        Convenience method equivalent to `crop_region(image, cell, cell, margin)`.
582
583        Args:
584            image: Source image (BGR or grayscale).
585            cell: Cell indices as (row, col).
586            margin: Padding in pixels around the cell (default 0).
587
588        Returns:
589            Cropped and perspective-corrected cell image.
590
591        Example:
592            >>> cell_img = grid.crop_cell(image, (0, 0))
593            >>> cv2.imwrite("cell_0_0.png", cell_img)
594        """
595        return self.crop_region(
596            image,
597            cell,
598            cell,
599            margin,
600            margin_top,
601            margin_bottom,
602            margin_left,
603            margin_right,
604            margin_y,
605            margin_x,
606        )

Abstract base class for table cell indexing and cropping.

Subclasses (TableGrid, HeaderTemplate) implement the cols, rows, and cell_polygon interface. This base provides shared methods for mapping pixel coordinates to cell indices and cropping cells/regions.

col_offset: int
107    @property
108    def col_offset(self) -> int:
109        return self._col_offset
cols: int
116    @property
117    @abstractmethod
118    def cols(self) -> int:
119        pass
rows: int
121    @property
122    @abstractmethod
123    def rows(self) -> int:
124        pass
def cells(self) -> Generator[tuple[int, int]]:
126    def cells(self) -> Generator[tuple[int, int]]:
127        """
128        Generate all cell indices in row-major order.
129
130        Yields (row, col) tuples for every cell in the table, iterating
131        through each row from left to right, top to bottom.
132
133        Yields:
134            tuple[int, int]: Cell indices as (row, col).
135
136        Example:
137            >>> for row, col in grid.cells():
138            ...     cell_img = grid.crop_cell(image, (row, col))
139            ...     process(cell_img)
140        """
141        for row in range(self.rows):
142            for col in range(self.cols):
143                yield (row, col)

Generate all cell indices in row-major order.

Yields (row, col) tuples for every cell in the table, iterating through each row from left to right, top to bottom.

Yields:

tuple[int, int]: Cell indices as (row, col).

Example:
>>> for row, col in grid.cells():
...     cell_img = grid.crop_cell(image, (row, col))
...     process(cell_img)
@abstractmethod
def cell(self, point: tuple[float, float]) -> tuple[int, int]:
157    @abstractmethod
158    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
159        """
160        Returns the coordinate (row, col) of the cell that contains the given position
161
162        Args:
163            point (tuple[float, float]): a location in the input image
164
165        Returns:
166            tuple[int, int]: the cell index (row, col) that contains the given point
167        """
168        pass

Returns the coordinate (row, col) of the cell that contains the given position

Arguments:
  • point (tuple[float, float]): a location in the input image
Returns:

tuple[int, int]: the cell index (row, col) that contains the given point

@abstractmethod
def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
170    @abstractmethod
171    def cell_polygon(
172        self, cell: tuple[int, int]
173    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
174        """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position"""
175        pass

returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position

def highlight_all_cells( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], color: tuple[int, int, int] = (0, 0, 255), thickness: int = 1) -> Union[cv2.Mat, numpy.ndarray]:
197    def highlight_all_cells(
198        self,
199        image: MatLike | os.PathLike[str] | str,
200        color: tuple[int, int, int] = (0, 0, 255),
201        thickness: int = 1,
202    ) -> MatLike:
203        if not isinstance(image, np.ndarray):
204            image = cv.imread(os.fspath(image))  # ty:ignore
205        img = np.copy(image)
206
207        for cell in self.cells():
208            self._highlight_cell(img, cell, color, thickness)
209
210        return img
def select_one_cell( self, image: Union[cv2.Mat, numpy.ndarray], window: str = 'taulu', color: tuple[int, int, int] = (255, 0, 0), thickness: int = 2) -> tuple[int, int] | None:
212    def select_one_cell(
213        self,
214        image: MatLike,
215        window: str = WINDOW,
216        color: tuple[int, int, int] = (255, 0, 0),
217        thickness: int = 2,
218    ) -> tuple[int, int] | None:
219        clicked = None
220
221        def click_event(event, x, y, flags, params):
222            nonlocal clicked
223
224            img = np.copy(image)
225            _ = flags
226            _ = params
227            if event == cv.EVENT_LBUTTONDOWN:
228                cell = self.cell((x, y))
229                if cell[0] >= 0:
230                    clicked = cell
231                else:
232                    return
233                self._highlight_cell(img, cell, color, thickness)
234                cv.imshow(window, img)
235
236        imu.show(image, click_event=click_event, title="select one cell", window=window)
237
238        return clicked
def show_cells( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], window: str = 'taulu') -> list[tuple[int, int]] | taulu.table_indexer.ShowCellsSession:
240    def show_cells(
241        self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW
242    ) -> list[tuple[int, int]] | ShowCellsSession:
243        """
244        Interactively display and highlight table cells.
245
246        In standard environments, shows an OpenCV window where clicking highlights cells.
247        In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.
248
249        Args:
250            image: Source image (path or array).
251            window: OpenCV window name (ignored in notebooks).
252
253        Returns:
254            list[tuple[int, int]]: Clicked cell indices (non-notebook).
255            ShowCellsSession: Session object with .cells property (notebook).
256
257        Example:
258            >>> # Standard Python
259            >>> cells = grid.show_cells("table.png")
260            >>>
261            >>> # Jupyter Notebook
262            >>> session = grid.show_cells("table.png")
263            >>> # ... click cells ...
264            >>> cells = session.cells
265        """
266        if not isinstance(image, np.ndarray):
267            image = cv.imread(os.fspath(image))  # ty:ignore
268
269        def running_in_notebook() -> bool:
270            try:
271                from IPython import get_ipython
272
273                ip = get_ipython()
274                return ip is not None and "IPKernelApp" in ip.config
275            except Exception:
276                return False
277
278        use_notebook = running_in_notebook()
279
280        if use_notebook:
281            return self.show_cells_notebook(image)
282        else:
283            img = np.copy(image)
284            cells = []
285
286            def click_event(event, x, y, flags, params):
287                _ = flags
288                _ = params
289                if event == cv.EVENT_LBUTTONDOWN:
290                    cell = self.cell((x, y))
291                    if cell[0] >= 0:
292                        cells.append(cell)
293                    else:
294                        return
295                    self._highlight_cell(img, cell)
296                    cv.imshow(window, img)
297
298            imu.show(
299                img,
300                click_event=click_event,
301                title="click to highlight cells",
302                window=window,
303            )
304
305            return cells

Interactively display and highlight table cells.

In standard environments, shows an OpenCV window where clicking highlights cells. In Jupyter notebooks, returns a ShowCellsSession and displays using matplotlib.

Arguments:
  • image: Source image (path or array).
  • window: OpenCV window name (ignored in notebooks).
Returns:

list[tuple[int, int]]: Clicked cell indices (non-notebook). ShowCellsSession: Session object with .cells property (notebook).

Example:
>>> # Standard Python
>>> cells = grid.show_cells("table.png")
>>>
>>> # Jupyter Notebook
>>> session = grid.show_cells("table.png")
>>> # ... click cells ...
>>> cells = session.cells
def show_cells_notebook( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str]) -> taulu.table_indexer.ShowCellsSession:
307    def show_cells_notebook(
308        self, image: MatLike | os.PathLike[str] | str
309    ) -> ShowCellsSession:
310        """
311        Notebook-compatible version of show_cells using matplotlib.
312
313        Returns a ShowCellsSession immediately. Click on cells to highlight them.
314        Access clicked cells via session.cells.
315
316        Args:
317            image: Source image (path or array).
318
319        Returns:
320            ShowCellsSession: Access .cells to get list of clicked cell indices.
321
322        Example:
323            >>> session = grid.show_cells_notebook("table.png")
324            >>> # Click cells in the interactive plot
325            >>> print(session.cells)  # [(0, 0), (1, 2), ...]
326        """
327        if not isinstance(image, np.ndarray):
328            tmp_image = cv.imread(os.fspath(image))
329            assert tmp_image is not None
330            image = tmp_image
331
332        import ipywidgets as widgets
333        import matplotlib.pyplot as plt
334        from IPython.display import display
335
336        session = ShowCellsSession()
337
338        # Convert BGR to RGB for matplotlib
339        display_img = cv.cvtColor(image, cv.COLOR_BGR2RGB)
340        img_with_highlights = np.copy(display_img)
341
342        fig, ax = plt.subplots(figsize=(15, 12))
343        fig.canvas.toolbar_visible = False  # ty:ignore[unresolved-attribute]
344        fig.canvas.header_visible = False  # ty:ignore[unresolved-attribute]
345
346        im_display = ax.imshow(img_with_highlights)
347        ax.set_title("Click cells to highlight them. Cells clicked: 0")
348        ax.set_axis_off()
349
350        # Create buttons
351        done_button = widgets.Button(
352            description="Done",
353            button_style="success",
354            layout=widgets.Layout(width="150px", height="50px"),
355        )
356        clear_button = widgets.Button(
357            description="Clear All",
358            button_style="warning",
359            layout=widgets.Layout(width="150px", height="50px"),
360        )
361        undo_button = widgets.Button(
362            description="Undo Last",
363            button_style="info",
364            layout=widgets.Layout(width="150px", height="50px"),
365        )
366
367        done_button.style.font_size = "18px"
368        clear_button.style.font_size = "18px"
369        undo_button.style.font_size = "18px"
370
371        status_label = widgets.Label(
372            value="Click on cells to highlight them", style={"font_size": "18px"}
373        )
374
375        def draw_highlight(cell_idx: tuple[int, int]):
376            """Draw a highlighted cell on the image."""
377            polygon = self.cell_polygon(cell_idx)
378            points = np.array(list(polygon), dtype=np.int32)
379
380            # Draw polyline on the RGB image
381            cv.polylines(
382                img_with_highlights,
383                [points],
384                True,
385                (255, 0, 0),  # Red in RGB
386                2,
387                cv.LINE_AA,
388            )
389
390            # Draw cell index text
391            cv.putText(
392                img_with_highlights,
393                str(cell_idx),
394                (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
395                cv.FONT_HERSHEY_PLAIN,
396                2.0,
397                (255, 255, 255),  # White in RGB
398                2,
399            )
400
401        def redraw_all():
402            """Redraw the image with all current highlights."""
403            nonlocal img_with_highlights
404            img_with_highlights = np.copy(display_img)
405
406            for cell_idx in session._cells:
407                draw_highlight(cell_idx)
408
409            im_display.set_data(img_with_highlights)
410            ax.set_title(
411                f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
412            )
413            fig.canvas.draw_idle()
414
415        def on_click(event):
416            if event.inaxes != ax or event.xdata is None:
417                return
418
419            x, y = int(event.xdata), int(event.ydata)
420
421            if event.button == 1:  # Left click
422                cell_idx = self.cell((x, y))
423                if cell_idx[0] >= 0:
424                    session._cells.append(cell_idx)
425                    draw_highlight(cell_idx)
426                    im_display.set_data(img_with_highlights)
427                    ax.set_title(
428                        f"Click cells to highlight them. Cells clicked: {len(session._cells)}"
429                    )
430                    status_label.value = (
431                        f"Cell {cell_idx} highlighted. Total: {len(session._cells)}"
432                    )
433                    fig.canvas.draw_idle()
434                else:
435                    status_label.value = f"Click at ({x}, {y}) is outside table bounds"
436
437        def on_clear(_):
438            session._cells.clear()
439            redraw_all()
440            status_label.value = "All highlights cleared"
441
442        def on_undo(_):
443            if session._cells:
444                removed = session._cells.pop()
445                redraw_all()
446                status_label.value = (
447                    f"Removed cell {removed}. Remaining: {len(session._cells)}"
448                )
449            else:
450                status_label.value = "No cells to undo"
451
452        def on_done(_):
453            fig.canvas.mpl_disconnect(cid)
454            done_button.disabled = True
455            clear_button.disabled = True
456            undo_button.disabled = True
457            ax.set_title(f"Done! {len(session._cells)} cells highlighted.")
458            status_label.value = "Complete! Access clicked cells via session.cells"
459            fig.canvas.draw_idle()
460
461        done_button.on_click(on_done)
462        clear_button.on_click(on_clear)
463        undo_button.on_click(on_undo)
464
465        cid = fig.canvas.mpl_connect("button_press_event", on_click)
466
467        plt.tight_layout(pad=0)
468        plt.show()
469        display(widgets.HBox([done_button, clear_button, undo_button, status_label]))
470
471        return session

Notebook-compatible version of show_cells using matplotlib.

Returns a ShowCellsSession immediately. Click on cells to highlight them. Access clicked cells via session.cells.

Arguments:
  • image: Source image (path or array).
Returns:

ShowCellsSession: Access .cells to get list of clicked cell indices.

Example:
>>> session = grid.show_cells_notebook("table.png")
>>> # Click cells in the interactive plot
>>> print(session.cells)  # [(0, 0), (1, 2), ...]
@abstractmethod
def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
473    @abstractmethod
474    def region(
475        self,
476        start: tuple[int, int],
477        end: tuple[int, int],
478    ) -> tuple[Point, Point, Point, Point]:
479        """
480        Get the bounding box for the rectangular region that goes from start to end
481
482        Returns:
483            4 points: lt, rt, rb, lb, in format (x, y)
484        """
485        pass

Get the bounding box for the rectangular region that goes from start to end

Returns:

4 points: lt, rt, rb, lb, in format (x, y)

def crop_region( self, image: Union[cv2.Mat, numpy.ndarray], start: tuple[int, int], end: tuple[int, int], margin: int = 0, margin_top: int | None = None, margin_bottom: int | None = None, margin_left: int | None = None, margin_right: int | None = None, margin_y: int | None = None, margin_x: int | None = None) -> Union[cv2.Mat, numpy.ndarray]:
487    def crop_region(
488        self,
489        image: MatLike,
490        start: tuple[int, int],
491        end: tuple[int, int],
492        margin: int = 0,
493        margin_top: int | None = None,
494        margin_bottom: int | None = None,
495        margin_left: int | None = None,
496        margin_right: int | None = None,
497        margin_y: int | None = None,
498        margin_x: int | None = None,
499    ) -> MatLike:
500        """
501        Extract a multi-cell region from the image with perspective correction.
502
503        Crops the image to include all cells from start to end (inclusive),
504        applying a perspective transform to produce a rectangular output.
505
506        Args:
507            image: Source image (BGR or grayscale).
508            start: Top-left cell as (row, col).
509            end: Bottom-right cell as (row, col).
510            margin: Uniform margin in pixels (default 0).
511            margin_top: Override top margin.
512            margin_bottom: Override bottom margin.
513            margin_left: Override left margin.
514            margin_right: Override right margin.
515            margin_y: Override vertical margins (top and bottom).
516            margin_x: Override horizontal margins (left and right).
517
518        Returns:
519            Cropped and perspective-corrected image.
520
521        Example:
522            >>> # Extract a 3x2 region starting at cell (1, 0)
523            >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
524        """
525
526        region = self.region(start, end)
527
528        lt, rt, rb, lb = _apply_margin(
529            *region,
530            margin=margin,
531            margin_top=margin_top,
532            margin_bottom=margin_bottom,
533            margin_left=margin_left,
534            margin_right=margin_right,
535            margin_y=margin_y,
536            margin_x=margin_x,
537        )
538
539        # apply margins according to priority:
540        # margin_top > margin_y > margin (etc.)
541
542        w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2
543        h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2
544
545        # crop by doing a perspective transform to the desired quad
546        src_pts = np.array([lt, rt, rb, lb], dtype="float32")
547        dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32")
548        m = cv.getPerspectiveTransform(src_pts, dst_pts)
549        warped = cv.warpPerspective(image, m, (int(w), int(h)))
550
551        return warped

Extract a multi-cell region from the image with perspective correction.

Crops the image to include all cells from start to end (inclusive), applying a perspective transform to produce a rectangular output.

Arguments:
  • image: Source image (BGR or grayscale).
  • start: Top-left cell as (row, col).
  • end: Bottom-right cell as (row, col).
  • margin: Uniform margin in pixels (default 0).
  • margin_top: Override top margin.
  • margin_bottom: Override bottom margin.
  • margin_left: Override left margin.
  • margin_right: Override right margin.
  • margin_y: Override vertical margins (top and bottom).
  • margin_x: Override horizontal margins (left and right).
Returns:

Cropped and perspective-corrected image.

Example:
>>> # Extract a 3x2 region starting at cell (1, 0)
>>> region_img = grid.crop_region(image, (1, 0), (3, 1))
@abstractmethod
def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 0, margin_y: int = 0) -> list[tuple[tuple[int, int], tuple[int, int]]]:
553    @abstractmethod
554    def text_regions(
555        self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0
556    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
557        """
558        Split the row into regions of continuous text
559
560        Returns
561            list[tuple[int, int]]: a list of spans (start col, end col)
562        """
563
564        pass

Split the row into regions of continuous text

Returns list[tuple[int, int]]: a list of spans (start col, end col)

def crop_cell( self, image, cell: tuple[int, int], margin: int = 0, margin_top: int | None = None, margin_bottom: int | None = None, margin_left: int | None = None, margin_right: int | None = None, margin_y: int | None = None, margin_x: int | None = None) -> Union[cv2.Mat, numpy.ndarray]:
566    def crop_cell(
567        self,
568        image,
569        cell: tuple[int, int],
570        margin: int = 0,
571        margin_top: int | None = None,
572        margin_bottom: int | None = None,
573        margin_left: int | None = None,
574        margin_right: int | None = None,
575        margin_y: int | None = None,
576        margin_x: int | None = None,
577    ) -> MatLike:
578        """
579        Extract a single cell from the image with perspective correction.
580
581        Convenience method equivalent to `crop_region(image, cell, cell, margin)`.
582
583        Args:
584            image: Source image (BGR or grayscale).
585            cell: Cell indices as (row, col).
586            margin: Padding in pixels around the cell (default 0).
587
588        Returns:
589            Cropped and perspective-corrected cell image.
590
591        Example:
592            >>> cell_img = grid.crop_cell(image, (0, 0))
593            >>> cv2.imwrite("cell_0_0.png", cell_img)
594        """
595        return self.crop_region(
596            image,
597            cell,
598            cell,
599            margin,
600            margin_top,
601            margin_bottom,
602            margin_left,
603            margin_right,
604            margin_y,
605            margin_x,
606        )

Extract a single cell from the image with perspective correction.

Convenience method equivalent to crop_region(image, cell, cell, margin).

Arguments:
  • image: Source image (BGR or grayscale).
  • cell: Cell indices as (row, col).
  • margin: Padding in pixels around the cell (default 0).
Returns:

Cropped and perspective-corrected cell image.

Example:
>>> cell_img = grid.crop_cell(image, (0, 0))
>>> cv2.imwrite("cell_0_0.png", cell_img)
class Taulu:
 42class Taulu:
 43    """
 44    High-level API for table segmentation from images.
 45
 46    Taulu orchestrates header alignment, grid detection, and table segmentation
 47    into a single workflow.
 48
 49    Workflow:
 50        1. Create annotated header images via `Taulu.annotate()`
 51        2. Initialize Taulu with header(s) and parameters
 52        3. Call `segment_table()` to get a `TableGrid` with cell boundaries
 53
 54    For two-page tables, use `Split[T]` to provide different parameters for
 55    left and right sides.
 56
 57    Example:
 58        >>> from taulu import Taulu
 59        >>> Taulu.annotate("table_image.png", "header.png")
 60        >>> taulu = Taulu("header.png")
 61        >>> grid = taulu.segment_table("table_page_01.png")
 62        >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
 63    """
 64
 65    def __init__(
 66        self,
 67        header_image_path: Splittable[PathLike[str]] | Splittable[str],
 68        cell_height_factor: Splittable[float] | Splittable[list[float]] | None = None,
 69        header_anno_path: Splittable[PathLike[str]] | Splittable[str] | None = None,
 70        sauvola_k: Splittable[float] = 0.25,
 71        search_region: Splittable[int] = 60,
 72        distance_penalty: Splittable[float] = 0.4,
 73        cross_width: Splittable[int] = 10,
 74        morph_size: Splittable[int] = 4,
 75        kernel_size: Splittable[int] = 41,
 76        processing_scale: Splittable[float] = 1.0,
 77        skip_astar_threshold: Splittable[float] = 0.2,
 78        min_rows: Splittable[int] = 5,
 79        look_distance: Splittable[int] = 3,
 80        grow_threshold: Splittable[float] = 0.3,
 81        smooth_grid: bool = False,
 82        smooth_strength: float = 0.5,
 83        smooth_iterations: int = 1,
 84        smooth_degree: int = 1,
 85        cuts: Splittable[int] = 0,
 86        cut_fraction: Splittable[float] = 0.5,
 87        match_method: Splittable[MatchMethod] = "akaze",
 88        alignment_scale: float = 1.0,
 89    ):
 90        """
 91        Args:
 92            header_image_path: Path to header template image(s). Use `Split` for two-page tables.
 93            cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
 94            header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
 95            sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
 96            search_region: Corner search area in pixels. Default: 60
 97            distance_penalty: Position penalty weight [0, 1]. Default: 0.4
 98            cross_width: Cross-kernel width matching line thickness. Default: 10
 99            morph_size: Morphological dilation size. Default: 4
100            kernel_size: Cross-kernel size (odd). Default: 41
101            processing_scale: Image downscale factor (0, 1]. Default: 1.0
102            skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
103            min_rows: Minimum rows before completion. Default: 5
104            look_distance: Rows to examine for extrapolation. Default: 3
105            grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
106            smooth_grid: Apply grid smoothing after detection. Default: False
107            smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
108            smooth_iterations: Number of smoothing passes. Default: 1
109            smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
110            cuts: Number of grid cuts during growing. Default: 0
111            cut_fraction: Fraction of points to delete per cut. Default: 0.5
112            match_method: Feature matching method for header alignment. One of "orb"
113                (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust,
114                patent-free). Default: "akaze"
115            alignment_scale: Downscale factor (0, 1] for header alignment only. Lower
116                values speed up feature matching. Default: 1.0
117        """
118        self._processing_scale = processing_scale
119        self._smooth = smooth_grid
120        self._smooth_strength = smooth_strength
121        self._smooth_iterations = smooth_iterations
122        self._smooth_degree = smooth_degree
123
124        if cell_height_factor is None:
125            cell_height_factor = [1.0]
126
127        self._cell_height_factor = cell_height_factor
128
129        if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split):
130            header = Split(Path(header_image_path.left), Path(header_image_path.right))
131
132            if not exists(header.left.with_suffix(".png")) or not exists(
133                header.right.with_suffix(".png")
134            ):
135                raise TauluException(
136                    "The header images you provided do not exist (or they aren't .png files)"
137                )
138
139            if header_anno_path is None:
140                if not exists(header.left.with_suffix(".json")) or not exists(
141                    header.right.with_suffix(".json")
142                ):
143                    raise TauluException(
144                        "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method"
145                    )
146
147                template_left = HeaderTemplate.from_saved(
148                    header.left.with_suffix(".json")
149                )
150                template_right = HeaderTemplate.from_saved(
151                    header.right.with_suffix(".json")
152                )
153
154            else:
155                if not exists(header_anno_path.left) or not exists(  # ty: ignore[unresolved-attribute]
156                    header_anno_path.right  # ty: ignore[unresolved-attribute]
157                ):
158                    raise TauluException(
159                        "The header annotation files you provided do not exist (or they aren't .json files)"
160                    )
161
162                template_left = HeaderTemplate.from_saved(header_anno_path.left)  # ty: ignore[unresolved-attribute]
163                template_right = HeaderTemplate.from_saved(header_anno_path.right)  # ty: ignore[unresolved-attribute]
164
165            self._header = Split(
166                cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right))
167            )
168
169            self._aligner = Split(
170                HeaderAligner(
171                    self._header.left,
172                    method=get_param(match_method, "left"),
173                    scale=alignment_scale,
174                ),
175                HeaderAligner(
176                    self._header.right,
177                    method=get_param(match_method, "right"),
178                    scale=alignment_scale,
179                ),
180            )
181
182            self._template = Split(template_left, template_right)
183
184            self._cell_heights = Split(
185                self._template.left.cell_heights(get_param(cell_height_factor, "left")),
186                self._template.right.cell_heights(
187                    get_param(cell_height_factor, "right")
188                ),
189            )
190
191            # Create GridDetector for left and right with potentially different parameters
192            self._grid_detector = Split(
193                GridDetector(
194                    kernel_size=get_param(kernel_size, "left"),
195                    cross_width=get_param(cross_width, "left"),
196                    morph_size=get_param(morph_size, "left"),
197                    search_region=get_param(search_region, "left"),
198                    sauvola_k=get_param(sauvola_k, "left"),
199                    distance_penalty=get_param(distance_penalty, "left"),
200                    scale=get_param(self._processing_scale, "left"),
201                    skip_astar_threshold=get_param(skip_astar_threshold, "left"),
202                    min_rows=get_param(min_rows, "left"),
203                    look_distance=get_param(look_distance, "left"),
204                    grow_threshold=get_param(grow_threshold, "left"),
205                    cuts=get_param(cuts, "left"),
206                    cut_fraction=get_param(cut_fraction, "left"),
207                ),
208                GridDetector(
209                    kernel_size=get_param(kernel_size, "right"),
210                    cross_width=get_param(cross_width, "right"),
211                    morph_size=get_param(morph_size, "right"),
212                    search_region=get_param(search_region, "right"),
213                    sauvola_k=get_param(sauvola_k, "right"),
214                    distance_penalty=get_param(distance_penalty, "right"),
215                    scale=get_param(self._processing_scale, "right"),
216                    skip_astar_threshold=get_param(skip_astar_threshold, "right"),
217                    min_rows=get_param(min_rows, "right"),
218                    look_distance=get_param(look_distance, "right"),
219                    grow_threshold=get_param(grow_threshold, "right"),
220                    cuts=get_param(cuts, "right"),
221                    cut_fraction=get_param(cut_fraction, "right"),
222                ),
223            )
224
225        else:
226            header_image_path = Path(header_image_path)
227            self._header = cv2.imread(os.fspath(header_image_path))
228            self._aligner = HeaderAligner(
229                self._header,
230                method=cast(MatchMethod, match_method),
231                scale=alignment_scale,
232            )
233            self._template = HeaderTemplate.from_saved(
234                header_image_path.with_suffix(".json")
235            )
236
237            # For single header, parameters should not be Split objects
238            if any(
239                isinstance(param, Split)
240                for param in [
241                    sauvola_k,
242                    search_region,
243                    distance_penalty,
244                    cross_width,
245                    morph_size,
246                    kernel_size,
247                    processing_scale,
248                    min_rows,
249                    look_distance,
250                    grow_threshold,
251                    cell_height_factor,
252                    cuts,
253                    cut_fraction,
254                    match_method,
255                ]
256            ):
257                raise TauluException(
258                    "Split parameters can only be used with split headers (tuple header_path)"
259                )
260
261            self._cell_heights = self._template.cell_heights(
262                cast(list[float] | float, self._cell_height_factor)
263            )
264
265            self._grid_detector = GridDetector(
266                kernel_size=kernel_size,  # ty: ignore
267                cross_width=cross_width,  # ty: ignore
268                morph_size=morph_size,  # ty: ignore
269                search_region=search_region,  # ty: ignore
270                sauvola_k=sauvola_k,  # ty: ignore
271                distance_penalty=distance_penalty,  # ty: ignore
272                scale=self._processing_scale,  # ty: ignore
273                skip_astar_threshold=skip_astar_threshold,  # ty: ignore
274                min_rows=min_rows,  # ty: ignore
275                look_distance=look_distance,  # ty: ignore
276                grow_threshold=grow_threshold,  # ty: ignore
277                cuts=cuts,  # ty:ignore
278                cut_fraction=cut_fraction,  # ty:ignore
279            )
280
281    @classmethod
282    def from_config(cls, config: TauluConfig) -> "Taulu":
283        """
284        Create a :class:`Taulu` instance from a :class:`~taulu.config.TauluConfig`.
285
286        Args:
287            config: A populated :class:`~taulu.config.TauluConfig` instance.
288
289        Returns:
290            A :class:`Taulu` instance configured according to ``config``.
291
292        Example::
293
294            from taulu import Taulu
295            from taulu.config import TauluConfig
296
297            config = TauluConfig.from_toml("my_table.toml")
298            taulu = Taulu.from_config(config)
299        """
300        import dataclasses
301
302        return cls(
303            **{f.name: getattr(config, f.name) for f in dataclasses.fields(config)}
304        )
305
306    @staticmethod
307    def annotate(
308        image_path: PathLike[str] | str,
309        output_path: PathLike[str] | str,
310        *,
311        backend: Literal["auto", "gui", "notebook"] = "auto",
312    ):
313        """
314        Interactive tool to create header annotations for table segmentation.
315
316        This method guides you through a two-step annotation process:
317
318        1. **Crop the header**: Click four corners to define the header region
319        2. **Annotate lines**: Click pairs of points to define each vertical and
320           horizontal line in the header
321
322        The annotations are saved as:
323        - A cropped header image (.png) at `output_path`
324        - A JSON file (.json) containing line coordinates
325
326        ## Annotation Guidelines
327
328        **Which lines to annotate:**
329        - All vertical lines that extend into the table body (column separators)
330        - The top horizontal line of the header
331        - The bottom horizontal line of the header (top of data rows)
332
333        **Order doesn't matter** - annotate lines in any order that's convenient.
334
335        **To annotate a line:**
336        1. Click once at one endpoint
337        2. Click again at the other endpoint
338        3. A green line appears showing your annotation
339
340        **To undo:**
341        - Right-click anywhere to remove the last line you drew
342
343        **When finished:**
344        - Press 'n' to save and exit
345        - Press 'q' to quit without saving
346
347        Args:
348            image_path (PathLike[str] | str): Path to a table image containing
349                a clear view of the header. This can be a full table image.
350            output_path (PathLike[str] | str): Where to save the cropped header
351                image. The annotation JSON will be saved with the same name but
352                .json extension.
353
354        Raises:
355            TauluException: If image_path doesn't exist or output_path is a directory
356
357        Examples:
358            Annotate a single header:
359
360            >>> from taulu import Taulu
361            >>> Taulu.annotate("scan_page_01.png", "header.png")
362            # Interactive window opens
363            # After annotation: creates header.png and header.json
364
365            Annotate left and right headers for a split table:
366
367            >>> Taulu.annotate("scan_page_01.png", "header_left.png")
368            >>> Taulu.annotate("scan_page_01.png", "header_right.png")
369            # Creates header_left.{png,json} and header_right.{png,json}
370
371        Notes:
372            - The header image doesn't need to be perfectly cropped initially -
373              the tool will help you crop it precisely
374            - Annotation accuracy is important: misaligned lines will cause
375              segmentation errors
376            - You can re-run this method to update annotations if needed
377        """
378
379        if not exists(image_path):
380            raise TauluException(f"Image path {image_path} does not exist")
381
382        if os.path.isdir(output_path):
383            raise TauluException("Output path should be a file")
384
385        output_path = Path(output_path)
386
387        def running_in_notebook() -> bool:
388            try:
389                from IPython import get_ipython
390
391                ip = get_ipython()
392                return ip is not None and "IPKernelApp" in ip.config
393            except Exception:
394                return False
395
396        # Decide backend
397        if backend not in ("auto", "gui", "notebook"):
398            raise TauluException("backend must be one of: 'auto', 'gui', 'notebook'")
399        if backend == "auto":
400            use_notebook = running_in_notebook()
401        else:
402            use_notebook = backend == "notebook"
403
404        if use_notebook:
405            # Notebook way
406            logger.info(
407                "\x1b[32mNotebook environment detected/selected. Using notebook annotation backend."
408            )
409            session = HeaderTemplate.annotate_image_notebook(
410                os.fspath(image_path), crop=output_path.with_suffix(".png")
411            )
412            session._save_path = output_path.with_suffix(".json")  # ty: ignore[unresolved-attribute]
413            return session
414
415        else:
416            # GUI way
417            template = HeaderTemplate.annotate_image(
418                os.fspath(image_path), crop=output_path.with_suffix(".png")
419            )
420            template.save(output_path.with_suffix(".json"))
421
422    def segment_table(
423        self,
424        image: MatLike | PathLike[str] | str,
425        filtered: MatLike | PathLike[str] | str | None = None,
426        debug_view: bool = False,
427        debug_view_notebook: bool = False,
428    ) -> TableGrid:
429        """
430        Segment a table image into a grid of cells.
431
432        Orchestrates header alignment, grid detection, corner growing, and
433        extrapolation to produce a complete grid structure.
434
435        Args:
436            image: Table image to segment (file path or numpy array).
437            filtered: Optional pre-filtered binary image for corner detection.
438                If provided, binarization parameters are ignored.
439            debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance,
440                'q' to quit. Default: False
441            debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook
442                using matplotlib. Default: False
443
444        Returns:
445            TableGrid: Grid structure with methods for cell access (`crop_cell`,
446                `cell_polygon`), visualization (`show_cells`), and persistence
447                (`save`, `from_saved`).
448
449        Raises:
450            TauluException: If image cannot be loaded or grid detection fails.
451        """
452
453        if not isinstance(image, MatLike):
454            image = cast(str | PathLike[str], image)
455            tmp_image = cv2.imread(os.fspath(image))
456            assert tmp_image is not None
457            image = tmp_image
458
459        now = perf_counter()
460        h = self._aligner.align(
461            image,  # ty: ignore[invalid-argument-type]
462            visual=debug_view,
463            visual_notebook=debug_view_notebook,
464        )
465        align_time = perf_counter() - now
466        logger.info(f"Header alignment took {align_time:.2f} seconds")
467
468        # find the starting point for the table grid algorithm
469
470        def make_top_row(template: HeaderTemplate, aligner: HeaderAligner, h: NDArray):
471            top_row = []
472            for x in range(template.cols + 1):
473                on_template = template.intersection((1, x))
474                on_template = (int(on_template[0]), int(on_template[1]))
475
476                on_img = aligner.template_to_img(h, on_template)
477
478                top_row.append(on_img)
479
480            return top_row
481
482        if isinstance(self._aligner, Split):
483            top_row = Split(
484                make_top_row(self._template.left, self._aligner.left, h.left),  # ty:ignore
485                make_top_row(self._template.right, self._aligner.right, h.right),  # ty:ignore
486            )
487        else:
488            top_row = make_top_row(self._template, self._aligner, h)  # ty:ignore
489
490        now = perf_counter()
491        table = self._grid_detector.find_table_points(
492            image,  # ty:ignore
493            top_row,  # ty:ignore
494            self._template.cell_widths(0),
495            self._cell_heights,  # ty:ignore
496            visual=debug_view,
497            visual_notebook=debug_view_notebook,
498            filtered=filtered,  # ty:ignore
499            smooth=self._smooth,
500            smooth_strength=self._smooth_strength,
501            smooth_iterations=self._smooth_iterations,
502            smooth_degree=self._smooth_degree,
503        )
504        grid_time = perf_counter() - now
505        logger.info(f"Grid detection took {grid_time:.2f} seconds")
506
507        if debug_view_notebook:
508            self._aligner.show_matches_notebook()
509
510        if isinstance(table, Split):
511            table = TableGrid.from_split(table, (0, 0))  # ty: ignore
512
513        return table

High-level API for table segmentation from images.

Taulu orchestrates header alignment, grid detection, and table segmentation into a single workflow.

Workflow:
  1. Create annotated header images via Taulu.annotate()
  2. Initialize Taulu with header(s) and parameters
  3. Call segment_table() to get a TableGrid with cell boundaries

For two-page tables, use Split[T] to provide different parameters for left and right sides.

Example:
>>> from taulu import Taulu
>>> Taulu.annotate("table_image.png", "header.png")
>>> taulu = Taulu("header.png")
>>> grid = taulu.segment_table("table_page_01.png")
>>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
Taulu( header_image_path: Splittable[os.PathLike[str]] | Splittable[str], cell_height_factor: Splittable[float] | Splittable[list[float]] | None = None, header_anno_path: Splittable[os.PathLike[str]] | Splittable[str] | None = None, sauvola_k: Splittable[float] = 0.25, search_region: Splittable[int] = 60, distance_penalty: Splittable[float] = 0.4, cross_width: Splittable[int] = 10, morph_size: Splittable[int] = 4, kernel_size: Splittable[int] = 41, processing_scale: Splittable[float] = 1.0, skip_astar_threshold: Splittable[float] = 0.2, min_rows: Splittable[int] = 5, look_distance: Splittable[int] = 3, grow_threshold: Splittable[float] = 0.3, smooth_grid: bool = False, smooth_strength: float = 0.5, smooth_iterations: int = 1, smooth_degree: int = 1, cuts: Splittable[int] = 0, cut_fraction: Splittable[float] = 0.5, match_method: Splittable[typing.Literal['orb', 'sift', 'surf', 'akaze']] = 'akaze', alignment_scale: float = 1.0)
 65    def __init__(
 66        self,
 67        header_image_path: Splittable[PathLike[str]] | Splittable[str],
 68        cell_height_factor: Splittable[float] | Splittable[list[float]] | None = None,
 69        header_anno_path: Splittable[PathLike[str]] | Splittable[str] | None = None,
 70        sauvola_k: Splittable[float] = 0.25,
 71        search_region: Splittable[int] = 60,
 72        distance_penalty: Splittable[float] = 0.4,
 73        cross_width: Splittable[int] = 10,
 74        morph_size: Splittable[int] = 4,
 75        kernel_size: Splittable[int] = 41,
 76        processing_scale: Splittable[float] = 1.0,
 77        skip_astar_threshold: Splittable[float] = 0.2,
 78        min_rows: Splittable[int] = 5,
 79        look_distance: Splittable[int] = 3,
 80        grow_threshold: Splittable[float] = 0.3,
 81        smooth_grid: bool = False,
 82        smooth_strength: float = 0.5,
 83        smooth_iterations: int = 1,
 84        smooth_degree: int = 1,
 85        cuts: Splittable[int] = 0,
 86        cut_fraction: Splittable[float] = 0.5,
 87        match_method: Splittable[MatchMethod] = "akaze",
 88        alignment_scale: float = 1.0,
 89    ):
 90        """
 91        Args:
 92            header_image_path: Path to header template image(s). Use `Split` for two-page tables.
 93            cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
 94            header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
 95            sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
 96            search_region: Corner search area in pixels. Default: 60
 97            distance_penalty: Position penalty weight [0, 1]. Default: 0.4
 98            cross_width: Cross-kernel width matching line thickness. Default: 10
 99            morph_size: Morphological dilation size. Default: 4
100            kernel_size: Cross-kernel size (odd). Default: 41
101            processing_scale: Image downscale factor (0, 1]. Default: 1.0
102            skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
103            min_rows: Minimum rows before completion. Default: 5
104            look_distance: Rows to examine for extrapolation. Default: 3
105            grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
106            smooth_grid: Apply grid smoothing after detection. Default: False
107            smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
108            smooth_iterations: Number of smoothing passes. Default: 1
109            smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
110            cuts: Number of grid cuts during growing. Default: 0
111            cut_fraction: Fraction of points to delete per cut. Default: 0.5
112            match_method: Feature matching method for header alignment. One of "orb"
113                (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust,
114                patent-free). Default: "akaze"
115            alignment_scale: Downscale factor (0, 1] for header alignment only. Lower
116                values speed up feature matching. Default: 1.0
117        """
118        self._processing_scale = processing_scale
119        self._smooth = smooth_grid
120        self._smooth_strength = smooth_strength
121        self._smooth_iterations = smooth_iterations
122        self._smooth_degree = smooth_degree
123
124        if cell_height_factor is None:
125            cell_height_factor = [1.0]
126
127        self._cell_height_factor = cell_height_factor
128
129        if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split):
130            header = Split(Path(header_image_path.left), Path(header_image_path.right))
131
132            if not exists(header.left.with_suffix(".png")) or not exists(
133                header.right.with_suffix(".png")
134            ):
135                raise TauluException(
136                    "The header images you provided do not exist (or they aren't .png files)"
137                )
138
139            if header_anno_path is None:
140                if not exists(header.left.with_suffix(".json")) or not exists(
141                    header.right.with_suffix(".json")
142                ):
143                    raise TauluException(
144                        "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method"
145                    )
146
147                template_left = HeaderTemplate.from_saved(
148                    header.left.with_suffix(".json")
149                )
150                template_right = HeaderTemplate.from_saved(
151                    header.right.with_suffix(".json")
152                )
153
154            else:
155                if not exists(header_anno_path.left) or not exists(  # ty: ignore[unresolved-attribute]
156                    header_anno_path.right  # ty: ignore[unresolved-attribute]
157                ):
158                    raise TauluException(
159                        "The header annotation files you provided do not exist (or they aren't .json files)"
160                    )
161
162                template_left = HeaderTemplate.from_saved(header_anno_path.left)  # ty: ignore[unresolved-attribute]
163                template_right = HeaderTemplate.from_saved(header_anno_path.right)  # ty: ignore[unresolved-attribute]
164
165            self._header = Split(
166                cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right))
167            )
168
169            self._aligner = Split(
170                HeaderAligner(
171                    self._header.left,
172                    method=get_param(match_method, "left"),
173                    scale=alignment_scale,
174                ),
175                HeaderAligner(
176                    self._header.right,
177                    method=get_param(match_method, "right"),
178                    scale=alignment_scale,
179                ),
180            )
181
182            self._template = Split(template_left, template_right)
183
184            self._cell_heights = Split(
185                self._template.left.cell_heights(get_param(cell_height_factor, "left")),
186                self._template.right.cell_heights(
187                    get_param(cell_height_factor, "right")
188                ),
189            )
190
191            # Create GridDetector for left and right with potentially different parameters
192            self._grid_detector = Split(
193                GridDetector(
194                    kernel_size=get_param(kernel_size, "left"),
195                    cross_width=get_param(cross_width, "left"),
196                    morph_size=get_param(morph_size, "left"),
197                    search_region=get_param(search_region, "left"),
198                    sauvola_k=get_param(sauvola_k, "left"),
199                    distance_penalty=get_param(distance_penalty, "left"),
200                    scale=get_param(self._processing_scale, "left"),
201                    skip_astar_threshold=get_param(skip_astar_threshold, "left"),
202                    min_rows=get_param(min_rows, "left"),
203                    look_distance=get_param(look_distance, "left"),
204                    grow_threshold=get_param(grow_threshold, "left"),
205                    cuts=get_param(cuts, "left"),
206                    cut_fraction=get_param(cut_fraction, "left"),
207                ),
208                GridDetector(
209                    kernel_size=get_param(kernel_size, "right"),
210                    cross_width=get_param(cross_width, "right"),
211                    morph_size=get_param(morph_size, "right"),
212                    search_region=get_param(search_region, "right"),
213                    sauvola_k=get_param(sauvola_k, "right"),
214                    distance_penalty=get_param(distance_penalty, "right"),
215                    scale=get_param(self._processing_scale, "right"),
216                    skip_astar_threshold=get_param(skip_astar_threshold, "right"),
217                    min_rows=get_param(min_rows, "right"),
218                    look_distance=get_param(look_distance, "right"),
219                    grow_threshold=get_param(grow_threshold, "right"),
220                    cuts=get_param(cuts, "right"),
221                    cut_fraction=get_param(cut_fraction, "right"),
222                ),
223            )
224
225        else:
226            header_image_path = Path(header_image_path)
227            self._header = cv2.imread(os.fspath(header_image_path))
228            self._aligner = HeaderAligner(
229                self._header,
230                method=cast(MatchMethod, match_method),
231                scale=alignment_scale,
232            )
233            self._template = HeaderTemplate.from_saved(
234                header_image_path.with_suffix(".json")
235            )
236
237            # For single header, parameters should not be Split objects
238            if any(
239                isinstance(param, Split)
240                for param in [
241                    sauvola_k,
242                    search_region,
243                    distance_penalty,
244                    cross_width,
245                    morph_size,
246                    kernel_size,
247                    processing_scale,
248                    min_rows,
249                    look_distance,
250                    grow_threshold,
251                    cell_height_factor,
252                    cuts,
253                    cut_fraction,
254                    match_method,
255                ]
256            ):
257                raise TauluException(
258                    "Split parameters can only be used with split headers (tuple header_path)"
259                )
260
261            self._cell_heights = self._template.cell_heights(
262                cast(list[float] | float, self._cell_height_factor)
263            )
264
265            self._grid_detector = GridDetector(
266                kernel_size=kernel_size,  # ty: ignore
267                cross_width=cross_width,  # ty: ignore
268                morph_size=morph_size,  # ty: ignore
269                search_region=search_region,  # ty: ignore
270                sauvola_k=sauvola_k,  # ty: ignore
271                distance_penalty=distance_penalty,  # ty: ignore
272                scale=self._processing_scale,  # ty: ignore
273                skip_astar_threshold=skip_astar_threshold,  # ty: ignore
274                min_rows=min_rows,  # ty: ignore
275                look_distance=look_distance,  # ty: ignore
276                grow_threshold=grow_threshold,  # ty: ignore
277                cuts=cuts,  # ty:ignore
278                cut_fraction=cut_fraction,  # ty:ignore
279            )
Arguments:
  • header_image_path: Path to header template image(s). Use Split for two-page tables.
  • cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
  • header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
  • sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
  • search_region: Corner search area in pixels. Default: 60
  • distance_penalty: Position penalty weight [0, 1]. Default: 0.4
  • cross_width: Cross-kernel width matching line thickness. Default: 10
  • morph_size: Morphological dilation size. Default: 4
  • kernel_size: Cross-kernel size (odd). Default: 41
  • processing_scale: Image downscale factor (0, 1]. Default: 1.0
  • skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
  • min_rows: Minimum rows before completion. Default: 5
  • look_distance: Rows to examine for extrapolation. Default: 3
  • grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
  • smooth_grid: Apply grid smoothing after detection. Default: False
  • smooth_strength: Blend factor per smoothing iteration (0.0-1.0). Default: 0.5
  • smooth_iterations: Number of smoothing passes. Default: 1
  • smooth_degree: Polynomial degree for smoothing regression (1 or 2). Default: 1
  • cuts: Number of grid cuts during growing. Default: 0
  • cut_fraction: Fraction of points to delete per cut. Default: 0.5
  • match_method: Feature matching method for header alignment. One of "orb" (fast, patent-free), "sift" (robust, uses FLANN), or "akaze" (robust, patent-free). Default: "akaze"
  • alignment_scale: Downscale factor (0, 1] for header alignment only. Lower values speed up feature matching. Default: 1.0
@classmethod
def from_config(cls, config: TauluConfig) -> Taulu:
281    @classmethod
282    def from_config(cls, config: TauluConfig) -> "Taulu":
283        """
284        Create a :class:`Taulu` instance from a :class:`~taulu.config.TauluConfig`.
285
286        Args:
287            config: A populated :class:`~taulu.config.TauluConfig` instance.
288
289        Returns:
290            A :class:`Taulu` instance configured according to ``config``.
291
292        Example::
293
294            from taulu import Taulu
295            from taulu.config import TauluConfig
296
297            config = TauluConfig.from_toml("my_table.toml")
298            taulu = Taulu.from_config(config)
299        """
300        import dataclasses
301
302        return cls(
303            **{f.name: getattr(config, f.name) for f in dataclasses.fields(config)}
304        )

Create a Taulu instance from a ~taulu.config.TauluConfig.

Arguments:
Returns:

A Taulu instance configured according to config.

Example::

from taulu import Taulu
from taulu.config import TauluConfig

config = TauluConfig.from_toml("my_table.toml")
taulu = Taulu.from_config(config)
@staticmethod
def annotate( image_path: os.PathLike[str] | str, output_path: os.PathLike[str] | str, *, backend: Literal['auto', 'gui', 'notebook'] = 'auto'):
306    @staticmethod
307    def annotate(
308        image_path: PathLike[str] | str,
309        output_path: PathLike[str] | str,
310        *,
311        backend: Literal["auto", "gui", "notebook"] = "auto",
312    ):
313        """
314        Interactive tool to create header annotations for table segmentation.
315
316        This method guides you through a two-step annotation process:
317
318        1. **Crop the header**: Click four corners to define the header region
319        2. **Annotate lines**: Click pairs of points to define each vertical and
320           horizontal line in the header
321
322        The annotations are saved as:
323        - A cropped header image (.png) at `output_path`
324        - A JSON file (.json) containing line coordinates
325
326        ## Annotation Guidelines
327
328        **Which lines to annotate:**
329        - All vertical lines that extend into the table body (column separators)
330        - The top horizontal line of the header
331        - The bottom horizontal line of the header (top of data rows)
332
333        **Order doesn't matter** - annotate lines in any order that's convenient.
334
335        **To annotate a line:**
336        1. Click once at one endpoint
337        2. Click again at the other endpoint
338        3. A green line appears showing your annotation
339
340        **To undo:**
341        - Right-click anywhere to remove the last line you drew
342
343        **When finished:**
344        - Press 'n' to save and exit
345        - Press 'q' to quit without saving
346
347        Args:
348            image_path (PathLike[str] | str): Path to a table image containing
349                a clear view of the header. This can be a full table image.
350            output_path (PathLike[str] | str): Where to save the cropped header
351                image. The annotation JSON will be saved with the same name but
352                .json extension.
353
354        Raises:
355            TauluException: If image_path doesn't exist or output_path is a directory
356
357        Examples:
358            Annotate a single header:
359
360            >>> from taulu import Taulu
361            >>> Taulu.annotate("scan_page_01.png", "header.png")
362            # Interactive window opens
363            # After annotation: creates header.png and header.json
364
365            Annotate left and right headers for a split table:
366
367            >>> Taulu.annotate("scan_page_01.png", "header_left.png")
368            >>> Taulu.annotate("scan_page_01.png", "header_right.png")
369            # Creates header_left.{png,json} and header_right.{png,json}
370
371        Notes:
372            - The header image doesn't need to be perfectly cropped initially -
373              the tool will help you crop it precisely
374            - Annotation accuracy is important: misaligned lines will cause
375              segmentation errors
376            - You can re-run this method to update annotations if needed
377        """
378
379        if not exists(image_path):
380            raise TauluException(f"Image path {image_path} does not exist")
381
382        if os.path.isdir(output_path):
383            raise TauluException("Output path should be a file")
384
385        output_path = Path(output_path)
386
387        def running_in_notebook() -> bool:
388            try:
389                from IPython import get_ipython
390
391                ip = get_ipython()
392                return ip is not None and "IPKernelApp" in ip.config
393            except Exception:
394                return False
395
396        # Decide backend
397        if backend not in ("auto", "gui", "notebook"):
398            raise TauluException("backend must be one of: 'auto', 'gui', 'notebook'")
399        if backend == "auto":
400            use_notebook = running_in_notebook()
401        else:
402            use_notebook = backend == "notebook"
403
404        if use_notebook:
405            # Notebook way
406            logger.info(
407                "\x1b[32mNotebook environment detected/selected. Using notebook annotation backend."
408            )
409            session = HeaderTemplate.annotate_image_notebook(
410                os.fspath(image_path), crop=output_path.with_suffix(".png")
411            )
412            session._save_path = output_path.with_suffix(".json")  # ty: ignore[unresolved-attribute]
413            return session
414
415        else:
416            # GUI way
417            template = HeaderTemplate.annotate_image(
418                os.fspath(image_path), crop=output_path.with_suffix(".png")
419            )
420            template.save(output_path.with_suffix(".json"))

Interactive tool to create header annotations for table segmentation.

This method guides you through a two-step annotation process:

  1. Crop the header: Click four corners to define the header region
  2. Annotate lines: Click pairs of points to define each vertical and horizontal line in the header

The annotations are saved as:

  • A cropped header image (.png) at output_path
  • A JSON file (.json) containing line coordinates

Annotation Guidelines

Which lines to annotate:

  • All vertical lines that extend into the table body (column separators)
  • The top horizontal line of the header
  • The bottom horizontal line of the header (top of data rows)

Order doesn't matter - annotate lines in any order that's convenient.

To annotate a line:

  1. Click once at one endpoint
  2. Click again at the other endpoint
  3. A green line appears showing your annotation

To undo:

  • Right-click anywhere to remove the last line you drew

When finished:

  • Press 'n' to save and exit
  • Press 'q' to quit without saving
Arguments:
  • image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
  • output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.
Raises:
  • TauluException: If image_path doesn't exist or output_path is a directory
Examples:

Annotate a single header:

>>> from taulu import Taulu
>>> Taulu.annotate("scan_page_01.png", "header.png")
<h1 id="interactive-window-opens">Interactive window opens</h1>

After annotation: creates header.png and header.json

Annotate left and right headers for a split table:

>>> Taulu.annotate("scan_page_01.png", "header_left.png")
>>> Taulu.annotate("scan_page_01.png", "header_right.png")
<h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>
Notes:
  • The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely
  • Annotation accuracy is important: misaligned lines will cause segmentation errors
  • You can re-run this method to update annotations if needed
def segment_table( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], filtered: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str, NoneType] = None, debug_view: bool = False, debug_view_notebook: bool = False) -> TableGrid:
422    def segment_table(
423        self,
424        image: MatLike | PathLike[str] | str,
425        filtered: MatLike | PathLike[str] | str | None = None,
426        debug_view: bool = False,
427        debug_view_notebook: bool = False,
428    ) -> TableGrid:
429        """
430        Segment a table image into a grid of cells.
431
432        Orchestrates header alignment, grid detection, corner growing, and
433        extrapolation to produce a complete grid structure.
434
435        Args:
436            image: Table image to segment (file path or numpy array).
437            filtered: Optional pre-filtered binary image for corner detection.
438                If provided, binarization parameters are ignored.
439            debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance,
440                'q' to quit. Default: False
441            debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook
442                using matplotlib. Default: False
443
444        Returns:
445            TableGrid: Grid structure with methods for cell access (`crop_cell`,
446                `cell_polygon`), visualization (`show_cells`), and persistence
447                (`save`, `from_saved`).
448
449        Raises:
450            TauluException: If image cannot be loaded or grid detection fails.
451        """
452
453        if not isinstance(image, MatLike):
454            image = cast(str | PathLike[str], image)
455            tmp_image = cv2.imread(os.fspath(image))
456            assert tmp_image is not None
457            image = tmp_image
458
459        now = perf_counter()
460        h = self._aligner.align(
461            image,  # ty: ignore[invalid-argument-type]
462            visual=debug_view,
463            visual_notebook=debug_view_notebook,
464        )
465        align_time = perf_counter() - now
466        logger.info(f"Header alignment took {align_time:.2f} seconds")
467
468        # find the starting point for the table grid algorithm
469
470        def make_top_row(template: HeaderTemplate, aligner: HeaderAligner, h: NDArray):
471            top_row = []
472            for x in range(template.cols + 1):
473                on_template = template.intersection((1, x))
474                on_template = (int(on_template[0]), int(on_template[1]))
475
476                on_img = aligner.template_to_img(h, on_template)
477
478                top_row.append(on_img)
479
480            return top_row
481
482        if isinstance(self._aligner, Split):
483            top_row = Split(
484                make_top_row(self._template.left, self._aligner.left, h.left),  # ty:ignore
485                make_top_row(self._template.right, self._aligner.right, h.right),  # ty:ignore
486            )
487        else:
488            top_row = make_top_row(self._template, self._aligner, h)  # ty:ignore
489
490        now = perf_counter()
491        table = self._grid_detector.find_table_points(
492            image,  # ty:ignore
493            top_row,  # ty:ignore
494            self._template.cell_widths(0),
495            self._cell_heights,  # ty:ignore
496            visual=debug_view,
497            visual_notebook=debug_view_notebook,
498            filtered=filtered,  # ty:ignore
499            smooth=self._smooth,
500            smooth_strength=self._smooth_strength,
501            smooth_iterations=self._smooth_iterations,
502            smooth_degree=self._smooth_degree,
503        )
504        grid_time = perf_counter() - now
505        logger.info(f"Grid detection took {grid_time:.2f} seconds")
506
507        if debug_view_notebook:
508            self._aligner.show_matches_notebook()
509
510        if isinstance(table, Split):
511            table = TableGrid.from_split(table, (0, 0))  # ty: ignore
512
513        return table

Segment a table image into a grid of cells.

Orchestrates header alignment, grid detection, corner growing, and extrapolation to produce a complete grid structure.

Arguments:
  • image: Table image to segment (file path or numpy array).
  • filtered: Optional pre-filtered binary image for corner detection. If provided, binarization parameters are ignored.
  • debug_view: Show intermediate processing steps via OpenCV windows. Press 'n' to advance, 'q' to quit. Default: False
  • debug_view_notebook: Show intermediate processing steps inline in a Jupyter notebook using matplotlib. Default: False
Returns:

TableGrid: Grid structure with methods for cell access (crop_cell, cell_polygon), visualization (show_cells), and persistence (save, from_saved).

Raises:
  • TauluException: If image cannot be loaded or grid detection fails.
@dataclass
class TauluConfig:
 46@dataclass
 47class TauluConfig:
 48    """
 49    Configuration for :class:`~taulu.Taulu`.
 50
 51    All parameters mirror the ``Taulu.__init__`` signature. Any parameter that
 52    accepts a ``Split[T]`` can be given as a ``Split`` instance or as a plain
 53    scalar (applied to both sides).
 54
 55    Use :meth:`from_toml` to load from a ``.toml`` file, then pass to
 56    :meth:`Taulu.from_config <taulu.Taulu.from_config>`.
 57    """
 58
 59    header_image_path: Splittable[str]
 60    cell_height_factor: Splittable[float] | Splittable[list[float]] | None = None
 61    header_anno_path: Splittable[str] | None = None
 62    sauvola_k: Splittable[float] = 0.25
 63    search_region: Splittable[int] = 60
 64    distance_penalty: Splittable[float] = 0.4
 65    cross_width: Splittable[int] = 10
 66    morph_size: Splittable[int] = 4
 67    kernel_size: Splittable[int] = 41
 68    processing_scale: Splittable[float] = 1.0
 69    skip_astar_threshold: Splittable[float] = 0.2
 70    min_rows: Splittable[int] = 5
 71    look_distance: Splittable[int] = 3
 72    grow_threshold: Splittable[float] = 0.3
 73    smooth_grid: bool = False
 74    smooth_strength: float = 0.5
 75    smooth_iterations: int = 1
 76    smooth_degree: int = 1
 77    cuts: Splittable[int] = 0
 78    cut_fraction: Splittable[float] = 0.5
 79    match_method: Splittable[str] = "akaze"
 80    alignment_scale: float = 1.0
 81
 82    @classmethod
 83    def from_toml(cls, *paths: PathLike[str] | str) -> "TauluConfig":
 84        """
 85        Load a :class:`TauluConfig` from one or more TOML files.
 86
 87        When multiple paths are given, files are merged in order: later files
 88        override keys from earlier ones. Use this to share a common base config
 89        and override only the fields that differ::
 90
 91            config = TauluConfig.from_toml("common.toml", "left.toml")
 92
 93        Args:
 94            *paths: One or more paths to ``.toml`` configuration files.
 95
 96        Returns:
 97            A fully populated :class:`TauluConfig` instance.
 98
 99        Raises:
100            KeyError: If a required field (``header_image_path``) is missing.
101            TypeError: If a field value has an unexpected type.
102        """
103        merged: dict = {}
104        for path in paths:
105            with open(path, "rb") as f:
106                data = tomllib.load(f)
107            merged.update(data)
108
109        parsed = {
110            key: _parse_value(value)
111            for key, value in merged.items()
112            if not key.startswith("$")
113        }
114        return cls(**parsed)

Configuration for ~taulu.Taulu.

All parameters mirror the Taulu.__init__ signature. Any parameter that accepts a Split[T] can be given as a Split instance or as a plain scalar (applied to both sides).

Use from_toml() to load from a .toml file, then pass to Taulu.from_config <taulu.Taulu.from_config>().

TauluConfig( header_image_path: Splittable[str], cell_height_factor: Splittable[float] | Splittable[list[float]] | None = None, header_anno_path: Splittable[str] | None = None, sauvola_k: Splittable[float] = 0.25, search_region: Splittable[int] = 60, distance_penalty: Splittable[float] = 0.4, cross_width: Splittable[int] = 10, morph_size: Splittable[int] = 4, kernel_size: Splittable[int] = 41, processing_scale: Splittable[float] = 1.0, skip_astar_threshold: Splittable[float] = 0.2, min_rows: Splittable[int] = 5, look_distance: Splittable[int] = 3, grow_threshold: Splittable[float] = 0.3, smooth_grid: bool = False, smooth_strength: float = 0.5, smooth_iterations: int = 1, smooth_degree: int = 1, cuts: Splittable[int] = 0, cut_fraction: Splittable[float] = 0.5, match_method: Splittable[str] = 'akaze', alignment_scale: float = 1.0)
header_image_path: Splittable[str]
cell_height_factor: Splittable[float] | Splittable[list[float]] | None = None
header_anno_path: Splittable[str] | None = None
sauvola_k: Splittable[float] = 0.25
search_region: Splittable[int] = 60
distance_penalty: Splittable[float] = 0.4
cross_width: Splittable[int] = 10
morph_size: Splittable[int] = 4
kernel_size: Splittable[int] = 41
processing_scale: Splittable[float] = 1.0
skip_astar_threshold: Splittable[float] = 0.2
min_rows: Splittable[int] = 5
look_distance: Splittable[int] = 3
grow_threshold: Splittable[float] = 0.3
smooth_grid: bool = False
smooth_strength: float = 0.5
smooth_iterations: int = 1
smooth_degree: int = 1
cuts: Splittable[int] = 0
cut_fraction: Splittable[float] = 0.5
match_method: Splittable[str] = 'akaze'
alignment_scale: float = 1.0
@classmethod
def from_toml(cls, *paths: os.PathLike[str] | str) -> TauluConfig:
 82    @classmethod
 83    def from_toml(cls, *paths: PathLike[str] | str) -> "TauluConfig":
 84        """
 85        Load a :class:`TauluConfig` from one or more TOML files.
 86
 87        When multiple paths are given, files are merged in order: later files
 88        override keys from earlier ones. Use this to share a common base config
 89        and override only the fields that differ::
 90
 91            config = TauluConfig.from_toml("common.toml", "left.toml")
 92
 93        Args:
 94            *paths: One or more paths to ``.toml`` configuration files.
 95
 96        Returns:
 97            A fully populated :class:`TauluConfig` instance.
 98
 99        Raises:
100            KeyError: If a required field (``header_image_path``) is missing.
101            TypeError: If a field value has an unexpected type.
102        """
103        merged: dict = {}
104        for path in paths:
105            with open(path, "rb") as f:
106                data = tomllib.load(f)
107            merged.update(data)
108
109        parsed = {
110            key: _parse_value(value)
111            for key, value in merged.items()
112            if not key.startswith("$")
113        }
114        return cls(**parsed)

Load a TauluConfig from one or more TOML files.

When multiple paths are given, files are merged in order: later files override keys from earlier ones. Use this to share a common base config and override only the fields that differ::

config = TauluConfig.from_toml("common.toml", "left.toml")
Arguments:
  • *paths: One or more paths to .toml configuration files.
Returns:

A fully populated TauluConfig instance.

Raises:
  • KeyError: If a required field (header_image_path) is missing.
  • TypeError: If a field value has an unexpected type.