CloneHunter Report

Schema: 1.0.0

Findings: 24

_select_compare
src/clonehunter/reporting/html_reporter.py:147-151
_select_compare
src/clonehunter/reporting/json_reporter.py:65-69
0.962
5 duplicated lines
Function AFunction B
147def _select_compare(matches: list[CandidateMatch]) -> dict[str, object] | None:65def _select_compare(matches: list[CandidateMatch]) -> dict[str, object] | None:
148 compare = select_compare(matches)66 compare = select_compare(matches)
149 if compare is None:67 if compare is None:
150 return None68 return None
151 return _compare_payload(compare, matches)69 return _serialize_evidence(compare)
_merge_spans
src/clonehunter/reporting/html_reporter.py:339-350
_covered_lines
src/clonehunter/similarity/rollup.py:152-162
0.921
11 duplicated lines
Function AFunction B
339def _merge_spans(spans: list[tuple[int, int]]) -> list[tuple[int, int]]:152def _covered_lines(spans: list[tuple[int, int]]) -> int:
340 if not spans:153 if not spans:
341 return []154 return 0
342 merged: list[tuple[int, int]] = []155 merged: list[list[int]] = []
343 for start, end in sorted(spans):156 for start, end in sorted(spans):
344 if not merged or start > merged[-1][1] + 1:157 if not merged or start > merged[-1][1] + 1:
345 merged.append((start, end))158 merged.append([start, end])
346 continue159 continue
347 prev_start, prev_end = merged[-1]160 if end > merged[-1][1]:
348 if end > prev_end:161 merged[-1][1] = end
349 merged[-1] = (prev_start, end)162 return sum((end - start + 1 for start, end in merged))
350 return merged
_Embedder.embed
src/clonehunter/core/pipeline.py:32-32
Embedder.embed
src/clonehunter/model/interfaces.py:39-39
1.000
1 duplicated lines
Function AFunction B
32 def embed(self, snippets: list[SnippetRef]) -> list[Embedding]: ...39 def embed(self, snippets: list[SnippetRef]) -> list[Embedding]: ...
_Embedder.dim
src/clonehunter/core/pipeline.py:35-35
Embedder.dim
src/clonehunter/model/interfaces.py:36-36
1.000
1 duplicated lines
Function AFunction B
35 def dim(self) -> int: ...36 def dim(self) -> int: ...
test_func_threshold_edge
tests/test_threshold_edges.py:43-48
test_exp_threshold_edge
tests/test_threshold_edges.py:90-95
0.943
6 duplicated lines
Function AFunction B
43def test_func_threshold_edge():90def test_exp_threshold_edge():
44 thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0)91 thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0)
45 at = rollup_findings([_match('FUNC', 0.95)], thresholds)92 at = rollup_findings([_match('EXP', 0.9)], thresholds)
46 below = rollup_findings([_match('FUNC', 0.9499)], thresholds)93 below = rollup_findings([_match('EXP', 0.8999)], thresholds)
47 assert at94 assert at
48 assert not below95 assert not below
test_win_threshold_edge
tests/test_threshold_edges.py:51-87
test_min_window_hits_edge
tests/test_threshold_edges.py:98-122
0.963
25 duplicated lines
Function AFunction B
51def test_win_threshold_edge():98def test_min_window_hits_edge():
52 thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0)99 thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0)
53 file = FileRef(path='x.py', content_hash='h', language='python')100 file = FileRef(path='x.py', content_hash='h', language='python')
54 fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=20, code='pass', code_hash='a')101 fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=20, code='pass', code_hash='a')
55 fn_b = FunctionRef(file=file, qualified_name='b', start_line=30, end_line=50, code='pass', code_hash='b')102 fn_b = FunctionRef(file=file, qualified_name='b', start_line=30, end_line=50, code='pass', code_hash='b')
56 a1 = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='a1', snippet_hash='a1')103 a1 = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='a1', snippet_hash='a1')
57 b1 = SnippetRef(kind='WIN', function=fn_b, start_line=30, end_line=32, text='b1', snippet_hash='b1')104 b1 = SnippetRef(kind='WIN', function=fn_b, start_line=30, end_line=32, text='b1', snippet_hash='b1')
58 a2 = SnippetRef(kind='WIN', function=fn_a, start_line=4, end_line=6, text='a2', snippet_hash='a2')105 a2 = SnippetRef(kind='WIN', function=fn_a, start_line=4, end_line=6, text='a2', snippet_hash='a2')
59 b2 = SnippetRef(kind='WIN', function=fn_b, start_line=33, end_line=35, text='b2', snippet_hash='b2')106 b2 = SnippetRef(kind='WIN', function=fn_b, start_line=33, end_line=35, text='b2', snippet_hash='b2')
60 at = rollup_findings([CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.9, evidence=''), CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.9, evidence='')], thresholds)107 m1 = CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.95, evidence='')
61 below = rollup_findings([CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.8999, evidence=''), CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.8999, evidence='')], thresholds)108 m2 = CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.95, evidence='')
62 assert at109 findings = rollup_findings([m1, m2], thresholds)
63 assert below110 assert findings
test_expansion_generates_snippet
tests/test_expansion.py:6-10
test_expansion_respects_max_chars
tests/test_expansion.py:22-26
0.924
5 duplicated lines
Function AFunction B
6def test_expansion_generates_snippet():22def test_expansion_respects_max_chars():
7 files = collect_files(['fixtures/tiny_repo'], ['**/*.py'], [])23 files = collect_files(['fixtures/tiny_repo'], ['**/*.py'], [])
8 functions = [fn for file in files for fn in extract_functions(file)]24 functions = [fn for file in files for fn in extract_functions(file)]
9 snippets = expand_calls(functions, ExpansionParams(enabled=True, depth=1, max_chars=10000))25 snippets = expand_calls(functions, ExpansionParams(enabled=True, depth=1, max_chars=1))
10 assert any((snippet.kind == 'EXP' for snippet in snippets))26 assert snippets == []
test_diff_end_to_end
tests/test_diff_e2e.py:8-83
test_cli_diff_command
tests/test_cli_entrypoints.py:31-82
0.962
52 duplicated lines
Function AFunction B
<23 lines not shown><1 lines not shown>
31def test_cli_diff_command(tmp_path: Path):32 )
32 repo = tmp_path / 'repo'33 subprocess.check_call(["git", "init"], cwd=repo)
33 repo.mkdir()34 subprocess.check_call(["git", "config", "user.email", "test@example.com"], cwd=repo)
34 (repo / 'a.py').write_text('\n\ndef add(nums):\n total = 0\n for n in nums:\n total += n\n return total\n', encoding='utf-8')35 subprocess.check_call(["git", "config", "user.name", "Test User"], cwd=repo)
35 subprocess.check_call(['git', 'init'], cwd=repo)36 subprocess.check_call(["git", "add", "a.py", "b.py"], cwd=repo)
36 subprocess.check_call(['git', 'config', 'user.email', 'test@example.com'], cwd=repo)37 subprocess.check_call(["git", "commit", "-m", "init"], cwd=repo)
37 subprocess.check_call(['git', 'config', 'user.name', 'Test User'], cwd=repo)38 (repo / "a.py").write_text(
38 subprocess.check_call(['git', 'add', 'a.py'], cwd=repo)39 """
39 subprocess.check_call(['git', 'commit', '-m', 'init'], cwd=repo)40def add(nums):
40 (repo / 'a.py').write_text('\n\ndef add(nums):\n total = 0\n for n in nums:\n total += n\n return total\n\n\ndef add_copy(values):\n total = 0\n for n in values:\n total += n\n return total\n', encoding='utf-8')41 total = 0
41 out = tmp_path / 'diff.json'42 for n in nums:
42 cmd = [sys.executable, '-m', 'clonehunter', 'diff', '--base', 'HEAD', '--format', 'json', '--out', str(out)]43 total += n
43 subprocess.check_call(cmd, cwd=repo, env=_env())44 return total
44 assert out.exists()45def add_copy(values):
46 total = 0
47 for n in values:
48 total += n
49 return total
50""",
51 encoding="utf-8",
52 )
53 out = tmp_path / "diff.json"
54 env = os.environ.copy()
55 env["PYTHONPATH"] = str(Path(__file__).resolve().parents[1] / "src")
56 env["CLONEHUNTER_EMBEDDER"] = "stub"
57 cmd = [
58 sys.executable,
59 "-m",
60 "clonehunter",
61 "diff",
62 "--base",
63 "HEAD",
64 "--format",
65 "json",
<11 lines not shown>
test_pipeline_non_python_implicit_windows_only
tests/test_pipeline_smoke.py:15-44
test_pipeline_non_python_allows_cross_file_types
tests/test_pipeline_smoke.py:47-72
0.949
26 duplicated lines
Function AFunction B
15def test_pipeline_non_python_implicit_windows_only(tmp_path: Path) -> None:47def test_pipeline_non_python_allows_cross_file_types(tmp_path: Path) -> None:
16 repo = tmp_path / 'repo'48 repo = tmp_path / 'repo'
17 repo.mkdir()49 repo.mkdir()
18 code = 'function add(a, b) {\n return a + b;\n}\n'50 code = 'function add(a, b) {\n return a + b;\n}\n'
19 (repo / 'a.js').write_text(code, encoding='utf-8')51 (repo / 'a.js').write_text(code, encoding='utf-8')
20 (repo / 'b.js').write_text(code, encoding='utf-8')52 (repo / 'b.ts').write_text(code, encoding='utf-8')
21 result = run_pipeline([str(repo)], CloneHunterConfig(include_globs=['**/*.js'], exclude_globs=[], embedder=EmbedderConfig(name='stub'), windows=WindowConfig(window_lines=3, stride_lines=1, min_nonempty=1), thresholds=Thresholds(func=0.99, win=0.8, exp=0.99, min_window_hits=1, lexical_min_ratio=0.0, lexical_weight=0.3)))53 result = run_pipeline([str(repo)], CloneHunterConfig(include_globs=['**/*.js', '**/*.ts'], exclude_globs=[], embedder=EmbedderConfig(name='stub'), windows=WindowConfig(window_lines=3, stride_lines=1, min_nonempty=1), thresholds=Thresholds(func=0.99, win=0.8, exp=0.99, min_window_hits=1, lexical_min_ratio=0.0, lexical_weight=0.3)))
22 assert result.stats.file_count == 254 assert result.stats.file_count == 2
23 assert result.stats.function_count == 0
24 assert result.stats.finding_count >= 155 assert result.stats.finding_count >= 1
25 finding = result.findings[0]
26 assert finding.function_a.qualified_name != '<file>'
27 assert finding.function_b.qualified_name != '<file>'
test_rollup_min_window_hits
tests/test_rollup.py:6-34
test_rollup_applies_lexical_filter
tests/test_rollup.py:109-144
0.927
29 duplicated lines
Function AFunction B
6def test_rollup_min_window_hits():109def test_rollup_applies_lexical_filter() -> None:
7 file = FileRef(path='x.py', content_hash='h', language='python')110 file = FileRef(path='x.py', content_hash='h', language='python')
8 fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=5, code='pass', code_hash='a')111 fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=5, code='pass', code_hash='a')
9 fn_b = FunctionRef(file=file, qualified_name='b', start_line=10, end_line=14, code='pass', code_hash='b')112 fn_b = FunctionRef(file=file, qualified_name='b', start_line=10, end_line=14, code='pass', code_hash='b')
10 a1 = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='a1', snippet_hash='a1')113 a = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='def alpha():\n return 1', snippet_hash='a1')
11 b1 = SnippetRef(kind='WIN', function=fn_b, start_line=10, end_line=12, text='b1', snippet_hash='b1')114 b = SnippetRef(kind='WIN', function=fn_b, start_line=10, end_line=12, text='def beta():\n return 2', snippet_hash='b1')
12 a2 = SnippetRef(kind='WIN', function=fn_a, start_line=2, end_line=4, text='a2', snippet_hash='a2')115 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=0.99, evidence='')
13 b2 = SnippetRef(kind='WIN', function=fn_b, start_line=11, end_line=13, text='b2', snippet_hash='b2')116 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1, lexical_min_ratio=0.6))
14 matches = [CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.5, evidence=''), CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.5, evidence='')]117 assert findings == []
15 findings = rollup_findings(matches, Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0))
16 assert findings
test_rollup_filters_overlapping_windows_same_function
tests/test_rollup.py:37-46
test_rollup_drops_identical_windows_same_function
tests/test_rollup.py:49-58
0.961
10 duplicated lines
Function AFunction B
37def test_rollup_filters_overlapping_windows_same_function():49def test_rollup_drops_identical_windows_same_function() -> None:
38 file = FileRef(path='x.py', content_hash='h', language='python')50 file = FileRef(path='x.py', content_hash='h', language='python')
39 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')51 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c')
40 a = SnippetRef(kind='WIN', function=fn, start_line=1, end_line=5, text='a', snippet_hash='a')52 a = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='a', snippet_hash='a')
41 b = SnippetRef(kind='WIN', function=fn, start_line=4, end_line=8, text='b', snippet_hash='b')53 b = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='b', snippet_hash='b')
42 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')54 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')
43 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))55 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))
44 assert findings == []56 assert findings == []
test_rollup_filters_overlapping_windows_same_function
tests/test_rollup.py:37-46
test_rollup_drops_identical_func_self_match
tests/test_rollup.py:61-70
0.970
10 duplicated lines
Function AFunction B
37def test_rollup_filters_overlapping_windows_same_function():61def test_rollup_drops_identical_func_self_match() -> None:
38 file = FileRef(path='x.py', content_hash='h', language='python')62 file = FileRef(path='x.py', content_hash='h', language='python')
39 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')63 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')
40 a = SnippetRef(kind='WIN', function=fn, start_line=1, end_line=5, text='a', snippet_hash='a')64 a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a')
41 b = SnippetRef(kind='WIN', function=fn, start_line=4, end_line=8, text='b', snippet_hash='b')65 b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b')
42 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')66 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')
43 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))67 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))
44 assert findings == []68 assert findings == []
test_rollup_filters_overlapping_windows_same_function
tests/test_rollup.py:37-46
test_rollup_drops_overlapping_func_win_same_function
tests/test_rollup.py:73-84
0.938
10 duplicated lines
Function AFunction B
37def test_rollup_filters_overlapping_windows_same_function():73def test_rollup_drops_overlapping_func_win_same_function() -> None:
38 file = FileRef(path='x.py', content_hash='h', language='python')74 file = FileRef(path='x.py', content_hash='h', language='python')
39 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')75 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c')
40 a = SnippetRef(kind='WIN', function=fn, start_line=1, end_line=5, text='a', snippet_hash='a')76 func = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=30, text='f', snippet_hash='f')
41 b = SnippetRef(kind='WIN', function=fn, start_line=4, end_line=8, text='b', snippet_hash='b')77 win = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='w', snippet_hash='w')
42 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')78 match = CandidateMatch(snippet_a=func, snippet_b=win, similarity=1.0, evidence='')
43 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))79 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))
44 assert findings == []80 assert findings == []
test_rollup_drops_identical_windows_same_function
tests/test_rollup.py:49-58
test_rollup_drops_identical_func_self_match
tests/test_rollup.py:61-70
0.970
10 duplicated lines
Function AFunction B
49def test_rollup_drops_identical_windows_same_function() -> None:61def test_rollup_drops_identical_func_self_match() -> None:
50 file = FileRef(path='x.py', content_hash='h', language='python')62 file = FileRef(path='x.py', content_hash='h', language='python')
51 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c')63 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')
52 a = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='a', snippet_hash='a')64 a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a')
53 b = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='b', snippet_hash='b')65 b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b')
54 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')66 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')
55 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))67 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))
56 assert findings == []68 assert findings == []
test_rollup_drops_identical_windows_same_function
tests/test_rollup.py:49-58
test_rollup_drops_overlapping_func_win_same_function
tests/test_rollup.py:73-84
0.970
10 duplicated lines
Function AFunction B
49def test_rollup_drops_identical_windows_same_function() -> None:73def test_rollup_drops_overlapping_func_win_same_function() -> None:
50 file = FileRef(path='x.py', content_hash='h', language='python')74 file = FileRef(path='x.py', content_hash='h', language='python')
51 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c')75 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c')
52 a = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='a', snippet_hash='a')76 func = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=30, text='f', snippet_hash='f')
53 b = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='b', snippet_hash='b')77 win = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='w', snippet_hash='w')
54 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')78 match = CandidateMatch(snippet_a=func, snippet_b=win, similarity=1.0, evidence='')
55 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))79 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))
56 assert findings == []80 assert findings == []
test_rollup_drops_identical_func_self_match
tests/test_rollup.py:61-70
test_rollup_drops_overlapping_func_win_same_function
tests/test_rollup.py:73-84
0.947
10 duplicated lines
Function AFunction B
61def test_rollup_drops_identical_func_self_match() -> None:73def test_rollup_drops_overlapping_func_win_same_function() -> None:
62 file = FileRef(path='x.py', content_hash='h', language='python')74 file = FileRef(path='x.py', content_hash='h', language='python')
63 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')75 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c')
64 a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a')76 func = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=30, text='f', snippet_hash='f')
65 b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b')77 win = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='w', snippet_hash='w')
66 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')78 match = CandidateMatch(snippet_a=func, snippet_b=win, similarity=1.0, evidence='')
67 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))79 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))
68 assert findings == []80 assert findings == []
test_rollup_drops_identical_func_self_match
tests/test_rollup.py:61-70
test_rollup_drops_overlapping_functions_same_file
tests/test_rollup.py:87-106
0.915
10 duplicated lines
Function AFunction B
61def test_rollup_drops_identical_func_self_match() -> None:87def test_rollup_drops_overlapping_functions_same_file() -> None:
62 file = FileRef(path='x.py', content_hash='h', language='python')88 file = FileRef(path='x.py', content_hash='h', language='python')
63 fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c')89 fn_outer = FunctionRef(file=file, qualified_name='outer', start_line=1, end_line=40, code='pass', code_hash='o')
64 a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a')90 fn_inner = FunctionRef(file=file, qualified_name='inner', start_line=10, end_line=20, code='pass', code_hash='i')
65 b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b')91 outer = SnippetRef(kind='FUNC', function=fn_outer, start_line=1, end_line=40, text='o', snippet_hash='o')
66 match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='')92 inner = SnippetRef(kind='FUNC', function=fn_inner, start_line=10, end_line=20, text='i', snippet_hash='i')
67 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1))93 match = CandidateMatch(snippet_a=outer, snippet_b=inner, similarity=1.0, evidence='')
94 findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1, lexical_min_ratio=0.0))
68 assert findings == []95 assert findings == []
_finding
tests/test_clustering.py:5-26
_sample_result
tests/test_reporters_html_sarif.py:17-44
0.934
22 duplicated lines
Function AFunction B
5def _finding(a: str, b: str) -> Finding:17def _sample_result() -> ScanResult:
6 file_a = FileRef(path=f'{a}.py', content_hash='h', language='python')18 file_a = FileRef(path='fixtures/tiny_repo/a.py', content_hash='h', language='python')
7 file_b = FileRef(path=f'{b}.py', content_hash='h', language='python')19 file_b = FileRef(path='fixtures/tiny_repo/b.py', content_hash='h', language='python')
8 fn_a = FunctionRef(file=file_a, qualified_name=a, start_line=1, end_line=2, code='pass', code_hash=a)20 fn_a = FunctionRef(file=file_a, qualified_name='a', start_line=1, end_line=2, code='pass', code_hash='a')
9 fn_b = FunctionRef(file=file_b, qualified_name=b, start_line=1, end_line=2, code='pass', code_hash=b)21 fn_b = FunctionRef(file=file_b, qualified_name='b', start_line=10, end_line=12, code='pass', code_hash='b')
10 snip = SnippetRef(kind='FUNC', function=fn_a, start_line=1, end_line=2, text='pass', snippet_hash=a)22 snip = SnippetRef(kind='FUNC', function=fn_a, start_line=1, end_line=2, text='pass', snippet_hash='s')
11 match = CandidateMatch(snippet_a=snip, snippet_b=snip, similarity=1.0, evidence='')23 match = CandidateMatch(snippet_a=snip, snippet_b=snip, similarity=1.0, evidence='')
12 return Finding(function_a=fn_a, function_b=fn_b, score=1.0, duplicated_lines=2, evidence=[match], reasons=['func'], metadata={})24 finding = Finding(function_a=fn_a, function_b=fn_b, score=1.0, duplicated_lines=2, evidence=[match], reasons=['func'], metadata={})
25 return ScanResult(findings=[finding], stats=ScanStats(0, 0, 0, 0, 1, 0, 0), config_snapshot={}, timing={})
normalize_customer_name
fixtures/demo_monorepo/orders_pipeline.py:14-16
normalize_client_name
fixtures/demo_monorepo/invoices_pipeline.py:14-16
0.960
3 duplicated lines
Function AFunction B
14def normalize_customer_name(raw: str) -> str:14def normalize_client_name(raw: str) -> str:
15 cleaned = ' '.join((part for part in raw.strip().split(' ') if part))15 cleaned = ' '.join((part for part in raw.strip().split(' ') if part))
16 return cleaned.title()16 return cleaned.title()
summarize_totals
fixtures/demo_monorepo/orders_pipeline.py:76-85
summarize_totals
fixtures/demo_monorepo/invoices_pipeline.py:76-85
0.966
10 duplicated lines
Function AFunction B
76def summarize_totals(payload: dict[str, object]) -> str:76def summarize_totals(payload: dict[str, object]) -> str:
77 subtotal = float(payload.get('subtotal', 0.0))77 subtotal = float(payload.get('subtotal', 0.0))
78 discount = float(payload.get('discount', 0.0))78 discount = float(payload.get('discount', 0.0))
79 tax = float(payload.get('tax', 0.0))79 tax = float(payload.get('tax', 0.0))
80 shipping = float(payload.get('shipping', 0.0))80 service_fee = float(payload.get('service_fee', 0.0))
81 total = float(payload.get('total', 0.0))81 total = float(payload.get('total', 0.0))
82 return f'subtotal=${subtotal:,.2f}; discount=${discount:,.2f}; tax=${tax:,.2f}; shipping=${shipping:,.2f}; total=${total:,.2f}'82 return f'subtotal=${subtotal:,.2f}; discount=${discount:,.2f}; tax=${tax:,.2f}; service_fee=${service_fee:,.2f}; total=${total:,.2f}'
build_monthly_breakdown
fixtures/demo_monorepo/orders_pipeline.py:88-124
build_monthly_breakdown
fixtures/demo_monorepo/invoices_pipeline.py:88-129
0.935
37 duplicated lines
Function AFunction B
88def build_monthly_breakdown(daily_totals: list[float], weekly_marketing_spend: list[float], fixed_cost: float) -> dict[str, object]:88def build_monthly_breakdown(daily_totals: list[float], weekly_marketing_spend: list[float], fixed_cost: float) -> dict[str, object]:
89 weeks: list[dict[str, object]] = []89 weeks: list[dict[str, object]] = []
90 monthly_revenue = 0.090 monthly_revenue = 0.0
91 monthly_cost = 0.091 monthly_cost = 0.0
92 for idx in range(4):92 for idx in range(4):
93 start = idx * 793 start = idx * 7
94 end = min(start + 7, len(daily_totals))94 end = min(start + 7, len(daily_totals))
95 revenue = round(sum(daily_totals[start:end]), 2)95 revenue = round(sum(daily_totals[start:end]), 2)
96 marketing = round(weekly_marketing_spend[idx] if idx < len(weekly_marketing_spend) else 0.0, 2)96 outreach = round(weekly_marketing_spend[idx] if idx < len(weekly_marketing_spend) else 0.0, 2)
97 fixed = round(fixed_cost, 2)97 fixed = round(fixed_cost, 2)
98 cost = round(marketing + fixed, 2)98 support_overhead = 15.0 if idx == 3 else 0.0
99 cost = round(outreach + fixed + support_overhead, 2)
99 margin = round(revenue - cost, 2)100 margin = round(revenue - cost, 2)
100 margin_pct = round(margin / revenue * 100, 2) if revenue > 0 else 0.0101 margin_pct = round(margin / revenue * 100, 2) if revenue > 0 else 0.0
102 margin_pct = min(margin_pct, 95.0)
101 monthly_revenue += revenue103 monthly_revenue += revenue
102 monthly_cost += cost104 monthly_cost += cost
103 weeks.append({'week': idx + 1, 'revenue': revenue, 'cost': cost, 'margin': margin, 'margin_pct': margin_pct})105 weeks.append({'week': idx + 1, 'revenue': revenue, 'cost': cost, 'margin': margin, 'margin_pct': margin_pct, 'warning': 'low_margin' if margin_pct < 20 else ''})
104 monthly_margin = round(monthly_revenue - monthly_cost, 2)106 monthly_margin = round(monthly_revenue - monthly_cost, 2)
105 return {'weeks': weeks, 'monthly_revenue': round(monthly_revenue, 2), 'monthly_cost': round(monthly_cost, 2), 'monthly_margin': monthly_margin, 'profitable': monthly_margin > 0}107 return {'weeks': weeks, 'monthly_revenue': round(monthly_revenue, 2), 'monthly_cost': round(monthly_cost, 2), 'monthly_margin': monthly_margin, 'net_positive': monthly_margin > 0}
compile_weekly_metrics
fixtures/demo_monorepo/orders_pipeline.py:127-163
compile_weekly_metrics
fixtures/demo_monorepo/invoices_pipeline.py:132-170
0.994
37 duplicated lines
Function AFunction B
127def compile_weekly_metrics(daily_orders: list[int], daily_revenue: list[float], refund_counts: list[int]) -> dict[str, object]:132def compile_weekly_metrics(daily_orders: list[int], daily_revenue: list[float], refund_counts: list[int]) -> dict[str, object]:
128 weeks: list[dict[str, object]] = []133 weeks: list[dict[str, object]] = []
129 for idx in range(4):134 for idx in range(4):
130 start = idx * 7135 start = idx * 7
131 end = min(start + 7, len(daily_orders))136 end = min(start + 7, len(daily_orders))
132 order_count = sum(daily_orders[start:end])137 order_count = sum(daily_orders[start:end])
133 revenue = round(sum(daily_revenue[start:end]), 2)138 revenue = round(sum(daily_revenue[start:end]), 2)
134 refunds = sum(refund_counts[start:end])139 refunds = sum(refund_counts[start:end])
135 fulfillment_rate = round((order_count - refunds) / order_count * 100, 2) if order_count else 0.0140 fulfillment_rate = round((order_count - refunds) / order_count * 100, 2) if order_count else 0.0
136 avg_order_value = round(revenue / order_count, 2) if order_count else 0.0141 avg_order_value = round(revenue / order_count, 2) if order_count else 0.0
137 weeks.append({'week': idx + 1, 'orders': int(order_count), 'revenue': revenue, 'refunds': int(refunds), 'fulfillment_rate': fulfillment_rate, 'avg_order_value': avg_order_value})142 weeks.append({'week': idx + 1, 'orders': int(order_count), 'revenue': revenue, 'refunds': int(refunds), 'fulfillment_rate': fulfillment_rate, 'avg_order_value': avg_order_value})
138 total_orders = sum((int(row['orders']) for row in weeks))143 total_orders = sum((int(row['orders']) for row in weeks))
139 total_revenue = round(sum((float(row['revenue']) for row in weeks)), 2)144 total_revenue = round(sum((float(row['revenue']) for row in weeks)), 2)
140 total_refunds = sum((int(row['refunds']) for row in weeks))145 total_refunds = sum((int(row['refunds']) for row in weeks))
141 net_orders = max(0, total_orders - total_refunds)146 net_orders = max(0, total_orders - total_refunds)
142 return {'weeks': weeks, 'totals': {'orders': total_orders, 'revenue': total_revenue, 'refunds': total_refunds, 'net_orders': net_orders}}147 adjusted_net = max(0, net_orders - 1)
148 return {'weeks': weeks, 'totals': {'orders': total_orders, 'revenue': total_revenue, 'refunds': total_refunds, 'net_orders': adjusted_net}}
Accumulator.total
fixtures/tiny_repo/classes.py:2-6
helper_sum
fixtures/tiny_repo/helpers.py:1-5
0.933
5 duplicated lines
Function AFunction B
2 def total(self, items):1def helper_sum(items):
3 total = 02 total = 0
4 for item in items:3 for item in items:
5 total += item4 total += item
6 return total5 return total
order_form_helpers.ts
fixtures/demo_monorepo/order_form_helpers.ts:1-64
invoice_form_helpers.ts
fixtures/demo_monorepo/invoice_form_helpers.ts:1-64
1.000
64 duplicated lines
Function AFunction B
<18 lines not shown><18 lines not shown>
19 const trimmedEmail = email.trim();19 const trimmedEmail = email.trim();
20 const trimmedPostal = postalCode.trim();20 const trimmedPostal = postalCode.trim();
21 if (!trimmedName) {21 if (!trimmedName) {
22 return { ok: false, message: "Name is required" };22 return { ok: false, message: "Name is required" };
23 }23 }
24 if (trimmedName.length < 2) {24 if (trimmedName.length < 2) {
25 return { ok: false, message: "Name must be at least 2 characters" };25 return { ok: false, message: "Name must be at least 2 characters" };
26 }26 }
27 if (!trimmedEmail) {27 if (!trimmedEmail) {
28 return { ok: false, message: "Email is required" };28 return { ok: false, message: "Email is required" };
29 }29 }
30 if (!trimmedEmail.includes("@") || !trimmedEmail.includes(".")) {30 if (!trimmedEmail.includes("@") || !trimmedEmail.includes(".")) {
31 return { ok: false, message: "Email format is invalid" };31 return { ok: false, message: "Email format is invalid" };
32 }32 }
33 if (!/^\d{5}$/.test(trimmedPostal)) {33 if (!/^\d{5}$/.test(trimmedPostal)) {
34 return { ok: false, message: "Postal code must be 5 digits" };34 return { ok: false, message: "Postal code must be 5 digits" };
35 }35 }
36 return { ok: true, message: "" };36 return { ok: true, message: "" };
37}37}
38export function buildFormTotals(items: FormItem[], discountPct: number): {38export function buildChargeTotals(charges: FormCharge[], discountPct: number): {
39 subtotal: number;39 subtotal: number;
40 discount: number;40 discount: number;
41 tax: number;41 tax: number;
42 total: number;42 total: number;
43} {43} {
44 let subtotal = 0;44 let subtotal = 0;
45 for (const item of items) {45 for (const charge of charges) {
46 subtotal += item.quantity * item.unitPrice;46 subtotal += charge.units * charge.rate;
47 }47 }
48 const normalizedSubtotal = Math.round(subtotal * 100) / 100;48 const normalizedSubtotal = Math.round(subtotal * 100) / 100;
49 const appliedDiscount = Math.round(normalizedSubtotal * Math.max(0, discountPct) * 100) / 100;49 const appliedDiscount = Math.round(normalizedSubtotal * Math.max(0, discountPct) * 100) / 100;
50 const discountedSubtotal = Math.max(0, normalizedSubtotal - appliedDiscount);50 const discountedSubtotal = Math.max(0, normalizedSubtotal - appliedDiscount);
51 const tax = Math.round(discountedSubtotal * 0.0825 * 100) / 100;51 const tax = Math.round(discountedSubtotal * 0.0825 * 100) / 100;
52 const total = Math.round((discountedSubtotal + tax) * 100) / 100;52 const total = Math.round((discountedSubtotal + tax) * 100) / 100;
53 return {53 return {
<6 lines not shown><6 lines not shown>