Schema: 1.0.0
Findings: 24
| Function A | Function B | ||
|---|---|---|---|
| 147 | def _select_compare(matches: list[CandidateMatch]) -> dict[str, object] | None: | 65 | def _select_compare(matches: list[CandidateMatch]) -> dict[str, object] | None: |
| 148 | compare = select_compare(matches) | 66 | compare = select_compare(matches) |
| 149 | if compare is None: | 67 | if compare is None: |
| 150 | return None | 68 | return None |
| 151 | return _compare_payload(compare, matches) | 69 | return _serialize_evidence(compare) |
| Function A | Function B | ||
|---|---|---|---|
| 339 | def _merge_spans(spans: list[tuple[int, int]]) -> list[tuple[int, int]]: | 152 | def _covered_lines(spans: list[tuple[int, int]]) -> int: |
| 340 | if not spans: | 153 | if not spans: |
| 341 | return [] | 154 | return 0 |
| 342 | merged: list[tuple[int, int]] = [] | 155 | merged: list[list[int]] = [] |
| 343 | for start, end in sorted(spans): | 156 | for start, end in sorted(spans): |
| 344 | if not merged or start > merged[-1][1] + 1: | 157 | if not merged or start > merged[-1][1] + 1: |
| 345 | merged.append((start, end)) | 158 | merged.append([start, end]) |
| 346 | continue | 159 | continue |
| 347 | prev_start, prev_end = merged[-1] | 160 | if end > merged[-1][1]: |
| 348 | if end > prev_end: | 161 | merged[-1][1] = end |
| 349 | merged[-1] = (prev_start, end) | 162 | return sum((end - start + 1 for start, end in merged)) |
| 350 | return merged |
| Function A | Function B | ||
|---|---|---|---|
| 32 | def embed(self, snippets: list[SnippetRef]) -> list[Embedding]: ... | 39 | def embed(self, snippets: list[SnippetRef]) -> list[Embedding]: ... |
| Function A | Function B | ||
|---|---|---|---|
| 35 | def dim(self) -> int: ... | 36 | def dim(self) -> int: ... |
| Function A | Function B | ||
|---|---|---|---|
| 43 | def test_func_threshold_edge(): | 90 | def test_exp_threshold_edge(): |
| 44 | thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0) | 91 | thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0) |
| 45 | at = rollup_findings([_match('FUNC', 0.95)], thresholds) | 92 | at = rollup_findings([_match('EXP', 0.9)], thresholds) |
| 46 | below = rollup_findings([_match('FUNC', 0.9499)], thresholds) | 93 | below = rollup_findings([_match('EXP', 0.8999)], thresholds) |
| 47 | assert at | 94 | assert at |
| 48 | assert not below | 95 | assert not below |
| Function A | Function B | ||
|---|---|---|---|
| 51 | def test_win_threshold_edge(): | 98 | def test_min_window_hits_edge(): |
| 52 | thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0) | 99 | thresholds = Thresholds(func=0.95, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0) |
| 53 | file = FileRef(path='x.py', content_hash='h', language='python') | 100 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 54 | fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=20, code='pass', code_hash='a') | 101 | fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=20, code='pass', code_hash='a') |
| 55 | fn_b = FunctionRef(file=file, qualified_name='b', start_line=30, end_line=50, code='pass', code_hash='b') | 102 | fn_b = FunctionRef(file=file, qualified_name='b', start_line=30, end_line=50, code='pass', code_hash='b') |
| 56 | a1 = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='a1', snippet_hash='a1') | 103 | a1 = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='a1', snippet_hash='a1') |
| 57 | b1 = SnippetRef(kind='WIN', function=fn_b, start_line=30, end_line=32, text='b1', snippet_hash='b1') | 104 | b1 = SnippetRef(kind='WIN', function=fn_b, start_line=30, end_line=32, text='b1', snippet_hash='b1') |
| 58 | a2 = SnippetRef(kind='WIN', function=fn_a, start_line=4, end_line=6, text='a2', snippet_hash='a2') | 105 | a2 = SnippetRef(kind='WIN', function=fn_a, start_line=4, end_line=6, text='a2', snippet_hash='a2') |
| 59 | b2 = SnippetRef(kind='WIN', function=fn_b, start_line=33, end_line=35, text='b2', snippet_hash='b2') | 106 | b2 = SnippetRef(kind='WIN', function=fn_b, start_line=33, end_line=35, text='b2', snippet_hash='b2') |
| 60 | at = rollup_findings([CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.9, evidence=''), CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.9, evidence='')], thresholds) | 107 | m1 = CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.95, evidence='') |
| 61 | below = rollup_findings([CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.8999, evidence=''), CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.8999, evidence='')], thresholds) | 108 | m2 = CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.95, evidence='') |
| 62 | assert at | 109 | findings = rollup_findings([m1, m2], thresholds) |
| 63 | assert below | 110 | assert findings |
| Function A | Function B | ||
|---|---|---|---|
| 6 | def test_expansion_generates_snippet(): | 22 | def test_expansion_respects_max_chars(): |
| 7 | files = collect_files(['fixtures/tiny_repo'], ['**/*.py'], []) | 23 | files = collect_files(['fixtures/tiny_repo'], ['**/*.py'], []) |
| 8 | functions = [fn for file in files for fn in extract_functions(file)] | 24 | functions = [fn for file in files for fn in extract_functions(file)] |
| 9 | snippets = expand_calls(functions, ExpansionParams(enabled=True, depth=1, max_chars=10000)) | 25 | snippets = expand_calls(functions, ExpansionParams(enabled=True, depth=1, max_chars=1)) |
| 10 | assert any((snippet.kind == 'EXP' for snippet in snippets)) | 26 | assert snippets == [] |
| Function A | Function B | ||
|---|---|---|---|
| <23 lines not shown> | <1 lines not shown> | ||
| 31 | def test_cli_diff_command(tmp_path: Path): | 32 | ) |
| 32 | repo = tmp_path / 'repo' | 33 | subprocess.check_call(["git", "init"], cwd=repo) |
| 33 | repo.mkdir() | 34 | subprocess.check_call(["git", "config", "user.email", "test@example.com"], cwd=repo) |
| 34 | (repo / 'a.py').write_text('\n\ndef add(nums):\n total = 0\n for n in nums:\n total += n\n return total\n', encoding='utf-8') | 35 | subprocess.check_call(["git", "config", "user.name", "Test User"], cwd=repo) |
| 35 | subprocess.check_call(['git', 'init'], cwd=repo) | 36 | subprocess.check_call(["git", "add", "a.py", "b.py"], cwd=repo) |
| 36 | subprocess.check_call(['git', 'config', 'user.email', 'test@example.com'], cwd=repo) | 37 | subprocess.check_call(["git", "commit", "-m", "init"], cwd=repo) |
| 37 | subprocess.check_call(['git', 'config', 'user.name', 'Test User'], cwd=repo) | 38 | (repo / "a.py").write_text( |
| 38 | subprocess.check_call(['git', 'add', 'a.py'], cwd=repo) | 39 | """ |
| 39 | subprocess.check_call(['git', 'commit', '-m', 'init'], cwd=repo) | 40 | def add(nums): |
| 40 | (repo / 'a.py').write_text('\n\ndef add(nums):\n total = 0\n for n in nums:\n total += n\n return total\n\n\ndef add_copy(values):\n total = 0\n for n in values:\n total += n\n return total\n', encoding='utf-8') | 41 | total = 0 |
| 41 | out = tmp_path / 'diff.json' | 42 | for n in nums: |
| 42 | cmd = [sys.executable, '-m', 'clonehunter', 'diff', '--base', 'HEAD', '--format', 'json', '--out', str(out)] | 43 | total += n |
| 43 | subprocess.check_call(cmd, cwd=repo, env=_env()) | 44 | return total |
| 44 | assert out.exists() | 45 | def add_copy(values): |
| 46 | total = 0 | ||
| 47 | for n in values: | ||
| 48 | total += n | ||
| 49 | return total | ||
| 50 | """, | ||
| 51 | encoding="utf-8", | ||
| 52 | ) | ||
| 53 | out = tmp_path / "diff.json" | ||
| 54 | env = os.environ.copy() | ||
| 55 | env["PYTHONPATH"] = str(Path(__file__).resolve().parents[1] / "src") | ||
| 56 | env["CLONEHUNTER_EMBEDDER"] = "stub" | ||
| 57 | cmd = [ | ||
| 58 | sys.executable, | ||
| 59 | "-m", | ||
| 60 | "clonehunter", | ||
| 61 | "diff", | ||
| 62 | "--base", | ||
| 63 | "HEAD", | ||
| 64 | "--format", | ||
| 65 | "json", | ||
| <11 lines not shown> |
| Function A | Function B | ||
|---|---|---|---|
| 15 | def test_pipeline_non_python_implicit_windows_only(tmp_path: Path) -> None: | 47 | def test_pipeline_non_python_allows_cross_file_types(tmp_path: Path) -> None: |
| 16 | repo = tmp_path / 'repo' | 48 | repo = tmp_path / 'repo' |
| 17 | repo.mkdir() | 49 | repo.mkdir() |
| 18 | code = 'function add(a, b) {\n return a + b;\n}\n' | 50 | code = 'function add(a, b) {\n return a + b;\n}\n' |
| 19 | (repo / 'a.js').write_text(code, encoding='utf-8') | 51 | (repo / 'a.js').write_text(code, encoding='utf-8') |
| 20 | (repo / 'b.js').write_text(code, encoding='utf-8') | 52 | (repo / 'b.ts').write_text(code, encoding='utf-8') |
| 21 | result = run_pipeline([str(repo)], CloneHunterConfig(include_globs=['**/*.js'], exclude_globs=[], embedder=EmbedderConfig(name='stub'), windows=WindowConfig(window_lines=3, stride_lines=1, min_nonempty=1), thresholds=Thresholds(func=0.99, win=0.8, exp=0.99, min_window_hits=1, lexical_min_ratio=0.0, lexical_weight=0.3))) | 53 | result = run_pipeline([str(repo)], CloneHunterConfig(include_globs=['**/*.js', '**/*.ts'], exclude_globs=[], embedder=EmbedderConfig(name='stub'), windows=WindowConfig(window_lines=3, stride_lines=1, min_nonempty=1), thresholds=Thresholds(func=0.99, win=0.8, exp=0.99, min_window_hits=1, lexical_min_ratio=0.0, lexical_weight=0.3))) |
| 22 | assert result.stats.file_count == 2 | 54 | assert result.stats.file_count == 2 |
| 23 | assert result.stats.function_count == 0 | ||
| 24 | assert result.stats.finding_count >= 1 | 55 | assert result.stats.finding_count >= 1 |
| 25 | finding = result.findings[0] | ||
| 26 | assert finding.function_a.qualified_name != '<file>' | ||
| 27 | assert finding.function_b.qualified_name != '<file>' |
| Function A | Function B | ||
|---|---|---|---|
| 6 | def test_rollup_min_window_hits(): | 109 | def test_rollup_applies_lexical_filter() -> None: |
| 7 | file = FileRef(path='x.py', content_hash='h', language='python') | 110 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 8 | fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=5, code='pass', code_hash='a') | 111 | fn_a = FunctionRef(file=file, qualified_name='a', start_line=1, end_line=5, code='pass', code_hash='a') |
| 9 | fn_b = FunctionRef(file=file, qualified_name='b', start_line=10, end_line=14, code='pass', code_hash='b') | 112 | fn_b = FunctionRef(file=file, qualified_name='b', start_line=10, end_line=14, code='pass', code_hash='b') |
| 10 | a1 = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='a1', snippet_hash='a1') | 113 | a = SnippetRef(kind='WIN', function=fn_a, start_line=1, end_line=3, text='def alpha():\n return 1', snippet_hash='a1') |
| 11 | b1 = SnippetRef(kind='WIN', function=fn_b, start_line=10, end_line=12, text='b1', snippet_hash='b1') | 114 | b = SnippetRef(kind='WIN', function=fn_b, start_line=10, end_line=12, text='def beta():\n return 2', snippet_hash='b1') |
| 12 | a2 = SnippetRef(kind='WIN', function=fn_a, start_line=2, end_line=4, text='a2', snippet_hash='a2') | 115 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=0.99, evidence='') |
| 13 | b2 = SnippetRef(kind='WIN', function=fn_b, start_line=11, end_line=13, text='b2', snippet_hash='b2') | 116 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1, lexical_min_ratio=0.6)) |
| 14 | matches = [CandidateMatch(snippet_a=a1, snippet_b=b1, similarity=0.5, evidence=''), CandidateMatch(snippet_a=a2, snippet_b=b2, similarity=0.5, evidence='')] | 117 | assert findings == [] |
| 15 | findings = rollup_findings(matches, Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=2, lexical_min_ratio=0.0)) | ||
| 16 | assert findings |
| Function A | Function B | ||
|---|---|---|---|
| 37 | def test_rollup_filters_overlapping_windows_same_function(): | 49 | def test_rollup_drops_identical_windows_same_function() -> None: |
| 38 | file = FileRef(path='x.py', content_hash='h', language='python') | 50 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 39 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') | 51 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c') |
| 40 | a = SnippetRef(kind='WIN', function=fn, start_line=1, end_line=5, text='a', snippet_hash='a') | 52 | a = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='a', snippet_hash='a') |
| 41 | b = SnippetRef(kind='WIN', function=fn, start_line=4, end_line=8, text='b', snippet_hash='b') | 53 | b = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='b', snippet_hash='b') |
| 42 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 54 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') |
| 43 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 55 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) |
| 44 | assert findings == [] | 56 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 37 | def test_rollup_filters_overlapping_windows_same_function(): | 61 | def test_rollup_drops_identical_func_self_match() -> None: |
| 38 | file = FileRef(path='x.py', content_hash='h', language='python') | 62 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 39 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') | 63 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') |
| 40 | a = SnippetRef(kind='WIN', function=fn, start_line=1, end_line=5, text='a', snippet_hash='a') | 64 | a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a') |
| 41 | b = SnippetRef(kind='WIN', function=fn, start_line=4, end_line=8, text='b', snippet_hash='b') | 65 | b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b') |
| 42 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 66 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') |
| 43 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 67 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) |
| 44 | assert findings == [] | 68 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 37 | def test_rollup_filters_overlapping_windows_same_function(): | 73 | def test_rollup_drops_overlapping_func_win_same_function() -> None: |
| 38 | file = FileRef(path='x.py', content_hash='h', language='python') | 74 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 39 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') | 75 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c') |
| 40 | a = SnippetRef(kind='WIN', function=fn, start_line=1, end_line=5, text='a', snippet_hash='a') | 76 | func = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=30, text='f', snippet_hash='f') |
| 41 | b = SnippetRef(kind='WIN', function=fn, start_line=4, end_line=8, text='b', snippet_hash='b') | 77 | win = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='w', snippet_hash='w') |
| 42 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 78 | match = CandidateMatch(snippet_a=func, snippet_b=win, similarity=1.0, evidence='') |
| 43 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 79 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) |
| 44 | assert findings == [] | 80 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 49 | def test_rollup_drops_identical_windows_same_function() -> None: | 61 | def test_rollup_drops_identical_func_self_match() -> None: |
| 50 | file = FileRef(path='x.py', content_hash='h', language='python') | 62 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 51 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c') | 63 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') |
| 52 | a = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='a', snippet_hash='a') | 64 | a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a') |
| 53 | b = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='b', snippet_hash='b') | 65 | b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b') |
| 54 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 66 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') |
| 55 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 67 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) |
| 56 | assert findings == [] | 68 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 49 | def test_rollup_drops_identical_windows_same_function() -> None: | 73 | def test_rollup_drops_overlapping_func_win_same_function() -> None: |
| 50 | file = FileRef(path='x.py', content_hash='h', language='python') | 74 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 51 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c') | 75 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c') |
| 52 | a = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='a', snippet_hash='a') | 76 | func = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=30, text='f', snippet_hash='f') |
| 53 | b = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='b', snippet_hash='b') | 77 | win = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='w', snippet_hash='w') |
| 54 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 78 | match = CandidateMatch(snippet_a=func, snippet_b=win, similarity=1.0, evidence='') |
| 55 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 79 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) |
| 56 | assert findings == [] | 80 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 61 | def test_rollup_drops_identical_func_self_match() -> None: | 73 | def test_rollup_drops_overlapping_func_win_same_function() -> None: |
| 62 | file = FileRef(path='x.py', content_hash='h', language='python') | 74 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 63 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') | 75 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=30, code='pass', code_hash='c') |
| 64 | a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a') | 76 | func = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=30, text='f', snippet_hash='f') |
| 65 | b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b') | 77 | win = SnippetRef(kind='WIN', function=fn, start_line=5, end_line=24, text='w', snippet_hash='w') |
| 66 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 78 | match = CandidateMatch(snippet_a=func, snippet_b=win, similarity=1.0, evidence='') |
| 67 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 79 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) |
| 68 | assert findings == [] | 80 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 61 | def test_rollup_drops_identical_func_self_match() -> None: | 87 | def test_rollup_drops_overlapping_functions_same_file() -> None: |
| 62 | file = FileRef(path='x.py', content_hash='h', language='python') | 88 | file = FileRef(path='x.py', content_hash='h', language='python') |
| 63 | fn = FunctionRef(file=file, qualified_name='f', start_line=1, end_line=10, code='pass', code_hash='c') | 89 | fn_outer = FunctionRef(file=file, qualified_name='outer', start_line=1, end_line=40, code='pass', code_hash='o') |
| 64 | a = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='a', snippet_hash='a') | 90 | fn_inner = FunctionRef(file=file, qualified_name='inner', start_line=10, end_line=20, code='pass', code_hash='i') |
| 65 | b = SnippetRef(kind='FUNC', function=fn, start_line=1, end_line=10, text='b', snippet_hash='b') | 91 | outer = SnippetRef(kind='FUNC', function=fn_outer, start_line=1, end_line=40, text='o', snippet_hash='o') |
| 66 | match = CandidateMatch(snippet_a=a, snippet_b=b, similarity=1.0, evidence='') | 92 | inner = SnippetRef(kind='FUNC', function=fn_inner, start_line=10, end_line=20, text='i', snippet_hash='i') |
| 67 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1)) | 93 | match = CandidateMatch(snippet_a=outer, snippet_b=inner, similarity=1.0, evidence='') |
| 94 | findings = rollup_findings([match], Thresholds(func=0.9, win=0.9, exp=0.9, min_window_hits=1, lexical_min_ratio=0.0)) | ||
| 68 | assert findings == [] | 95 | assert findings == [] |
| Function A | Function B | ||
|---|---|---|---|
| 5 | def _finding(a: str, b: str) -> Finding: | 17 | def _sample_result() -> ScanResult: |
| 6 | file_a = FileRef(path=f'{a}.py', content_hash='h', language='python') | 18 | file_a = FileRef(path='fixtures/tiny_repo/a.py', content_hash='h', language='python') |
| 7 | file_b = FileRef(path=f'{b}.py', content_hash='h', language='python') | 19 | file_b = FileRef(path='fixtures/tiny_repo/b.py', content_hash='h', language='python') |
| 8 | fn_a = FunctionRef(file=file_a, qualified_name=a, start_line=1, end_line=2, code='pass', code_hash=a) | 20 | fn_a = FunctionRef(file=file_a, qualified_name='a', start_line=1, end_line=2, code='pass', code_hash='a') |
| 9 | fn_b = FunctionRef(file=file_b, qualified_name=b, start_line=1, end_line=2, code='pass', code_hash=b) | 21 | fn_b = FunctionRef(file=file_b, qualified_name='b', start_line=10, end_line=12, code='pass', code_hash='b') |
| 10 | snip = SnippetRef(kind='FUNC', function=fn_a, start_line=1, end_line=2, text='pass', snippet_hash=a) | 22 | snip = SnippetRef(kind='FUNC', function=fn_a, start_line=1, end_line=2, text='pass', snippet_hash='s') |
| 11 | match = CandidateMatch(snippet_a=snip, snippet_b=snip, similarity=1.0, evidence='') | 23 | match = CandidateMatch(snippet_a=snip, snippet_b=snip, similarity=1.0, evidence='') |
| 12 | return Finding(function_a=fn_a, function_b=fn_b, score=1.0, duplicated_lines=2, evidence=[match], reasons=['func'], metadata={}) | 24 | finding = Finding(function_a=fn_a, function_b=fn_b, score=1.0, duplicated_lines=2, evidence=[match], reasons=['func'], metadata={}) |
| 25 | return ScanResult(findings=[finding], stats=ScanStats(0, 0, 0, 0, 1, 0, 0), config_snapshot={}, timing={}) |
| Function A | Function B | ||
|---|---|---|---|
| 14 | def normalize_customer_name(raw: str) -> str: | 14 | def normalize_client_name(raw: str) -> str: |
| 15 | cleaned = ' '.join((part for part in raw.strip().split(' ') if part)) | 15 | cleaned = ' '.join((part for part in raw.strip().split(' ') if part)) |
| 16 | return cleaned.title() | 16 | return cleaned.title() |
| Function A | Function B | ||
|---|---|---|---|
| 76 | def summarize_totals(payload: dict[str, object]) -> str: | 76 | def summarize_totals(payload: dict[str, object]) -> str: |
| 77 | subtotal = float(payload.get('subtotal', 0.0)) | 77 | subtotal = float(payload.get('subtotal', 0.0)) |
| 78 | discount = float(payload.get('discount', 0.0)) | 78 | discount = float(payload.get('discount', 0.0)) |
| 79 | tax = float(payload.get('tax', 0.0)) | 79 | tax = float(payload.get('tax', 0.0)) |
| 80 | shipping = float(payload.get('shipping', 0.0)) | 80 | service_fee = float(payload.get('service_fee', 0.0)) |
| 81 | total = float(payload.get('total', 0.0)) | 81 | total = float(payload.get('total', 0.0)) |
| 82 | return f'subtotal=${subtotal:,.2f}; discount=${discount:,.2f}; tax=${tax:,.2f}; shipping=${shipping:,.2f}; total=${total:,.2f}' | 82 | return f'subtotal=${subtotal:,.2f}; discount=${discount:,.2f}; tax=${tax:,.2f}; service_fee=${service_fee:,.2f}; total=${total:,.2f}' |
| Function A | Function B | ||
|---|---|---|---|
| 88 | def build_monthly_breakdown(daily_totals: list[float], weekly_marketing_spend: list[float], fixed_cost: float) -> dict[str, object]: | 88 | def build_monthly_breakdown(daily_totals: list[float], weekly_marketing_spend: list[float], fixed_cost: float) -> dict[str, object]: |
| 89 | weeks: list[dict[str, object]] = [] | 89 | weeks: list[dict[str, object]] = [] |
| 90 | monthly_revenue = 0.0 | 90 | monthly_revenue = 0.0 |
| 91 | monthly_cost = 0.0 | 91 | monthly_cost = 0.0 |
| 92 | for idx in range(4): | 92 | for idx in range(4): |
| 93 | start = idx * 7 | 93 | start = idx * 7 |
| 94 | end = min(start + 7, len(daily_totals)) | 94 | end = min(start + 7, len(daily_totals)) |
| 95 | revenue = round(sum(daily_totals[start:end]), 2) | 95 | revenue = round(sum(daily_totals[start:end]), 2) |
| 96 | marketing = round(weekly_marketing_spend[idx] if idx < len(weekly_marketing_spend) else 0.0, 2) | 96 | outreach = round(weekly_marketing_spend[idx] if idx < len(weekly_marketing_spend) else 0.0, 2) |
| 97 | fixed = round(fixed_cost, 2) | 97 | fixed = round(fixed_cost, 2) |
| 98 | cost = round(marketing + fixed, 2) | 98 | support_overhead = 15.0 if idx == 3 else 0.0 |
| 99 | cost = round(outreach + fixed + support_overhead, 2) | ||
| 99 | margin = round(revenue - cost, 2) | 100 | margin = round(revenue - cost, 2) |
| 100 | margin_pct = round(margin / revenue * 100, 2) if revenue > 0 else 0.0 | 101 | margin_pct = round(margin / revenue * 100, 2) if revenue > 0 else 0.0 |
| 102 | margin_pct = min(margin_pct, 95.0) | ||
| 101 | monthly_revenue += revenue | 103 | monthly_revenue += revenue |
| 102 | monthly_cost += cost | 104 | monthly_cost += cost |
| 103 | weeks.append({'week': idx + 1, 'revenue': revenue, 'cost': cost, 'margin': margin, 'margin_pct': margin_pct}) | 105 | weeks.append({'week': idx + 1, 'revenue': revenue, 'cost': cost, 'margin': margin, 'margin_pct': margin_pct, 'warning': 'low_margin' if margin_pct < 20 else ''}) |
| 104 | monthly_margin = round(monthly_revenue - monthly_cost, 2) | 106 | monthly_margin = round(monthly_revenue - monthly_cost, 2) |
| 105 | return {'weeks': weeks, 'monthly_revenue': round(monthly_revenue, 2), 'monthly_cost': round(monthly_cost, 2), 'monthly_margin': monthly_margin, 'profitable': monthly_margin > 0} | 107 | return {'weeks': weeks, 'monthly_revenue': round(monthly_revenue, 2), 'monthly_cost': round(monthly_cost, 2), 'monthly_margin': monthly_margin, 'net_positive': monthly_margin > 0} |
| Function A | Function B | ||
|---|---|---|---|
| 127 | def compile_weekly_metrics(daily_orders: list[int], daily_revenue: list[float], refund_counts: list[int]) -> dict[str, object]: | 132 | def compile_weekly_metrics(daily_orders: list[int], daily_revenue: list[float], refund_counts: list[int]) -> dict[str, object]: |
| 128 | weeks: list[dict[str, object]] = [] | 133 | weeks: list[dict[str, object]] = [] |
| 129 | for idx in range(4): | 134 | for idx in range(4): |
| 130 | start = idx * 7 | 135 | start = idx * 7 |
| 131 | end = min(start + 7, len(daily_orders)) | 136 | end = min(start + 7, len(daily_orders)) |
| 132 | order_count = sum(daily_orders[start:end]) | 137 | order_count = sum(daily_orders[start:end]) |
| 133 | revenue = round(sum(daily_revenue[start:end]), 2) | 138 | revenue = round(sum(daily_revenue[start:end]), 2) |
| 134 | refunds = sum(refund_counts[start:end]) | 139 | refunds = sum(refund_counts[start:end]) |
| 135 | fulfillment_rate = round((order_count - refunds) / order_count * 100, 2) if order_count else 0.0 | 140 | fulfillment_rate = round((order_count - refunds) / order_count * 100, 2) if order_count else 0.0 |
| 136 | avg_order_value = round(revenue / order_count, 2) if order_count else 0.0 | 141 | avg_order_value = round(revenue / order_count, 2) if order_count else 0.0 |
| 137 | weeks.append({'week': idx + 1, 'orders': int(order_count), 'revenue': revenue, 'refunds': int(refunds), 'fulfillment_rate': fulfillment_rate, 'avg_order_value': avg_order_value}) | 142 | weeks.append({'week': idx + 1, 'orders': int(order_count), 'revenue': revenue, 'refunds': int(refunds), 'fulfillment_rate': fulfillment_rate, 'avg_order_value': avg_order_value}) |
| 138 | total_orders = sum((int(row['orders']) for row in weeks)) | 143 | total_orders = sum((int(row['orders']) for row in weeks)) |
| 139 | total_revenue = round(sum((float(row['revenue']) for row in weeks)), 2) | 144 | total_revenue = round(sum((float(row['revenue']) for row in weeks)), 2) |
| 140 | total_refunds = sum((int(row['refunds']) for row in weeks)) | 145 | total_refunds = sum((int(row['refunds']) for row in weeks)) |
| 141 | net_orders = max(0, total_orders - total_refunds) | 146 | net_orders = max(0, total_orders - total_refunds) |
| 142 | return {'weeks': weeks, 'totals': {'orders': total_orders, 'revenue': total_revenue, 'refunds': total_refunds, 'net_orders': net_orders}} | 147 | adjusted_net = max(0, net_orders - 1) |
| 148 | return {'weeks': weeks, 'totals': {'orders': total_orders, 'revenue': total_revenue, 'refunds': total_refunds, 'net_orders': adjusted_net}} |
| Function A | Function B | ||
|---|---|---|---|
| 2 | def total(self, items): | 1 | def helper_sum(items): |
| 3 | total = 0 | 2 | total = 0 |
| 4 | for item in items: | 3 | for item in items: |
| 5 | total += item | 4 | total += item |
| 6 | return total | 5 | return total |
| Function A | Function B | ||
|---|---|---|---|
| <18 lines not shown> | <18 lines not shown> | ||
| 19 | const trimmedEmail = email.trim(); | 19 | const trimmedEmail = email.trim(); |
| 20 | const trimmedPostal = postalCode.trim(); | 20 | const trimmedPostal = postalCode.trim(); |
| 21 | if (!trimmedName) { | 21 | if (!trimmedName) { |
| 22 | return { ok: false, message: "Name is required" }; | 22 | return { ok: false, message: "Name is required" }; |
| 23 | } | 23 | } |
| 24 | if (trimmedName.length < 2) { | 24 | if (trimmedName.length < 2) { |
| 25 | return { ok: false, message: "Name must be at least 2 characters" }; | 25 | return { ok: false, message: "Name must be at least 2 characters" }; |
| 26 | } | 26 | } |
| 27 | if (!trimmedEmail) { | 27 | if (!trimmedEmail) { |
| 28 | return { ok: false, message: "Email is required" }; | 28 | return { ok: false, message: "Email is required" }; |
| 29 | } | 29 | } |
| 30 | if (!trimmedEmail.includes("@") || !trimmedEmail.includes(".")) { | 30 | if (!trimmedEmail.includes("@") || !trimmedEmail.includes(".")) { |
| 31 | return { ok: false, message: "Email format is invalid" }; | 31 | return { ok: false, message: "Email format is invalid" }; |
| 32 | } | 32 | } |
| 33 | if (!/^\d{5}$/.test(trimmedPostal)) { | 33 | if (!/^\d{5}$/.test(trimmedPostal)) { |
| 34 | return { ok: false, message: "Postal code must be 5 digits" }; | 34 | return { ok: false, message: "Postal code must be 5 digits" }; |
| 35 | } | 35 | } |
| 36 | return { ok: true, message: "" }; | 36 | return { ok: true, message: "" }; |
| 37 | } | 37 | } |
| 38 | export function buildFormTotals(items: FormItem[], discountPct: number): { | 38 | export function buildChargeTotals(charges: FormCharge[], discountPct: number): { |
| 39 | subtotal: number; | 39 | subtotal: number; |
| 40 | discount: number; | 40 | discount: number; |
| 41 | tax: number; | 41 | tax: number; |
| 42 | total: number; | 42 | total: number; |
| 43 | } { | 43 | } { |
| 44 | let subtotal = 0; | 44 | let subtotal = 0; |
| 45 | for (const item of items) { | 45 | for (const charge of charges) { |
| 46 | subtotal += item.quantity * item.unitPrice; | 46 | subtotal += charge.units * charge.rate; |
| 47 | } | 47 | } |
| 48 | const normalizedSubtotal = Math.round(subtotal * 100) / 100; | 48 | const normalizedSubtotal = Math.round(subtotal * 100) / 100; |
| 49 | const appliedDiscount = Math.round(normalizedSubtotal * Math.max(0, discountPct) * 100) / 100; | 49 | const appliedDiscount = Math.round(normalizedSubtotal * Math.max(0, discountPct) * 100) / 100; |
| 50 | const discountedSubtotal = Math.max(0, normalizedSubtotal - appliedDiscount); | 50 | const discountedSubtotal = Math.max(0, normalizedSubtotal - appliedDiscount); |
| 51 | const tax = Math.round(discountedSubtotal * 0.0825 * 100) / 100; | 51 | const tax = Math.round(discountedSubtotal * 0.0825 * 100) / 100; |
| 52 | const total = Math.round((discountedSubtotal + tax) * 100) / 100; | 52 | const total = Math.round((discountedSubtotal + tax) * 100) / 100; |
| 53 | return { | 53 | return { |
| <6 lines not shown> | <6 lines not shown> |