strata_match

strata-match: Two-stage vector + LLM job-to-profile matching.

This package exposes a small public API for matching job postings to candidate profiles:

Advanced integrations (custom embedding/LLM providers) live under strata_match.embeddings, strata_match.llm, and strata_match.providers — they are not re-exported from the root package.

Example::

from strata_match import (
    CandidateProfile,
    JobDescription,
    create_matcher,
    match_job,
)

matcher = create_matcher("openai", vector_threshold=0.35)
profile = CandidateProfile(title="Engineer", skills=["Python"])
job = JobDescription(title="Backend Role", company="Acme")
result = await match_job(matcher, profile, job)
print(result.score, result.confidence_tier)
 1"""strata-match: Two-stage vector + LLM job-to-profile matching.
 2
 3This package exposes a small **public API** for matching job postings to
 4candidate profiles:
 5
 6* **Factory:** :func:`create_matcher` returns a :class:`Matcher` configured
 7  with embedding and optional LLM scoring backends.
 8* **Matching:** :func:`match_job` and :func:`match_batch` score jobs against
 9  a profile.
10* **Data types:** :class:`CandidateProfile`, :class:`JobDescription`,
11  :class:`MatchResult`, :class:`BatchMatchResult`, and :class:`ConfidenceTier`.
12
13Advanced integrations (custom embedding/LLM providers) live under
14``strata_match.embeddings``, ``strata_match.llm``, and
15``strata_match.providers`` — they are not re-exported from the root package.
16
17Example::
18
19    from strata_match import (
20        CandidateProfile,
21        JobDescription,
22        create_matcher,
23        match_job,
24    )
25
26    matcher = create_matcher("openai", vector_threshold=0.35)
27    profile = CandidateProfile(title="Engineer", skills=["Python"])
28    job = JobDescription(title="Backend Role", company="Acme")
29    result = await match_job(matcher, profile, job)
30    print(result.score, result.confidence_tier)
31"""
32
33from __future__ import annotations
34
35from strata_match.exceptions import (
36    ConfigurationError,
37    EmbeddingError,
38    ProviderError,
39    ScoringError,
40    StrataMatchError,
41)
42from strata_match.matcher import Matcher, create_matcher, match_batch, match_job
43from strata_match.models import (
44    BatchMatchResult,
45    CandidateProfile,
46    ConfidenceTier,
47    JobDescription,
48    MatchResult,
49)
50
51__all__ = [
52    "BatchMatchResult",
53    "CandidateProfile",
54    "ConfidenceTier",
55    "ConfigurationError",
56    "EmbeddingError",
57    "JobDescription",
58    "MatchResult",
59    "Matcher",
60    "ProviderError",
61    "ScoringError",
62    "StrataMatchError",
63    "create_matcher",
64    "match_batch",
65    "match_job",
66]
67
68__version__ = "0.2.2"
class BatchMatchResult(pydantic.main.BaseModel):
156class BatchMatchResult(BaseModel):
157    """Result of matching multiple jobs against a candidate profile.
158
159    ``results`` is sorted by ``score`` descending. ``jobs_skipped`` counts jobs
160    that fell below the matcher's vector threshold.
161
162    Example::
163
164        assert batch.jobs_evaluated == len(jobs)
165        for r in batch.results:
166            print(r.job_title, r.score)
167    """
168
169    results: list[MatchResult] = Field(default_factory=list)
170    jobs_evaluated: int = 0
171    jobs_skipped: int = 0
172    total_tokens: int = Field(default=0, ge=0)
173    duration_ms: float = Field(default=0.0, ge=0.0)
174    llm_scored_count: int = 0
175    llm_fallback_count: int = Field(
176        default=0,
177        ge=0,
178        description=(
179            "Rows where LLM scoring was attempted but failed (fallback returned). "
180            "Distinct from jobs_skipped (below vector threshold) and "
181            "llm_scored_count (successful LLM calls)."
182        ),
183    )
184
185    @property
186    def strong_matches(self) -> list[MatchResult]:
187        return [r for r in self.results if r.is_strong_match]

Result of matching multiple jobs against a candidate profile.

results is sorted by score descending. jobs_skipped counts jobs that fell below the matcher's vector threshold.

Example::

assert batch.jobs_evaluated == len(jobs)
for r in batch.results:
    print(r.job_title, r.score)
results: list[MatchResult] = PydanticUndefined
jobs_evaluated: int = 0
jobs_skipped: int = 0
total_tokens: int = 0
duration_ms: float = 0.0
llm_scored_count: int = 0
llm_fallback_count: int = 0

Rows where LLM scoring was attempted but failed (fallback returned). Distinct from jobs_skipped (below vector threshold) and llm_scored_count (successful LLM calls).

strong_matches: list[MatchResult]
185    @property
186    def strong_matches(self) -> list[MatchResult]:
187        return [r for r in self.results if r.is_strong_match]
class CandidateProfile(pydantic.main.BaseModel):
27class CandidateProfile(BaseModel):
28    """Public candidate profile for matching (no PII fields).
29
30    Attributes map to embedding text and optional pre-computed ``embedding``
31    vectors. Use :class:`JobDescription` for the job side.
32
33    Example::
34
35        CandidateProfile(
36            title="Senior Engineer",
37            skills=["Python", "PostgreSQL"],
38            years_of_experience=8,
39            experience_summary="Backend and distributed systems.",
40        )
41    """
42
43    title: str
44    skills: list[str] = Field(default_factory=list)
45    experience_summary: str = ""
46    years_of_experience: int = Field(default=0, ge=0)
47    education: list[str] = Field(default_factory=list)
48    achievements: list[str] = Field(default_factory=list)
49    preferences: dict[str, Any] = Field(default_factory=dict)
50    embedding: list[float] | None = Field(
51        default=None, description="Optional pre-computed embedding vector"
52    )
53    certifications: list[str] = Field(default_factory=list)
54    industries: list[str] = Field(default_factory=list)
55    preferred_locations: list[str] = Field(default_factory=list)

Public candidate profile for matching (no PII fields).

Attributes map to embedding text and optional pre-computed embedding vectors. Use JobDescription for the job side.

Example::

CandidateProfile(
    title="Senior Engineer",
    skills=["Python", "PostgreSQL"],
    years_of_experience=8,
    experience_summary="Backend and distributed systems.",
)
title: str = PydanticUndefined
skills: list[str] = PydanticUndefined
experience_summary: str = ''
years_of_experience: int = 0
education: list[str] = PydanticUndefined
achievements: list[str] = PydanticUndefined
preferences: dict[str, typing.Any] = PydanticUndefined
embedding: list[float] | None = None

Optional pre-computed embedding vector

certifications: list[str] = PydanticUndefined
industries: list[str] = PydanticUndefined
preferred_locations: list[str] = PydanticUndefined
class ConfidenceTier(enum.StrEnum):
12class ConfidenceTier(StrEnum):
13    """Match confidence classification — four tiers from strongest to weakest.
14
15    VERY_HIGH: Exceptional semantic AND LLM-confirmed fit.  Display as "Very High".
16    HIGH:      Strong fit confirmed by both vector and LLM.  Display as "High".
17    MEDIUM:    Meaningful overlap; worth exploring.            Display as "Medium".
18    LOW:       Weak fit — above the vector floor but not actionable. Display as "Low".
19    """
20
21    VERY_HIGH = "very_high"
22    HIGH = "high"
23    MEDIUM = "medium"
24    LOW = "low"

Match confidence classification — four tiers from strongest to weakest.

VERY_HIGH: Exceptional semantic AND LLM-confirmed fit. Display as "Very High". HIGH: Strong fit confirmed by both vector and LLM. Display as "High". MEDIUM: Meaningful overlap; worth exploring. Display as "Medium". LOW: Weak fit — above the vector floor but not actionable. Display as "Low".

VERY_HIGH = <ConfidenceTier.VERY_HIGH: 'very_high'>
HIGH = <ConfidenceTier.HIGH: 'high'>
MEDIUM = <ConfidenceTier.MEDIUM: 'medium'>
LOW = <ConfidenceTier.LOW: 'low'>
class ConfigurationError(strata_match.StrataMatchError, builtins.ValueError):
17class ConfigurationError(StrataMatchError, ValueError):
18    """Invalid configuration (e.g. unknown provider name, bad parameters)."""

Invalid configuration (e.g. unknown provider name, bad parameters).

class EmbeddingError(strata_match.StrataMatchError):
25class EmbeddingError(StrataMatchError):
26    """Embedding API call failed (network, auth, rate limits, etc.)."""

Embedding API call failed (network, auth, rate limits, etc.).

class JobDescription(pydantic.main.BaseModel):
58class JobDescription(BaseModel):
59    """Structured job description for matching.
60
61    Together with :class:`CandidateProfile`, this is the input to
62    :func:`~strata_match.matcher.match_job` / :func:`~strata_match.matcher.match_batch`.
63
64    Example::
65
66        JobDescription(
67            title="Staff Engineer",
68            company="Acme",
69            requirements=["Python", "Leadership"],
70            description="Lead the platform team.",
71        )
72    """
73
74    title: str
75    company: str = ""
76    description: str = ""
77    requirements: list[str] = Field(default_factory=list)
78    preferred_qualifications: list[str] = Field(default_factory=list)
79    location: str | None = None
80    salary_range: str | None = None
81    employment_type: str | None = None
82    external_id: str | None = None
83    embedding: list[float] | None = Field(
84        default=None, description="Optional pre-computed embedding vector"
85    )
86    ann_score: float | None = Field(
87        default=None,
88        description=(
89            "Pre-computed ANN cosine similarity in [0, 1] from SQL ANN retrieval (PCC-1895). "
90            "When set, VectorScorer returns this directly, skipping embed + cosine."
91        ),
92    )

Structured job description for matching.

Together with CandidateProfile, this is the input to ~strata_match.matcher.match_job() / ~strata_match.matcher.match_batch().

Example::

JobDescription(
    title="Staff Engineer",
    company="Acme",
    requirements=["Python", "Leadership"],
    description="Lead the platform team.",
)
title: str = PydanticUndefined
company: str = ''
description: str = ''
requirements: list[str] = PydanticUndefined
preferred_qualifications: list[str] = PydanticUndefined
location: str | None = None
salary_range: str | None = None
employment_type: str | None = None
external_id: str | None = None
embedding: list[float] | None = None

Optional pre-computed embedding vector

ann_score: float | None = None

Pre-computed ANN cosine similarity in [0, 1] from SQL ANN retrieval (PCC-1895). When set, VectorScorer returns this directly, skipping embed + cosine.

class MatchResult(pydantic.main.BaseModel):
 95class MatchResult(BaseModel):
 96    """Result of matching a single job against a candidate profile.
 97
 98    ``score`` is the primary signal (0–100). When LLM scoring **succeeded**,
 99    ``llm_scored`` is ``True``, ``llm_error`` is ``None``, and ``rationale`` /
100    ``strengths`` / ``gaps`` are populated. When LLM scoring was **attempted**
101    but failed, ``llm_scored`` is ``False`` and ``llm_error`` explains why.
102    When no LLM stage ran, ``llm_scored`` is ``False`` and ``llm_error`` is
103    ``None``.
104
105    Example::
106
107        assert 0 <= result.score <= 100
108        if result.confidence_tier == ConfidenceTier.HIGH:
109            ...
110    """
111
112    job_title: str
113    job_company: str = ""
114    score: float = Field(ge=0.0, le=100.0, description="Overall match score 0-100")
115    vector_score: float | None = Field(
116        default=None, ge=0.0, le=100.0, description="Vector similarity score 0-100"
117    )
118    confidence_tier: ConfidenceTier = ConfidenceTier.LOW
119    rationale: str = ""
120    strengths: list[str] = Field(default_factory=list)
121    gaps: list[str] = Field(default_factory=list)
122    salary_match: bool | None = Field(default=None, description="Whether salary expectations align")
123    culture_signals: list[str] = Field(default_factory=list)
124    what_they_want: str = Field(
125        default="",
126        description=(
127            "Structured WTAW analysis: role archetype + 3 key needs read between JD lines "
128            "+ candidate proof points mapped to each need. Empty string when LLM omits it."
129        ),
130    )
131    llm_scored: bool = False
132    llm_error: str | None = Field(
133        default=None,
134        description=(
135            "Set when LLM scoring was attempted but failed; None if not attempted or succeeded"
136        ),
137    )
138    tokens_used: int = Field(default=0, ge=0)
139    prompt_version: str | None = Field(
140        default=None, description="Version of the scoring prompt template used"
141    )
142
143    @property
144    def is_strong_match(self) -> bool:
145        """True for HIGH and VERY_HIGH tiers with a score >= 70."""
146        return (
147            self.confidence_tier
148            in (
149                ConfidenceTier.VERY_HIGH,
150                ConfidenceTier.HIGH,
151            )
152            and self.score >= 70.0
153        )

Result of matching a single job against a candidate profile.

score is the primary signal (0–100). When LLM scoring succeeded, llm_scored is True, llm_error is None, and rationale / strengths / gaps are populated. When LLM scoring was attempted but failed, llm_scored is False and llm_error explains why. When no LLM stage ran, llm_scored is False and llm_error is None.

Example::

assert 0 <= result.score <= 100
if result.confidence_tier == ConfidenceTier.HIGH:
    ...
job_title: str = PydanticUndefined
job_company: str = ''
score: float = PydanticUndefined

Overall match score 0-100

vector_score: float | None = None

Vector similarity score 0-100

confidence_tier: ConfidenceTier = <ConfidenceTier.LOW: 'low'>
rationale: str = ''
strengths: list[str] = PydanticUndefined
gaps: list[str] = PydanticUndefined
salary_match: bool | None = None

Whether salary expectations align

culture_signals: list[str] = PydanticUndefined
what_they_want: str = ''

Structured WTAW analysis: role archetype + 3 key needs read between JD lines + candidate proof points mapped to each need. Empty string when LLM omits it.

llm_scored: bool = False
llm_error: str | None = None

Set when LLM scoring was attempted but failed; None if not attempted or succeeded

tokens_used: int = 0
prompt_version: str | None = None

Version of the scoring prompt template used

is_strong_match: bool
143    @property
144    def is_strong_match(self) -> bool:
145        """True for HIGH and VERY_HIGH tiers with a score >= 70."""
146        return (
147            self.confidence_tier
148            in (
149                ConfidenceTier.VERY_HIGH,
150                ConfidenceTier.HIGH,
151            )
152            and self.score >= 70.0
153        )

True for HIGH and VERY_HIGH tiers with a score >= 70.

@dataclass
class Matcher:
 36@dataclass
 37class Matcher:
 38    """Two-stage matching engine.
 39
 40    **Stage 1 (vector):** Cosine similarity between profile and job embeddings
 41    (or pre-computed vectors on the models). Produces a score in ``[0, 100]``.
 42
 43    **Stage 2 (LLM):** When ``llm_scorer`` is set and the Stage 1 raw score is
 44    at or above ``vector_threshold``, the job is sent to the LLM for nuanced
 45    scoring (rationale, strengths, gaps). Below the threshold, Stage 2 is
 46    skipped and confidence is typically LOW.
 47
 48    Args:
 49        vector_scorer: Stage-1 scorer wrapping an embedding provider.
 50        vector_threshold: Minimum raw cosine similarity in ``[0, 1]`` before
 51            Stage 2 runs (when an LLM scorer is configured).
 52        llm_scorer: Optional Stage-2 scorer; ``None`` means vector-only matching.
 53        max_concurrency: Maximum concurrent LLM scoring calls in :meth:`match_batch`
 54            (when ``llm_scorer`` is set). Values below ``1`` are treated as ``1``.
 55
 56    Example::
 57
 58        matcher = create_matcher("openai", vector_threshold=0.35)
 59        result = await matcher.match_one(profile, job)
 60    """
 61
 62    vector_scorer: VectorScorer
 63    vector_threshold: float = 0.3
 64    llm_scorer: LLMScorer | None = None
 65    llm_confirm_threshold: float = 70.0
 66    max_concurrency: int = 5
 67
 68    async def match_one(self, profile: CandidateProfile, job: JobDescription) -> MatchResult:
 69        """Score a single job against the candidate profile.
 70
 71        Args:
 72            profile: Candidate data (skills, summary, optional embedding).
 73            job: Job posting data (title, requirements, optional embedding).
 74
 75        Returns:
 76            A :class:`~strata_match.models.MatchResult` with score, tier, and
 77            optional LLM fields.
 78
 79        Raises:
 80            StrataMatchError: Propagates embedding or LLM provider errors (network,
 81                authentication, rate limits) from the configured backends.
 82
 83        Example::
 84
 85            r = await matcher.match_one(profile, job)
 86            print(r.score, r.confidence_tier, r.rationale)
 87        """
 88        raw_score = await self.vector_scorer.score(profile, job)
 89        score_100 = _to_score_100(raw_score)
 90
 91        if raw_score < self.vector_threshold:
 92            return build_match_result(
 93                job,
 94                score=score_100,
 95                vector_score=score_100,
 96                confidence_tier=ConfidenceTier.LOW,
 97            )
 98
 99        if self.llm_scorer is not None:
100            llm_result = await self.llm_scorer.score(profile, job, vector_score=score_100)
101            confidence = classify_confidence(
102                raw_score,
103                llm_confirmed=llm_result.score >= self.llm_confirm_threshold,
104                llm_score=llm_result.score,
105            )
106            return MatchResult(
107                job_title=llm_result.job_title,
108                job_company=llm_result.job_company,
109                score=llm_result.score,
110                vector_score=score_100,
111                confidence_tier=confidence,
112                rationale=llm_result.rationale,
113                strengths=llm_result.strengths,
114                gaps=llm_result.gaps,
115                salary_match=llm_result.salary_match,
116                culture_signals=llm_result.culture_signals,
117                what_they_want=llm_result.what_they_want,
118                llm_scored=llm_result.llm_scored,
119                llm_error=llm_result.llm_error,
120                tokens_used=llm_result.tokens_used,
121                prompt_version=llm_result.prompt_version,
122            )
123
124        confidence = classify_confidence(raw_score, llm_confirmed=False)
125        return build_match_result(
126            job,
127            score=score_100,
128            vector_score=score_100,
129            confidence_tier=confidence,
130            llm_scored=False,
131        )
132
133    async def match_batch(
134        self, profile: CandidateProfile, jobs: list[JobDescription]
135    ) -> BatchMatchResult:
136        """Score multiple jobs against the candidate profile.
137
138        Jobs below ``vector_threshold`` are counted in ``jobs_skipped`` and do
139        not receive LLM scoring. Remaining results are sorted by ``score``
140        descending.
141
142        Args:
143            profile: Candidate data shared across all jobs.
144            jobs: List of job postings to evaluate (may be empty).
145
146        Returns:
147            :class:`~strata_match.models.BatchMatchResult` with per-job
148            results, counts, and token totals.
149
150        Raises:
151            StrataMatchError: Propagates embedding or LLM provider errors from the
152                configured backends.
153
154        Example::
155
156            batch = await matcher.match_batch(profile, open_jobs)
157            for r in batch.results:
158                print(r.job_title, r.score)
159        """
160        if not jobs:
161            return BatchMatchResult(
162                results=[],
163                jobs_evaluated=0,
164                jobs_skipped=0,
165                total_tokens=0,
166                duration_ms=0.0,
167                llm_scored_count=0,
168            )
169
170        start = time.perf_counter()
171        scores = await self.vector_scorer.score_batch(profile, jobs)
172
173        results: list[MatchResult] = []
174        skipped = 0
175        llm_count = 0
176        llm_fallback_count = 0
177        total_tokens = 0
178
179        llm_pairs: list[tuple[JobDescription, float]] = []
180
181        for job, raw in zip(jobs, scores, strict=True):
182            if raw < self.vector_threshold:
183                skipped += 1
184                continue
185
186            score_100 = _to_score_100(raw)
187
188            if self.llm_scorer is not None:
189                llm_pairs.append((job, raw))
190            else:
191                confidence = classify_confidence(raw, llm_confirmed=False)
192                results.append(
193                    build_match_result(
194                        job,
195                        score=score_100,
196                        vector_score=score_100,
197                        confidence_tier=confidence,
198                        llm_scored=False,
199                    )
200                )
201
202        if self.llm_scorer is not None and llm_pairs:
203            cap = max(1, self.max_concurrency)
204            semaphore = asyncio.Semaphore(cap)
205            llm = self.llm_scorer
206
207            async def _score_one_llm(job: JobDescription, raw: float) -> MatchResult:
208                score_100 = _to_score_100(raw)
209                async with semaphore:
210                    return await llm.score(profile, job, vector_score=score_100)
211
212            gathered = await asyncio.gather(
213                *(_score_one_llm(j, r) for j, r in llm_pairs),
214                return_exceptions=True,
215            )
216
217            for (job, raw), item in zip(llm_pairs, gathered, strict=True):
218                score_100 = _to_score_100(raw)
219                if isinstance(item, BaseException):
220                    # AC2: preserve the row as a fallback instead of dropping
221                    logger.error(
222                        "match_batch: LLM scoring error for job %r; returning fallback row",
223                        job.title,
224                        exc_info=(type(item), item, item.__traceback__),
225                    )
226                    results.append(
227                        build_match_result(
228                            job,
229                            score=score_100,
230                            vector_score=score_100,
231                            llm_scored=False,
232                            llm_error=f"LLM scoring error: {item}",
233                        )
234                    )
235                    llm_fallback_count += 1  # AC3: distinct counter
236                    continue
237
238                llm_result = item
239                score_100 = _to_score_100(raw)
240                confidence = classify_confidence(
241                    raw,
242                    llm_confirmed=llm_result.score >= self.llm_confirm_threshold,
243                    llm_score=llm_result.score,
244                )
245                results.append(
246                    MatchResult(
247                        job_title=llm_result.job_title,
248                        job_company=llm_result.job_company,
249                        score=llm_result.score,
250                        vector_score=score_100,
251                        confidence_tier=confidence,
252                        rationale=llm_result.rationale,
253                        strengths=llm_result.strengths,
254                        gaps=llm_result.gaps,
255                        salary_match=llm_result.salary_match,
256                        culture_signals=llm_result.culture_signals,
257                        what_they_want=llm_result.what_they_want,
258                        llm_scored=llm_result.llm_scored,
259                        llm_error=llm_result.llm_error,
260                        tokens_used=llm_result.tokens_used,
261                        prompt_version=llm_result.prompt_version,
262                    )
263                )
264                if llm_result.llm_scored:
265                    llm_count += 1
266                elif llm_result.llm_error is not None:
267                    llm_fallback_count += 1  # AC3: LLMScorer returned a fallback
268                total_tokens += llm_result.tokens_used
269
270        results.sort(key=lambda r: r.score, reverse=True)
271
272        elapsed_ms = (time.perf_counter() - start) * 1000
273        return BatchMatchResult(
274            results=results,
275            jobs_evaluated=len(jobs),
276            jobs_skipped=skipped,
277            total_tokens=total_tokens,
278            duration_ms=round(elapsed_ms, 2),
279            llm_scored_count=llm_count,
280            llm_fallback_count=llm_fallback_count,
281        )

Two-stage matching engine.

Stage 1 (vector): Cosine similarity between profile and job embeddings (or pre-computed vectors on the models). Produces a score in [0, 100].

Stage 2 (LLM): When llm_scorer is set and the Stage 1 raw score is at or above vector_threshold, the job is sent to the LLM for nuanced scoring (rationale, strengths, gaps). Below the threshold, Stage 2 is skipped and confidence is typically LOW.

Args: vector_scorer: Stage-1 scorer wrapping an embedding provider. vector_threshold: Minimum raw cosine similarity in [0, 1] before Stage 2 runs (when an LLM scorer is configured). llm_scorer: Optional Stage-2 scorer; None means vector-only matching. max_concurrency: Maximum concurrent LLM scoring calls in match_batch() (when llm_scorer is set). Values below 1 are treated as 1.

Example::

matcher = create_matcher("openai", vector_threshold=0.35)
result = await matcher.match_one(profile, job)
Matcher( vector_scorer: strata_match.scoring.VectorScorer, vector_threshold: float = 0.3, llm_scorer: strata_match.llm.LLMScorer | None = None, llm_confirm_threshold: float = 70.0, max_concurrency: int = 5)
vector_scorer: strata_match.scoring.VectorScorer
vector_threshold: float = 0.3
llm_scorer: strata_match.llm.LLMScorer | None = None
llm_confirm_threshold: float = 70.0
max_concurrency: int = 5
async def match_one( self, profile: CandidateProfile, job: JobDescription) -> MatchResult:
 68    async def match_one(self, profile: CandidateProfile, job: JobDescription) -> MatchResult:
 69        """Score a single job against the candidate profile.
 70
 71        Args:
 72            profile: Candidate data (skills, summary, optional embedding).
 73            job: Job posting data (title, requirements, optional embedding).
 74
 75        Returns:
 76            A :class:`~strata_match.models.MatchResult` with score, tier, and
 77            optional LLM fields.
 78
 79        Raises:
 80            StrataMatchError: Propagates embedding or LLM provider errors (network,
 81                authentication, rate limits) from the configured backends.
 82
 83        Example::
 84
 85            r = await matcher.match_one(profile, job)
 86            print(r.score, r.confidence_tier, r.rationale)
 87        """
 88        raw_score = await self.vector_scorer.score(profile, job)
 89        score_100 = _to_score_100(raw_score)
 90
 91        if raw_score < self.vector_threshold:
 92            return build_match_result(
 93                job,
 94                score=score_100,
 95                vector_score=score_100,
 96                confidence_tier=ConfidenceTier.LOW,
 97            )
 98
 99        if self.llm_scorer is not None:
100            llm_result = await self.llm_scorer.score(profile, job, vector_score=score_100)
101            confidence = classify_confidence(
102                raw_score,
103                llm_confirmed=llm_result.score >= self.llm_confirm_threshold,
104                llm_score=llm_result.score,
105            )
106            return MatchResult(
107                job_title=llm_result.job_title,
108                job_company=llm_result.job_company,
109                score=llm_result.score,
110                vector_score=score_100,
111                confidence_tier=confidence,
112                rationale=llm_result.rationale,
113                strengths=llm_result.strengths,
114                gaps=llm_result.gaps,
115                salary_match=llm_result.salary_match,
116                culture_signals=llm_result.culture_signals,
117                what_they_want=llm_result.what_they_want,
118                llm_scored=llm_result.llm_scored,
119                llm_error=llm_result.llm_error,
120                tokens_used=llm_result.tokens_used,
121                prompt_version=llm_result.prompt_version,
122            )
123
124        confidence = classify_confidence(raw_score, llm_confirmed=False)
125        return build_match_result(
126            job,
127            score=score_100,
128            vector_score=score_100,
129            confidence_tier=confidence,
130            llm_scored=False,
131        )

Score a single job against the candidate profile.

Args: profile: Candidate data (skills, summary, optional embedding). job: Job posting data (title, requirements, optional embedding).

Returns: A ~strata_match.models.MatchResult with score, tier, and optional LLM fields.

Raises: StrataMatchError: Propagates embedding or LLM provider errors (network, authentication, rate limits) from the configured backends.

Example::

r = await matcher.match_one(profile, job)
print(r.score, r.confidence_tier, r.rationale)
async def match_batch( self, profile: CandidateProfile, jobs: list[JobDescription]) -> BatchMatchResult:
133    async def match_batch(
134        self, profile: CandidateProfile, jobs: list[JobDescription]
135    ) -> BatchMatchResult:
136        """Score multiple jobs against the candidate profile.
137
138        Jobs below ``vector_threshold`` are counted in ``jobs_skipped`` and do
139        not receive LLM scoring. Remaining results are sorted by ``score``
140        descending.
141
142        Args:
143            profile: Candidate data shared across all jobs.
144            jobs: List of job postings to evaluate (may be empty).
145
146        Returns:
147            :class:`~strata_match.models.BatchMatchResult` with per-job
148            results, counts, and token totals.
149
150        Raises:
151            StrataMatchError: Propagates embedding or LLM provider errors from the
152                configured backends.
153
154        Example::
155
156            batch = await matcher.match_batch(profile, open_jobs)
157            for r in batch.results:
158                print(r.job_title, r.score)
159        """
160        if not jobs:
161            return BatchMatchResult(
162                results=[],
163                jobs_evaluated=0,
164                jobs_skipped=0,
165                total_tokens=0,
166                duration_ms=0.0,
167                llm_scored_count=0,
168            )
169
170        start = time.perf_counter()
171        scores = await self.vector_scorer.score_batch(profile, jobs)
172
173        results: list[MatchResult] = []
174        skipped = 0
175        llm_count = 0
176        llm_fallback_count = 0
177        total_tokens = 0
178
179        llm_pairs: list[tuple[JobDescription, float]] = []
180
181        for job, raw in zip(jobs, scores, strict=True):
182            if raw < self.vector_threshold:
183                skipped += 1
184                continue
185
186            score_100 = _to_score_100(raw)
187
188            if self.llm_scorer is not None:
189                llm_pairs.append((job, raw))
190            else:
191                confidence = classify_confidence(raw, llm_confirmed=False)
192                results.append(
193                    build_match_result(
194                        job,
195                        score=score_100,
196                        vector_score=score_100,
197                        confidence_tier=confidence,
198                        llm_scored=False,
199                    )
200                )
201
202        if self.llm_scorer is not None and llm_pairs:
203            cap = max(1, self.max_concurrency)
204            semaphore = asyncio.Semaphore(cap)
205            llm = self.llm_scorer
206
207            async def _score_one_llm(job: JobDescription, raw: float) -> MatchResult:
208                score_100 = _to_score_100(raw)
209                async with semaphore:
210                    return await llm.score(profile, job, vector_score=score_100)
211
212            gathered = await asyncio.gather(
213                *(_score_one_llm(j, r) for j, r in llm_pairs),
214                return_exceptions=True,
215            )
216
217            for (job, raw), item in zip(llm_pairs, gathered, strict=True):
218                score_100 = _to_score_100(raw)
219                if isinstance(item, BaseException):
220                    # AC2: preserve the row as a fallback instead of dropping
221                    logger.error(
222                        "match_batch: LLM scoring error for job %r; returning fallback row",
223                        job.title,
224                        exc_info=(type(item), item, item.__traceback__),
225                    )
226                    results.append(
227                        build_match_result(
228                            job,
229                            score=score_100,
230                            vector_score=score_100,
231                            llm_scored=False,
232                            llm_error=f"LLM scoring error: {item}",
233                        )
234                    )
235                    llm_fallback_count += 1  # AC3: distinct counter
236                    continue
237
238                llm_result = item
239                score_100 = _to_score_100(raw)
240                confidence = classify_confidence(
241                    raw,
242                    llm_confirmed=llm_result.score >= self.llm_confirm_threshold,
243                    llm_score=llm_result.score,
244                )
245                results.append(
246                    MatchResult(
247                        job_title=llm_result.job_title,
248                        job_company=llm_result.job_company,
249                        score=llm_result.score,
250                        vector_score=score_100,
251                        confidence_tier=confidence,
252                        rationale=llm_result.rationale,
253                        strengths=llm_result.strengths,
254                        gaps=llm_result.gaps,
255                        salary_match=llm_result.salary_match,
256                        culture_signals=llm_result.culture_signals,
257                        what_they_want=llm_result.what_they_want,
258                        llm_scored=llm_result.llm_scored,
259                        llm_error=llm_result.llm_error,
260                        tokens_used=llm_result.tokens_used,
261                        prompt_version=llm_result.prompt_version,
262                    )
263                )
264                if llm_result.llm_scored:
265                    llm_count += 1
266                elif llm_result.llm_error is not None:
267                    llm_fallback_count += 1  # AC3: LLMScorer returned a fallback
268                total_tokens += llm_result.tokens_used
269
270        results.sort(key=lambda r: r.score, reverse=True)
271
272        elapsed_ms = (time.perf_counter() - start) * 1000
273        return BatchMatchResult(
274            results=results,
275            jobs_evaluated=len(jobs),
276            jobs_skipped=skipped,
277            total_tokens=total_tokens,
278            duration_ms=round(elapsed_ms, 2),
279            llm_scored_count=llm_count,
280            llm_fallback_count=llm_fallback_count,
281        )

Score multiple jobs against the candidate profile.

Jobs below vector_threshold are counted in jobs_skipped and do not receive LLM scoring. Remaining results are sorted by score descending.

Args: profile: Candidate data shared across all jobs. jobs: List of job postings to evaluate (may be empty).

Returns: ~strata_match.models.BatchMatchResult with per-job results, counts, and token totals.

Raises: StrataMatchError: Propagates embedding or LLM provider errors from the configured backends.

Example::

batch = await matcher.match_batch(profile, open_jobs)
for r in batch.results:
    print(r.job_title, r.score)
class ProviderError(strata_match.StrataMatchError, builtins.ImportError):
21class ProviderError(StrataMatchError, ImportError):
22    """Provider initialization failed (e.g. missing optional dependency)."""

Provider initialization failed (e.g. missing optional dependency).

class ScoringError(strata_match.StrataMatchError):
29class ScoringError(StrataMatchError):
30    """LLM chat-completion / scoring call failed."""

LLM chat-completion / scoring call failed.

class StrataMatchError(builtins.Exception):
13class StrataMatchError(Exception):
14    """Base class for all strata-match errors."""

Base class for all strata-match errors.

def create_matcher( embedding_provider: strata_match.embeddings.EmbeddingProvider | str = 'openai', *, embedding_model: str | None = None, model: str | None = None, scoring_provider: strata_match.llm.LLMProvider | strata_match.llm.LLMScorer | str | None = None, scoring_model: str | None = None, vector_threshold: float = 0.3, llm_confirm_threshold: float = 70.0, max_concurrency: int = 5, llm_scorer: strata_match.llm.LLMScorer | None = None, _provider_client: object | None = None, _scoring_client: object | None = None, **provider_config: Any) -> Matcher:
284def create_matcher(
285    embedding_provider: EmbeddingProvider | str = "openai",
286    *,
287    embedding_model: str | None = None,
288    model: str | None = None,
289    scoring_provider: LLMProvider | LLMScorer | str | None = None,
290    scoring_model: str | None = None,
291    vector_threshold: float = 0.3,
292    llm_confirm_threshold: float = 70.0,
293    max_concurrency: int = 5,
294    llm_scorer: LLMScorer | None = None,
295    _provider_client: object | None = None,
296    _scoring_client: object | None = None,
297    **provider_config: Any,
298) -> Matcher:
299    """Factory: create a configured Matcher instance.
300
301    Args:
302        embedding_provider: An EmbeddingProvider instance, or a string key
303            ("openai", "gemini", "ollama") to auto-resolve via the provider
304            factory.
305        embedding_model: Model identifier for the embedding provider (forwarded
306            when *embedding_provider* is a string).  Alias for *model*.
307        model: Deprecated alias for *embedding_model*.  If both are supplied,
308            *embedding_model* wins.
309        scoring_provider: An LLMProvider, LLMScorer, or string key ("openai",
310            "litellm") for Stage 2 nuance scoring.  When a string is given the
311            provider is auto-resolved via the LLM provider factory.  ``None``
312            disables LLM scoring.
313        scoring_model: Model identifier forwarded to the LLM provider factory
314            when *scoring_provider* is a string.
315        vector_threshold: Minimum vector similarity to proceed to LLM scoring.
316        llm_confirm_threshold: Minimum LLM score (0–100) to consider the LLM
317            as "confirming" the match for tier classification.  Defaults to 70.
318        max_concurrency: Maximum concurrent LLM calls in :meth:`Matcher.match_batch`.
319            Defaults to 5. Values below ``1`` are treated as ``1``.
320        llm_scorer: Pre-built LLM scorer (backward-compatible).  Ignored when
321            *scoring_provider* is supplied.
322        _provider_client: Pre-built API client forwarded to the embedding
323            provider factory (for testing).
324        _scoring_client: Pre-built API client forwarded to the LLM provider
325            factory (for testing).
326        **provider_config: Extra keyword arguments forwarded to the embedding
327            provider factory (e.g. ``api_key``, ``base_url``).
328
329    Returns:
330        A configured Matcher ready for use.
331
332    Raises:
333        ProviderError: If a string provider name is used but the optional
334            dependency for that backend is not installed (see extras in
335            ``pyproject.toml``).
336        ConfigurationError: If an unknown embedding or LLM provider name is given.
337
338    Example::
339
340        # Vector-only (no LLM)
341        m = create_matcher("openai", api_key="...", vector_threshold=0.4)
342
343        # Two-stage with LLM via provider key
344        m = create_matcher(
345            "openai",
346            scoring_provider="openai",
347            scoring_model="gpt-4o-mini",
348            api_key="...",
349        )
350    """
351    resolved_emb_model = embedding_model or model
352
353    if isinstance(embedding_provider, str):
354        from strata_match.providers import create_embedding_provider
355
356        embedding_provider = create_embedding_provider(
357            embedding_provider,
358            model=resolved_emb_model,
359            _client=_provider_client,
360            **provider_config,
361        )
362
363    resolved_scorer = _resolve_llm_scorer(
364        scoring_provider, scoring_model, _scoring_client, llm_scorer
365    )
366
367    vector_scorer = VectorScorer(provider=embedding_provider)
368    return Matcher(
369        vector_scorer=vector_scorer,
370        vector_threshold=vector_threshold,
371        llm_scorer=resolved_scorer,
372        llm_confirm_threshold=llm_confirm_threshold,
373        max_concurrency=max_concurrency,
374    )

Factory: create a configured Matcher instance.

Args: embedding_provider: An EmbeddingProvider instance, or a string key ("openai", "gemini", "ollama") to auto-resolve via the provider factory. embedding_model: Model identifier for the embedding provider (forwarded when embedding_provider is a string). Alias for model. model: Deprecated alias for embedding_model. If both are supplied, embedding_model wins. scoring_provider: An LLMProvider, LLMScorer, or string key ("openai", "litellm") for Stage 2 nuance scoring. When a string is given the provider is auto-resolved via the LLM provider factory. None disables LLM scoring. scoring_model: Model identifier forwarded to the LLM provider factory when scoring_provider is a string. vector_threshold: Minimum vector similarity to proceed to LLM scoring. llm_confirm_threshold: Minimum LLM score (0–100) to consider the LLM as "confirming" the match for tier classification. Defaults to 70. max_concurrency: Maximum concurrent LLM calls in Matcher.match_batch(). Defaults to 5. Values below 1 are treated as 1. llm_scorer: Pre-built LLM scorer (backward-compatible). Ignored when scoring_provider is supplied. _provider_client: Pre-built API client forwarded to the embedding provider factory (for testing). _scoring_client: Pre-built API client forwarded to the LLM provider factory (for testing). **provider_config: Extra keyword arguments forwarded to the embedding provider factory (e.g. api_key, base_url).

Returns: A configured Matcher ready for use.

Raises: ProviderError: If a string provider name is used but the optional dependency for that backend is not installed (see extras in pyproject.toml). ConfigurationError: If an unknown embedding or LLM provider name is given.

Example::

# Vector-only (no LLM)
m = create_matcher("openai", api_key="...", vector_threshold=0.4)

# Two-stage with LLM via provider key
m = create_matcher(
    "openai",
    scoring_provider="openai",
    scoring_model="gpt-4o-mini",
    api_key="...",
)
async def match_batch( matcher: Matcher, profile: CandidateProfile, jobs: list[JobDescription]) -> BatchMatchResult:
428async def match_batch(
429    matcher: Matcher,
430    profile: CandidateProfile,
431    jobs: list[JobDescription],
432) -> BatchMatchResult:
433    """Match many jobs against a profile (delegates to :meth:`Matcher.match_batch`).
434
435    Args:
436        matcher: Engine from :func:`create_matcher`.
437        profile: Candidate profile.
438        jobs: Jobs to score.
439
440    Returns:
441        Batch outcome with sorted ``results``.
442
443    Raises:
444        StrataMatchError: Same as :meth:`Matcher.match_batch`.
445
446    Example::
447
448        batch = await match_batch(matcher, profile, jobs)
449        print(batch.strong_matches)
450    """
451    return await matcher.match_batch(profile, jobs)

Match many jobs against a profile (delegates to Matcher.match_batch()).

Args: matcher: Engine from create_matcher(). profile: Candidate profile. jobs: Jobs to score.

Returns: Batch outcome with sorted results.

Raises: StrataMatchError: Same as Matcher.match_batch().

Example::

batch = await match_batch(matcher, profile, jobs)
print(batch.strong_matches)
async def match_job( matcher: Matcher, profile: CandidateProfile, job: JobDescription) -> MatchResult:
403async def match_job(
404    matcher: Matcher,
405    profile: CandidateProfile,
406    job: JobDescription,
407) -> MatchResult:
408    """Match one job against a profile (delegates to :meth:`Matcher.match_one`).
409
410    Args:
411        matcher: Engine from :func:`create_matcher`.
412        profile: Candidate profile.
413        job: Job description.
414
415    Returns:
416        Match outcome for the pair.
417
418    Raises:
419        StrataMatchError: Same as :meth:`Matcher.match_one`.
420
421    Example::
422
423        result = await match_job(matcher, profile, job)
424    """
425    return await matcher.match_one(profile, job)

Match one job against a profile (delegates to Matcher.match_one()).

Args: matcher: Engine from create_matcher(). profile: Candidate profile. job: Job description.

Returns: Match outcome for the pair.

Raises: StrataMatchError: Same as Matcher.match_one().

Example::

result = await match_job(matcher, profile, job)