# Standalone scraper microservice.
# Build context: monorepo root (so we can access pyproject.toml + uv.lock + sibling workspace packages).
# Coolify config: base_directory=/, dockerfile_location=/packages/matrx-scraper/Dockerfile.

FROM python:3.13-slim AS base

ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    UV_LINK_MODE=copy \
    UV_COMPILE_BYTECODE=1

# System deps: curl for healthcheck. Playwright browser deps are installed
# later by `playwright install --with-deps`.
RUN apt-get update && apt-get install -y --no-install-recommends \
        curl \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir uv

WORKDIR /app

# uv sync needs the workspace root pyproject.toml + uv.lock and ALL workspace
# members listed in [tool.uv.workspace] members. We copy the whole packages/
# directory (~10 MB) — partial copies break workspace resolution.
COPY pyproject.toml uv.lock ./
COPY packages/ ./packages/

# Install matrx-scraper plus its [server] extras, using the frozen uv.lock
# from the workspace root. `--package matrx-scraper` focuses sync on this
# workspace member only — the heavy aidream-current root project is NOT
# installed. --no-dev skips dev-only tooling.
RUN uv sync --frozen --no-dev --package matrx-scraper --extra server

# Playwright Chromium + its OS deps. Largest layer; isolated for caching.
RUN uv run --no-sync playwright install --with-deps chromium

# Default to 8001 to match the existing Coolify Traefik labels and avoid
# proxy reconfiguration. ServerConfig.from_env() respects PORT.
ENV PORT=8001 \
    HOST=0.0.0.0

EXPOSE 8001

# /health/ready confirms DB pool + cache are wired before traffic flows.
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -fsS http://localhost:8001/health/ready >/dev/null || exit 1

CMD ["uv", "run", "--no-sync", "python", "-m", "matrx_scraper.server"]
