#!/usr/bin/env python3
"""Query a vLLM chat endpoint and print the reply. Usage: mat-query <endpoint> <model> <prompt>"""
import json, sys, urllib.request, urllib.error

endpoint, model, prompt = sys.argv[1], sys.argv[2], sys.argv[3]
payload = json.dumps({
    "model": model,
    "messages": [{"role": "user", "content": prompt}],
    "max_tokens": 4096,
}).encode()
req = urllib.request.Request(
    f"{endpoint.rstrip('/')}/v1/chat/completions",
    data=payload,
    headers={"Content-Type": "application/json"},
)
try:
    with urllib.request.urlopen(req, timeout=120) as r:
        print(json.loads(r.read())["choices"][0]["message"]["content"])
except urllib.error.HTTPError as e:
    sys.exit(f"HTTP {e.code}: {e.read().decode()}")
except OSError as e:
    sys.exit(f"Connection error: {e}")
