LLM 代码练习——API 代理

一个单文件的 OpenAI 兼容 API 代理脚本，用来转发请求到上游服务，并记录请求与响应日志，可以用来测试大模型服务。

flowchart LR;
    A[OpenAI 兼容客户端] -->|HTTP 请求| B[llm-proxy]
    B -->|转发请求| C[上游大模型服务]
    C -->|返回响应| B
    B -->|返回响应| A
    B -.->|记录请求与响应| D[日志文件]

用法

直接运行：

1	uv run llm-proxy.py --upstream-base-url https://your-upstream.example.com/v1

常用参数：

--upstream-base-url：上游服务地址
--host：监听地址，默认 127.0.0.1
--port：监听端口，默认 8000
--log-dir：日志目录，默认 ./logs
--upstream-timeout：上游请求超时，默认 300

llm_proxy.py 头部已内联声明依赖，uv run 会自动处理单脚本的依赖：提供一个缓存的虚拟环境安装依赖并运行。（不能在 Python 项目中，否则 uv 会在项目模式下处理依赖）

启动后，客户端把 Base URL 指向本地代理即可，例如 http://127.0.0.1:8000/v1，加不加 v1 均可。

源码

需要第三方依赖：

fastapi
httpx
uvicorn

完整源码

#!/usr/bin/env -S uv run
# /// script
# requires-python = ">=3.11"
# dependencies = [
#   "fastapi",
#   "httpx",
#   "uvicorn",
# ]
# ///

import argparse
import asyncio
import json
import threading
import time
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Any, TypedDict, cast
from urllib.parse import urlencode, urlsplit, urlunsplit

import httpx
import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, StreamingResponse

HOP_BY_HOP_HEADERS = {
    "connection",
    "keep-alive",
    "proxy-authenticate",
    "proxy-authorization",
    "te",
    "trailer",
    "transfer-encoding",
    "upgrade",
    "host",
    "content-length",
}

OPENAI_COMPATIBLE_TOP_LEVEL_PATHS = {
    "models",
    "chat",
    "completions",
    "embeddings",
    "responses",
    "images",
    "audio",
    "moderations",
    "batches",
    "files",
    "fine_tuning",
    "assistants",
    "threads",
    "vector_stores",
}

JsonValue = dict[str, Any] | list[Any] | str | int | float | bool | None
MarkdownEntry = dict[str, str]


class ToolCallAccumulator(TypedDict):
    name: str
    arguments: list[str]


def iso_now() -> str:
    return time.strftime("%Y-%m-%dT%H:%M:%S%z")


def now_ts() -> str:
    return time.strftime("%Y%m%d-%H%M%S")


LOG_WRITE_LOCK = threading.Lock()


def make_session_log_name() -> str:
    return f"proxy-{now_ts()}-{time.time_ns() % 1_000_000_000}.jsonl"


def get_log_file(log_dir: Path, session_log_name: str) -> Path:
    return log_dir / session_log_name


def get_markdown_log_file(log_dir: Path, session_log_name: str) -> Path:
    return log_dir / session_log_name.replace(".jsonl", ".md")


def ensure_log_files(log_dir: Path, session_log_name: str) -> None:
    get_log_file(log_dir, session_log_name).touch(exist_ok=True)
    get_markdown_log_file(log_dir, session_log_name).touch(exist_ok=True)


def append_jsonl(path: Path, data: object) -> None:
    line = json.dumps(data, ensure_ascii=False) + "\n"
    with LOG_WRITE_LOCK:
        with path.open("a", encoding="utf-8") as file:
            file.write(line)


def append_markdown(path: Path, text: str) -> None:
    with LOG_WRITE_LOCK:
        with path.open("a", encoding="utf-8") as file:
            file.write(text)


def sanitize_headers(headers: Any) -> dict[str, str]:
    sanitized = dict(headers.items())
    for key in list(sanitized.keys()):
        if key.lower() in {"authorization", "proxy-authorization"}:
            sanitized[key] = "***REDACTED***"
    return sanitized


def proxy_request_headers(headers: Any) -> dict[str, str]:
    return {
        key: value
        for key, value in headers.items()
        if key.lower() not in HOP_BY_HOP_HEADERS
    }


def proxy_response_headers(headers: Any) -> dict[str, str]:
    return {
        key: value
        for key, value in headers.items()
        if key.lower() not in HOP_BY_HOP_HEADERS
    }


def decode_body(body: bytes) -> JsonValue:
    if not body:
        return None
    try:
        return json.loads(body.decode("utf-8"))
    except Exception:
        return body.decode("utf-8", errors="replace")


def serialize_query_params(query_params: Any) -> list[tuple[str, str]]:
    return list(query_params.multi_items())


def split_path_segments(path: str) -> list[str]:
    return [segment for segment in path.split("/") if segment]


def normalize_upstream_base_url(upstream_base_url: str) -> str:
    upstream_parts = urlsplit(upstream_base_url.rstrip("/"))
    upstream_segments = split_path_segments(upstream_parts.path)

    if not upstream_segments or upstream_segments[-1] != "v1":
        upstream_segments.append("v1")

    normalized_path = "/" + "/".join(upstream_segments)
    return urlunsplit(
        (
            upstream_parts.scheme,
            upstream_parts.netloc,
            normalized_path,
            upstream_parts.query,
            upstream_parts.fragment,
        )
    )


def normalize_proxy_path(upstream_base_url: str, path: str) -> str:
    upstream_parts = urlsplit(upstream_base_url)
    upstream_segments = split_path_segments(upstream_parts.path)
    request_segments = split_path_segments(path)

    if (
        request_segments
        and upstream_segments
        and request_segments[0] == upstream_segments[-1]
    ):
        request_segments = request_segments[1:]

    if (
        request_segments
        and request_segments[0] != "v1"
        and (not upstream_segments or upstream_segments[-1] != "v1")
        and request_segments[0] in OPENAI_COMPATIBLE_TOP_LEVEL_PATHS
    ):
        request_segments = ["v1", *request_segments]

    merged_segments = [*upstream_segments, *request_segments]
    deduped_segments: list[str] = []
    for segment in merged_segments:
        if segment == "v1" and deduped_segments and deduped_segments[-1] == "v1":
            continue
        deduped_segments.append(segment)

    return "/" + "/".join(deduped_segments) if deduped_segments else ""


def build_upstream_url(upstream_base_url: str, path: str, query_params: Any) -> str:
    upstream_parts = urlsplit(upstream_base_url)
    normalized_path = normalize_proxy_path(upstream_base_url, path)
    url = urlunsplit(
        (
            upstream_parts.scheme,
            upstream_parts.netloc,
            normalized_path,
            "",
            upstream_parts.fragment,
        )
    )
    query_items = serialize_query_params(query_params)
    if query_items:
        return f"{url}?{urlencode(query_items, doseq=True)}"
    return url


def build_log_record(
    request_id: str,
    request: Request,
    upstream_url: str,
    request_body: bytes,
) -> dict[str, JsonValue]:
    return {
        "timestamp": iso_now(),
        "request_id": request_id,
        "method": request.method,
        "path": request.url.path,
        "query": serialize_query_params(request.query_params),
        "upstream_url": upstream_url,
        "request_headers": sanitize_headers(request.headers),
        "request_body": decode_body(request_body),
    }


def stringify_content(content: Any) -> str:
    if content is None:
        return ""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts: list[str] = []
        for item in content:
            if isinstance(item, dict):
                if item.get("type") == "text" and isinstance(item.get("text"), str):
                    parts.append(item["text"])
                elif isinstance(item.get("content"), str):
                    parts.append(item["content"])
                else:
                    parts.append(json.dumps(item, ensure_ascii=False))
            else:
                parts.append(str(item))
        return "\n".join(part for part in parts if part)
    if isinstance(content, (dict, list)):
        return json.dumps(content, ensure_ascii=False)
    return str(content)


def format_tool_arguments(arguments: Any) -> str:
    if arguments is None:
        return ""
    if isinstance(arguments, str):
        return arguments
    return json.dumps(arguments, ensure_ascii=False, indent=2)


def code_block(text: str, language: str = "json") -> str:
    stripped = text.strip()
    if not stripped:
        return ""
    return f"```{language}\n{stripped}\n```"


def detect_code_language(text: str) -> str:
    stripped = text.strip()
    if not stripped:
        return ""
    if stripped.startswith("<?xml") or (
        stripped.startswith("<") and stripped.endswith(">") and "</" in stripped
    ):
        return "xml"
    if stripped.startswith(("{", "[")):
        return "json"
    return "text"


def should_wrap_markdown_content(text: str) -> bool:
    stripped = text.strip()
    if not stripped:
        return False
    if "```" in stripped:
        return True
    if stripped.startswith("<?xml") or (
        stripped.startswith("<") and stripped.endswith(">") and "</" in stripped
    ):
        return True
    if stripped.startswith(("{", "[")):
        return True
    if any(line.startswith(("    ", "\t")) for line in stripped.splitlines()):
        return True
    return False


def format_markdown_content(text: str) -> str:
    stripped = text.strip()
    if not stripped:
        return ""
    if should_wrap_markdown_content(stripped):
        return code_block(stripped, detect_code_language(stripped))
    return stripped


def role_heading(role: str) -> str:
    normalized = role.strip().lower()
    mapping = {
        "system": "System",
        "user": "User",
        "assistant": "Assistant",
        "tool": "Tool",
    }
    return mapping.get(normalized, role.title())


def extract_request_messages(request_body: JsonValue) -> list[MarkdownEntry]:
    if not isinstance(request_body, dict):
        return []
    messages = request_body.get("messages")
    if not isinstance(messages, list):
        return []
    extracted: list[MarkdownEntry] = []
    for message in messages:
        if not isinstance(message, dict):
            continue
        role = str(message.get("role") or "unknown")
        content = stringify_content(message.get("content"))
        if content:
            extracted.append({"kind": "message", "role": role, "content": content})
        tool_calls = message.get("tool_calls")
        if isinstance(tool_calls, list):
            for tool_call in tool_calls:
                if not isinstance(tool_call, dict):
                    continue
                function = tool_call.get("function")
                if not isinstance(function, dict):
                    continue
                name = str(function.get("name") or "unknown_tool")
                arguments = format_tool_arguments(function.get("arguments"))
                extracted.append(
                    {
                        "kind": "tool_call",
                        "role": role,
                        "name": name,
                        "arguments": arguments,
                    }
                )
        if role == "tool":
            tool_name = str(message.get("name") or "tool")
            tool_content = stringify_content(message.get("content"))
            if tool_content:
                extracted.append(
                    {
                        "kind": "tool_result",
                        "role": role,
                        "name": tool_name,
                        "content": tool_content,
                    }
                )
    return extracted


def extract_response_messages(
    response_body: str, content_type: str | None
) -> list[MarkdownEntry]:
    if not response_body:
        return []
    if content_type and "event-stream" in content_type.lower():
        return extract_sse_messages(response_body)
    try:
        payload = json.loads(response_body)
    except Exception:
        return []
    return extract_json_response_messages(payload)


def extract_json_response_messages(payload: JsonValue) -> list[MarkdownEntry]:
    if not isinstance(payload, dict):
        return []
    choices = payload.get("choices")
    extracted: list[MarkdownEntry] = []
    if isinstance(choices, list):
        for choice in choices:
            if not isinstance(choice, dict):
                continue
            message = choice.get("message")
            if isinstance(message, dict):
                role = str(message.get("role") or "assistant")
                content = stringify_content(message.get("content"))
                if content:
                    extracted.append({"kind": "message", "role": role, "content": content})
                tool_calls = message.get("tool_calls")
                if isinstance(tool_calls, list):
                    for tool_call in tool_calls:
                        if not isinstance(tool_call, dict):
                            continue
                        function = tool_call.get("function")
                        if not isinstance(function, dict):
                            continue
                        name = str(function.get("name") or "unknown_tool")
                        arguments = format_tool_arguments(function.get("arguments"))
                        extracted.append(
                            {
                                "kind": "tool_call",
                                "role": role,
                                "name": name,
                                "arguments": arguments,
                            }
                        )
    output = payload.get("output")
    if isinstance(output, list):
        for item in output:
            if not isinstance(item, dict):
                continue
            role = str(item.get("role") or "assistant")
            content = stringify_content(item.get("content"))
            if content:
                extracted.append({"kind": "message", "role": role, "content": content})
    return extracted


def extract_sse_messages(response_body: str) -> list[MarkdownEntry]:
    roles: dict[int, str] = {}
    contents: dict[int, list[str]] = {}
    tool_calls: dict[int, dict[int, ToolCallAccumulator]] = {}
    for line in response_body.splitlines():
        stripped = line.strip()
        if not stripped.startswith("data:"):
            continue
        data = stripped[5:].strip()
        if not data or data == "[DONE]":
            continue
        try:
            payload = json.loads(data)
        except Exception:
            continue
        choices = payload.get("choices")
        if not isinstance(choices, list):
            continue
        for index, choice in enumerate(choices):
            if not isinstance(choice, dict):
                continue
            delta = choice.get("delta")
            if not isinstance(delta, dict):
                continue
            role = delta.get("role")
            if isinstance(role, str):
                roles[index] = role
            content = stringify_content(delta.get("content"))
            if content:
                contents.setdefault(index, []).append(content)
            delta_tool_calls = delta.get("tool_calls")
            if isinstance(delta_tool_calls, list):
                choice_tool_calls = tool_calls.setdefault(index, {})
                for call_index, tool_call in enumerate(delta_tool_calls):
                    if not isinstance(tool_call, dict):
                        continue
                    target_index = int(tool_call.get("index", call_index))
                    entry = choice_tool_calls.setdefault(
                        target_index,
                        ToolCallAccumulator(name="", arguments=[]),
                    )
                    function = tool_call.get("function")
                    if isinstance(function, dict):
                        name = function.get("name")
                        if isinstance(name, str) and name:
                            entry["name"] = name
                        arguments = function.get("arguments")
                        if arguments is not None:
                            entry["arguments"].append(str(arguments))
    extracted: list[MarkdownEntry] = []
    for index in sorted(contents.keys()):
        joined = "".join(contents[index]).strip()
        if joined:
            extracted.append(
                {
                    "kind": "message",
                    "role": roles.get(index, "assistant"),
                    "content": joined,
                }
            )
    for choice_index in sorted(tool_calls.keys()):
        for tool_index in sorted(tool_calls[choice_index].keys()):
            tool_call = cast(
                ToolCallAccumulator,
                tool_calls[choice_index][tool_index],
            )
            arguments = "".join(tool_call["arguments"]).strip()
            extracted.append(
                {
                    "kind": "tool_call",
                    "role": roles.get(choice_index, "assistant"),
                    "name": str(tool_call["name"] or "unknown_tool"),
                    "arguments": arguments,
                }
            )
    return extracted


def build_markdown_entry(
    request_id: str,
    path: str,
    request_body: JsonValue,
    response_body: str,
    response_headers: dict[str, str],
) -> str:
    request_messages = extract_request_messages(request_body)
    response_messages = extract_response_messages(
        response_body,
        response_headers.get("content-type") if response_headers else None,
    )
    if not request_messages and not response_messages:
        return ""

    lines = [
        f"## {request_id}",
        f"- path: `{path}`",
        f"- time: `{iso_now()}`",
        "",
    ]
    for entry in request_messages + response_messages:
        kind = entry["kind"]
        if kind == "message":
            lines.extend(
                [
                    f"### {role_heading(entry['role'])}",
                    "",
                    format_markdown_content(entry["content"]),
                    "",
                ]
            )
        elif kind == "tool_call":
            lines.extend(
                [
                    f"### Tool Call: `{entry['name']}`",
                    "",
                    code_block(entry["arguments"]),
                    "",
                ]
            )
        elif kind == "tool_result":
            lines.extend(
                [
                    f"### Tool Result: `{entry['name']}`",
                    "",
                    format_markdown_content(entry["content"]),
                    "",
                ]
            )
    return "\n".join(lines).rstrip() + "\n\n"


def append_error_log(
    log_dir: Path,
    session_log_name: str,
    request_id: str,
    request: Request,
    upstream_url: str,
    request_body: bytes,
    message: str,
    extra: JsonValue = None,
) -> None:
    payload = build_log_record(request_id, request, upstream_url, request_body)
    payload["error"] = {"message": message, "extra": extra}
    append_jsonl(get_log_file(log_dir, session_log_name), payload)


def build_app(
    upstream_base_url: str, log_dir: Path, upstream_timeout: float
) -> FastAPI:
    upstream_base_url = normalize_upstream_base_url(upstream_base_url)
    log_dir.mkdir(parents=True, exist_ok=True)
    session_log_name = make_session_log_name()
    ensure_log_files(log_dir, session_log_name)

    @asynccontextmanager
    async def lifespan(app: FastAPI):
        timeout = httpx.Timeout(
            upstream_timeout,
            connect=min(10.0, upstream_timeout),
        )
        app.state.client = httpx.AsyncClient(timeout=timeout, follow_redirects=False)
        try:
            yield
        finally:
            await app.state.client.aclose()

    app = FastAPI(lifespan=lifespan)
    app.state.log_dir = log_dir.resolve()
    app.state.log_file = get_log_file(log_dir, session_log_name).resolve()
    app.state.markdown_log_file = get_markdown_log_file(
        log_dir,
        session_log_name,
    ).resolve()

    @app.get("/healthz")
    async def healthz():
        return {
            "status": "ok",
            "upstream_base_url": upstream_base_url,
            "log_dir": str(app.state.log_dir),
            "log_file": session_log_name,
        }

    @app.api_route(
        "/{full_path:path}",
        methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
    )
    async def proxy(request: Request, full_path: str):
        if request.url.path == "/healthz":
            return JSONResponse(
                status_code=404,
                content={"error": "not_found"},
            )

        request_id = f"proxy-{now_ts()}-{time.time_ns() % 1_000_000_000}"
        body = await request.body()
        upstream_url = build_upstream_url(
            upstream_base_url, full_path, request.query_params
        )
        log_record = build_log_record(request_id, request, upstream_url, body)

        upstream_request = request.app.state.client.build_request(
            method=request.method,
            url=upstream_url,
            headers=proxy_request_headers(request.headers),
            content=body,
        )

        try:
            upstream_response = await request.app.state.client.send(
                upstream_request,
                stream=True,
            )
        except httpx.RequestError as exc:
            append_error_log(
                log_dir,
                session_log_name,
                request_id,
                request,
                upstream_url,
                body,
                "Failed to reach upstream service",
                {"detail": str(exc), "upstream_url": upstream_url},
            )
            return JSONResponse(
                status_code=502,
                content={"error": "bad_gateway", "detail": str(exc)},
            )

        response_chunks: list[str] = []
        log_written = False

        async def stream_and_log():
            nonlocal log_written
            try:
                async for chunk in upstream_response.aiter_bytes():
                    if not chunk:
                        continue
                    response_chunks.append(chunk.decode("utf-8", errors="replace"))
                    yield chunk
            except httpx.HTTPError as exc:
                response_chunks.append(f"\n[stream-error] {exc}\n")
                log_record["error"] = {
                    "message": "Upstream stream error",
                    "detail": str(exc),
                }
            except asyncio.CancelledError:
                response_chunks.append("\n[stream-cancelled]\n")
                log_record["error"] = {"message": "Client cancelled stream"}
                raise
            finally:
                if not log_written:
                    log_record["response_status_code"] = upstream_response.status_code
                    log_record["response_headers"] = sanitize_headers(
                        upstream_response.headers
                    )
                    log_record["response_body"] = "".join(response_chunks)
                    append_jsonl(get_log_file(log_dir, session_log_name), log_record)
                    markdown_entry = build_markdown_entry(
                        request_id,
                        request.url.path,
                        log_record.get("request_body"),
                        log_record["response_body"],
                        log_record["response_headers"],
                    )
                    if markdown_entry:
                        append_markdown(
                            get_markdown_log_file(log_dir, session_log_name),
                            markdown_entry,
                        )
                    log_written = True
                await upstream_response.aclose()

        return StreamingResponse(
            stream_and_log(),
            status_code=upstream_response.status_code,
            headers=proxy_response_headers(upstream_response.headers),
            media_type=upstream_response.headers.get("content-type"),
        )

    return app


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--upstream-base-url", required=True)
    parser.add_argument("--host", default="127.0.0.1")
    parser.add_argument("--port", type=int, default=8000)
    parser.add_argument("--log-dir", default="./logs")
    parser.add_argument(
        "--upstream-timeout",
        type=float,
        default=300.0,
        help="Per-request timeout in seconds for the upstream service.",
    )
    return parser.parse_args()


def main():
    args = parse_args()
    app = build_app(args.upstream_base_url, Path(args.log_dir), args.upstream_timeout)
    print(f"JSONL log: {app.state.log_file}")
    print(f"Markdown log: {app.state.markdown_log_file}")
    uvicorn.run(app, host=args.host, port=args.port)


if __name__ == "__main__":
    main()