Skip to content

API Reference

completion()

from llmgate import completion

resp = completion(
    model,          # str
    messages,       # list[dict | Message]
    *,
    provider=None,          # str | None
    api_key=None,           # str | None
    max_tokens=None,        # int | None
    temperature=None,       # float | None
    top_p=None,             # float | None
    stream=False,           # bool
    tools=None,             # list[ToolDefinition | dict] | None
    tool_choice=None,       # str | dict | None
    response_format=None,   # type[BaseModel] | None
    middleware=None,        # list[BaseMiddleware] | None
    **extra_kwargs,
) -> CompletionResponse | Iterator[StreamChunk]

acompletion()

Async variant — identical signature, returns Coroutine.


parse() / aparse()

from llmgate import parse

instance = parse(model, messages, *, response_format: type[T], **kwargs) -> T

Shorthand for completion(..., response_format=Model).parsed.


embed() / aembed()

from llmgate import embed

resp = embed(
    model,          # str
    input,          # str | list[str]
    *,
    dimensions=None,    # int | None (OpenAI / Azure / Gemini)
    api_key=None,
    **extra_kwargs,
) -> EmbeddingResponse

batch() / abatch()

from llmgate import batch

results = batch(
    requests,               # list[CompletionRequest | dict]
    *,
    max_concurrency=10,     # int
    fail_fast=False,        # bool
    middleware=None,        # list[BaseMiddleware] | None
    **extra_kwargs,
) -> BatchResult

Types

Message

class Message(BaseModel):
    role: Literal["system", "user", "assistant", "tool"]
    content: str | list[TextPart | ImagePart] | None
    tool_calls: list[ToolCall] | None
    tool_call_id: str | None
    name: str | None

TextPart

class TextPart(BaseModel):
    type: Literal["text"] = "text"
    text: str

ImagePart

class ImagePart(BaseModel):
    type: Literal["image_url", "image_bytes"]
    image_url: ImageURL | None
    image_bytes: ImageBytes | None

ImageURL

class ImageURL(BaseModel):
    url: str
    detail: Literal["auto", "low", "high"] | None

ImageBytes

class ImageBytes(BaseModel):
    data: str       # base64-encoded, no data-URI prefix
    mime_type: str  # "image/jpeg" | "image/png" | "image/webp" | "image/gif"

CompletionResponse

class CompletionResponse(BaseModel):
    id: str
    model: str
    provider: str
    choices: list[Choice]
    usage: TokenUsage
    parsed: BaseModel | None
    raw: Any   # raw SDK response
    text: str  # property → choices[0].message.content
    tool_calls: list[ToolCall] | None  # property

EmbeddingResponse

class EmbeddingResponse(BaseModel):
    embeddings: list[list[float]]
    model: str
    provider: str
    usage: TokenUsage

BatchResult

class BatchResult(BaseModel):
    results: list[CompletionResponse | None]
    errors: list[BatchError]
    successful: int
    failed: int
    total_tokens: int
    success_rate: float

TokenUsage

class TokenUsage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int

ToolCall

class ToolCall(BaseModel):
    id: str
    function: str
    arguments: dict[str, Any]