API Reference¶
completion()¶
from llmgate import completion
resp = completion(
model, # str
messages, # list[dict | Message]
*,
provider=None, # str | None
api_key=None, # str | None
max_tokens=None, # int | None
temperature=None, # float | None
top_p=None, # float | None
stream=False, # bool
tools=None, # list[ToolDefinition | dict] | None
tool_choice=None, # str | dict | None
response_format=None, # type[BaseModel] | None
middleware=None, # list[BaseMiddleware] | None
**extra_kwargs,
) -> CompletionResponse | Iterator[StreamChunk]
acompletion()¶
Async variant — identical signature, returns Coroutine.
parse() / aparse()¶
from llmgate import parse
instance = parse(model, messages, *, response_format: type[T], **kwargs) -> T
Shorthand for completion(..., response_format=Model).parsed.
embed() / aembed()¶
from llmgate import embed
resp = embed(
model, # str
input, # str | list[str]
*,
dimensions=None, # int | None (OpenAI / Azure / Gemini)
api_key=None,
**extra_kwargs,
) -> EmbeddingResponse
batch() / abatch()¶
from llmgate import batch
results = batch(
requests, # list[CompletionRequest | dict]
*,
max_concurrency=10, # int
fail_fast=False, # bool
middleware=None, # list[BaseMiddleware] | None
**extra_kwargs,
) -> BatchResult
Types¶
Message¶
class Message(BaseModel):
role: Literal["system", "user", "assistant", "tool"]
content: str | list[TextPart | ImagePart] | None
tool_calls: list[ToolCall] | None
tool_call_id: str | None
name: str | None
TextPart¶
ImagePart¶
class ImagePart(BaseModel):
type: Literal["image_url", "image_bytes"]
image_url: ImageURL | None
image_bytes: ImageBytes | None
ImageURL¶
ImageBytes¶
class ImageBytes(BaseModel):
data: str # base64-encoded, no data-URI prefix
mime_type: str # "image/jpeg" | "image/png" | "image/webp" | "image/gif"
CompletionResponse¶
class CompletionResponse(BaseModel):
id: str
model: str
provider: str
choices: list[Choice]
usage: TokenUsage
parsed: BaseModel | None
raw: Any # raw SDK response
text: str # property → choices[0].message.content
tool_calls: list[ToolCall] | None # property
EmbeddingResponse¶
class EmbeddingResponse(BaseModel):
embeddings: list[list[float]]
model: str
provider: str
usage: TokenUsage
BatchResult¶
class BatchResult(BaseModel):
results: list[CompletionResponse | None]
errors: list[BatchError]
successful: int
failed: int
total_tokens: int
success_rate: float