AI Inference API

REST API for AI model inference, chat completions, and embeddings.

Base URL

https://api.cloud.tenzro.com/ai

Endpoints

Chat Completion

POST /ai/chat
Content-Type: application/json
{
"model": "gemini-2.5-flash",
"messages": [
{ "role": "system", "content": "You are a helpful assistant." },
{ "role": "user", "content": "What is machine learning?" }
],
"temperature": 0.7,
"max_tokens": 500
}
# Response
{
"id": "chat_abc123",
"model": "gemini-2.5-flash",
"message": {
"role": "assistant",
"content": "Machine learning is a subset of artificial intelligence..."
},
"usage": {
"prompt_tokens": 25,
"completion_tokens": 150,
"total_tokens": 175
},
"finish_reason": "stop"
}

Streaming Chat

POST /ai/chat
Content-Type: application/json
{
"model": "gemini-2.5-flash",
"messages": [
{ "role": "user", "content": "Write a story" }
],
"stream": true
}
# Response (Server-Sent Events)
data: {"delta": {"content": "Once"}}
data: {"delta": {"content": " upon"}}
data: {"delta": {"content": " a"}}
data: {"delta": {"content": " time"}}
data: {"delta": {}, "finish_reason": "stop"}
data: [DONE]

Generate Text

POST /ai/generate
Content-Type: application/json
{
"model": "gemini-2.5-flash",
"prompt": "Explain quantum computing in simple terms",
"max_tokens": 200,
"temperature": 0.5
}
# Response
{
"id": "gen_abc123",
"text": "Quantum computing uses quantum mechanics...",
"usage": {
"prompt_tokens": 10,
"completion_tokens": 100,
"total_tokens": 110
}
}

Generate Embeddings

POST /ai/embeddings
Content-Type: application/json
{
"model": "text-embedding-3-small",
"input": "Machine learning is fascinating"
}
# Response
{
"embedding": [0.123, -0.456, 0.789, ...],
"dimensions": 1536,
"usage": {
"total_tokens": 5
}
}
# Batch embeddings
POST /ai/embeddings
{
"model": "text-embedding-3-small",
"input": [
"First document",
"Second document"
]
}
# Response
{
"embeddings": [
[0.1, 0.2, ...],
[0.3, 0.4, ...]
],
"dimensions": 1536
}

Function Calling

POST /ai/chat
Content-Type: application/json
{
"model": "gemini-3-pro-preview",
"messages": [
{ "role": "user", "content": "What's the weather in Tokyo?" }
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": { "type": "string" }
},
"required": ["location"]
}
}
}
]
}
# Response
{
"message": {
"role": "assistant",
"tool_calls": [
{
"id": "call_abc",
"function": {
"name": "get_weather",
"arguments": "{\"location\": \"Tokyo\"}"
}
}
]
}
}

List Models

GET /ai/models
# Response
{
"models": [
{
"id": "gemini-2.5-flash",
"provider": "google",
"capabilities": ["chat", "vision"],
"context_length": 1000000
},
{
"id": "gemini-3-pro-preview",
"provider": "google",
"capabilities": ["chat", "vision", "function_calling"],
"context_length": 2000000
},
{
"id": "gpt-5",
"provider": "openai",
"capabilities": ["chat", "vision", "function_calling"],
"context_length": 256000
},
{
"id": "claude-sonnet-4-5",
"provider": "anthropic",
"capabilities": ["chat", "vision", "function_calling"],
"context_length": 200000
}
]
}

Parameters

ParameterTypeDescription
modelstringModel identifier
temperaturenumber0-2, controls randomness
max_tokensnumberMaximum output length
top_pnumber0-1, nucleus sampling
streambooleanEnable streaming

Error Responses

{
"error": {
"code": "rate_limit_exceeded",
"message": "Rate limit exceeded. Retry after 60 seconds.",
"retry_after": 60,
"status": 429
}
}