Providers
Configure specific providers. Provider Setup →
This page explains the core building blocks of Iris. Understanding these concepts will help you design effective AI applications and use the SDK efficiently.
A Provider implements the core.Provider interface and adapts a specific LLM API to Iris’s
unified interface. Providers handle the translation between Iris’s common types and provider-specific
API formats.
type Provider interface { // Identity ID() string // "openai", "anthropic", etc. Models() []ModelInfo // Available models with metadata
// Capabilities Supports(feature Feature) bool // Check if feature is supported
// Core operations Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) StreamChat(ctx context.Context, req *ChatRequest) (*ChatStream, error)}Each provider advertises what it supports via the Supports() method:
// Feature capability flagsconst ( FeatureChat Feature = "chat" FeatureChatStreaming Feature = "chat_streaming" FeatureToolCalling Feature = "tool_calling" FeatureReasoning Feature = "reasoning" FeatureBuiltInTools Feature = "built_in_tools" FeatureResponseChain Feature = "response_chain" FeatureEmbeddings Feature = "embeddings" FeatureContextualizedEmbeddings Feature = "contextualized_embeddings" FeatureReranking Feature = "reranking")
// Check capabilities before using featuresif provider.Supports(core.FeatureToolCalling) { // Safe to use tool calling}Each provider package exports a New() function:
// Cloud providers require API keysopenaiProvider := openai.New(os.Getenv("OPENAI_API_KEY"))anthropicProvider := anthropic.New(os.Getenv("ANTHROPIC_API_KEY"))geminiProvider := gemini.New(os.Getenv("GEMINI_API_KEY"))
// Local providers require base URLsollamaProvider := ollama.New("http://localhost:11434")Providers accept configuration options:
provider := openai.New(apiKey, openai.WithBaseURL("https://custom-endpoint.example.com/v1"), openai.WithOrganization("org-xxx"), openai.WithHTTPClient(customHTTPClient), openai.WithTimeout(60 * time.Second),)The core.Client wraps a provider with middleware features: retry logic, telemetry hooks, and
the fluent builder API. The client is the primary entry point for SDK usage.
// Basic clientclient := core.NewClient(provider)
// Client with optionsclient := core.NewClient(provider, core.WithRetryPolicy(customPolicy), core.WithTelemetry(telemetryHook), core.WithTimeout(30 * time.Second),)core.Client is safe for concurrent use. Create one client and share it across goroutines:
// Create once at startupclient := core.NewClient(provider)
// Use from multiple goroutinesvar wg sync.WaitGroupfor i := 0; i < 10; i++ { wg.Add(1) go func(id int) { defer wg.Done() resp, err := client.Chat("gpt-4o"). User(fmt.Sprintf("Query %d", id)). GetResponse(ctx) // Handle response... }(i)}wg.Wait()// Start a chat builderbuilder := client.Chat("gpt-4o")
// Start an embedding builderembedBuilder := client.Embed("text-embedding-3-small")
// Access underlying providerprovider := client.Provider()
// Get client configurationconfig := client.Config()The ChatBuilder constructs chat requests through a fluent API. Each method returns the builder
for chaining. Builders are not thread-safe—use Clone() when sharing configurations.
resp, err := client.Chat("gpt-4o"). // Message methods System("You are a helpful assistant."). // System message User("What is the capital of France?"). // User message Assistant("Paris is the capital of France."). // Assistant message (for context)
// Configuration methods Temperature(0.7). // 0.0-2.0, lower = more deterministic MaxTokens(1000). // Maximum tokens in response TopP(0.9). // Nucleus sampling FrequencyPenalty(0.5). // Reduce repetition PresencePenalty(0.5). // Encourage new topics Stop("END", "STOP"). // Stop sequences Seed(42). // Reproducible outputs (if supported)
// Tool methods Tools(tool1, tool2). // Available tools ToolChoice("auto"). // "auto", "none", or specific tool name ParallelToolCalls(true). // Allow parallel tool invocations
// Advanced methods ResponseFormat("json"). // Force JSON output User("userID123"). // User identifier for abuse tracking
// Execute GetResponse(ctx) // Blocking response // or GetStream(ctx) // Streaming responseFor messages with images or files, use the multimodal builder:
resp, err := client.Chat("gpt-4o"). System("Analyze images in detail."). UserMultimodal(). Text("What objects are in this image?"). ImageURL("https://example.com/photo.jpg"). ImageBase64(base64Data, "image/png"). Done(). GetResponse(ctx)Use Clone() to create a copy with the current configuration:
// Base configurationbase := client.Chat("gpt-4o"). System("You are a helpful assistant."). Temperature(0.7)
// Create specialized builders from basecodeHelper := base.Clone().System("You are a coding expert.")mathHelper := base.Clone().System("You are a math tutor.")
// Use independentlycodeResp, _ := codeHelper.User("How do I reverse a string in Go?").GetResponse(ctx)mathResp, _ := mathHelper.User("What is the derivative of x^2?").GetResponse(ctx)Build multi-turn conversations by adding message history:
builder := client.Chat("gpt-4o"). System("You are a helpful assistant.")
// First turnresp1, _ := builder.Clone(). User("What is Python?"). GetResponse(ctx)
// Second turn with historyresp2, _ := builder.Clone(). User("What is Python?"). Assistant(resp1.Output). User("How do I install it?"). GetResponse(ctx)The response from a chat completion contains the generated content and metadata:
type ChatResponse struct { // Primary content Output string // Generated text content ToolCalls []ToolCall // Tool invocations (if tools were provided)
// Metadata Model string // Actual model used FinishReason string // "stop", "tool_calls", "length", "content_filter" Usage Usage // Token counts
// Advanced (provider-dependent) Reasoning *Reasoning // Reasoning summary (GPT-5, Claude) SystemFingerprint string // Model version identifier}
type Usage struct { PromptTokens int CompletionTokens int TotalTokens int}resp, err := client.Chat("gpt-4o"). User("Hello!"). GetResponse(ctx)
if err != nil { // Handle error (see Error Handling section) return err}
// Check finish reasonswitch resp.FinishReason {case "stop": // Normal completion fmt.Println(resp.Output)case "tool_calls": // Model wants to call tools for _, tc := range resp.ToolCalls { result := executeToolCall(tc) // Continue conversation with tool result }case "length": // Hit max tokens limit fmt.Println("Response truncated:", resp.Output)case "content_filter": // Content was filtered fmt.Println("Content filtered by provider")}
// Check token usagefmt.Printf("Tokens used: %d\n", resp.Usage.TotalTokens)Streaming delivers response tokens as they’re generated, enabling real-time UI updates. The
ChatStream type provides channels for incremental processing.
type ChatStream struct { Ch <-chan ChatChunk // Incremental content chunks Err <-chan error // Error channel (receives at most one) Final <-chan *ChatResponse // Aggregated final response}
type ChatChunk struct { Content string // Text delta ToolCallDelta *ToolCallDelta // Tool call delta (if streaming tools)}stream, err := client.Chat("gpt-4o"). User("Tell me a story."). GetStream(ctx)
if err != nil { return err}
// Process chunks as they arrivefor chunk := range stream.Ch { fmt.Print(chunk.Content) // Real-time output}
// Check for errorsif err := <-stream.Err; err != nil { return err}
// Get final aggregated responsefinal := <-stream.Finalfmt.Printf("\nTotal tokens: %d\n", final.Usage.TotalTokens)ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)defer cancel()
stream, err := client.Chat("gpt-4o"). User("Write a long essay."). GetStream(ctx)
if err != nil { return err}
for { select { case chunk, ok := <-stream.Ch: if !ok { // Stream finished break } fmt.Print(chunk.Content) case <-ctx.Done(): // Timeout or cancellation return ctx.Err() }}Use core.DrainStream to collect a stream into a full response:
stream, err := client.Chat("gpt-4o"). User("Hello"). GetStream(ctx)
if err != nil { return err}
// Drain stream to get full responseresp, err := core.DrainStream(stream)if err != nil { return err}
// Now you have a regular ChatResponsefmt.Println(resp.Output)Tool calls can also stream, with deltas indicating partial arguments:
stream, err := client.Chat("gpt-4o"). User("What's the weather in Tokyo?"). Tools(weatherTool). GetStream(ctx)
if err != nil { return err}
var toolCallArgs strings.Builderfor chunk := range stream.Ch { if chunk.ToolCallDelta != nil { // Accumulate tool call arguments toolCallArgs.WriteString(chunk.ToolCallDelta.Arguments) } else if chunk.Content != "" { fmt.Print(chunk.Content) }}
// After stream completes, check final for complete tool callsfinal := <-stream.Finalfor _, tc := range final.ToolCalls { // Execute with complete arguments executeToolCall(tc)}Tools enable models to invoke external functions with structured arguments. This is the foundation for building agents that can take actions in the real world.
type Tool struct { Name string // Unique identifier Description string // What the tool does (used by model) Parameters ToolParameters // JSON Schema for arguments}
type ToolParameters struct { Type string // Always "object" Properties map[string]Property // Parameter definitions Required []string // Required parameter names}
type Property struct { Type string // "string", "integer", "number", "boolean", "array", "object" Description string // Parameter description Enum []string // Allowed values (optional) Items *Property // For array types}weatherTool := core.Tool{ Name: "get_current_weather", Description: "Get the current weather in a specific location", Parameters: core.ToolParameters{ Type: "object", Properties: map[string]core.Property{ "location": { Type: "string", Description: "The city and state, e.g., 'San Francisco, CA'", }, "unit": { Type: "string", Description: "Temperature unit", Enum: []string{"celsius", "fahrenheit"}, }, }, Required: []string{"location"}, },}When a model decides to use a tool, it returns tool calls instead of text:
type ToolCall struct { ID string // Unique call identifier Name string // Tool name Arguments json.RawMessage // JSON-encoded arguments}Complete tool calling requires multiple turns:
// Turn 1: Initial request with toolsresp, err := client.Chat("gpt-4o"). System("You are a helpful assistant with access to weather data."). User("What's the weather in New York?"). Tools(weatherTool). GetResponse(ctx)
if err != nil { return err}
// Check if model wants to call a toolif len(resp.ToolCalls) > 0 { tc := resp.ToolCalls[0]
// Parse arguments var args struct { Location string `json:"location"` Unit string `json:"unit"` } json.Unmarshal(tc.Arguments, &args)
// Execute the tool (your implementation) weatherResult := getWeather(args.Location, args.Unit)
// Turn 2: Send tool result back finalResp, err := client.Chat("gpt-4o"). System("You are a helpful assistant with access to weather data."). User("What's the weather in New York?"). Assistant(""). // Model's response (empty since it called tool) ToolResult(tc.ID, weatherResult). // Tool result Tools(weatherTool). GetResponse(ctx)
if err != nil { return err }
fmt.Println(finalResp.Output) // "The current weather in New York is 72°F with partly cloudy skies."}Control how the model uses tools:
// Let model decide (default)builder.ToolChoice("auto")
// Force no tool usebuilder.ToolChoice("none")
// Force specific toolbuilder.ToolChoice("get_current_weather")
// Require some tool (any)builder.ToolChoice("required")Some models can call multiple tools simultaneously:
resp, err := client.Chat("gpt-4o"). User("What's the weather in New York and London?"). Tools(weatherTool). ParallelToolCalls(true). // Enable parallel calls GetResponse(ctx)
// resp.ToolCalls may contain multiple callsfor _, tc := range resp.ToolCalls { // Execute each tool call result := executeToolCall(tc) // Collect results...}Some models (GPT-5, Claude with extended thinking) provide reasoning summaries explaining their thought process. Additionally, certain models have built-in tools like web search.
type Reasoning struct { Summary string // High-level explanation Steps []string // Step-by-step reasoning (if available)}Access reasoning in responses:
resp, err := client.Chat("gpt-5"). User("Solve: If a train leaves Chicago at 9am going 60mph..."). GetResponse(ctx)
if resp.Reasoning != nil { fmt.Println("Model reasoning:") fmt.Println(resp.Reasoning.Summary) for i, step := range resp.Reasoning.Steps { fmt.Printf("%d. %s\n", i+1, step) }}
fmt.Println("\nAnswer:", resp.Output)GPT-5+ models support built-in tools that the model can invoke internally:
resp, err := client.Chat("gpt-5"). User("Search the web for the latest news about AI."). BuiltInTools( core.BuiltInToolWebSearch, // Web search core.BuiltInToolFileSearch, // File search in uploaded files ). GetResponse(ctx)Vision-capable models can process images alongside text. Iris provides a consistent interface across providers.
// From URLresp, err := client.Chat("gpt-4o"). UserMultimodal(). Text("Describe this image."). ImageURL("https://example.com/photo.jpg"). Done(). GetResponse(ctx)
// From base64imageData, _ := os.ReadFile("photo.png")base64Data := base64.StdEncoding.EncodeToString(imageData)
resp, err := client.Chat("gpt-4o"). UserMultimodal(). Text("What's in this image?"). ImageBase64(base64Data, "image/png"). Done(). GetResponse(ctx)
// Multiple imagesresp, err := client.Chat("gpt-4o"). UserMultimodal(). Text("Compare these two images."). ImageURL("https://example.com/before.jpg"). ImageURL("https://example.com/after.jpg"). Done(). GetResponse(ctx)Control the detail level for image analysis:
resp, err := client.Chat("gpt-4o"). UserMultimodal(). Text("Analyze this diagram in detail."). ImageURL("https://example.com/diagram.png", core.ImageDetailHigh). Done(). GetResponse(ctx)
// Detail levels:// - core.ImageDetailAuto (default) - Model decides// - core.ImageDetailLow - Faster, lower token usage// - core.ImageDetailHigh - More detailed analysis| Format | OpenAI | Anthropic | Gemini | Ollama |
|---|---|---|---|---|
| PNG | ✓ | ✓ | ✓ | ✓ |
| JPEG | ✓ | ✓ | ✓ | ✓ |
| GIF | ✓ | ✓ | ✓ | - |
| WebP | ✓ | ✓ | ✓ | - |
Embeddings convert text into dense vectors for semantic search, clustering, and RAG pipelines.
// Single textresp, err := client.Embed("text-embedding-3-small"). Text("The quick brown fox jumps over the lazy dog."). GetEmbedding(ctx)
if err != nil { return err}
fmt.Printf("Dimensions: %d\n", len(resp.Embedding))// Dimensions: 1536
// Batch embeddingresp, err := client.Embed("text-embedding-3-small"). Texts( "First document", "Second document", "Third document", ). GetEmbeddings(ctx)
for i, emb := range resp.Embeddings { fmt.Printf("Document %d: %d dimensions\n", i, len(emb))}resp, err := client.Embed("text-embedding-3-small"). Text("Query text"). Dimensions(512). // Reduce dimensions (if supported) EncodingFormat("float"). // "float" or "base64" GetEmbedding(ctx)// Generate query embeddingqueryEmb, _ := client.Embed("text-embedding-3-small"). Text("What is machine learning?"). GetEmbedding(ctx)
// Search vector store (Qdrant example)results, err := qdrantClient.Search(&qdrant.SearchRequest{ Vector: queryEmb.Embedding, Limit: 5,})
// Use results in RAG promptvar context strings.Builderfor _, r := range results { context.WriteString(r.Payload["text"].(string)) context.WriteString("\n---\n")}
resp, _ := client.Chat("gpt-4o"). System("Answer based on the provided context."). User(fmt.Sprintf("Context:\n%s\n\nQuestion: What is machine learning?", context.String())). GetResponse(ctx)Telemetry hooks let you instrument Iris requests for observability, debugging, and cost tracking.
type TelemetryHook interface { OnRequestStart(ctx context.Context, req *TelemetryRequest) OnRequestEnd(ctx context.Context, req *TelemetryRequest, resp *TelemetryResponse, err error)}
type TelemetryRequest struct { Provider string Model string Messages int // Number of messages HasTools bool StartTime time.Time}
type TelemetryResponse struct { Duration time.Duration PromptTokens int OutputTokens int TotalTokens int FinishReason string}type LoggingHook struct { logger *log.Logger}
func (h *LoggingHook) OnRequestStart(ctx context.Context, req *core.TelemetryRequest) { h.logger.Printf("Starting request to %s/%s", req.Provider, req.Model)}
func (h *LoggingHook) OnRequestEnd(ctx context.Context, req *core.TelemetryRequest, resp *core.TelemetryResponse, err error) { if err != nil { h.logger.Printf("Request failed: %v", err) return } h.logger.Printf("Request completed in %v, tokens: %d", resp.Duration, resp.TotalTokens)}
// Use the hookclient := core.NewClient(provider, core.WithTelemetry(&LoggingHook{logger: log.Default()}),)type CostTracker struct { mu sync.Mutex costs map[string]float64}
func (t *CostTracker) OnRequestEnd(ctx context.Context, req *core.TelemetryRequest, resp *core.TelemetryResponse, err error) { if err != nil || resp == nil { return }
cost := calculateCost(req.Model, resp.PromptTokens, resp.OutputTokens)
t.mu.Lock() t.costs[req.Model] += cost t.mu.Unlock()}
func calculateCost(model string, promptTokens, outputTokens int) float64 { // Model-specific pricing prices := map[string]struct{ prompt, output float64 }{ "gpt-4o": {0.005, 0.015}, // per 1K tokens "gpt-4o-mini": {0.00015, 0.0006}, }
p, ok := prices[model] if !ok { return 0 }
return (float64(promptTokens)/1000)*p.prompt + (float64(outputTokens)/1000)*p.output}import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace")
type OTelHook struct { tracer trace.Tracer}
func (h *OTelHook) OnRequestStart(ctx context.Context, req *core.TelemetryRequest) { _, span := h.tracer.Start(ctx, "llm.request", trace.WithAttributes( attribute.String("llm.provider", req.Provider), attribute.String("llm.model", req.Model), ), ) // Store span in context for OnRequestEnd}
func (h *OTelHook) OnRequestEnd(ctx context.Context, req *core.TelemetryRequest, resp *core.TelemetryResponse, err error) { span := trace.SpanFromContext(ctx) if resp != nil { span.SetAttributes( attribute.Int("llm.tokens.total", resp.TotalTokens), attribute.String("llm.finish_reason", resp.FinishReason), ) } if err != nil { span.RecordError(err) } span.End()}Retry policies handle transient failures with configurable backoff strategies.
// Iris includes a sensible defaultclient := core.NewClient(provider) // Uses default retry policytype RetryPolicy struct { MaxAttempts int // Maximum retry attempts InitialBackoff time.Duration // First retry delay MaxBackoff time.Duration // Maximum delay between retries BackoffMultiplier float64 // Exponential multiplier RetryableErrors []string // Error types to retry}
policy := &core.RetryPolicy{ MaxAttempts: 5, InitialBackoff: 1 * time.Second, MaxBackoff: 60 * time.Second, BackoffMultiplier: 2.0, RetryableErrors: []string{"rate_limit", "server_error", "timeout"},}
client := core.NewClient(provider, core.WithRetryPolicy(policy),)| Error Type | Retryable | Reason |
|---|---|---|
| Rate limit (429) | ✓ | Temporary, will resolve |
| Server error (500+) | ✓ | Transient infrastructure issue |
| Timeout | ✓ | Network glitch |
| Authentication (401) | ✗ | Invalid credentials won’t change |
| Bad request (400) | ✗ | Request itself is malformed |
| Context length | ✗ | Input is too long |
| Content filter | ✗ | Content policy violation |
client := core.NewClient(provider, core.WithRetryPolicy(core.NoRetry),)The core.Secret type wraps sensitive values to prevent accidental logging or exposure.
type Secret struct { value string}
func NewSecret(value string) Secret { return Secret{value: value}}
func (s Secret) String() string { return "[REDACTED]" // Never exposes actual value}
func (s Secret) Expose() string { return s.value // Explicit access required}
func (s Secret) IsEmpty() bool { return s.value == ""}// Create a secretapiKey := core.NewSecret("sk-actual-api-key-here")
// Safe to loglog.Printf("Using API key: %s", apiKey)// Output: Using API key: [REDACTED]
// Explicit access for actual useprovider := openai.New(apiKey.Expose())The encrypted keystore returns secrets:
keystore, err := core.LoadKeystore()if err != nil { return err}
// Returns core.Secret, not stringapiKey, err := keystore.Get("openai")if err != nil { return err}
// Safe to pass around - won't leak in logsprovider := openai.New(apiKey.Expose())Iris provides typed errors for different failure scenarios, enabling precise error handling.
// Base errortype IrisError struct { Code string // Error code Message string // Human-readable message Cause error // Underlying error}
// Specific error typestype RateLimitError struct { IrisError RetryAfter time.Duration // When to retry}
type AuthenticationError struct { IrisError Provider string // Which provider failed}
type ContextLengthError struct { IrisError MaxTokens int // Model's limit ActualTokens int // Request size}
type ContentFilterError struct { IrisError FilterType string // What was filtered}resp, err := client.Chat("gpt-4o").User("Hello").GetResponse(ctx)if err != nil { var rateLimitErr *core.RateLimitError var authErr *core.AuthenticationError var contextErr *core.ContextLengthError var contentErr *core.ContentFilterError
switch { case errors.As(err, &rateLimitErr): // Wait and retry time.Sleep(rateLimitErr.RetryAfter) return retry(ctx)
case errors.As(err, &authErr): // Check credentials return fmt.Errorf("invalid API key for %s", authErr.Provider)
case errors.As(err, &contextErr): // Reduce input size return fmt.Errorf("input too long: %d tokens (max %d)", contextErr.ActualTokens, contextErr.MaxTokens)
case errors.As(err, &contentErr): // Handle policy violation return fmt.Errorf("content filtered: %s", contentErr.FilterType)
default: // Unknown error return fmt.Errorf("request failed: %w", err) }}Providers
Configure specific providers. Provider Setup →
Streaming Guide
Advanced streaming patterns. Streaming →
Tools Guide
Build tool-augmented agents. Tools →
Examples
See complete working examples. Examples →