Streaming Guide
Stream responses in real-time. Streaming →
Iris provides a unified interface for working with multiple LLM providers. This guide covers provider configuration, capability checking, model selection, runtime switching, and fallback patterns. For detailed per-provider setup, see the Providers section.
| Provider | Package | Chat | Stream | Tools | Vision | Embeddings |
|---|---|---|---|---|---|---|
| OpenAI | providers/openai | ✓ | ✓ | ✓ | ✓ | ✓ |
| Anthropic | providers/anthropic | ✓ | ✓ | ✓ | ✓ | - |
| Gemini | providers/gemini | ✓ | ✓ | ✓ | ✓ | ✓ |
| xAI (Grok) | providers/xai | ✓ | ✓ | ✓ | ✓ | - |
| Z.ai | providers/zai | ✓ | ✓ | - | - | - |
| Perplexity | providers/perplexity | ✓ | ✓ | - | - | - |
| Ollama | providers/ollama | ✓ | ✓ | ✓ | ✓ | ✓ |
| Voyage AI | providers/voyageai | - | - | - | - | ✓ |
| Hugging Face | providers/huggingface | ✓ | ✓ | - | - | ✓ |
Each provider has specific configuration requirements. Here’s how to create instances for each supported provider.
package main
import ( "os"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/openai")
func main() { // Basic creation with API key provider := openai.New(os.Getenv("OPENAI_API_KEY"))
// With options provider = openai.New( os.Getenv("OPENAI_API_KEY"), openai.WithOrganization("org-xxx"), openai.WithBaseURL("https://custom-endpoint.example.com/v1"), openai.WithHTTPClient(customClient), )
client := core.NewClient(provider)}Configuration Options:
WithOrganization(org): Set organization ID for billingWithBaseURL(url): Use custom endpoint (Azure OpenAI, proxies)WithHTTPClient(client): Use custom HTTP client with timeouts/proxiesWithProject(project): Set project ID for usage trackingpackage main
import ( "os"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/anthropic")
func main() { // Basic creation provider := anthropic.New(os.Getenv("ANTHROPIC_API_KEY"))
// With options provider = anthropic.New( os.Getenv("ANTHROPIC_API_KEY"), anthropic.WithBaseURL("https://custom-endpoint.example.com"), anthropic.WithVersion("2024-01-01"), anthropic.WithBeta("extended-thinking"), )
client := core.NewClient(provider)}Configuration Options:
WithBaseURL(url): Use custom endpointWithVersion(version): Specify API versionWithBeta(features...): Enable beta features (extended thinking, computer use)package main
import ( "os"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/gemini")
func main() { // Basic creation provider := gemini.New(os.Getenv("GEMINI_API_KEY"))
// With options provider = gemini.New( os.Getenv("GEMINI_API_KEY"), gemini.WithProject("my-gcp-project"), gemini.WithLocation("us-central1"), )
client := core.NewClient(provider)}Configuration Options:
WithProject(project): GCP project for Vertex AIWithLocation(location): GCP regionWithBaseURL(url): Custom endpointpackage main
import ( "os"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/xai")
func main() { provider := xai.New(os.Getenv("XAI_API_KEY")) client := core.NewClient(provider)}package main
import ( "github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/ollama")
func main() { // Default localhost provider := ollama.New("http://localhost:11434")
// Remote Ollama instance provider = ollama.New( "http://ollama.internal:11434", ollama.WithTimeout(120*time.Second), )
client := core.NewClient(provider)}Configuration Options:
WithTimeout(duration): Request timeout for long generationsWithHTTPClient(client): Custom HTTP clientpackage main
import ( "os"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/voyageai")
func main() { provider := voyageai.New(os.Getenv("VOYAGEAI_API_KEY")) client := core.NewClient(provider)
// Use for embeddings embedding, err := client.Embed("voyage-3"). Text("Search query text"). GetEmbedding(ctx)}package main
import ( "os"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/huggingface")
func main() { provider := huggingface.New(os.Getenv("HUGGINGFACE_API_KEY"))
// With custom endpoint (Inference Endpoints) provider = huggingface.New( os.Getenv("HUGGINGFACE_API_KEY"), huggingface.WithEndpoint("https://xxx.us-east-1.aws.endpoints.huggingface.cloud"), )
client := core.NewClient(provider)}Before using provider-specific features, check if the provider supports them. This prevents runtime errors when switching between providers.
// Core capabilitiescore.FeatureChat // Basic chat completioncore.FeatureStreaming // Streaming responsescore.FeatureToolCalling // Function/tool callingcore.FeatureVision // Image input (multimodal)core.FeatureEmbeddings // Text embeddingscore.FeatureJSONMode // Structured JSON outputcore.FeatureReasoning // Extended thinking/reasoning
// Advanced capabilitiescore.FeatureImageGeneration // Image creation (DALL-E)core.FeatureBatchProcessing // Batch API supportcore.FeatureFineTuning // Model fine-tuning// Check single capabilityif provider.Supports(core.FeatureToolCalling) { builder = builder.Tools(myTools...)}
// Check multiple capabilitiesif provider.Supports(core.FeatureVision) && provider.Supports(core.FeatureStreaming) { // Safe to stream vision requests stream, err := client.Chat("gpt-4o"). UserMultimodal(). Text("Describe this image"). ImageURL(imageURL). Done(). GetStream(ctx)}
// Get all capabilitiescaps := provider.Capabilities()fmt.Printf("Provider supports: %v\n", caps)func ProcessQuery(client *core.Client, provider core.Provider, query string, tools []core.Tool) (*core.ChatResponse, error) { builder := client.Chat(provider.DefaultModel()). System("You are a helpful assistant."). User(query)
// Only add tools if supported if len(tools) > 0 && provider.Supports(core.FeatureToolCalling) { builder = builder.Tools(tools...) }
// Only request JSON if supported if provider.Supports(core.FeatureJSONMode) { builder = builder.ResponseFormat(core.ResponseFormatJSON) }
return builder.GetResponse(ctx)}Specify the model for each request:
// Use specific modelresp, err := client.Chat("gpt-4o"). User("Complex reasoning task"). GetResponse(ctx)
// Use cheaper model for simple tasksresp, err = client.Chat("gpt-4o-mini"). User("What's 2+2?"). GetResponse(ctx)Each provider defines a default model:
// Use provider's defaultresp, err := client.Chat(provider.DefaultModel()). User("Hello"). GetResponse(ctx)Query model capabilities and limits:
// Get model infoinfo, err := provider.ModelInfo("gpt-4o")if err != nil { log.Fatal(err)}
fmt.Printf("Model: %s\n", info.Name)fmt.Printf("Context window: %d tokens\n", info.ContextWindow)fmt.Printf("Max output: %d tokens\n", info.MaxOutputTokens)fmt.Printf("Supports vision: %v\n", info.SupportsVision)fmt.Printf("Supports tools: %v\n", info.SupportsTools)Choose models based on task requirements:
type ModelSelector struct { provider core.Provider}
func (s *ModelSelector) SelectModel(task TaskType) string { switch task { case TaskReasoning: // Use most capable model for complex reasoning return "gpt-4o" case TaskSimpleChat: // Use fast, cheap model for simple queries return "gpt-4o-mini" case TaskVision: // Use vision-capable model return "gpt-4o" case TaskCodeGeneration: // Use model optimized for code return "gpt-4o" default: return s.provider.DefaultModel() }}Maintain a registry of providers for runtime switching:
package main
import ( "context" "os" "sync"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/anthropic" "github.com/petal-labs/iris/providers/openai")
type ProviderRegistry struct { mu sync.RWMutex providers map[string]core.Provider current string}
func NewProviderRegistry() *ProviderRegistry { return &ProviderRegistry{ providers: make(map[string]core.Provider), current: "openai", // default }}
func (r *ProviderRegistry) Register(name string, provider core.Provider) { r.mu.Lock() defer r.mu.Unlock() r.providers[name] = provider}
func (r *ProviderRegistry) Get(name string) (core.Provider, bool) { r.mu.RLock() defer r.mu.RUnlock() p, ok := r.providers[name] return p, ok}
func (r *ProviderRegistry) Current() core.Provider { r.mu.RLock() defer r.mu.RUnlock() return r.providers[r.current]}
func (r *ProviderRegistry) Switch(name string) error { r.mu.Lock() defer r.mu.Unlock() if _, ok := r.providers[name]; !ok { return fmt.Errorf("provider %s not registered", name) } r.current = name return nil}
func main() { registry := NewProviderRegistry()
// Register providers registry.Register("openai", openai.New(os.Getenv("OPENAI_API_KEY"))) registry.Register("anthropic", anthropic.New(os.Getenv("ANTHROPIC_API_KEY")))
// Use current provider client := core.NewClient(registry.Current())
// Switch at runtime registry.Switch("anthropic") client = core.NewClient(registry.Current())}Load provider selection from configuration:
type Config struct { DefaultProvider string `yaml:"default_provider"` Providers map[string]struct { APIKeyEnv string `yaml:"api_key_env"` DefaultModel string `yaml:"default_model"` BaseURL string `yaml:"base_url,omitempty"` } `yaml:"providers"`}
func (c *Config) CreateProvider(name string) (core.Provider, error) { cfg, ok := c.Providers[name] if !ok { return nil, fmt.Errorf("provider %s not configured", name) }
apiKey := os.Getenv(cfg.APIKeyEnv) if apiKey == "" { return nil, fmt.Errorf("API key not set: %s", cfg.APIKeyEnv) }
switch name { case "openai": opts := []openai.Option{} if cfg.BaseURL != "" { opts = append(opts, openai.WithBaseURL(cfg.BaseURL)) } return openai.New(apiKey, opts...), nil case "anthropic": return anthropic.New(apiKey), nil default: return nil, fmt.Errorf("unknown provider: %s", name) }}Implement fallback strategies for reliability and cost optimization.
func ChatWithFallback(ctx context.Context, providers []core.Provider, prompt string) (*core.ChatResponse, error) { var lastErr error
for _, provider := range providers { client := core.NewClient(provider) resp, err := client.Chat(provider.DefaultModel()). User(prompt). GetResponse(ctx)
if err == nil { return resp, nil }
lastErr = err log.Printf("Provider %s failed: %v, trying next", provider.Name(), err) }
return nil, fmt.Errorf("all providers failed, last error: %w", lastErr)}
// Usageproviders := []core.Provider{ openai.New(os.Getenv("OPENAI_API_KEY")), anthropic.New(os.Getenv("ANTHROPIC_API_KEY")), ollama.New("http://localhost:11434"),}
resp, err := ChatWithFallback(ctx, providers, "Hello, world!")func ChatWithSmartFallback(ctx context.Context, primary, fallback core.Provider, prompt string) (*core.ChatResponse, error) { client := core.NewClient(primary) resp, err := client.Chat(primary.DefaultModel()). User(prompt). GetResponse(ctx)
if err == nil { return resp, nil }
// Only fallback on specific errors var rateLimitErr *core.RateLimitError var serverErr *core.ServerError
if errors.As(err, &rateLimitErr) || errors.As(err, &serverErr) { log.Printf("Primary provider unavailable (%v), falling back", err)
client = core.NewClient(fallback) return client.Chat(fallback.DefaultModel()). User(prompt). GetResponse(ctx) }
// Don't fallback on auth errors or invalid requests return nil, err}type LoadBalancedProvider struct { providers []core.Provider index uint64}
func NewLoadBalancedProvider(providers ...core.Provider) *LoadBalancedProvider { return &LoadBalancedProvider{providers: providers}}
func (lb *LoadBalancedProvider) Next() core.Provider { idx := atomic.AddUint64(&lb.index, 1) return lb.providers[idx%uint64(len(lb.providers))]}
// Usagelb := NewLoadBalancedProvider( openai.New(apiKey1), openai.New(apiKey2), // Different API keys for rate limit distribution)
client := core.NewClient(lb.Next())type CostOptimizedRouter struct { cheap core.Provider expensive core.Provider threshold int // token threshold}
func (r *CostOptimizedRouter) Route(prompt string) core.Provider { // Estimate tokens (rough approximation) estimatedTokens := len(prompt) / 4
if estimatedTokens < r.threshold { return r.cheap } return r.expensive}
// Usagerouter := &CostOptimizedRouter{ cheap: openai.New(apiKey), // Uses gpt-4o-mini by default expensive: openai.New(apiKey), // Uses gpt-4o for complex tasks threshold: 1000,}
provider := router.Route(userPrompt)client := core.NewClient(provider)| Use Case | Recommended Provider | Reason |
|---|---|---|
| General chat | OpenAI, Anthropic | Best balance of quality and speed |
| Complex reasoning | Anthropic Claude | Extended thinking capabilities |
| Vision tasks | OpenAI GPT-4o | Best vision understanding |
| Code generation | OpenAI, Anthropic | Strong code capabilities |
| Local/private | Ollama | No data leaves your infrastructure |
| Search-augmented | Perplexity | Built-in web search |
| Embeddings | OpenAI, Voyage AI | High-quality embeddings |
| Cost-sensitive | OpenAI Mini, Ollama | Lower per-token costs |
// Add timeouts based on provider characteristicsctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)defer cancel()
// Ollama may need longer timeouts for first request (model loading)if provider.Name() == "ollama" { ctx, cancel = context.WithTimeout(context.Background(), 120*time.Second) defer cancel()}
resp, err := client.Chat(model).User(prompt).GetResponse(ctx)// Goodprovider := openai.New(os.Getenv("OPENAI_API_KEY"))
// Bad - hardcoded keysprovider := openai.New("sk-...")// Goodif provider.Supports(core.FeatureToolCalling) { builder = builder.Tools(tools...)}
// Bad - may fail at runtimebuilder = builder.Tools(tools...)resp, err := client.Chat(model).User(prompt).GetResponse(ctx)if err != nil { var rateLimitErr *core.RateLimitError if errors.As(err, &rateLimitErr) { // Wait and retry, or switch provider time.Sleep(rateLimitErr.RetryAfter) }}providers: openai: default_model: gpt-4o fast_model: gpt-4o-mini anthropic: default_model: claude-3-5-sonnet-20241022model := config.Providers["openai"].DefaultModelresp, err := client.Chat(model).User(prompt).GetResponse(ctx)func ProcessWithDegradation(ctx context.Context, prompt string) (*core.ChatResponse, error) { // Try primary provider resp, err := tryProvider(ctx, primaryProvider, prompt) if err == nil { return resp, nil }
// Log and try fallback log.Printf("Primary failed: %v, using fallback", err) resp, err = tryProvider(ctx, fallbackProvider, prompt) if err == nil { return resp, nil }
// Final fallback: local model log.Printf("Fallback failed: %v, using local model", err) return tryProvider(ctx, ollamaProvider, prompt)}Streaming Guide
Stream responses in real-time. Streaming →
Tools Guide
Add function calling to your app. Tools →
Provider Details
In-depth provider configuration. Providers →
Examples
See working multi-provider code. Examples →