Providers & Models

Iris provides a unified interface for working with multiple LLM providers. This guide covers provider configuration, capability checking, model selection, runtime switching, and fallback patterns. For detailed per-provider setup, see the Providers section.

Provider Overview

Provider	Package	Chat	Stream	Tools	Vision	Embeddings
OpenAI	`providers/openai`	✓	✓	✓	✓	✓
Anthropic	`providers/anthropic`	✓	✓	✓	✓	-
Gemini	`providers/gemini`	✓	✓	✓	✓	✓
xAI (Grok)	`providers/xai`	✓	✓	✓	✓	-
Z.ai	`providers/zai`	✓	✓	-	-	-
Perplexity	`providers/perplexity`	✓	✓	-	-	-
Ollama	`providers/ollama`	✓	✓	✓	✓	✓
Voyage AI	`providers/voyageai`	-	-	-	-	✓
Hugging Face	`providers/huggingface`	✓	✓	-	-	✓

Creating Providers

Each provider has specific configuration requirements. Here’s how to create instances for each supported provider.

OpenAI

package main

import (
    "os"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/openai"
)

func main() {
    // Basic creation with API key
    provider := openai.New(os.Getenv("OPENAI_API_KEY"))

    // With options
    provider = openai.New(
        os.Getenv("OPENAI_API_KEY"),
        openai.WithOrganization("org-xxx"),
        openai.WithBaseURL("https://custom-endpoint.example.com/v1"),
        openai.WithHTTPClient(customClient),
    )

    client := core.NewClient(provider)
}

Configuration Options:

WithOrganization(org): Set organization ID for billing
WithBaseURL(url): Use custom endpoint (Azure OpenAI, proxies)
WithHTTPClient(client): Use custom HTTP client with timeouts/proxies
WithProject(project): Set project ID for usage tracking

Anthropic

package main

import (
    "os"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/anthropic"
)

func main() {
    // Basic creation
    provider := anthropic.New(os.Getenv("ANTHROPIC_API_KEY"))

    // With options
    provider = anthropic.New(
        os.Getenv("ANTHROPIC_API_KEY"),
        anthropic.WithBaseURL("https://custom-endpoint.example.com"),
        anthropic.WithVersion("2024-01-01"),
        anthropic.WithBeta("extended-thinking"),
    )

    client := core.NewClient(provider)
}

Configuration Options:

WithBaseURL(url): Use custom endpoint
WithVersion(version): Specify API version
WithBeta(features...): Enable beta features (extended thinking, computer use)

Gemini

package main

import (
    "os"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/gemini"
)

func main() {
    // Basic creation
    provider := gemini.New(os.Getenv("GEMINI_API_KEY"))

    // With options
    provider = gemini.New(
        os.Getenv("GEMINI_API_KEY"),
        gemini.WithProject("my-gcp-project"),
        gemini.WithLocation("us-central1"),
    )

    client := core.NewClient(provider)
}

Configuration Options:

WithProject(project): GCP project for Vertex AI
WithLocation(location): GCP region
WithBaseURL(url): Custom endpoint

xAI (Grok)

package main

import (
    "os"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/xai"
)

func main() {
    provider := xai.New(os.Getenv("XAI_API_KEY"))
    client := core.NewClient(provider)
}

Ollama (Local)

package main

import (
    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/ollama"
)

func main() {
    // Default localhost
    provider := ollama.New("http://localhost:11434")

    // Remote Ollama instance
    provider = ollama.New(
        "http://ollama.internal:11434",
        ollama.WithTimeout(120*time.Second),
    )

    client := core.NewClient(provider)
}

Configuration Options:

WithTimeout(duration): Request timeout for long generations
WithHTTPClient(client): Custom HTTP client

Voyage AI (Embeddings Only)

package main

import (
    "os"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/voyageai"
)

func main() {
    provider := voyageai.New(os.Getenv("VOYAGEAI_API_KEY"))
    client := core.NewClient(provider)

    // Use for embeddings
    embedding, err := client.Embed("voyage-3").
        Text("Search query text").
        GetEmbedding(ctx)
}

Hugging Face

package main

import (
    "os"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/huggingface"
)

func main() {
    provider := huggingface.New(os.Getenv("HUGGINGFACE_API_KEY"))

    // With custom endpoint (Inference Endpoints)
    provider = huggingface.New(
        os.Getenv("HUGGINGFACE_API_KEY"),
        huggingface.WithEndpoint("https://xxx.us-east-1.aws.endpoints.huggingface.cloud"),
    )

    client := core.NewClient(provider)
}

Capability Checking

Before using provider-specific features, check if the provider supports them. This prevents runtime errors when switching between providers.

Available Capabilities

// Core capabilities
core.FeatureChat           // Basic chat completion
core.FeatureStreaming      // Streaming responses
core.FeatureToolCalling    // Function/tool calling
core.FeatureVision         // Image input (multimodal)
core.FeatureEmbeddings     // Text embeddings
core.FeatureJSONMode       // Structured JSON output
core.FeatureReasoning      // Extended thinking/reasoning

// Advanced capabilities
core.FeatureImageGeneration // Image creation (DALL-E)
core.FeatureBatchProcessing // Batch API support
core.FeatureFineTuning      // Model fine-tuning

Checking Before Use

// Check single capability
if provider.Supports(core.FeatureToolCalling) {
    builder = builder.Tools(myTools...)
}

// Check multiple capabilities
if provider.Supports(core.FeatureVision) && provider.Supports(core.FeatureStreaming) {
    // Safe to stream vision requests
    stream, err := client.Chat("gpt-4o").
        UserMultimodal().
            Text("Describe this image").
            ImageURL(imageURL).
            Done().
        GetStream(ctx)
}

// Get all capabilities
caps := provider.Capabilities()
fmt.Printf("Provider supports: %v\n", caps)

Capability-Safe Patterns

func ProcessQuery(client *core.Client, provider core.Provider, query string, tools []core.Tool) (*core.ChatResponse, error) {
    builder := client.Chat(provider.DefaultModel()).
        System("You are a helpful assistant.").
        User(query)

    // Only add tools if supported
    if len(tools) > 0 && provider.Supports(core.FeatureToolCalling) {
        builder = builder.Tools(tools...)
    }

    // Only request JSON if supported
    if provider.Supports(core.FeatureJSONMode) {
        builder = builder.ResponseFormat(core.ResponseFormatJSON)
    }

    return builder.GetResponse(ctx)
}

Model Selection

Per-Request Model Selection

Specify the model for each request:

// Use specific model
resp, err := client.Chat("gpt-4o").
    User("Complex reasoning task").
    GetResponse(ctx)

// Use cheaper model for simple tasks
resp, err = client.Chat("gpt-4o-mini").
    User("What's 2+2?").
    GetResponse(ctx)

Default Model

Each provider defines a default model:

// Use provider's default
resp, err := client.Chat(provider.DefaultModel()).
    User("Hello").
    GetResponse(ctx)

Model Information

Query model capabilities and limits:

// Get model info
info, err := provider.ModelInfo("gpt-4o")
if err != nil {
    log.Fatal(err)
}

fmt.Printf("Model: %s\n", info.Name)
fmt.Printf("Context window: %d tokens\n", info.ContextWindow)
fmt.Printf("Max output: %d tokens\n", info.MaxOutputTokens)
fmt.Printf("Supports vision: %v\n", info.SupportsVision)
fmt.Printf("Supports tools: %v\n", info.SupportsTools)

Model Selection Strategy

Choose models based on task requirements:

type ModelSelector struct {
    provider core.Provider
}

func (s *ModelSelector) SelectModel(task TaskType) string {
    switch task {
    case TaskReasoning:
        // Use most capable model for complex reasoning
        return "gpt-4o"
    case TaskSimpleChat:
        // Use fast, cheap model for simple queries
        return "gpt-4o-mini"
    case TaskVision:
        // Use vision-capable model
        return "gpt-4o"
    case TaskCodeGeneration:
        // Use model optimized for code
        return "gpt-4o"
    default:
        return s.provider.DefaultModel()
    }
}

Runtime Provider Switching

Provider Registry

Maintain a registry of providers for runtime switching:

package main

import (
    "context"
    "os"
    "sync"

    "github.com/petal-labs/iris/core"
    "github.com/petal-labs/iris/providers/anthropic"
    "github.com/petal-labs/iris/providers/openai"
)

type ProviderRegistry struct {
    mu        sync.RWMutex
    providers map[string]core.Provider
    current   string
}

func NewProviderRegistry() *ProviderRegistry {
    return &ProviderRegistry{
        providers: make(map[string]core.Provider),
        current:   "openai", // default
    }
}

func (r *ProviderRegistry) Register(name string, provider core.Provider) {
    r.mu.Lock()
    defer r.mu.Unlock()
    r.providers[name] = provider
}

func (r *ProviderRegistry) Get(name string) (core.Provider, bool) {
    r.mu.RLock()
    defer r.mu.RUnlock()
    p, ok := r.providers[name]
    return p, ok
}

func (r *ProviderRegistry) Current() core.Provider {
    r.mu.RLock()
    defer r.mu.RUnlock()
    return r.providers[r.current]
}

func (r *ProviderRegistry) Switch(name string) error {
    r.mu.Lock()
    defer r.mu.Unlock()
    if _, ok := r.providers[name]; !ok {
        return fmt.Errorf("provider %s not registered", name)
    }
    r.current = name
    return nil
}

func main() {
    registry := NewProviderRegistry()

    // Register providers
    registry.Register("openai", openai.New(os.Getenv("OPENAI_API_KEY")))
    registry.Register("anthropic", anthropic.New(os.Getenv("ANTHROPIC_API_KEY")))

    // Use current provider
    client := core.NewClient(registry.Current())

    // Switch at runtime
    registry.Switch("anthropic")
    client = core.NewClient(registry.Current())
}

Configuration-Driven Selection

Load provider selection from configuration:

type Config struct {
    DefaultProvider string            `yaml:"default_provider"`
    Providers       map[string]struct {
        APIKeyEnv    string `yaml:"api_key_env"`
        DefaultModel string `yaml:"default_model"`
        BaseURL      string `yaml:"base_url,omitempty"`
    } `yaml:"providers"`
}

func (c *Config) CreateProvider(name string) (core.Provider, error) {
    cfg, ok := c.Providers[name]
    if !ok {
        return nil, fmt.Errorf("provider %s not configured", name)
    }

    apiKey := os.Getenv(cfg.APIKeyEnv)
    if apiKey == "" {
        return nil, fmt.Errorf("API key not set: %s", cfg.APIKeyEnv)
    }

    switch name {
    case "openai":
        opts := []openai.Option{}
        if cfg.BaseURL != "" {
            opts = append(opts, openai.WithBaseURL(cfg.BaseURL))
        }
        return openai.New(apiKey, opts...), nil
    case "anthropic":
        return anthropic.New(apiKey), nil
    default:
        return nil, fmt.Errorf("unknown provider: %s", name)
    }
}

Fallback Patterns

Implement fallback strategies for reliability and cost optimization.

Simple Fallback Chain

func ChatWithFallback(ctx context.Context, providers []core.Provider, prompt string) (*core.ChatResponse, error) {
    var lastErr error

    for _, provider := range providers {
        client := core.NewClient(provider)
        resp, err := client.Chat(provider.DefaultModel()).
            User(prompt).
            GetResponse(ctx)

        if err == nil {
            return resp, nil
        }

        lastErr = err
        log.Printf("Provider %s failed: %v, trying next", provider.Name(), err)
    }

    return nil, fmt.Errorf("all providers failed, last error: %w", lastErr)
}

// Usage
providers := []core.Provider{
    openai.New(os.Getenv("OPENAI_API_KEY")),
    anthropic.New(os.Getenv("ANTHROPIC_API_KEY")),
    ollama.New("http://localhost:11434"),
}

resp, err := ChatWithFallback(ctx, providers, "Hello, world!")

Error-Specific Fallback

func ChatWithSmartFallback(ctx context.Context, primary, fallback core.Provider, prompt string) (*core.ChatResponse, error) {
    client := core.NewClient(primary)
    resp, err := client.Chat(primary.DefaultModel()).
        User(prompt).
        GetResponse(ctx)

    if err == nil {
        return resp, nil
    }

    // Only fallback on specific errors
    var rateLimitErr *core.RateLimitError
    var serverErr *core.ServerError

    if errors.As(err, &rateLimitErr) || errors.As(err, &serverErr) {
        log.Printf("Primary provider unavailable (%v), falling back", err)

        client = core.NewClient(fallback)
        return client.Chat(fallback.DefaultModel()).
            User(prompt).
            GetResponse(ctx)
    }

    // Don't fallback on auth errors or invalid requests
    return nil, err
}

Load-Balanced Provider

type LoadBalancedProvider struct {
    providers []core.Provider
    index     uint64
}

func NewLoadBalancedProvider(providers ...core.Provider) *LoadBalancedProvider {
    return &LoadBalancedProvider{providers: providers}
}

func (lb *LoadBalancedProvider) Next() core.Provider {
    idx := atomic.AddUint64(&lb.index, 1)
    return lb.providers[idx%uint64(len(lb.providers))]
}

// Usage
lb := NewLoadBalancedProvider(
    openai.New(apiKey1),
    openai.New(apiKey2), // Different API keys for rate limit distribution
)

client := core.NewClient(lb.Next())

Cost-Optimized Fallback

type CostOptimizedRouter struct {
    cheap     core.Provider
    expensive core.Provider
    threshold int // token threshold
}

func (r *CostOptimizedRouter) Route(prompt string) core.Provider {
    // Estimate tokens (rough approximation)
    estimatedTokens := len(prompt) / 4

    if estimatedTokens < r.threshold {
        return r.cheap
    }
    return r.expensive
}

// Usage
router := &CostOptimizedRouter{
    cheap:     openai.New(apiKey), // Uses gpt-4o-mini by default
    expensive: openai.New(apiKey), // Uses gpt-4o for complex tasks
    threshold: 1000,
}

provider := router.Route(userPrompt)
client := core.NewClient(provider)

Provider Comparison

Choosing the Right Provider

Use Case	Recommended Provider	Reason
General chat	OpenAI, Anthropic	Best balance of quality and speed
Complex reasoning	Anthropic Claude	Extended thinking capabilities
Vision tasks	OpenAI GPT-4o	Best vision understanding
Code generation	OpenAI, Anthropic	Strong code capabilities
Local/private	Ollama	No data leaves your infrastructure
Search-augmented	Perplexity	Built-in web search
Embeddings	OpenAI, Voyage AI	High-quality embeddings
Cost-sensitive	OpenAI Mini, Ollama	Lower per-token costs

Latency Considerations

// Add timeouts based on provider characteristics
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

// Ollama may need longer timeouts for first request (model loading)
if provider.Name() == "ollama" {
    ctx, cancel = context.WithTimeout(context.Background(), 120*time.Second)
    defer cancel()
}

resp, err := client.Chat(model).User(prompt).GetResponse(ctx)

Best Practices

1. Use Environment Variables for Keys

// Good
provider := openai.New(os.Getenv("OPENAI_API_KEY"))

// Bad - hardcoded keys
provider := openai.New("sk-...")

2. Check Capabilities Before Use

// Good
if provider.Supports(core.FeatureToolCalling) {
    builder = builder.Tools(tools...)
}

// Bad - may fail at runtime
builder = builder.Tools(tools...)

3. Handle Provider-Specific Errors

resp, err := client.Chat(model).User(prompt).GetResponse(ctx)
if err != nil {
    var rateLimitErr *core.RateLimitError
    if errors.As(err, &rateLimitErr) {
        // Wait and retry, or switch provider
        time.Sleep(rateLimitErr.RetryAfter)
    }
}

4. Use Configuration for Model IDs

providers:
  openai:
    default_model: gpt-4o
    fast_model: gpt-4o-mini
  anthropic:
    default_model: claude-3-5-sonnet-20241022

model := config.Providers["openai"].DefaultModel
resp, err := client.Chat(model).User(prompt).GetResponse(ctx)

5. Implement Graceful Degradation

func ProcessWithDegradation(ctx context.Context, prompt string) (*core.ChatResponse, error) {
    // Try primary provider
    resp, err := tryProvider(ctx, primaryProvider, prompt)
    if err == nil {
        return resp, nil
    }

    // Log and try fallback
    log.Printf("Primary failed: %v, using fallback", err)
    resp, err = tryProvider(ctx, fallbackProvider, prompt)
    if err == nil {
        return resp, nil
    }

    // Final fallback: local model
    log.Printf("Fallback failed: %v, using local model", err)
    return tryProvider(ctx, ollamaProvider, prompt)
}

Next Steps

Streaming Guide

Stream responses in real-time. Streaming →

Tools Guide

Add function calling to your app. Tools →

Provider Details

In-depth provider configuration. Providers →

Examples

See working multi-provider code. Examples →