Skip to content

Providers & Models

Iris provides a unified interface for working with multiple LLM providers. This guide covers provider configuration, capability checking, model selection, runtime switching, and fallback patterns. For detailed per-provider setup, see the Providers section.

ProviderPackageChatStreamToolsVisionEmbeddings
OpenAIproviders/openai
Anthropicproviders/anthropic-
Geminiproviders/gemini
xAI (Grok)providers/xai-
Z.aiproviders/zai---
Perplexityproviders/perplexity---
Ollamaproviders/ollama
Voyage AIproviders/voyageai----
Hugging Faceproviders/huggingface--

Each provider has specific configuration requirements. Here’s how to create instances for each supported provider.

package main
import (
"os"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/openai"
)
func main() {
// Basic creation with API key
provider := openai.New(os.Getenv("OPENAI_API_KEY"))
// With options
provider = openai.New(
os.Getenv("OPENAI_API_KEY"),
openai.WithOrganization("org-xxx"),
openai.WithBaseURL("https://custom-endpoint.example.com/v1"),
openai.WithHTTPClient(customClient),
)
client := core.NewClient(provider)
}

Configuration Options:

  • WithOrganization(org): Set organization ID for billing
  • WithBaseURL(url): Use custom endpoint (Azure OpenAI, proxies)
  • WithHTTPClient(client): Use custom HTTP client with timeouts/proxies
  • WithProject(project): Set project ID for usage tracking
package main
import (
"os"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/anthropic"
)
func main() {
// Basic creation
provider := anthropic.New(os.Getenv("ANTHROPIC_API_KEY"))
// With options
provider = anthropic.New(
os.Getenv("ANTHROPIC_API_KEY"),
anthropic.WithBaseURL("https://custom-endpoint.example.com"),
anthropic.WithVersion("2024-01-01"),
anthropic.WithBeta("extended-thinking"),
)
client := core.NewClient(provider)
}

Configuration Options:

  • WithBaseURL(url): Use custom endpoint
  • WithVersion(version): Specify API version
  • WithBeta(features...): Enable beta features (extended thinking, computer use)
package main
import (
"os"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/gemini"
)
func main() {
// Basic creation
provider := gemini.New(os.Getenv("GEMINI_API_KEY"))
// With options
provider = gemini.New(
os.Getenv("GEMINI_API_KEY"),
gemini.WithProject("my-gcp-project"),
gemini.WithLocation("us-central1"),
)
client := core.NewClient(provider)
}

Configuration Options:

  • WithProject(project): GCP project for Vertex AI
  • WithLocation(location): GCP region
  • WithBaseURL(url): Custom endpoint
package main
import (
"os"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/xai"
)
func main() {
provider := xai.New(os.Getenv("XAI_API_KEY"))
client := core.NewClient(provider)
}
package main
import (
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/ollama"
)
func main() {
// Default localhost
provider := ollama.New("http://localhost:11434")
// Remote Ollama instance
provider = ollama.New(
"http://ollama.internal:11434",
ollama.WithTimeout(120*time.Second),
)
client := core.NewClient(provider)
}

Configuration Options:

  • WithTimeout(duration): Request timeout for long generations
  • WithHTTPClient(client): Custom HTTP client
package main
import (
"os"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/voyageai"
)
func main() {
provider := voyageai.New(os.Getenv("VOYAGEAI_API_KEY"))
client := core.NewClient(provider)
// Use for embeddings
embedding, err := client.Embed("voyage-3").
Text("Search query text").
GetEmbedding(ctx)
}
package main
import (
"os"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/huggingface"
)
func main() {
provider := huggingface.New(os.Getenv("HUGGINGFACE_API_KEY"))
// With custom endpoint (Inference Endpoints)
provider = huggingface.New(
os.Getenv("HUGGINGFACE_API_KEY"),
huggingface.WithEndpoint("https://xxx.us-east-1.aws.endpoints.huggingface.cloud"),
)
client := core.NewClient(provider)
}

Before using provider-specific features, check if the provider supports them. This prevents runtime errors when switching between providers.

// Core capabilities
core.FeatureChat // Basic chat completion
core.FeatureStreaming // Streaming responses
core.FeatureToolCalling // Function/tool calling
core.FeatureVision // Image input (multimodal)
core.FeatureEmbeddings // Text embeddings
core.FeatureJSONMode // Structured JSON output
core.FeatureReasoning // Extended thinking/reasoning
// Advanced capabilities
core.FeatureImageGeneration // Image creation (DALL-E)
core.FeatureBatchProcessing // Batch API support
core.FeatureFineTuning // Model fine-tuning
// Check single capability
if provider.Supports(core.FeatureToolCalling) {
builder = builder.Tools(myTools...)
}
// Check multiple capabilities
if provider.Supports(core.FeatureVision) && provider.Supports(core.FeatureStreaming) {
// Safe to stream vision requests
stream, err := client.Chat("gpt-4o").
UserMultimodal().
Text("Describe this image").
ImageURL(imageURL).
Done().
GetStream(ctx)
}
// Get all capabilities
caps := provider.Capabilities()
fmt.Printf("Provider supports: %v\n", caps)
func ProcessQuery(client *core.Client, provider core.Provider, query string, tools []core.Tool) (*core.ChatResponse, error) {
builder := client.Chat(provider.DefaultModel()).
System("You are a helpful assistant.").
User(query)
// Only add tools if supported
if len(tools) > 0 && provider.Supports(core.FeatureToolCalling) {
builder = builder.Tools(tools...)
}
// Only request JSON if supported
if provider.Supports(core.FeatureJSONMode) {
builder = builder.ResponseFormat(core.ResponseFormatJSON)
}
return builder.GetResponse(ctx)
}

Specify the model for each request:

// Use specific model
resp, err := client.Chat("gpt-4o").
User("Complex reasoning task").
GetResponse(ctx)
// Use cheaper model for simple tasks
resp, err = client.Chat("gpt-4o-mini").
User("What's 2+2?").
GetResponse(ctx)

Each provider defines a default model:

// Use provider's default
resp, err := client.Chat(provider.DefaultModel()).
User("Hello").
GetResponse(ctx)

Query model capabilities and limits:

// Get model info
info, err := provider.ModelInfo("gpt-4o")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Model: %s\n", info.Name)
fmt.Printf("Context window: %d tokens\n", info.ContextWindow)
fmt.Printf("Max output: %d tokens\n", info.MaxOutputTokens)
fmt.Printf("Supports vision: %v\n", info.SupportsVision)
fmt.Printf("Supports tools: %v\n", info.SupportsTools)

Choose models based on task requirements:

type ModelSelector struct {
provider core.Provider
}
func (s *ModelSelector) SelectModel(task TaskType) string {
switch task {
case TaskReasoning:
// Use most capable model for complex reasoning
return "gpt-4o"
case TaskSimpleChat:
// Use fast, cheap model for simple queries
return "gpt-4o-mini"
case TaskVision:
// Use vision-capable model
return "gpt-4o"
case TaskCodeGeneration:
// Use model optimized for code
return "gpt-4o"
default:
return s.provider.DefaultModel()
}
}

Maintain a registry of providers for runtime switching:

package main
import (
"context"
"os"
"sync"
"github.com/petal-labs/iris/core"
"github.com/petal-labs/iris/providers/anthropic"
"github.com/petal-labs/iris/providers/openai"
)
type ProviderRegistry struct {
mu sync.RWMutex
providers map[string]core.Provider
current string
}
func NewProviderRegistry() *ProviderRegistry {
return &ProviderRegistry{
providers: make(map[string]core.Provider),
current: "openai", // default
}
}
func (r *ProviderRegistry) Register(name string, provider core.Provider) {
r.mu.Lock()
defer r.mu.Unlock()
r.providers[name] = provider
}
func (r *ProviderRegistry) Get(name string) (core.Provider, bool) {
r.mu.RLock()
defer r.mu.RUnlock()
p, ok := r.providers[name]
return p, ok
}
func (r *ProviderRegistry) Current() core.Provider {
r.mu.RLock()
defer r.mu.RUnlock()
return r.providers[r.current]
}
func (r *ProviderRegistry) Switch(name string) error {
r.mu.Lock()
defer r.mu.Unlock()
if _, ok := r.providers[name]; !ok {
return fmt.Errorf("provider %s not registered", name)
}
r.current = name
return nil
}
func main() {
registry := NewProviderRegistry()
// Register providers
registry.Register("openai", openai.New(os.Getenv("OPENAI_API_KEY")))
registry.Register("anthropic", anthropic.New(os.Getenv("ANTHROPIC_API_KEY")))
// Use current provider
client := core.NewClient(registry.Current())
// Switch at runtime
registry.Switch("anthropic")
client = core.NewClient(registry.Current())
}

Load provider selection from configuration:

type Config struct {
DefaultProvider string `yaml:"default_provider"`
Providers map[string]struct {
APIKeyEnv string `yaml:"api_key_env"`
DefaultModel string `yaml:"default_model"`
BaseURL string `yaml:"base_url,omitempty"`
} `yaml:"providers"`
}
func (c *Config) CreateProvider(name string) (core.Provider, error) {
cfg, ok := c.Providers[name]
if !ok {
return nil, fmt.Errorf("provider %s not configured", name)
}
apiKey := os.Getenv(cfg.APIKeyEnv)
if apiKey == "" {
return nil, fmt.Errorf("API key not set: %s", cfg.APIKeyEnv)
}
switch name {
case "openai":
opts := []openai.Option{}
if cfg.BaseURL != "" {
opts = append(opts, openai.WithBaseURL(cfg.BaseURL))
}
return openai.New(apiKey, opts...), nil
case "anthropic":
return anthropic.New(apiKey), nil
default:
return nil, fmt.Errorf("unknown provider: %s", name)
}
}

Implement fallback strategies for reliability and cost optimization.

func ChatWithFallback(ctx context.Context, providers []core.Provider, prompt string) (*core.ChatResponse, error) {
var lastErr error
for _, provider := range providers {
client := core.NewClient(provider)
resp, err := client.Chat(provider.DefaultModel()).
User(prompt).
GetResponse(ctx)
if err == nil {
return resp, nil
}
lastErr = err
log.Printf("Provider %s failed: %v, trying next", provider.Name(), err)
}
return nil, fmt.Errorf("all providers failed, last error: %w", lastErr)
}
// Usage
providers := []core.Provider{
openai.New(os.Getenv("OPENAI_API_KEY")),
anthropic.New(os.Getenv("ANTHROPIC_API_KEY")),
ollama.New("http://localhost:11434"),
}
resp, err := ChatWithFallback(ctx, providers, "Hello, world!")
func ChatWithSmartFallback(ctx context.Context, primary, fallback core.Provider, prompt string) (*core.ChatResponse, error) {
client := core.NewClient(primary)
resp, err := client.Chat(primary.DefaultModel()).
User(prompt).
GetResponse(ctx)
if err == nil {
return resp, nil
}
// Only fallback on specific errors
var rateLimitErr *core.RateLimitError
var serverErr *core.ServerError
if errors.As(err, &rateLimitErr) || errors.As(err, &serverErr) {
log.Printf("Primary provider unavailable (%v), falling back", err)
client = core.NewClient(fallback)
return client.Chat(fallback.DefaultModel()).
User(prompt).
GetResponse(ctx)
}
// Don't fallback on auth errors or invalid requests
return nil, err
}
type LoadBalancedProvider struct {
providers []core.Provider
index uint64
}
func NewLoadBalancedProvider(providers ...core.Provider) *LoadBalancedProvider {
return &LoadBalancedProvider{providers: providers}
}
func (lb *LoadBalancedProvider) Next() core.Provider {
idx := atomic.AddUint64(&lb.index, 1)
return lb.providers[idx%uint64(len(lb.providers))]
}
// Usage
lb := NewLoadBalancedProvider(
openai.New(apiKey1),
openai.New(apiKey2), // Different API keys for rate limit distribution
)
client := core.NewClient(lb.Next())
type CostOptimizedRouter struct {
cheap core.Provider
expensive core.Provider
threshold int // token threshold
}
func (r *CostOptimizedRouter) Route(prompt string) core.Provider {
// Estimate tokens (rough approximation)
estimatedTokens := len(prompt) / 4
if estimatedTokens < r.threshold {
return r.cheap
}
return r.expensive
}
// Usage
router := &CostOptimizedRouter{
cheap: openai.New(apiKey), // Uses gpt-4o-mini by default
expensive: openai.New(apiKey), // Uses gpt-4o for complex tasks
threshold: 1000,
}
provider := router.Route(userPrompt)
client := core.NewClient(provider)
Use CaseRecommended ProviderReason
General chatOpenAI, AnthropicBest balance of quality and speed
Complex reasoningAnthropic ClaudeExtended thinking capabilities
Vision tasksOpenAI GPT-4oBest vision understanding
Code generationOpenAI, AnthropicStrong code capabilities
Local/privateOllamaNo data leaves your infrastructure
Search-augmentedPerplexityBuilt-in web search
EmbeddingsOpenAI, Voyage AIHigh-quality embeddings
Cost-sensitiveOpenAI Mini, OllamaLower per-token costs
// Add timeouts based on provider characteristics
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Ollama may need longer timeouts for first request (model loading)
if provider.Name() == "ollama" {
ctx, cancel = context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
}
resp, err := client.Chat(model).User(prompt).GetResponse(ctx)
// Good
provider := openai.New(os.Getenv("OPENAI_API_KEY"))
// Bad - hardcoded keys
provider := openai.New("sk-...")
// Good
if provider.Supports(core.FeatureToolCalling) {
builder = builder.Tools(tools...)
}
// Bad - may fail at runtime
builder = builder.Tools(tools...)
resp, err := client.Chat(model).User(prompt).GetResponse(ctx)
if err != nil {
var rateLimitErr *core.RateLimitError
if errors.As(err, &rateLimitErr) {
// Wait and retry, or switch provider
time.Sleep(rateLimitErr.RetryAfter)
}
}
config.yaml
providers:
openai:
default_model: gpt-4o
fast_model: gpt-4o-mini
anthropic:
default_model: claude-3-5-sonnet-20241022
model := config.Providers["openai"].DefaultModel
resp, err := client.Chat(model).User(prompt).GetResponse(ctx)
func ProcessWithDegradation(ctx context.Context, prompt string) (*core.ChatResponse, error) {
// Try primary provider
resp, err := tryProvider(ctx, primaryProvider, prompt)
if err == nil {
return resp, nil
}
// Log and try fallback
log.Printf("Primary failed: %v, using fallback", err)
resp, err = tryProvider(ctx, fallbackProvider, prompt)
if err == nil {
return resp, nil
}
// Final fallback: local model
log.Printf("Fallback failed: %v, using local model", err)
return tryProvider(ctx, ollamaProvider, prompt)
}

Tools Guide

Add function calling to your app. Tools →

Provider Details

In-depth provider configuration. Providers →