Batch API
Test batch operations. Batch API →
Iris provides testing utilities that make it easy to write deterministic, fast, and reliable
tests for code that uses LLM providers. The testing package includes MockProvider for
controlled responses and RecordingProvider for capturing real interactions.
MockProvider returns predefined responses, allowing you to control exactly what your code
receives from the LLM layer.
package myapp_test
import ( "context" "testing"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/testing")
func TestChatHandler(t *testing.T) { // Create mock with a single response mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{ ID: "test-response", Model: "mock-model", Output: "Hello! I'm a mock response.", Usage: core.TokenUsage{TotalTokens: 10}, })
// Create client with mock provider client := core.NewClient(mock)
// Your code under test resp, err := client.Chat("any-model"). User("Hello!"). GetResponse(context.Background())
if err != nil { t.Fatalf("unexpected error: %v", err) }
if resp.Output != "Hello! I'm a mock response." { t.Errorf("unexpected output: %s", resp.Output) }}Queue multiple responses for multi-turn conversations or sequential tests:
func TestMultiTurnConversation(t *testing.T) { mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{ ID: "resp-1", Output: "I'm the first response.", }). WithResponse(core.ChatResponse{ ID: "resp-2", Output: "I'm the second response.", }). WithDefaultResponse(core.ChatResponse{ ID: "default", Output: "I'm the default response.", })
client := core.NewClient(mock) ctx := context.Background()
// First call gets first queued response resp1, _ := client.Chat("model").User("First").GetResponse(ctx) if resp1.Output != "I'm the first response." { t.Errorf("wrong first response: %s", resp1.Output) }
// Second call gets second queued response resp2, _ := client.Chat("model").User("Second").GetResponse(ctx) if resp2.Output != "I'm the second response." { t.Errorf("wrong second response: %s", resp2.Output) }
// Third call (queue exhausted) gets default response resp3, _ := client.Chat("model").User("Third").GetResponse(ctx) if resp3.Output != "I'm the default response." { t.Errorf("wrong default response: %s", resp3.Output) }}Test error handling by injecting specific errors:
func TestRateLimitHandling(t *testing.T) { mock := testing.NewMockProvider(). WithError(core.ErrRateLimited)
client := core.NewClient(mock)
_, err := client.Chat("model"). User("This will fail"). GetResponse(context.Background())
if !errors.Is(err, core.ErrRateLimited) { t.Errorf("expected rate limit error, got: %v", err) }}
func TestAuthenticationError(t *testing.T) { mock := testing.NewMockProvider(). WithError(core.ErrUnauthorized)
client := core.NewClient(mock)
_, err := client.Chat("model"). User("This will fail"). GetResponse(context.Background())
if !errors.Is(err, core.ErrUnauthorized) { t.Errorf("expected auth error, got: %v", err) }}Mock streaming responses with controlled chunks:
func TestStreamingHandler(t *testing.T) { mock := testing.NewMockProvider(). WithStreamingResponse( []string{"Hello", " ", "world", "!"}, &core.ChatResponse{ ID: "stream-resp", Model: "mock-model", Output: "Hello world!", Usage: core.TokenUsage{TotalTokens: 5}, }, )
client := core.NewClient(mock)
stream, err := client.Chat("model"). User("Stream something"). Stream(context.Background())
if err != nil { t.Fatalf("unexpected error: %v", err) }
var chunks []string for chunk := range stream.Ch { chunks = append(chunks, chunk.Delta) }
if err := <-stream.Err; err != nil { t.Fatalf("stream error: %v", err) }
expected := []string{"Hello", " ", "world", "!"} if !reflect.DeepEqual(chunks, expected) { t.Errorf("chunks = %v, want %v", chunks, expected) }}Verify that your code makes the expected requests:
func TestRequestInspection(t *testing.T) { mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{Output: "OK"})
client := core.NewClient(mock) ctx := context.Background()
// Make some calls client.Chat("gpt-4o"). System("You are helpful."). User("Hello"). Temperature(0.7). GetResponse(ctx)
client.Chat("gpt-4o"). User("Goodbye"). GetResponse(ctx)
// Inspect recorded calls calls := mock.Calls()
if len(calls) != 2 { t.Fatalf("expected 2 calls, got %d", len(calls)) }
// Check first call first := calls[0] if first.Request.Model != "gpt-4o" { t.Errorf("wrong model: %s", first.Request.Model) } if len(first.Request.Messages) != 2 { t.Errorf("expected 2 messages, got %d", len(first.Request.Messages)) }
// Check system message if first.Request.Messages[0].Role != core.RoleSystem { t.Error("first message should be system") }}RecordingProvider wraps a real provider and records all interactions. This is useful for:
package main
import ( "context" "fmt" "os" "time"
"github.com/petal-labs/iris/core" "github.com/petal-labs/iris/providers/openai" "github.com/petal-labs/iris/testing")
func main() { // Wrap real provider with recorder realProvider := openai.New(os.Getenv("OPENAI_API_KEY")) recorder := testing.NewRecordingProvider(realProvider)
client := core.NewClient(recorder) ctx := context.Background()
// Make API calls as normal _, _ = client.Chat("gpt-4o-mini"). User("What is the capital of France?"). GetResponse(ctx)
_, _ = client.Chat("gpt-4o-mini"). User("And what about Germany?"). GetResponse(ctx)
// Inspect recorded interactions for i, rec := range recorder.Recordings() { fmt.Printf("Call %d:\n", i+1) fmt.Printf(" Method: %s\n", rec.Method) fmt.Printf(" Duration: %v\n", rec.Duration) fmt.Printf(" Model: %s\n", rec.Request.Model)
if rec.Response != nil { fmt.Printf(" Output: %s\n", truncate(rec.Response.Output, 50)) fmt.Printf(" Tokens: %d\n", rec.Response.Usage.TotalTokens) }
if rec.Error != nil { fmt.Printf(" Error: %v\n", rec.Error) } }}Use recordings to verify API interactions in integration tests:
func TestAPIIntegration(t *testing.T) { if os.Getenv("OPENAI_API_KEY") == "" { t.Skip("OPENAI_API_KEY not set") }
provider := openai.New(os.Getenv("OPENAI_API_KEY")) recorder := testing.NewRecordingProvider(provider) client := core.NewClient(recorder)
// Run your code result := myApp.ProcessQuery(client, "test query")
// Verify recordings recordings := recorder.Recordings()
if len(recordings) == 0 { t.Error("expected at least one API call") }
// Check that we used the right model for _, rec := range recordings { if rec.Request.Model != "gpt-4o-mini" { t.Errorf("expected gpt-4o-mini, got %s", rec.Request.Model) } }
// Verify no errors for i, rec := range recordings { if rec.Error != nil { t.Errorf("call %d failed: %v", i, rec.Error) } }}Clear recordings between test cases:
func TestMultipleScenarios(t *testing.T) { provider := testing.NewMockProvider(). WithDefaultResponse(core.ChatResponse{Output: "OK"}) recorder := testing.NewRecordingProvider(provider) client := core.NewClient(recorder)
t.Run("scenario1", func(t *testing.T) { recorder.Clear() // Start fresh
// Test scenario 1 client.Chat("model").User("Test 1").GetResponse(context.Background())
if len(recorder.Recordings()) != 1 { t.Error("expected 1 recording") } })
t.Run("scenario2", func(t *testing.T) { recorder.Clear() // Start fresh
// Test scenario 2 client.Chat("model").User("Test 2a").GetResponse(context.Background()) client.Chat("model").User("Test 2b").GetResponse(context.Background())
if len(recorder.Recordings()) != 2 { t.Error("expected 2 recordings") } })}func TestPromptVariations(t *testing.T) { tests := []struct { name string prompt string response string wantErr bool }{ { name: "greeting", prompt: "Hello", response: "Hi there!", }, { name: "farewell", prompt: "Goodbye", response: "See you later!", }, { name: "error case", prompt: "trigger error", wantErr: true, }, }
for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var mock *testing.MockProvider if tt.wantErr { mock = testing.NewMockProvider(). WithError(errors.New("simulated error")) } else { mock = testing.NewMockProvider(). WithResponse(core.ChatResponse{Output: tt.response}) }
client := core.NewClient(mock) resp, err := client.Chat("model"). User(tt.prompt). GetResponse(context.Background())
if tt.wantErr { if err == nil { t.Error("expected error, got nil") } return }
if err != nil { t.Fatalf("unexpected error: %v", err) }
if resp.Output != tt.response { t.Errorf("output = %q, want %q", resp.Output, tt.response) } }) }}Structure your code for testability:
// production codetype ChatService struct { client *core.Client}
func NewChatService(provider core.Provider) *ChatService { return &ChatService{ client: core.NewClient(provider), }}
func (s *ChatService) Summarize(ctx context.Context, text string) (string, error) { resp, err := s.client.Chat("gpt-4o-mini"). System("Summarize the following text in one sentence."). User(text). GetResponse(ctx) if err != nil { return "", err } return resp.Output, nil}
// test codefunc TestSummarize(t *testing.T) { mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{ Output: "This is a summary.", })
service := NewChatService(mock)
result, err := service.Summarize(context.Background(), "Long text...")
if err != nil { t.Fatalf("unexpected error: %v", err) } if result != "This is a summary." { t.Errorf("unexpected result: %s", result) }
// Verify the request calls := mock.Calls() if calls[0].Request.Messages[0].Content != "Summarize the following text in one sentence." { t.Error("wrong system prompt") }}func TestToolCallingFlow(t *testing.T) { mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{ ID: "resp-1", Output: "", ToolCalls: []core.ToolCall{ { ID: "call-1", Name: "get_weather", Arguments: json.RawMessage(`{"location": "Tokyo"}`), }, }, }). WithResponse(core.ChatResponse{ ID: "resp-2", Output: "The weather in Tokyo is sunny and 22°C.", })
client := core.NewClient(mock) ctx := context.Background()
// First call - should get tool call resp1, _ := client.Chat("gpt-4o"). User("What's the weather in Tokyo?"). Tools(weatherTool). GetResponse(ctx)
if len(resp1.ToolCalls) != 1 { t.Fatalf("expected 1 tool call, got %d", len(resp1.ToolCalls)) }
// Second call - with tool result resp2, _ := client.Chat("gpt-4o"). User("What's the weather in Tokyo?"). Tools(weatherTool). ToolResults(core.ToolResult{ CallID: "call-1", Content: `{"temperature": 22, "condition": "sunny"}`, }). GetResponse(ctx)
if resp2.Output == "" { t.Error("expected final response with output") }}// Unit test - fast, deterministicfunc TestBusinessLogic(t *testing.T) { mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{Output: "expected"}) // Test your logic}// Integration test - verifies real API behaviorfunc TestIntegration(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") }
provider := openai.New(os.Getenv("OPENAI_API_KEY")) recorder := testing.NewRecordingProvider(provider) // Test with real API}func TestRequestConstruction(t *testing.T) { mock := testing.NewMockProvider(). WithResponse(core.ChatResponse{Output: "OK"})
client := core.NewClient(mock) myFunction(client)
calls := mock.Calls() req := calls[0].Request
// Verify request was constructed correctly assert.Equal(t, "gpt-4o", req.Model) assert.Equal(t, 0.7, *req.Temperature) assert.Len(t, req.Messages, 2)}func TestErrorRecovery(t *testing.T) { // First call fails, second succeeds mock := testing.NewMockProvider(). WithError(core.ErrRateLimited). WithResponse(core.ChatResponse{Output: "Success"})
// Your retry logic should handle this result := myAppWithRetry(core.NewClient(mock))
if result != "Success" { t.Error("retry should have succeeded") }}Batch API
Test batch operations. Batch API →
Tools Guide
Test tool calling flows. Tools →
Examples
See testing patterns in examples. Examples →
API Reference
Full testing package API. API →