Skip to content

Memory & Conversations

Iris v0.11.0 introduces a structured approach to managing conversation history through the Memory interface, InMemoryStore implementation, and the high-level Conversation API. These primitives enable multi-turn conversations, session persistence, and automatic memory management with summarization.

import "github.com/petal-labs/iris/core"
// Create a conversation with automatic history management
conv := core.NewConversation(client, "gpt-4o",
core.WithSystemMessage("You are a helpful assistant."),
)
// Send messages - history is managed automatically
resp1, _ := conv.Send("What is the capital of France?")
fmt.Println(resp1.Output) // "The capital of France is Paris."
resp2, _ := conv.Send("What about Germany?")
fmt.Println(resp2.Output) // "The capital of Germany is Berlin."
// The conversation maintains context
resp3, _ := conv.Send("Compare their populations.")
fmt.Println(resp3.Output) // Compares Paris and Berlin populations

The Memory interface defines a contract for conversation storage:

type Memory interface {
// AddMessage appends a message to the conversation history.
AddMessage(msg Message)
// AddMessages appends multiple messages to the conversation history.
AddMessages(msgs []Message)
// GetHistory returns all messages in the conversation.
GetHistory() []Message
// GetLastN returns the last N messages in the conversation.
GetLastN(n int) []Message
// Clear removes all messages from the conversation.
Clear()
// Len returns the number of messages in the conversation.
Len() int
// SetMessages replaces the entire conversation history.
SetMessages(msgs []Message)
}

The built-in thread-safe in-memory implementation:

// Create a new store
store := core.NewInMemoryStore()
// Add messages
store.AddMessage(core.Message{
Role: core.RoleUser,
Content: "Hello!",
})
store.AddMessage(core.Message{
Role: core.RoleAssistant,
Content: "Hi there! How can I help you?",
})
// Get history
messages := store.GetHistory()
fmt.Printf("Total messages: %d\n", store.Len())
// Get recent messages only
recent := store.GetLastN(5)
// Clear when starting a new topic
store.Clear()

The Conversation type provides a high-level API for multi-turn chat:

// Create conversation with options
conv := core.NewConversation(client, "gpt-4o",
core.WithSystemMessage("You are a coding assistant. Be concise."),
core.WithMemoryStore(customStore), // Optional: use custom storage
)
// Simple send (uses context.Background())
resp, err := conv.Send("Explain Go interfaces")
// Send with context for timeout/cancellation
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
resp, err = conv.SendWithContext(ctx, "Show me an example")
// Access conversation state
history := conv.GetHistory()
count := conv.MessageCount()
// Reset conversation (keeps system message)
conv.Clear()
conv := core.NewConversation(client, "gpt-4o",
core.WithSystemMessage("You are a math tutor."),
)
// First turn
resp, _ := conv.Send("What is a derivative?")
fmt.Println("Tutor:", resp.Output)
// Second turn - conversation has context
resp, _ = conv.Send("Can you give me an example?")
fmt.Println("Tutor:", resp.Output)
// Third turn - still has full context
resp, _ = conv.Send("How does that relate to the first thing you explained?")
fmt.Println("Tutor:", resp.Output)
// Check history
fmt.Printf("Conversation has %d messages\n", conv.MessageCount())
// Output: Conversation has 7 messages (1 system + 3 user + 3 assistant)

For long-running conversations, implement windowed memory or manual summarization to stay within context limits:

Keep only the most recent messages:

// Create conversation with windowed memory
windowedStore := NewWindowedMemory(20) // Keep last 20 messages
conv := core.NewConversation(client, "gpt-4o",
core.WithSystemMessage("You are a helpful assistant."),
core.WithMemoryStore(windowedStore),
)
// Messages beyond the window are automatically trimmed
for _, msg := range userMessages {
resp, _ := conv.Send(msg)
fmt.Println(resp.Output)
}

Summarize conversation history when approaching context limits:

func summarizeIfNeeded(conv *core.Conversation, client *core.Client, maxMessages int) error {
if conv.MessageCount() < maxMessages {
return nil
}
// Get current history
history := conv.GetHistory()
// Create summarization request
summaryResp, err := client.Chat("gpt-4o").
System("Summarize this conversation preserving key context and decisions.").
User(formatMessagesForSummary(history)).
GetResponse(context.Background())
if err != nil {
return err
}
// Replace history with summary + recent messages
conv.Clear()
conv.GetMemory().AddMessage(core.Message{
Role: core.RoleSystem,
Content: "Previous conversation summary: " + summaryResp.Output,
})
// Keep last few messages for context
for _, msg := range history[len(history)-4:] {
conv.GetMemory().AddMessage(msg)
}
return nil
}

Estimate tokens to know when summarization is needed:

func estimateTokens(messages []core.Message) int {
totalChars := 0
for _, msg := range messages {
totalChars += len(msg.Content)
}
// ~4 characters per token (rough heuristic)
return totalChars / 4
}
// Check before each send
if estimateTokens(conv.GetHistory()) > 80000 { // 80% of 100K context
summarizeIfNeeded(conv, client, 50)
}

Implement the Memory interface for custom storage:

type RedisMemoryStore struct {
client *redis.Client
key string
mu sync.RWMutex
}
func NewRedisMemoryStore(client *redis.Client, sessionID string) *RedisMemoryStore {
return &RedisMemoryStore{
client: client,
key: fmt.Sprintf("conversation:%s", sessionID),
}
}
func (m *RedisMemoryStore) AddMessage(msg core.Message) {
m.mu.Lock()
defer m.mu.Unlock()
data, _ := json.Marshal(msg)
m.client.RPush(context.Background(), m.key, data)
}
func (m *RedisMemoryStore) GetHistory() []core.Message {
m.mu.RLock()
defer m.mu.RUnlock()
data, _ := m.client.LRange(context.Background(), m.key, 0, -1).Result()
messages := make([]core.Message, len(data))
for i, d := range data {
json.Unmarshal([]byte(d), &messages[i])
}
return messages
}
func (m *RedisMemoryStore) Clear() {
m.mu.Lock()
defer m.mu.Unlock()
m.client.Del(context.Background(), m.key)
}
func (m *RedisMemoryStore) Len() int {
return int(m.client.LLen(context.Background(), m.key).Val())
}
// ... implement remaining methods
type SessionManager struct {
sessions map[string]*core.Conversation
client *core.Client
mu sync.RWMutex
}
func (m *SessionManager) GetOrCreate(sessionID string) *core.Conversation {
m.mu.Lock()
defer m.mu.Unlock()
if conv, ok := m.sessions[sessionID]; ok {
return conv
}
conv := core.NewConversation(m.client, "gpt-4o",
core.WithSystemMessage("You are a helpful assistant."),
)
m.sessions[sessionID] = conv
return conv
}
func (m *SessionManager) EndSession(sessionID string) {
m.mu.Lock()
defer m.mu.Unlock()
delete(m.sessions, sessionID)
}

Keep only recent messages to control context size:

type WindowedMemory struct {
store core.Memory
maxSize int
}
func NewWindowedMemory(maxSize int) *WindowedMemory {
return &WindowedMemory{
store: core.NewInMemoryStore(),
maxSize: maxSize,
}
}
func (m *WindowedMemory) AddMessage(msg core.Message) {
m.store.AddMessage(msg)
// Trim to window size
if m.store.Len() > m.maxSize {
messages := m.store.GetHistory()
m.store.SetMessages(messages[len(messages)-m.maxSize:])
}
}
// ... delegate other methods to m.store

Inject dynamic context into conversations:

type ContextAwareConversation struct {
conv *core.Conversation
getContext func() string
}
func (c *ContextAwareConversation) Send(userMessage string) (*core.ChatResponse, error) {
// Inject current context before user message
context := c.getContext()
if context != "" {
c.conv.memory.AddMessage(core.Message{
Role: core.RoleSystem,
Content: fmt.Sprintf("Current context:\n%s", context),
})
}
return c.conv.Send(userMessage)
}
// Usage
conv := &ContextAwareConversation{
conv: core.NewConversation(client, "gpt-4o"),
getContext: func() string {
return fmt.Sprintf("Current time: %s\nUser location: %s",
time.Now().Format(time.RFC3339),
getUserLocation(),
)
},
}

The memory system uses a simple heuristic for token estimation:

// ~4 characters per token (works reasonably across models)
estimatedTokens := totalCharacters / 4

For more precise token counting, you can implement a TokenCounter interface and inject it into your memory management logic.

PracticeRecommendation
System messagesAlways set via WithSystemMessage for consistency
Token limitsSet MaxTokens below model’s context window (e.g., 80%)
Preserve recentKeep 4-8 recent messages unsummarized for coherence
Clear appropriatelyCall Clear() when changing topics significantly
Custom backendsUse Redis/PostgreSQL for production persistence
Session cleanupImplement TTL or explicit session termination

Agent Tools Example

See memory in action with AgentRunner. Agent Tools →

Streaming Guide

Stream responses in conversations. Streaming →