Phase 3: AI Classification via DeepSeek (Version: 2026-04.3)

- DeepSeek API client with configurable model, temperature, max tokens
- Prompt template engine: loads bin/prompt.txt at runtime, substitutes {sender}/{subject}/{body}
- Response parser validates folder names (Important/eCommerce/Spam/Other) and confidence scores (1-100)
- Graceful fallback to placeholder classifier if prompt file/API key missing
- Email body text limit increased to 4000 chars for AI context
- Replaced EmailSummary.Snippet with EmailSummary.Body
- Wired real AI classifier into main.go init
This commit is contained in:
Claus Lohmar 2026-04-23 11:13:10 +00:00
parent 8bb9ff067b
commit 283faddb05
10 changed files with 464 additions and 16 deletions

View file

@ -5,7 +5,25 @@ All notable changes to inBOXER will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [2026-04.2] - 2026-04-23
## [2026-04.3] - 2026-04-23
### Added
- AI classification package (`src/internal/ai/`):
- DeepSeek API client with chat completion requests (chat.deepseek.com API)
- Configurable model, temperature, max tokens via `config.yaml`
- Prompt template engine: loads `bin/prompt.txt` at runtime, substitutes `{sender}`, `{subject}`, `{body}` placeholders
- Response parser validates folder names (Important/eCommerce/Spam/Other) and confidence scores (1-100)
- Graceful fallback to placeholder classifier if prompt file is missing or API key unset
- Unit tests for JSON parsing, prompt loading, and API client creation
- Email body text now fetched up to 4000 chars (from 200) for AI classification context
### Changed
- Replaced `EmailSummary.Snippet` with `EmailSummary.Body` (4000 char limit)
- Main orchestrator now initializes DeepSeek classifier and passes to worker
- Worker uses real AI classifier when available; falls back to placeholder on init failure
### Fixed
- N/A
### Added
- IMAP client package (`src/internal/imap/`):

View file

@ -10,6 +10,7 @@ import (
"syscall"
"time"
"inboxer/src/internal/ai"
"inboxer/src/internal/auth"
"inboxer/src/internal/db"
"inboxer/src/internal/web"
@ -90,8 +91,22 @@ func main() {
IdleTimeout: 60 * time.Second,
}
// Initialize AI classifier
deepSeekAPI := ai.NewDeepSeekAPI(
env.DeepSeekAPIKey,
cfg.AI.Model,
cfg.AI.MaxTokens,
cfg.AI.Temperature,
)
var classifier worker.AIClassifier
classifier, err = ai.NewClassifier(deepSeekAPI, cfg.AI.PromptFile)
if err != nil {
log.Printf("Warning: AI classifier initialization failed: %v", err)
log.Println("Falling back to placeholder classifier (all emails -> Other)")
classifier = worker.NewPlaceholderClassifier(cfg.Folders.Other)
}
// Start background worker
classifier := worker.NewPlaceholderClassifier(cfg.Folders.Other)
bgWorker := worker.NewWorker(database, cfg, classifier)
bgWorker.Start()

161
src/internal/ai/ai.go Normal file
View file

@ -0,0 +1,161 @@
package ai
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// DeepSeekAPI is the client for the DeepSeek chat completion API
type DeepSeekAPI struct {
apiKey string
model string
maxTokens int
temperature float64
httpClient *http.Client
}
// chatMessage represents a message in the chat completion request
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
// chatCompletionRequest represents the API request body
type chatCompletionRequest struct {
Model string `json:"model"`
Messages []chatMessage `json:"messages"`
MaxTokens int `json:"max_tokens"`
Temperature float64 `json:"temperature"`
}
// chatCompletionResponse represents the API response
type chatCompletionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []chatCompletionChoice `json:"choices"`
Usage *struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage,omitempty"`
Error *struct {
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code"`
} `json:"error,omitempty"`
}
// chatCompletionChoice represents a single choice in the response
type chatCompletionChoice struct {
Index int `json:"index"`
Message chatMessage `json:"message"`
FinishReason string `json:"finish_reason"`
}
const deepSeekEndpoint = "https://api.deepseek.com/v1/chat/completions"
// NewDeepSeekAPI creates a new DeepSeek API client
func NewDeepSeekAPI(apiKey, model string, maxTokens int, temperature float64) *DeepSeekAPI {
return &DeepSeekAPI{
apiKey: apiKey,
model: model,
maxTokens: maxTokens,
temperature: temperature,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// ChatCompletion sends a chat completion request to DeepSeek
func (d *DeepSeekAPI) ChatCompletion(messages []chatMessage) (string, error) {
reqBody := chatCompletionRequest{
Model: d.model,
Messages: messages,
MaxTokens: d.maxTokens,
Temperature: d.temperature,
}
body, err := json.Marshal(reqBody)
if err != nil {
return "", fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequest(http.MethodPost, deepSeekEndpoint, bytes.NewReader(body))
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+d.apiKey)
req.Header.Set("Content-Type", "application/json")
resp, err := d.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("api request failed: %w", err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("api returned status %d: %s", resp.StatusCode, string(respBody))
}
var apiResp chatCompletionResponse
if err := json.Unmarshal(respBody, &apiResp); err != nil {
return "", fmt.Errorf("failed to parse response: %w", err)
}
if apiResp.Error != nil {
return "", fmt.Errorf("api error: %s (type: %s, code: %s)",
apiResp.Error.Message, apiResp.Error.Type, apiResp.Error.Code)
}
if len(apiResp.Choices) == 0 {
return "", fmt.Errorf("no choices in api response")
}
return apiResp.Choices[0].Message.Content, nil
}
// ClassificationResult holds the parsed DeepSeek response
type ClassificationResult struct {
Folder string `json:"folder"`
Score int `json:"score"`
Context string `json:"context"`
}
// ParseClassification parses the JSON response from DeepSeek into a ClassificationResult
func ParseClassification(content string) (*ClassificationResult, error) {
var result ClassificationResult
if err := json.Unmarshal([]byte(content), &result); err != nil {
return nil, fmt.Errorf("failed to parse classification JSON: %w", err)
}
// Validate folder name
validFolders := map[string]bool{
"Important": true,
"eCommerce": true,
"Spam": true,
"Other": true,
}
if !validFolders[result.Folder] {
return nil, fmt.Errorf("invalid folder in classification: %q", result.Folder)
}
// Validate score
if result.Score < 1 || result.Score > 100 {
return nil, fmt.Errorf("invalid confidence score: %d (must be 1-100)", result.Score)
}
return &result, nil
}

175
src/internal/ai/ai_test.go Normal file
View file

@ -0,0 +1,175 @@
package ai_test
import (
"os"
"path/filepath"
"testing"
"inboxer/src/internal/ai"
"inboxer/src/internal/imap"
)
func TestParseClassification(t *testing.T) {
tests := []struct {
name string
input string
wantFolder string
wantScore int
wantErr bool
}{
{
name: "important",
input: `{"folder": "Important", "score": 85, "context": "Work-related email"}`,
wantFolder: "Important",
wantScore: 85,
},
{
name: "ecommerce",
input: `{"folder": "eCommerce", "score": 90, "context": "Shopping confirmation"}`,
wantFolder: "eCommerce",
wantScore: 90,
},
{
name: "spam",
input: `{"folder": "Spam", "score": 95, "context": "Unsolicited"}`,
wantFolder: "Spam",
wantScore: 95,
},
{
name: "other",
input: `{"folder": "Other", "score": 50, "context": "Newsletter"}`,
wantFolder: "Other",
wantScore: 50,
},
{
name: "invalid folder",
input: `{"folder": "Unknown", "score": 50, "context": "Test"}`,
wantErr: true,
},
{
name: "score too low",
input: `{"folder": "Other", "score": 0, "context": "Test"}`,
wantErr: true,
},
{
name: "score too high",
input: `{"folder": "Other", "score": 101, "context": "Test"}`,
wantErr: true,
},
{
name: "not json",
input: `not json at all`,
wantErr: true,
},
{
name: "empty string",
input: ``,
wantErr: true,
},
{
name: "extra whitespace in json",
input: ` {"folder": "Important", "score": 75, "context": "Test"} `,
wantFolder: "Important",
wantScore: 75,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := ai.ParseClassification(tt.input)
if tt.wantErr {
if err == nil {
t.Fatal("expected error, got nil")
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Folder != tt.wantFolder {
t.Errorf("expected folder %q, got %q", tt.wantFolder, result.Folder)
}
if result.Score != tt.wantScore {
t.Errorf("expected score %d, got %d", tt.wantScore, result.Score)
}
})
}
}
func TestNewClassifierInvalidPrompt(t *testing.T) {
api := ai.NewDeepSeekAPI("test-key", "deepseek-chat", 1000, 0.1)
_, err := ai.NewClassifier(api, "/nonexistent/prompt.txt")
if err == nil {
t.Fatal("expected error with nonexistent prompt file")
}
}
func TestNewClassifierValidPrompt(t *testing.T) {
dir := t.TempDir()
promptFile := filepath.Join(dir, "prompt.txt")
err := os.WriteFile(promptFile, []byte("Classify this email: {sender} {subject} {body}"), 0644)
if err != nil {
t.Fatalf("failed to write prompt file: %v", err)
}
api := ai.NewDeepSeekAPI("test-key", "deepseek-chat", 1000, 0.1)
classifier, err := ai.NewClassifier(api, promptFile)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if classifier == nil {
t.Fatal("expected non-nil classifier")
}
}
func TestNewDeepSeekAPI(t *testing.T) {
api := ai.NewDeepSeekAPI("sk-test-key", "deepseek-chat", 500, 0.5)
if api == nil {
t.Fatal("NewDeepSeekAPI returned nil")
}
}
func TestNewClassifierFromBin(t *testing.T) {
// Test that bin/prompt.txt can be loaded (exists in project)
api := ai.NewDeepSeekAPI("test-key", "deepseek-chat", 1000, 0.1)
classifier, err := ai.NewClassifier(api, "prompt.txt")
if err != nil {
// If CWD isn't project root, this may fail; that's acceptable
t.Logf("relative path failed (maybe not in project root): %v", err)
_ = classifier
}
}
func TestClassifierNoAPIKey(t *testing.T) {
// Test that classifying with a fake API key returns an error (doesn't panic)
dir := t.TempDir()
promptFile := filepath.Join(dir, "prompt.txt")
err := os.WriteFile(promptFile, []byte("Classify: {sender} {subject} {body}"), 0644)
if err != nil {
t.Fatalf("write prompt: %v", err)
}
api := ai.NewDeepSeekAPI("sk-fake-key", "deepseek-chat", 1000, 0.1)
classifier, err := ai.NewClassifier(api, promptFile)
if err != nil {
t.Fatalf("NewClassifier failed: %v", err)
}
_, _, err = classifier.Classify(imap.EmailSummary{
From: "test@example.com",
Subject: "Test",
Body: "Body text",
})
// Should fail because API key is fake (connection refused or auth error)
if err == nil {
t.Log("no error with fake API key (network may be mocked)")
}
}
func TestNewDeepSeekAPIDefaults(t *testing.T) {
api := ai.NewDeepSeekAPI("test-key", "deepseek-chat", 1000, 0.1)
if api == nil {
t.Fatal("expected non-nil API client")
}
}

View file

@ -0,0 +1,79 @@
package ai
import (
"fmt"
"os"
"strings"
"inboxer/src/internal/imap"
)
// Classifier implements worker.AIClassifier using DeepSeek
type Classifier struct {
api *DeepSeekAPI
prompt string // loaded from prompt file
}
// NewClassifier creates a new AI classifier
func NewClassifier(api *DeepSeekAPI, promptFile string) (*Classifier, error) {
prompt, err := loadPromptFile(promptFile)
if err != nil {
return nil, fmt.Errorf("failed to load prompt file: %w", err)
}
return &Classifier{
api: api,
prompt: prompt,
}, nil
}
// Classify classifies an email using DeepSeek
func (c *Classifier) Classify(email imap.EmailSummary) (string, int, error) {
// Render the prompt with email data
rendered := renderPrompt(c.prompt, email)
messages := []chatMessage{
{
Role: "user",
Content: rendered,
},
}
content, err := c.api.ChatCompletion(messages)
if err != nil {
return "", 0, fmt.Errorf("deepseek api error: %w", err)
}
result, err := ParseClassification(content)
if err != nil {
return "", 0, fmt.Errorf("failed to parse AI response: %w (raw: %s)", err, content)
}
// Map the folder name from the prompt template to actual IMAP folder
// The prompt uses "Important", "eCommerce", "Spam", "Other" which
// correspond to the configured folder names. We return the raw folder
// name; the caller (worker) uses it directly to move.
return result.Folder, result.Score, nil
}
// renderPrompt substitutes {sender}, {subject}, {body} placeholders
func renderPrompt(tmpl string, email imap.EmailSummary) string {
result := strings.ReplaceAll(tmpl, "{sender}", email.From)
result = strings.ReplaceAll(result, "{subject}", email.Subject)
result = strings.ReplaceAll(result, "{body}", email.Body)
return result
}
// loadPromptFile reads the prompt template from disk
func loadPromptFile(path string) (string, error) {
// Try absolute path first, then relative to working directory
data, err := os.ReadFile(path)
if err != nil {
// Try with bin/ prefix
data, err = os.ReadFile("bin/" + path)
if err != nil {
return "", fmt.Errorf("prompt file not found at %q or bin/%s: %w", path, path, err)
}
}
return strings.TrimSpace(string(data)), nil
}

View file

@ -2,7 +2,6 @@ package auth
import (
"net/http"
"time"
"github.com/gorilla/sessions"
)

View file

@ -20,10 +20,10 @@ func NewDatabaseOTPStore(db *Database) *DatabaseOTPStore {
// StoreOTP stores an OTP hash and expiry for the given email
func (s *DatabaseOTPStore) StoreOTP(email, otpHash string, expiry time.Time) error {
// First, ensure user exists
user, err := s.db.GetUserByEmail(email)
_, err := s.db.GetUserByEmail(email)
if err != nil {
// User doesn't exist, create them
user, err = s.db.CreateUser(email)
_, err = s.db.CreateUser(email)
if err != nil {
return fmt.Errorf("failed to create user: %w", err)
}

View file

@ -16,7 +16,7 @@ type EmailSummary struct {
From string
Date time.Time
MessageID string
Snippet string // first ~200 chars of body text
Body string // body text (up to 4000 chars) for AI classification
}
// fetchItems returns the common FetchItems for fetching email metadata + body snippet
@ -48,15 +48,16 @@ func buildEmailSummary(msg *imap.Message) EmailSummary {
summary.From = msg.Envelope.From[0].Address()
}
// Extract body snippet from first available body section
// Extract body text from first available body section
// Limit to 4000 chars for AI classification efficiency
for _, literal := range msg.Body {
if literal != nil {
data, err := io.ReadAll(literal)
if err == nil && len(data) > 0 {
if len(data) > 200 {
summary.Snippet = string(data[:200])
if len(data) > 4000 {
summary.Body = string(data[:4000])
} else {
summary.Snippet = string(data)
summary.Body = string(data)
}
}
break // first body section only

View file

@ -278,7 +278,7 @@ func TestEmailSummaryFields(t *testing.T) {
From: "user@example.com",
Date: now,
MessageID: "<abc@example.com>",
Snippet: "Hello world",
Body: "Hello world",
}
if s.UID != 42 {
@ -293,8 +293,8 @@ func TestEmailSummaryFields(t *testing.T) {
if s.MessageID != "<abc@example.com>" {
t.Errorf("expected MessageID, got %s", s.MessageID)
}
if s.Snippet != "Hello world" {
t.Errorf("expected snippet, got %s", s.Snippet)
if s.Body != "Hello world" {
t.Errorf("expected body text, got %s", s.Body)
}
}

View file

@ -76,8 +76,8 @@ type FlashMessage struct {
// NewTemplateData creates base template data
func (h *Handler) NewTemplateData(r *http.Request) TemplateData {
email, loggedIn := h.authService.GetSessionManager().GetUserEmail(r)
email, _ := h.authService.GetSessionManager().GetUserEmail(r)
return TemplateData{
Title: "inBOXER",
CurrentPage: "",
@ -371,7 +371,7 @@ func (h *Handler) ToggleTestModeHandler(w http.ResponseWriter, r *http.Request)
// ProcessNowHandler triggers immediate email processing
func (h *Handler) ProcessNowHandler(w http.ResponseWriter, r *http.Request) {
// Check authentication
email, loggedIn := h.authService.GetSessionManager().GetUserEmail(r)
_, loggedIn := h.authService.GetSessionManager().GetUserEmail(r)
if !loggedIn {
http.Redirect(w, r, "/login", http.StatusSeeOther)
return