QuickStartΒΆ
Boosting Sentiment Detection for Enterprise Support EmailsΒΆ
π’ OverviewΒΆ
A major enterprise support team manages thousands of facility maintenance requests via email every week. Each message can be:
- π Positive β expressing satisfaction or thanks
- π Neutral β routine updates or requests
- π Negative β reporting issues or dissatisfaction
But manual triage is slow and inconsistent, and the teamβs first AI solution struggled with accuracy β especially distinguishing between neutral and negative feedback.
Goal: Rapidly improve sentiment classification Accuracy so every support request is routed and prioritized correctly, using real-world data from Meta's Facility Support Analyzer dataset.
β οΈ Challenge
|
π Results
|
!pip install afnio
import os
import json
import re
from getpass import getpass
import afnio
import afnio.cognitive as cog
import afnio.cognitive.functional as F
import afnio.tellurio as te
from afnio.models.openai import AsyncOpenAI
from afnio.trainer import Trainer
from afnio.utils.data import DataLoader, WeightedRandomSampler
from afnio.utils.datasets import FacilitySupport
π Setup: API Keys and Project InitializationΒΆ
Set your OpenAI and Tellurio API keys, then initialize your project and experiment run.
if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
openai_api_key = getpass("π Enter your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = openai_api_key
if not (tellurio_api_key := os.getenv("TELLURIO_API_KEY")):
tellurio_api_key = getpass("π Enter your Tellurio API key: ") # This is automatically generated at signup and visible on the Tellurio Studio overview page (or you can create a new one under `https://platform.tellurio.ai/settings/api-keys`)
if not (username := os.getenv("TELLURIO_USERNAME")):
tellurio_username = input("π Enter your Tellurio username: ") # Replace with your Tellurio username (in slug format). You can find this in the Tellurio Studio header bar or in the URL when logged in (e.g., `https://platform.tellurio.ai/your-username-slug`)
te.configure_logging("INFO")
te.login(api_key=tellurio_api_key)
run = te.init(tellurio_username, "Facility Support")
π Data PreparationΒΆ
Balance the training set, prepare your data loaders, and get the dataset ready for training and evaluation.
# The training set is inbalanced, so we assign weights to each sample to ensure fair learning across all classes
def compute_sample_weights(data):
with te.suppress_variable_notifications():
labels = [y.data for _, (_, y, _) in data]
counts = {label: labels.count(label) for label in set(labels)}
total = len(data)
return [total / counts[label] for label in labels]
BATCH_SIZE = 33
training_data = FacilitySupport(split="train", root="data")
validation_data = FacilitySupport(split="val", root="data")
test_data = FacilitySupport(split="test", root="data")
weights = compute_sample_weights(training_data)
sampler = WeightedRandomSampler(weights, num_samples=len(training_data), replacement=True)
train_dataloader = DataLoader(training_data, sampler=sampler, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(validation_data, batch_size=BATCH_SIZE, seed=42)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, seed=42)
π§ AI Agent ConfigurationΒΆ
Define the initial prompt, response format, LM model clients used for inference and optimization, and the sentiment classification agent.
# Start with a simple prompt. The optimizer will refine it, but it can't guess your intentβso clearly state what you want the model to do
sentiment_task = "Read the provided message and determine the sentiment."
sentiment_user = "Read the provided message and determine the sentiment.\n\n**Message:**\n\n{message}\n\n"
SENTIMENT_RESPONSE_FORMAT = {
"type": "json_schema",
"json_schema": {
"strict": True,
"name": "sentiment_response_schema",
"schema": {
"type": "object",
"properties": {
"sentiment": {"type": "string", "enum": ["positive", "neutral", "negative"]},
},
"additionalProperties": False,
"required": ["sentiment"],
},
},
}
# We use gpt-4.1-nano for the forward pass (inference), gpt-5 for the backward pass (feeedback generation), and gpt-5 for the optimization step (prompt rewriting)
afnio.set_backward_model_client("openai/gpt-5", completion_args={"temperature": 1.0, "max_completion_tokens": 32000, "reasoning_effort": "low"})
fw_model_client = AsyncOpenAI()
optim_model_client = AsyncOpenAI()
class FacilitySupportAnalyzer(cog.Module):
def __init__(self):
super().__init__()
self.sentiment_task = cog.Parameter(data=sentiment_task, role="system prompt for sentiment classification", requires_grad=True)
self.sentiment_user = afnio.Variable(data=sentiment_user, role="input template to sentiment classifier")
self.sentiment_classifier = cog.ChatCompletion()
def forward(self, fwd_model, inputs, **completion_args):
sentiment_messages = [
{"role": "system", "content": [self.sentiment_task]},
{"role": "user", "content": [self.sentiment_user]},
]
return self.sentiment_classifier(fwd_model, sentiment_messages, inputs=inputs, response_format=SENTIMENT_RESPONSE_FORMAT, **completion_args)
def training_step(self, batch, batch_idx):
X, y = batch
_, gold_sentiment, _ = y
pred_sentiment = self(fw_model_client, inputs={"message": X}, model="gpt-4.1-nano", temperature=0.0)
pred_sentiment.data = [json.loads(re.sub(r"^```json\n|\n```$", "", item))["sentiment"].lower() for item in pred_sentiment.data]
loss = F.exact_match_evaluator(pred_sentiment, gold_sentiment)
return {"loss": loss, "accuracy": loss[0].data / len(gold_sentiment.data)}
def validation_step(self, batch, batch_idx):
return self.training_step(batch, batch_idx)
def test_step(self, batch, batch_idx):
return self.validation_step(batch, batch_idx)
def configure_optimizers(self):
constraints = [
afnio.Variable(
data="The improved variable must never include or reference the characters `{` or `}`. Do not output them, mention them, or describe them in any way.",
role="optimizer constraint"
)
]
optimizer = afnio.optim.TGD(self.parameters(), model_client=optim_model_client, constraints=constraints, momentum=3, model="gpt-5", temperature=1.0, max_completion_tokens=32000, reasoning_effort="low")
return optimizer
π Training and EvaluationΒΆ
Instantiate the agent and trainer, establish baseline performance, train the agent, and validate results.
agent = FacilitySupportAnalyzer()
trainer = Trainer(max_epochs=5, enable_agent_summary=False)
print(agent)
# Establish baseline performance by testing the untrained agent on the test set
llm_clients=[fw_model_client, afnio.get_backward_model_client(), optim_model_client]
trainer.test(agent=agent, test_dataloader=test_dataloader, llm_clients=llm_clients)
# Train the agent and validate results
trainer.fit(agent=agent, train_dataloader=train_dataloader, val_dataloader=val_dataloader, llm_clients=llm_clients)
π Loading and Testing the Optimized AI AgentΒΆ
val_accuracy (accuracy on validation set) during training. You can find its filename in the automatically created checkpoints/ directory.
Load the best agent checkpoint, evaluate on the test set, and display the final results.
# Only run this if you want to download our reference checkpoint
checkpoint_path = 'checkpoints/checkpoint_epoch2_20250912-190039.hf'
if not os.path.exists(checkpoint_path):
!mkdir -p checkpoints
!wget https://github.com/Tellurio-AI/tutorials/raw/main/facility_support/checkpoints/checkpoint_epoch2_20250912-190039.hf -P checkpoints/
checkpoint = afnio.load("checkpoints/checkpoint_epoch2_20250912-190039.hf") # Replace with your best checkpoint path, or use our reference checkpoint (downloaded with the previous cell)
best_agent = FacilitySupportAnalyzer()
best_agent.load_state_dict(
checkpoint['agent_state_dict'],
model_clients={
"sentiment_classifier.forward_model_client": fw_model_client,
}
)
# Test the best agent checkpoint on the test set
trainer.test(agent=best_agent, test_dataloader=test_dataloader, llm_clients=llm_clients)
# Compare the agent's prompt before and after training
from IPython.display import display, HTML
display(HTML(f"""
<table style="width:100%;border-collapse:collapse;">
<tr>
<th style="text-align:left;background-color:#e0e0e0; color:#222;font-weight:bold;">BEFORE OPTIMIZATION</th>
<th style="text-align:left;background-color:#e0e0e0; color:#222;font-weight:bold;">AFTER OPTIMIZATION</th>
</tr>
<tr>
<td style="text-align:left;vertical-align:top;word-break:break-word;">
<pre style="margin:0;white-space:pre-wrap;word-break:break-word;">{sentiment_task}</pre>
</td>
<td style="text-align:left;vertical-align:top;word-break:break-word;">
<pre style="margin:0;white-space:pre-wrap;word-break:break-word;">{best_agent.sentiment_task.data}</pre>
</td>
</tr>
</table>
"""))
run.finish()