Time to build something real. In this article, you will create a CLI chatbot that streams responses, keeps conversation history, and supports slash commands. It is a complete, working application in about 150 lines of code.
What We Are Building
A terminal chatbot with these features:
- Streaming responses (character by character)
- Conversation history (multi-turn)
- Customizable system prompt
- Slash commands:
/clear,/model,/system,/tokens,/export - Token usage tracking
- Graceful error handling
Python Implementation
Setup
pip install anthropic rich
Full Code
#!/usr/bin/env python3
"""CLI chatbot with Claude API — streaming, history, and slash commands."""
import anthropic
import json
import sys
from datetime import datetime
from rich.console import Console
console = Console()
client = anthropic.Anthropic()
# Configuration
config = {
"model": "claude-sonnet-4-6",
"system": "You are a helpful assistant. Be concise and direct.",
"max_tokens": 4096,
}
# Conversation state
messages: list[dict] = []
total_input_tokens = 0
total_output_tokens = 0
def stream_response(user_input: str) -> str:
"""Send a message and stream the response."""
global total_input_tokens, total_output_tokens
messages.append({"role": "user", "content": user_input})
full_response = ""
with client.messages.stream(
model=config["model"],
max_tokens=config["max_tokens"],
system=config["system"],
messages=messages,
) as stream:
for text in stream.text_stream:
console.print(text, end="", style="green")
full_response += text
# Get final message for token counts
final = stream.get_final_message()
total_input_tokens += final.usage.input_tokens
total_output_tokens += final.usage.output_tokens
console.print() # New line after response
messages.append({"role": "assistant", "content": full_response})
return full_response
def handle_command(command: str) -> bool:
"""Handle slash commands. Returns True if command was handled."""
parts = command.strip().split(maxsplit=1)
cmd = parts[0].lower()
arg = parts[1] if len(parts) > 1 else ""
if cmd == "/clear":
messages.clear()
console.print("[yellow]Conversation cleared.[/yellow]")
return True
elif cmd == "/model":
if arg:
config["model"] = arg
console.print(f"[yellow]Model set to: {arg}[/yellow]")
else:
console.print(f"[yellow]Current model: {config['model']}[/yellow]")
return True
elif cmd == "/system":
if arg:
config["system"] = arg
console.print(f"[yellow]System prompt updated.[/yellow]")
else:
console.print(f"[yellow]Current system prompt: {config['system']}[/yellow]")
return True
elif cmd == "/tokens":
console.print(f"[yellow]Input tokens: {total_input_tokens:,}[/yellow]")
console.print(f"[yellow]Output tokens: {total_output_tokens:,}[/yellow]")
# Estimate cost (Sonnet 4.6 pricing)
cost = (total_input_tokens * 3 + total_output_tokens * 15) / 1_000_000
console.print(f"[yellow]Estimated cost: ${cost:.4f}[/yellow]")
return True
elif cmd == "/export":
filename = arg or f"chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(filename, "w") as f:
json.dump(
{
"model": config["model"],
"system": config["system"],
"messages": messages,
"tokens": {
"input": total_input_tokens,
"output": total_output_tokens,
},
},
f,
indent=2,
)
console.print(f"[yellow]Conversation exported to {filename}[/yellow]")
return True
elif cmd == "/help":
console.print("[yellow]Commands:[/yellow]")
console.print(" /clear — Clear conversation history")
console.print(" /model [name] — View or change model")
console.print(" /system [prompt] — View or change system prompt")
console.print(" /tokens — Show token usage and cost")
console.print(" /export [file] — Export conversation to JSON")
console.print(" /help — Show this help")
console.print(" /quit — Exit")
return True
elif cmd in ("/quit", "/exit", "/q"):
console.print("[yellow]Goodbye![/yellow]")
sys.exit(0)
return False
def main():
"""Main chat loop."""
console.print("[bold blue]Claude CLI Chatbot[/bold blue]")
console.print(f"Model: {config['model']} | Type /help for commands\n")
while True:
try:
user_input = console.input("[bold cyan]You:[/bold cyan] ").strip()
if not user_input:
continue
if user_input.startswith("/"):
if handle_command(user_input):
continue
console.print("[bold green]Claude:[/bold green] ", end="")
stream_response(user_input)
console.print()
except KeyboardInterrupt:
console.print("\n[yellow]Use /quit to exit.[/yellow]")
except anthropic.APIError as e:
console.print(f"\n[red]API Error: {e.message}[/red]")
except anthropic.RateLimitError:
console.print("\n[red]Rate limit hit. Wait a moment and try again.[/red]")
if __name__ == "__main__":
main()
TypeScript Implementation
Setup
npm init -y
npm install @anthropic-ai/sdk @clack/prompts
Full Code
// chatbot.ts
import Anthropic from "@anthropic-ai/sdk";
import * as p from "@clack/prompts";
import { writeFileSync } from "fs";
const client = new Anthropic();
// Configuration
const config = {
model: "claude-sonnet-4-6",
system: "You are a helpful assistant. Be concise and direct.",
maxTokens: 4096,
};
// Conversation state
const messages: Anthropic.MessageParam[] = [];
let totalInputTokens = 0;
let totalOutputTokens = 0;
async function streamResponse(userInput: string): Promise<string> {
messages.push({ role: "user", content: userInput });
let fullResponse = "";
const stream = await client.messages.stream({
model: config.model,
max_tokens: config.maxTokens,
system: config.system,
messages,
});
for await (const event of stream) {
if (
event.type === "content_block_delta" &&
event.delta.type === "text_delta"
) {
process.stdout.write(event.delta.text);
fullResponse += event.delta.text;
}
}
const finalMessage = await stream.finalMessage();
totalInputTokens += finalMessage.usage.input_tokens;
totalOutputTokens += finalMessage.usage.output_tokens;
console.log(); // New line after response
messages.push({ role: "assistant", content: fullResponse });
return fullResponse;
}
function handleCommand(command: string): boolean {
const [cmd, ...args] = command.trim().split(" ");
const arg = args.join(" ");
switch (cmd.toLowerCase()) {
case "/clear":
messages.length = 0;
console.log("\x1b[33mConversation cleared.\x1b[0m");
return true;
case "/model":
if (arg) {
config.model = arg;
console.log(`\x1b[33mModel set to: ${arg}\x1b[0m`);
} else {
console.log(`\x1b[33mCurrent model: ${config.model}\x1b[0m`);
}
return true;
case "/system":
if (arg) {
config.system = arg;
console.log("\x1b[33mSystem prompt updated.\x1b[0m");
} else {
console.log(`\x1b[33mCurrent system prompt: ${config.system}\x1b[0m`);
}
return true;
case "/tokens": {
console.log(`\x1b[33mInput tokens: ${totalInputTokens.toLocaleString()}\x1b[0m`);
console.log(`\x1b[33mOutput tokens: ${totalOutputTokens.toLocaleString()}\x1b[0m`);
const cost = (totalInputTokens * 3 + totalOutputTokens * 15) / 1_000_000;
console.log(`\x1b[33mEstimated cost: $${cost.toFixed(4)}\x1b[0m`);
return true;
}
case "/export": {
const filename =
arg || `chat_${new Date().toISOString().replace(/[:.]/g, "-")}.json`;
writeFileSync(
filename,
JSON.stringify(
{
model: config.model,
system: config.system,
messages,
tokens: { input: totalInputTokens, output: totalOutputTokens },
},
null,
2
)
);
console.log(`\x1b[33mConversation exported to ${filename}\x1b[0m`);
return true;
}
case "/help":
console.log("\x1b[33mCommands:\x1b[0m");
console.log(" /clear — Clear conversation history");
console.log(" /model [name] — View or change model");
console.log(" /system [prompt] — View or change system prompt");
console.log(" /tokens — Show token usage and cost");
console.log(" /export [file] — Export conversation to JSON");
console.log(" /help — Show this help");
console.log(" /quit — Exit");
return true;
case "/quit":
case "/exit":
case "/q":
console.log("\x1b[33mGoodbye!\x1b[0m");
process.exit(0);
default:
return false;
}
}
async function main(): Promise<void> {
p.intro("Claude CLI Chatbot");
console.log(`Model: ${config.model} | Type /help for commands\n`);
while (true) {
const userInput = await p.text({
message: "You:",
placeholder: "Type your message...",
});
if (p.isCancel(userInput)) {
console.log("\x1b[33mUse /quit to exit.\x1b[0m");
continue;
}
const input = (userInput as string).trim();
if (!input) continue;
if (input.startsWith("/")) {
if (handleCommand(input)) continue;
}
try {
process.stdout.write("\x1b[32mClaude:\x1b[0m ");
await streamResponse(input);
console.log();
} catch (error) {
if (error instanceof Anthropic.APIError) {
console.error(`\x1b[31mAPI Error: ${error.message}\x1b[0m`);
} else {
console.error(`\x1b[31mError: ${error}\x1b[0m`);
}
}
}
}
main();
How It Works
Conversation History
The messages array stores the entire conversation. Each user message and each assistant response is added to the array. On every new request, Claude sees the full history.
messages = [
{"role": "user", "content": "What is Python?"},
{"role": "assistant", "content": "Python is a programming language..."},
{"role": "user", "content": "What about TypeScript?"},
# Claude sees all previous messages and can reference them
]
Streaming
Instead of waiting for the full response, we stream it character by character:
with client.messages.stream(...) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
This gives the user instant feedback. The first characters appear in under a second.
Token Tracking
After each response, we track token usage:
final = stream.get_final_message()
total_input_tokens += final.usage.input_tokens
total_output_tokens += final.usage.output_tokens
The /tokens command shows the running total and estimated cost.
Adding Tools
You can add tools to your chatbot so Claude can do more than just chat. Here is an example with a calculator and a file reader:
tools = [
{
"name": "calculate",
"description": "Calculate a math expression",
"input_schema": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "Math expression to evaluate, e.g. '2 + 2 * 3'",
},
},
"required": ["expression"],
},
},
{
"name": "read_file",
"description": "Read the contents of a file",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path to read"},
},
"required": ["path"],
},
},
]
def execute_tool(name: str, input_data: dict) -> str:
"""Execute a tool and return the result."""
if name == "calculate":
try:
# Only allow safe math operations
allowed = set("0123456789+-*/.(). ")
expr = input_data["expression"]
if all(c in allowed for c in expr):
result = eval(expr)
return str(result)
return "Error: Invalid expression"
except Exception as e:
return f"Error: {e}"
elif name == "read_file":
try:
with open(input_data["path"]) as f:
return f.read()[:5000] # Limit to 5000 chars
except FileNotFoundError:
return f"Error: File not found: {input_data['path']}"
return f"Unknown tool: {name}"
def stream_response_with_tools(user_input: str) -> str:
"""Send a message with tool support."""
messages.append({"role": "user", "content": user_input})
while True:
response = client.messages.create(
model=config["model"],
max_tokens=config["max_tokens"],
system=config["system"],
messages=messages,
tools=tools,
)
# If Claude wants to use a tool
if response.stop_reason == "tool_use":
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
console.print(f"[dim]Using tool: {block.name}...[/dim]")
result = execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result,
})
messages.append({"role": "user", "content": tool_results})
continue # Let Claude process the tool results
# Claude is done — extract text
full_response = ""
for block in response.content:
if hasattr(block, "text"):
full_response += block.text
messages.append({"role": "assistant", "content": full_response})
console.print(full_response, style="green")
return full_response
With tools, your chatbot can calculate math, read files, check the weather, or anything else you define.
Conversation History Management
Long conversations use many tokens. Manage the history to control costs:
def trim_messages(messages: list[dict], max_messages: int = 20) -> list[dict]:
"""Keep only the most recent messages."""
if len(messages) > max_messages:
# Always keep the first message for context
return [messages[0]] + messages[-(max_messages - 1):]
return messages
def count_tokens(messages: list[dict]) -> int:
"""Rough estimate of token count."""
text = json.dumps(messages)
return len(text) // 4 # Rough approximation: 1 token ~ 4 characters
# Before each API call
if count_tokens(messages) > 100000:
messages = trim_messages(messages, max_messages=10)
console.print("[dim]Trimmed conversation history to save tokens.[/dim]")
Error Handling
Handle common errors gracefully:
try:
stream_response(user_input)
except anthropic.RateLimitError:
console.print("[red]Rate limit hit. Waiting 30 seconds...[/red]")
import time
time.sleep(30)
except anthropic.APIStatusError as e:
if e.status_code == 529:
console.print("[red]API overloaded. Try again in a few seconds.[/red]")
else:
console.print(f"[red]API Error {e.status_code}: {e.message}[/red]")
except anthropic.APIConnectionError:
console.print("[red]Connection error. Check your internet connection.[/red]")
Running the Chatbot
Python
export ANTHROPIC_API_KEY="your-key-here"
python chatbot.py
TypeScript
export ANTHROPIC_API_KEY="your-key-here"
npx tsx chatbot.ts
A typical 10-message conversation with Sonnet 4.6 costs about $0.02-0.05.
Summary
| Feature | Implementation |
|---|---|
| Streaming | client.messages.stream() with text_stream iterator |
| History | Append each message to messages array |
| Slash commands | Parse input starting with / |
| Token tracking | Read usage from final message |
| Tools | Define tools, execute on tool_use stop reason |
| Cost control | Trim old messages when token count is high |
What’s Next?
In the next article, we will build a code review bot that analyzes git diffs and posts review comments.
Next: Build a Code Review Bot