Time to build something real. In this article, you will create a CLI chatbot that streams responses, keeps conversation history, and supports slash commands. It is a complete, working application in about 150 lines of code.
What We Are Building A terminal chatbot with these features:
Streaming responses (character by character) Conversation history (multi-turn) Customizable system prompt Slash commands: /clear, /model, /system, /tokens, /export Token usage tracking Graceful error handling Python Implementation Setup pip install anthropic rich Full Code #!/usr/bin/env python3 """CLI chatbot with Claude API — streaming, history, and slash commands.""" import anthropic import json import sys from datetime import datetime from rich.console import Console console = Console() client = anthropic.Anthropic() # Configuration config = { "model": "claude-sonnet-4-6", "system": "You are a helpful assistant. Be concise and direct.", "max_tokens": 4096, } # Conversation state messages: list[dict] = [] total_input_tokens = 0 total_output_tokens = 0 def stream_response(user_input: str) -> str: """Send a message and stream the response.""" global total_input_tokens, total_output_tokens messages.append({"role": "user", "content": user_input}) full_response = "" with client.messages.stream( model=config["model"], max_tokens=config["max_tokens"], system=config["system"], messages=messages, ) as stream: for text in stream.text_stream: console.print(text, end="", style="green") full_response += text # Get final message for token counts final = stream.get_final_message() total_input_tokens += final.usage.input_tokens total_output_tokens += final.usage.output_tokens console.print() # New line after response messages.append({"role": "assistant", "content": full_response}) return full_response def handle_command(command: str) -> bool: """Handle slash commands. Returns True if command was handled.""" parts = command.strip().split(maxsplit=1) cmd = parts[0].lower() arg = parts[1] if len(parts) > 1 else "" if cmd == "/clear": messages.clear() console.print("[yellow]Conversation cleared.[/yellow]") return True elif cmd == "/model": if arg: config["model"] = arg console.print(f"[yellow]Model set to: {arg}[/yellow]") else: console.print(f"[yellow]Current model: {config['model']}[/yellow]") return True elif cmd == "/system": if arg: config["system"] = arg console.print(f"[yellow]System prompt updated.[/yellow]") else: console.print(f"[yellow]Current system prompt: {config['system']}[/yellow]") return True elif cmd == "/tokens": console.print(f"[yellow]Input tokens: {total_input_tokens:,}[/yellow]") console.print(f"[yellow]Output tokens: {total_output_tokens:,}[/yellow]") # Estimate cost (Sonnet 4.6 pricing) cost = (total_input_tokens * 3 + total_output_tokens * 15) / 1_000_000 console.print(f"[yellow]Estimated cost: ${cost:.4f}[/yellow]") return True elif cmd == "/export": filename = arg or f"chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(filename, "w") as f: json.dump( { "model": config["model"], "system": config["system"], "messages": messages, "tokens": { "input": total_input_tokens, "output": total_output_tokens, }, }, f, indent=2, ) console.print(f"[yellow]Conversation exported to {filename}[/yellow]") return True elif cmd == "/help": console.print("[yellow]Commands:[/yellow]") console.print(" /clear — Clear conversation history") console.print(" /model [name] — View or change model") console.print(" /system [prompt] — View or change system prompt") console.print(" /tokens — Show token usage and cost") console.print(" /export [file] — Export conversation to JSON") console.print(" /help — Show this help") console.print(" /quit — Exit") return True elif cmd in ("/quit", "/exit", "/q"): console.print("[yellow]Goodbye![/yellow]") sys.exit(0) return False def main(): """Main chat loop.""" console.print("[bold blue]Claude CLI Chatbot[/bold blue]") console.print(f"Model: {config['model']} | Type /help for commands\n") while True: try: user_input = console.input("[bold cyan]You:[/bold cyan] ").strip() if not user_input: continue if user_input.startswith("/"): if handle_command(user_input): continue console.print("[bold green]Claude:[/bold green] ", end="") stream_response(user_input) console.print() except KeyboardInterrupt: console.print("\n[yellow]Use /quit to exit.[/yellow]") except anthropic.APIError as e: console.print(f"\n[red]API Error: {e.message}[/red]") except anthropic.RateLimitError: console.print("\n[red]Rate limit hit. Wait a moment and try again.[/red]") if __name__ == "__main__": main() TypeScript Implementation Setup npm init -y npm install @anthropic-ai/sdk @clack/prompts Full Code // chatbot.ts import Anthropic from "@anthropic-ai/sdk"; import * as p from "@clack/prompts"; import { writeFileSync } from "fs"; const client = new Anthropic(); // Configuration const config = { model: "claude-sonnet-4-6", system: "You are a helpful assistant. Be concise and direct.", maxTokens: 4096, }; // Conversation state const messages: Anthropic.MessageParam[] = []; let totalInputTokens = 0; let totalOutputTokens = 0; async function streamResponse(userInput: string): Promise<string> { messages.push({ role: "user", content: userInput }); let fullResponse = ""; const stream = await client.messages.stream({ model: config.model, max_tokens: config.maxTokens, system: config.system, messages, }); for await (const event of stream) { if ( event.type === "content_block_delta" && event.delta.type === "text_delta" ) { process.stdout.write(event.delta.text); fullResponse += event.delta.text; } } const finalMessage = await stream.finalMessage(); totalInputTokens += finalMessage.usage.input_tokens; totalOutputTokens += finalMessage.usage.output_tokens; console.log(); // New line after response messages.push({ role: "assistant", content: fullResponse }); return fullResponse; } function handleCommand(command: string): boolean { const [cmd, ...args] = command.trim().split(" "); const arg = args.join(" "); switch (cmd.toLowerCase()) { case "/clear": messages.length = 0; console.log("\x1b[33mConversation cleared.\x1b[0m"); return true; case "/model": if (arg) { config.model = arg; console.log(`\x1b[33mModel set to: ${arg}\x1b[0m`); } else { console.log(`\x1b[33mCurrent model: ${config.model}\x1b[0m`); } return true; case "/system": if (arg) { config.system = arg; console.log("\x1b[33mSystem prompt updated.\x1b[0m"); } else { console.log(`\x1b[33mCurrent system prompt: ${config.system}\x1b[0m`); } return true; case "/tokens": { console.log(`\x1b[33mInput tokens: ${totalInputTokens.toLocaleString()}\x1b[0m`); console.log(`\x1b[33mOutput tokens: ${totalOutputTokens.toLocaleString()}\x1b[0m`); const cost = (totalInputTokens * 3 + totalOutputTokens * 15) / 1_000_000; console.log(`\x1b[33mEstimated cost: $${cost.toFixed(4)}\x1b[0m`); return true; } case "/export": { const filename = arg || `chat_${new Date().toISOString().replace(/[:.]/g, "-")}.json`; writeFileSync( filename, JSON.stringify( { model: config.model, system: config.system, messages, tokens: { input: totalInputTokens, output: totalOutputTokens }, }, null, 2 ) ); console.log(`\x1b[33mConversation exported to ${filename}\x1b[0m`); return true; } case "/help": console.log("\x1b[33mCommands:\x1b[0m"); console.log(" /clear — Clear conversation history"); console.log(" /model [name] — View or change model"); console.log(" /system [prompt] — View or change system prompt"); console.log(" /tokens — Show token usage and cost"); console.log(" /export [file] — Export conversation to JSON"); console.log(" /help — Show this help"); console.log(" /quit — Exit"); return true; case "/quit": case "/exit": case "/q": console.log("\x1b[33mGoodbye!\x1b[0m"); process.exit(0); default: return false; } } async function main(): Promise<void> { p.intro("Claude CLI Chatbot"); console.log(`Model: ${config.model} | Type /help for commands\n`); while (true) { const userInput = await p.text({ message: "You:", placeholder: "Type your message...", }); if (p.isCancel(userInput)) { console.log("\x1b[33mUse /quit to exit.\x1b[0m"); continue; } const input = (userInput as string).trim(); if (!input) continue; if (input.startsWith("/")) { if (handleCommand(input)) continue; } try { process.stdout.write("\x1b[32mClaude:\x1b[0m "); await streamResponse(input); console.log(); } catch (error) { if (error instanceof Anthropic.APIError) { console.error(`\x1b[31mAPI Error: ${error.message}\x1b[0m`); } else { console.error(`\x1b[31mError: ${error}\x1b[0m`); } } } } main(); How It Works Conversation History The messages array stores the entire conversation. Each user message and each assistant response is added to the array. On every new request, Claude sees the full history.
...