advanced20 min read· Module 8, Lesson 5
📄Project: Document Analyzer
Build a tool that reads PDFs and images, extracts key info, and generates summaries
Project: Document Analyzer
In this project, you'll build a tool that reads documents (images, PDFs), extracts key information, and generates structured summaries using Vision capabilities and structured outputs.
Setup
mkdir doc-analyzer && cd doc-analyzer
npm init -y
npm install @anthropic-ai/sdkMain Code
import Anthropic from "@anthropic-ai/sdk";
import fs from "fs";
import path from "path";
const client = new Anthropic();
function fileToBase64(filePath) {
return fs.readFileSync(filePath).toString("base64");
}
function getMediaType(filePath) {
const ext = path.extname(filePath).toLowerCase();
const types = {
".png": "image/png", ".jpg": "image/jpeg",
".jpeg": "image/jpeg", ".gif": "image/gif",
".webp": "image/webp", ".pdf": "application/pdf",
};
return types[ext] || "image/png";
}
async function analyzeDocument(filePath, documentType = "general") {
const base64 = fileToBase64(filePath);
const mediaType = getMediaType(filePath);
const prompts = {
invoice: `Analyze this invoice and extract:
- Invoice number, date, seller, buyer
- Line items (name, quantity, price)
- Subtotal, tax, total
Return as JSON.`,
contract: `Analyze this contract and extract:
- Parties, start/end dates, value
- Key terms, termination clauses
- 3-point summary
Return as JSON.`,
general: `Analyze this document and extract:
- Document type, key information
- Important dates, numbers/amounts
- 5-point summary
Return as JSON.`,
};
const response = await client.messages.create({
model: "claude-sonnet-4-20250514",
max_tokens: 2048,
messages: [{
role: "user",
content: [
{ type: "image", source: { type: "base64", media_type: mediaType, data: base64 } },
{ type: "text", text: prompts[documentType] || prompts.general },
],
}],
});
return response.content[0].text;
}Analyze Multiple Documents
async function analyzeMultiple(filePaths, documentType = "general") {
const results = [];
for (const filePath of filePaths) {
console.log(`Analyzing: ${filePath}...`);
try {
const result = await analyzeDocument(filePath, documentType);
results.push({ file: filePath, status: "success", data: JSON.parse(result) });
} catch (error) {
results.push({ file: filePath, status: "error", error: error.message });
}
}
return results;
}Document Comparison
async function compareDocuments(file1, file2) {
const b1 = fileToBase64(file1), b2 = fileToBase64(file2);
const m1 = getMediaType(file1), m2 = getMediaType(file2);
const response = await client.messages.create({
model: "claude-sonnet-4-20250514",
max_tokens: 2048,
messages: [{
role: "user",
content: [
{ type: "image", source: { type: "base64", media_type: m1, data: b1 } },
{ type: "image", source: { type: "base64", media_type: m2, data: b2 } },
{ type: "text", text: "Compare these documents: similarities, differences, conflicts. Return as JSON." },
],
}],
});
return response.content[0].text;
}Python Version
import anthropic
import base64
import json
from pathlib import Path
client = anthropic.Anthropic()
def analyze_document(file_path, doc_type="general"):
with open(file_path, "rb") as f:
data = base64.standard_b64encode(f.read()).decode("utf-8")
ext = Path(file_path).suffix.lower()
media_types = {".png": "image/png", ".jpg": "image/jpeg", ".pdf": "application/pdf"}
media_type = media_types.get(ext, "image/png")
prompts = {
"invoice": "Analyze this invoice. Extract line items, totals, dates, parties. Return as JSON.",
"contract": "Analyze this contract. Extract parties, dates, value, key terms. Return as JSON.",
"general": "Analyze this document. Extract key info, dates, amounts. Return as JSON.",
}
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=2048,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {"type": "base64", "media_type": media_type, "data": data}},
{"type": "text", "text": prompts.get(doc_type, prompts["general"])},
],
}],
)
return json.loads(response.content[0].text)
result = analyze_document("invoice.png", "invoice")
print(json.dumps(result, indent=2))CLI Interface
const args = process.argv.slice(2);
const filePath = args[0];
const docType = args[1] || "general";
if (!filePath) {
console.log("Usage: node analyzer.js <file-path> [invoice|contract|general]");
process.exit(1);
}
analyzeDocument(filePath, docType)
.then((result) => console.log("\n=== Analysis Result ===\n" + result))
.catch((err) => console.error("Error:", err.message));Extensions to Try
- Multi-page PDF — Analyze each page and merge results
- Export to Excel — Convert extracted data to spreadsheet
- Web interface — Create a file upload page
- Database storage — Store results for later search
- Batch processing — Process a folder of documents at once
Next: We'll build a Web Research Agent.