📄Project: Document Analyzer

Build a tool that reads PDFs and images, extracts key info, and generates summaries

Project: Document Analyzer

In this project, you'll build a tool that reads documents (images, PDFs), extracts key information, and generates structured summaries using Vision capabilities and structured outputs.

Setup

Terminal
mkdir doc-analyzer && cd doc-analyzer
npm init -y
npm install @anthropic-ai/sdk

Main Code

JavaScript
import Anthropic from "@anthropic-ai/sdk";
import fs from "fs";
import path from "path";

const client = new Anthropic();

function fileToBase64(filePath) {
  return fs.readFileSync(filePath).toString("base64");
}

function getMediaType(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  const types = {
    ".png": "image/png", ".jpg": "image/jpeg",
    ".jpeg": "image/jpeg", ".gif": "image/gif",
    ".webp": "image/webp", ".pdf": "application/pdf",
  };
  return types[ext] || "image/png";
}

async function analyzeDocument(filePath, documentType = "general") {
  const base64 = fileToBase64(filePath);
  const mediaType = getMediaType(filePath);

  const prompts = {
    invoice: `Analyze this invoice and extract:
- Invoice number, date, seller, buyer
- Line items (name, quantity, price)
- Subtotal, tax, total
Return as JSON.`,
    contract: `Analyze this contract and extract:
- Parties, start/end dates, value
- Key terms, termination clauses
- 3-point summary
Return as JSON.`,
    general: `Analyze this document and extract:
- Document type, key information
- Important dates, numbers/amounts
- 5-point summary
Return as JSON.`,
  };

  const response = await client.messages.create({
    model: "claude-sonnet-4-20250514",
    max_tokens: 2048,
    messages: [{
      role: "user",
      content: [
        { type: "image", source: { type: "base64", media_type: mediaType, data: base64 } },
        { type: "text", text: prompts[documentType] || prompts.general },
      ],
    }],
  });

  return response.content[0].text;
}

Analyze Multiple Documents

JavaScript
async function analyzeMultiple(filePaths, documentType = "general") {
  const results = [];
  for (const filePath of filePaths) {
    console.log(`Analyzing: ${filePath}...`);
    try {
      const result = await analyzeDocument(filePath, documentType);
      results.push({ file: filePath, status: "success", data: JSON.parse(result) });
    } catch (error) {
      results.push({ file: filePath, status: "error", error: error.message });
    }
  }
  return results;
}

Document Comparison

JavaScript
async function compareDocuments(file1, file2) {
  const b1 = fileToBase64(file1), b2 = fileToBase64(file2);
  const m1 = getMediaType(file1), m2 = getMediaType(file2);

  const response = await client.messages.create({
    model: "claude-sonnet-4-20250514",
    max_tokens: 2048,
    messages: [{
      role: "user",
      content: [
        { type: "image", source: { type: "base64", media_type: m1, data: b1 } },
        { type: "image", source: { type: "base64", media_type: m2, data: b2 } },
        { type: "text", text: "Compare these documents: similarities, differences, conflicts. Return as JSON." },
      ],
    }],
  });

  return response.content[0].text;
}

Python Version

Python
import anthropic
import base64
import json
from pathlib import Path

client = anthropic.Anthropic()

def analyze_document(file_path, doc_type="general"):
    with open(file_path, "rb") as f:
        data = base64.standard_b64encode(f.read()).decode("utf-8")

    ext = Path(file_path).suffix.lower()
    media_types = {".png": "image/png", ".jpg": "image/jpeg", ".pdf": "application/pdf"}
    media_type = media_types.get(ext, "image/png")

    prompts = {
        "invoice": "Analyze this invoice. Extract line items, totals, dates, parties. Return as JSON.",
        "contract": "Analyze this contract. Extract parties, dates, value, key terms. Return as JSON.",
        "general": "Analyze this document. Extract key info, dates, amounts. Return as JSON.",
    }

    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=2048,
        messages=[{
            "role": "user",
            "content": [
                {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": data}},
                {"type": "text", "text": prompts.get(doc_type, prompts["general"])},
            ],
        }],
    )

    return json.loads(response.content[0].text)

result = analyze_document("invoice.png", "invoice")
print(json.dumps(result, indent=2))

CLI Interface

JavaScript
const args = process.argv.slice(2);
const filePath = args[0];
const docType = args[1] || "general";

if (!filePath) {
  console.log("Usage: node analyzer.js <file-path> [invoice|contract|general]");
  process.exit(1);
}

analyzeDocument(filePath, docType)
  .then((result) => console.log("\n=== Analysis Result ===\n" + result))
  .catch((err) => console.error("Error:", err.message));

Extensions to Try

Multi-page PDF — Analyze each page and merge results
Export to Excel — Convert extracted data to spreadsheet
Web interface — Create a file upload page
Database storage — Store results for later search
Batch processing — Process a folder of documents at once

Next: We'll build a Web Research Agent.

Module 8

5/9

🔍 Project: AI Code Reviewer

Project: CLI Automation Tool ⚙️

5/9