Design a multi-agent system for research paper analysis.
#gen-ai#system-design#agents
Answer
Multi-Agent System for Research Paper Analysis
Architecture Overview
Implementation with LangGraph
pythonfrom langgraph.graph import StateGraph, END from langchain_openai import ChatOpenAI from typing import TypedDict, Annotated import operator # Shared state across all agents class PaperAnalysisState(TypedDict): pdf_path: str raw_text: str summary: str key_contributions: list[str] citations: list[dict] code_snippets: list[str] final_report: str errors: Annotated[list[str], operator.add] # Accumulate errors llm = ChatOpenAI(model="gpt-4o", temperature=0) # Agent 1: Extract text from PDF def pdf_extractor_agent(state: PaperAnalysisState) -> dict: import fitz # PyMuPDF doc = fitz.open(state["pdf_path"]) text = "\n".join(page.get_text() for page in doc) return {"raw_text": text} # Agent 2: Summarise the paper def summary_agent(state: PaperAnalysisState) -> dict: response = llm.invoke(f'''Analyse this research paper and extract: 1. One-paragraph summary 2. 3-5 key contributions 3. Main methodology Paper text (first 8000 chars): {state["raw_text"][:8000]} Respond as JSON with keys: summary, contributions''') import json data = json.loads(response.content) return {"summary": data["summary"], "key_contributions": data["contributions"]} # Agent 3: Extract citations def citation_agent(state: PaperAnalysisState) -> dict: response = llm.invoke(f'''Extract all references from this paper text. Return as JSON list: [{{"title": "...", "authors": "...", "year": "..."}}] Text (references section): {state["raw_text"][-3000:]}''') import json try: citations = json.loads(response.content) except: citations = [] return {"citations": citations} # Agent 4: Extract code samples def code_extractor_agent(state: PaperAnalysisState) -> dict: import re # Find code blocks (common in ML papers) code_blocks = re.findall(r'```[\s\S]*?```', state["raw_text"]) return {"code_snippets": code_blocks} # Synthesis agent: combine all outputs def synthesis_agent(state: PaperAnalysisState) -> dict: report = f'''# Paper Analysis Report ## Summary {state["summary"]} ## Key Contributions {chr(10).join(f"- {c}" for c in state["key_contributions"])} ## Citations ({len(state["citations"])} references) {chr(10).join(f'- {c.get("authors", "")} ({c.get("year", "")}) - {c.get("title", "")}' for c in state["citations"][:5])} ## Code Samples Found: {len(state["code_snippets"])} ''' return {"final_report": report} # Build the graph def build_research_graph(): graph = StateGraph(PaperAnalysisState) graph.add_node("pdf_extractor", pdf_extractor_agent) graph.add_node("summary", summary_agent) graph.add_node("citations", citation_agent) graph.add_node("code_extractor", code_extractor_agent) graph.add_node("synthesis", synthesis_agent) graph.set_entry_point("pdf_extractor") # After extraction, run all analysis agents in parallel graph.add_edge("pdf_extractor", "summary") graph.add_edge("pdf_extractor", "citations") graph.add_edge("pdf_extractor", "code_extractor") # All analysis agents feed into synthesis graph.add_edge("summary", "synthesis") graph.add_edge("citations", "synthesis") graph.add_edge("code_extractor", "synthesis") graph.add_edge("synthesis", END) return graph.compile() # Run the system app = build_research_graph() result = app.invoke({"pdf_path": "attention_is_all_you_need.pdf", "errors": []}) print(result["final_report"])
Key Design Principles
| Principle | Implementation |
|---|---|
| Parallel execution | Independent agents run concurrently |
| Shared state | TypedDict passed between agents |
| Error isolation | Each agent catches its own errors |
| Idempotency | Agents can be re-run safely |
| Human-in-the-loop | Add review node before final output |
Why multi-agent? Decomposing complex tasks into specialist agents improves quality (each agent does one thing well), enables parallelism (faster overall completion), and makes failures isolated and debuggable.