GraphRAG Studio — initial commit: multimodal RAG system with KG visualization

Full-stack application for document-to-knowledge-graph pipeline: - Backend: FastAPI + LangGraph ReAct agent + DeepSeek + MinerU parsing - Frontend: React 19 + Vite + D3.js + shadcn/ui - Pipeline: MinerU parsing → LangExtract entity extraction → KG building Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-07 17:30:04 +08:00
commit b02d3378fc
127 changed files with 37218 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,39 @@
 # IDE / Editor
 .idea/
 .vscode/
 *.swp
 *.swo
 # OS
 .DS_Store
 Thumbs.db
 # Dependencies
 node_modules/
 .pnpm-store/
 # Build output
 dist/
 build/
 # Environment & secrets
 .env
 .env.local
 .env.*.local
 # Python
 __pycache__/
 *.pyc
 *.pyo
 .venv/
 *.egg-info/
 # Logs
 *.log
 # OMC
 .omc/
 **/.git_embedded_backup/
 # Claude Code personal config
 settings.json
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,31 @@
 # GraphRAG Studio — Project Conventions
 ## 1. 目录结构
 - **前端代码** 统一放在 `frontend/` 目录下
 - **后端代码** 统一放在 `backend/` 目录下
 ```
 GraphRAGAgent/
 ├── frontend/   # 所有前端代码（HTML/CSS/JS）
 ├── backend/    # 所有后端代码（FastAPI 服务）
 └── docs/       # 规范文档
 ```
 ## 2. 环境变量与敏感配置
 - 所有外部配置（API Key、Base URL、Token 等）统一在 `backend/.env` 中管理
 - `.env` 文件**禁止提交到 Git**，必须在 `.gitignore` 中忽略
 - 提供 `backend/.env.example` 作为配置模板（不含真实值）
 ## 3. 后端虚拟环境
 - 后端服务必须使用 `uv` 创建独立虚拟环境：
 ```bash
 cd backend
 uv venv
 uv pip install -r requirements.txt
 ```
 - 虚拟环境目录 `.venv/` 不提交到 Git
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -0,0 +1,10 @@
 # DeepSeek API (required for entity extraction + QA)
 DEEPSEEK_API_KEY=your_deepseek_api_key_here
 DEEPSEEK_BASE_URL=https://api.deepseek.com
 # MinerU (required for document parsing)
 MINERU_API_TOKEN=your_mineru_api_token_here
 # MinerU venv path (absolute path to python.exe)
 MINERU_PYTHON=F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe
 MINERU_PIPELINE=F:/GraphRAGAgent/mineru_mvp/pipeline.py
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -0,0 +1,10 @@
 .env
 .venv/
 __pycache__/
 *.pyc
 *.pyo
 data/uploads/
 data/jobs/
 data/kg/
 *.egg-info/
 dist/
--- a/backend/CLAUDE.md
+++ b/backend/CLAUDE.md
@@ -0,0 +1,28 @@
 # Backend — GraphRAG Studio API
 ## 路径
 ```
 F:\GraphRAGAgent\backend\
 ```
 ## 启动命令
 ```bash
 cd F:/GraphRAGAgent/backend
 .venv/Scripts/python.exe -m uvicorn main:app --host 0.0.0.0 --port 8000 --reload
 ```
 ## 接口测试
 服务启动后，运行：
 ```bash
 .venv/Scripts/python.exe tests/test_api.py
 ```
 ## API 文档
 - Swagger UI：http://localhost:8000/docs
 - ReDoc：http://localhost:8000/redoc
 - 健康检查：http://localhost:8000/api/v1/health
--- a/backend/main.py
+++ b/backend/main.py
@@ -0,0 +1,58 @@
 """
 GraphRAG Studio — FastAPI Backend
 Entry point: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
 """
 import sys
 from pathlib import Path
 # Ensure backend/ is in sys.path for absolute imports
 sys.path.insert(0, str(Path(__file__).parent))
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 load_dotenv(Path(__file__).parent / ".env", override=True)
 from routers import documents, indexing, kg, query, search, system
 app = FastAPI(
    title="GraphRAG Studio API",
    description="Multimodal RAG Q&A system backend — MinerU + LangExtract + Agentic-RAG",
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc",
 )
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # All routers under /api/v1. Each router carries its own sub-prefix.
 # documents.router  prefix="/documents" → /api/v1/documents
 # indexing.router   prefix="/index"     → /api/v1/index
 # kg.router         prefix="/kg"        → /api/v1/kg
 # query.router      prefix="/query"     → /api/v1/query
 # search.router     prefix="/search"    → /api/v1/search
 # system.router     no prefix           → /api/v1/health, /api/v1/system/...
 PREFIX = "/api/v1"
 app.include_router(documents.router, prefix=PREFIX)
 app.include_router(indexing.router,  prefix=PREFIX)
 app.include_router(kg.router,        prefix=PREFIX)
 app.include_router(query.router,     prefix=PREFIX)
 app.include_router(search.router,    prefix=PREFIX)
 app.include_router(system.router,    prefix=PREFIX)
@app.get("/")
 async def root():
    return {"msg": "GraphRAG Studio API v1.0.0", "docs": "/docs", "health": "/api/v1/health"}
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
--- a/backend/models/init.py
+++ b/backend/models/init.py
--- a/backend/models/schemas.py
+++ b/backend/models/schemas.py
@@ -0,0 +1,360 @@
 """
 Pydantic v2 schemas — all API data objects per backend_service_specification-v1.0.md
 """
 from __future__ import annotations
 import uuid
 from typing import Any, Generic, Optional, TypeVar
 from pydantic import BaseModel, Field
 T = TypeVar("T")
 # ---------------------------------------------------------------------------
 # Universal response envelope
 # ---------------------------------------------------------------------------
 class APIResponse(BaseModel, Generic[T]):
    code: int = 0
    msg: str = "success"
    request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
    data: Optional[T] = None
    @classmethod
    def ok(cls, data: Any = None) -> "APIResponse":
        return cls(code=0, msg="success", data=data)
    @classmethod
    def err(cls, code: int, msg: str) -> "APIResponse":
        return cls(code=code, msg=msg, data=None)
 # ---------------------------------------------------------------------------
 # A. Document schemas
 # ---------------------------------------------------------------------------
 class DocumentInfo(BaseModel):
    doc_id: str
    filename: str
    format: str
    size_bytes: int
    pages: Optional[int] = None
    uploaded_at: str
    status: str  # uploaded | indexed | failed
    language: str = "ch"
    enable_formula: bool = True
    enable_table: bool = True
 class DocumentListData(BaseModel):
    total: int
    page: int
    page_size: int
    items: list[DocumentInfo]
 class DeleteDocumentData(BaseModel):
    deleted: bool
    doc_id: str
    removed_nodes: int
    removed_edges: int
 # ---------------------------------------------------------------------------
 # B. Indexing job schemas
 # ---------------------------------------------------------------------------
 class IndexingProgress(BaseModel):
    parsed_pages: int = 0
    total_pages: int = 0
    extracted_entities: int = 0
 class IndexingJobStatus(BaseModel):
    job_id: str
    doc_id: str
    status: str  # submitted|queued|parsing|extracting|indexing|done|failed|cancelled
    stage: str = ""
    progress: IndexingProgress = Field(default_factory=IndexingProgress)
    created_at: str
    elapsed_seconds: float = 0.0
    error: Optional[str] = None
 class StartIndexRequest(BaseModel):
    doc_id: str
 class CancelJobData(BaseModel):
    cancelled: bool
    job_id: str
    previous_status: str
 # ---------------------------------------------------------------------------
 # C. KG schemas
 # ---------------------------------------------------------------------------
 class KGNode(BaseModel):
    id: str
    name: str
    type: str
    source_doc: str
    char_start: Optional[int] = None
    char_end: Optional[int] = None
    confidence: Optional[str] = None
    page: int = 0
    degree: int = 0
 class KGNodeDetail(KGNode):
    degree_centrality: float = 0.0
    neighbor_count: int = 0
 class KGEdge(BaseModel):
    source: str
    target: str
    relation: str = "CO_OCCURS_IN"
    doc_id: str
    page: int = 0
 class KGNodeListData(BaseModel):
    total: int
    page: int
    page_size: int
    items: list[KGNode]
 class KGEdgeListData(BaseModel):
    total: int
    page: int
    page_size: int
    items: list[KGEdge]
 class KGStatsData(BaseModel):
    total_nodes: int
    total_edges: int
    density: float
    type_distribution: dict[str, int]
    relation_types: dict[str, int]
    top5_central_nodes: list[dict]
    source_documents: list[str]
 class KGExportData(BaseModel):
    format: str
    doc_id: Optional[str]
    total_nodes: int
    total_edges: int
    exported_at: str
    nodes: list[KGNode]
    edges: list[KGEdge]
 class NeighborInfo(BaseModel):
    id: str
    name: str
    type: str
    page: int
 class NeighborsData(BaseModel):
    center: NeighborInfo
    hops: int
    neighbors_by_hop: dict[str, list[NeighborInfo]]
    total_neighbors: int
 # ---------------------------------------------------------------------------
 # D. QA schemas
 # ---------------------------------------------------------------------------
 class ChatMessage(BaseModel):
    role: str  # human | ai
    content: str
 class QueryRequest(BaseModel):
    question: str
    history: list[ChatMessage] = Field(default_factory=list)
 class ToolCallRecord(BaseModel):
    tool: str
    input: dict
    output: str
 class QAResult(BaseModel):
    query_id: str
    question: str
    answer: str
    tool_calls: list[ToolCallRecord] = Field(default_factory=list)
    cited_nodes: list[str] = Field(default_factory=list)
    elapsed_seconds: float
    created_at: str
 class QAHistoryData(BaseModel):
    total: int
    page: int
    page_size: int
    items: list[QAResult]
 class BatchQueryRequest(BaseModel):
    questions: list[str] = Field(..., max_length=20)
 class BatchQueryData(BaseModel):
    batch_id: str
    total: int
    status: str
    created_at: str
 class BatchResultData(BaseModel):
    batch_id: str
    total: int
    completed: int
    failed: int
    status: str
    results: list[QAResult]
 # ---------------------------------------------------------------------------
 # E. Search schemas
 # ---------------------------------------------------------------------------
 class EntitySearchData(BaseModel):
    query: str
    total: int
    items: list[KGNode]
 class PathNode(BaseModel):
    id: str
    name: str
    type: str
 class PathEdge(BaseModel):
    source: str
    target: str
    relation: str
 class PathInfo(BaseModel):
    length: int
    nodes: list[PathNode]
    edges: list[PathEdge]
 class PathSearchData(BaseModel):
    from_node: PathNode = Field(alias="from")
    to_node: PathNode = Field(alias="to")
    max_hops: int
    paths: list[PathInfo]
    total_paths: int
    model_config = {"populate_by_name": True}
 class GraphSearchData(BaseModel):
    query: str
    matched_nodes: list[KGNode]
    subgraph_edges: list[KGEdge]
 # ---------------------------------------------------------------------------
 # F. System schemas
 # ---------------------------------------------------------------------------
 class ComponentHealth(BaseModel):
    status: str  # ok | error
    path: Optional[str] = None
    exists: Optional[bool] = None
    base_url: Optional[str] = None
    key_configured: Optional[bool] = None
    kg_nodes_exists: Optional[bool] = None
    kg_edges_exists: Optional[bool] = None
    uploads_dir_exists: Optional[bool] = None
 class HealthData(BaseModel):
    status: str
    version: str
    uptime_seconds: float
    components: dict[str, ComponentHealth]
 class SystemStatsData(BaseModel):
    total_documents: int
    indexed_documents: int
    failed_documents: int
    total_nodes: int
    total_edges: int
    type_distribution: dict[str, int]
    total_queries: int
    active_jobs: int
    storage_used_mb: float
 class FormatInfo(BaseModel):
    ext: str
    description: str
    max_size_mb: int
    max_pages: int
    requires_ocr: bool
 class FormatsData(BaseModel):
    formats: list[FormatInfo]
    ocr_languages: list[dict]
    notes: list[str]
 class DemoData(BaseModel):
    nodes: list[KGNode]
    edges: list[KGEdge]
    stats: dict
 # ---------------------------------------------------------------------------
 # B3 index result
 # ---------------------------------------------------------------------------
 class IndexResultStats(BaseModel):
    blocks: int = 0
    block_types: dict[str, int] = Field(default_factory=dict)
    pages: int = 0
    raw_extractions: int = 0
    nodes: int = 0
    edges: int = 0
    type_counts: dict[str, int] = Field(default_factory=dict)
    alignment_counts: dict[str, int] = Field(default_factory=dict)
    elapsed_seconds: float = 0.0
 class ExtractionRecord(BaseModel):
    text: str
    type: str
    char_start: Optional[int] = None
    char_end: Optional[int] = None
    alignment: Optional[str] = None
    page: int = 0
    doc_id: str
 class IndexResultData(BaseModel):
    job_id: str
    doc_id: str
    status: str
    stats: Optional[IndexResultStats] = None
    extractions: Optional[list[ExtractionRecord]] = None
    nodes: Optional[list[KGNode]] = None
    edges: Optional[list[KGEdge]] = None
--- a/backend/output/8456b615_sample_graphrag_overview/99c9be1f-bba4-4a58-824b-7331d50db9bb_content_list.json
+++ b/backend/output/8456b615_sample_graphrag_overview/99c9be1f-bba4-4a58-824b-7331d50db9bb_content_list.json
@@ -0,0 +1,367 @@
 [
    {
        "type": "text",
        "text": "GraphRAG System ",
        "text_level": 1,
        "bbox": [
            344,
            175,
            655,
            204
        ],
        "page_idx": 0
    },
    {
        "type": "text",
        "text": "Technical Architecture Overview ",
        "bbox": [
            289,
            234,
            710,
            254
        ],
        "page_idx": 0
    },
    {
        "type": "text",
        "text": "Version 1.0 | March 2026 ",
        "bbox": [
            364,
            272,
            633,
            290
        ],
        "page_idx": 0
    },
    {
        "type": "text",
        "text": "1. Abstract ",
        "text_level": 1,
        "bbox": [
            52,
            42,
            200,
            61
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "This document presents the technical architecture of a Multimodal GraphRAG System designed for intelligent document parsing and knowledge graph construction. The system integrates MinerU for document parsing, LangExtract for structured entity extraction, and a graph database for knowledge storage and retrieval. ",
        "bbox": [
            48,
            83,
            951,
            171
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "The pipeline supports multiple document formats including PDF, DOCX, PPTX, and image files. Extracted entities and relations are stored as graph nodes and edges, enabling semantic search and question answering over large document collections. ",
        "bbox": [
            48,
            200,
            949,
            265
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "2. System Components ",
        "text_level": 1,
        "bbox": [
            50,
            299,
            321,
            318
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "2.1 Document Parsing Module ",
        "text_level": 1,
        "bbox": [
            50,
            343,
            349,
            361
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "MinerU Cloud API (v4) serves as the document parsing backend. It accepts PDF, DOCX, PPTX, PNG, JPG, and HTML files. Output includes Markdown text, structured content_list.json, and extracted images. ",
        "bbox": [
            48,
            373,
            951,
            436
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "2.2 Entity Extraction Module ",
        "text_level": 1,
        "bbox": [
            50,
            461,
            357,
            479
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "LangExtract (v1.1.1) performs structured information extraction from plain text using few-shot prompting with LLM backends (Gemini, OpenAI, or local Ollama). Each extraction includes character-level position anchoring. ",
        "bbox": [
            48,
            492,
            949,
            555
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "2.3 Knowledge Graph Module ",
        "text_level": 1,
        "bbox": [
            50,
            580,
            337,
            596
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "Extracted entities and relationships are stored in a graph database. Node types include: Person, Organization, Location, Event, Concept. Edge types include: RELATED_TO, BELONGS_TO, CAUSED_BY, LOCATED_IN. ",
        "bbox": [
            48,
            608,
            949,
            674
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "2.4 Retrieval Module ",
        "text_level": 1,
        "bbox": [
            50,
            697,
            272,
            715
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "The retrieval layer supports hybrid search combining vector similarity and graph traversal.   \nQuery results are ranked by relevance score and returned with source document references. ",
        "bbox": [
            48,
            727,
            944,
            766
        ],
        "page_idx": 1
    },
    {
        "type": "text",
        "text": "3. Data Pipeline ",
        "text_level": 1,
        "bbox": [
            50,
            42,
            268,
            61
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "The end-to-end data pipeline consists of the following stages: ",
        "bbox": [
            50,
            83,
            623,
            99
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "Stage 1: Document Ingestion ",
        "bbox": [
            68,
            130,
            322,
            146
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "- Accept raw documents (PDF, DOCX, images, HTML) - Submit to MinerU API for parsing - Poll task status until state $\\underline { { \\underline { { \\mathbf { \\delta \\pi } } } } }$ done ",
        "bbox": [
            85,
            153,
            531,
            217
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "Stage 2: Content Extraction ",
        "bbox": [
            68,
            249,
            322,
            263
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "- Download and decompress full_zip_url - Parse content_list.json into Document objects - Separate text blocks, tables, images, equations ",
        "bbox": [
            85,
            272,
            542,
            335
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "Stage 3: Entity & Relation Extraction ",
        "bbox": [
            67,
            367,
            415,
            381
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "- Feed text blocks to LangExtract - Extract entities with char_interval positions - Extract relationships between entities ",
        "bbox": [
            85,
            390,
            526,
            454
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "Stage 4: Graph Construction ",
        "bbox": [
            68,
            485,
            322,
            500
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "- Map extractions to graph nodes and edges - Store with source provenance (page_idx, bbox) - Build vector embeddings for semantic search ",
        "bbox": [
            85,
            508,
            522,
            571
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "4. Supported File Formats ",
        "text_level": 1,
        "bbox": [
            50,
            604,
            326,
            620
        ],
        "page_idx": 2
    },
    {
        "type": "table",
        "img_path": "images/1ed7aacecd20fecef8dc27ee2fe76dc1ae7fa93c44f7d10878d17a41f21a6bef.jpg",
        "table_caption": [],
        "table_footnote": [],
        "table_body": "<table><tr><td rowspan=1 colspan=1>Format</td><td rowspan=1 colspan=1>Extension</td><td rowspan=1 colspan=1>OCR Required</td><td rowspan=1 colspan=1>ModeI</td></tr><tr><td rowspan=1 colspan=1>PDF (text)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline / vlm</td></tr><tr><td rowspan=1 colspan=1>PDF (scan)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>Yes</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>Word</td><td rowspan=1 colspan=1>. docx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>PowerPoint</td><td rowspan=1 colspan=1>.pptx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>Image</td><td rowspan=1 colspan=1>.png / .jpg</td><td rowspan=1 colspan=1>Auto</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>HTML</td><td rowspan=1 colspan=1>.html</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>MinerU-HTML</td></tr></table>",
        "bbox": [
            45,
            634,
            882,
            806
        ],
        "page_idx": 2
    },
    {
        "type": "text",
        "text": "5. API Configuration Reference ",
        "text_level": 1,
        "bbox": [
            48,
            42,
            457,
            63
        ],
        "page_idx": 3
    },
    {
        "type": "text",
        "text": "The following environment variables must be configured before running the MinerU parsing service: ",
        "bbox": [
            48,
            83,
            952,
            123
        ],
        "page_idx": 3
    },
    {
        "type": "text",
        "text": "MINERU_API_TOKEN : Bearer token for API authentication   \nMINERU_USER_UID : User UUID for quota management   \nMINERU_BASE_URL : https://mineru.net/api/v4   \nMINERU_MODEL_VERSION : pipeline (default) | vlm | MinerU-HTML   \nMINERU_LANGUAGE : ch (Chinese) | en (English)   \nMINERU_IS_OCR : false (text PDF) | true (scanned PDF)   \nMINERU_ENABLE_FORMULA: true | false   \nMINERU_ENABLE_TABLE : true | false ",
        "bbox": [
            65,
            152,
            636,
            337
        ],
        "page_idx": 3
    },
    {
        "type": "text",
        "text": "Rate Limits: ",
        "bbox": [
            48,
            367,
            161,
            381
        ],
        "page_idx": 3
    },
    {
        "type": "text",
        "text": "- Max file size : 200 MB per file - Max pages : 600 pages per file - Daily quota : 2000 pages (high priority) - Batch limit : 200 files per request ",
        "bbox": [
            65,
            388,
            504,
            478
        ],
        "page_idx": 3
    }
 ]
--- a/backend/output/8456b615_sample_graphrag_overview/99c9be1f-bba4-4a58-824b-7331d50db9bb_origin.pdf
+++ b/backend/output/8456b615_sample_graphrag_overview/99c9be1f-bba4-4a58-824b-7331d50db9bb_origin.pdf
--- a/backend/output/8456b615_sample_graphrag_overview/full.md
+++ b/backend/output/8456b615_sample_graphrag_overview/full.md
@@ -0,0 +1,71 @@
 # GraphRAG System
 Technical Architecture Overview
 Version 1.0 | March 2026
 # 1. Abstract
 This document presents the technical architecture of a Multimodal GraphRAG System designed for intelligent document parsing and knowledge graph construction. The system integrates MinerU for document parsing, LangExtract for structured entity extraction, and a graph database for knowledge storage and retrieval.
 The pipeline supports multiple document formats including PDF, DOCX, PPTX, and image files. Extracted entities and relations are stored as graph nodes and edges, enabling semantic search and question answering over large document collections.
 # 2. System Components
 # 2.1 Document Parsing Module
 MinerU Cloud API (v4) serves as the document parsing backend. It accepts PDF, DOCX, PPTX, PNG, JPG, and HTML files. Output includes Markdown text, structured content_list.json, and extracted images.
 # 2.2 Entity Extraction Module
 LangExtract (v1.1.1) performs structured information extraction from plain text using few-shot prompting with LLM backends (Gemini, OpenAI, or local Ollama). Each extraction includes character-level position anchoring.
 # 2.3 Knowledge Graph Module
 Extracted entities and relationships are stored in a graph database. Node types include: Person, Organization, Location, Event, Concept. Edge types include: RELATED_TO, BELONGS_TO, CAUSED_BY, LOCATED_IN.
 # 2.4 Retrieval Module
 The retrieval layer supports hybrid search combining vector similarity and graph traversal.   
 Query results are ranked by relevance score and returned with source document references.
 # 3. Data Pipeline
 The end-to-end data pipeline consists of the following stages:
 Stage 1: Document Ingestion
 - Accept raw documents (PDF, DOCX, images, HTML) - Submit to MinerU API for parsing - Poll task status until state $\underline { { \underline { { \mathbf { \delta \pi } } } } }$ done
 Stage 2: Content Extraction
 - Download and decompress full_zip_url - Parse content_list.json into Document objects - Separate text blocks, tables, images, equations
 Stage 3: Entity & Relation Extraction
 - Feed text blocks to LangExtract - Extract entities with char_interval positions - Extract relationships between entities
 Stage 4: Graph Construction
 - Map extractions to graph nodes and edges - Store with source provenance (page_idx, bbox) - Build vector embeddings for semantic search
 # 4. Supported File Formats
 <table><tr><td rowspan=1 colspan=1>Format</td><td rowspan=1 colspan=1>Extension</td><td rowspan=1 colspan=1>OCR Required</td><td rowspan=1 colspan=1>ModeI</td></tr><tr><td rowspan=1 colspan=1>PDF (text)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline / vlm</td></tr><tr><td rowspan=1 colspan=1>PDF (scan)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>Yes</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>Word</td><td rowspan=1 colspan=1>. docx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>PowerPoint</td><td rowspan=1 colspan=1>.pptx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>Image</td><td rowspan=1 colspan=1>.png / .jpg</td><td rowspan=1 colspan=1>Auto</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>HTML</td><td rowspan=1 colspan=1>.html</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>MinerU-HTML</td></tr></table>
 # 5. API Configuration Reference
 The following environment variables must be configured before running the MinerU parsing service:
 MINERU_API_TOKEN : Bearer token for API authentication   
 MINERU_USER_UID : User UUID for quota management   
 MINERU_BASE_URL : https://mineru.net/api/v4   
 MINERU_MODEL_VERSION : pipeline (default) | vlm | MinerU-HTML   
 MINERU_LANGUAGE : ch (Chinese) | en (English)   
 MINERU_IS_OCR : false (text PDF) | true (scanned PDF)   
 MINERU_ENABLE_FORMULA: true | false   
 MINERU_ENABLE_TABLE : true | false
 Rate Limits:
 - Max file size : 200 MB per file - Max pages : 600 pages per file - Daily quota : 2000 pages (high priority) - Batch limit : 200 files per request
--- a/backend/output/8456b615_sample_graphrag_overview/images/1ed7aacecd20fecef8dc27ee2fe76dc1ae7fa93c44f7d10878d17a41f21a6bef.jpg
+++ b/backend/output/8456b615_sample_graphrag_overview/images/1ed7aacecd20fecef8dc27ee2fe76dc1ae7fa93c44f7d10878d17a41f21a6bef.jpg
--- a/backend/output/8456b615_sample_graphrag_overview/layout.json
+++ b/backend/output/8456b615_sample_graphrag_overview/layout.json
--- a/backend/output/8456b615_sample_graphrag_overview/parse_summary.json
+++ b/backend/output/8456b615_sample_graphrag_overview/parse_summary.json
@@ -0,0 +1,10 @@
 {
  "total_blocks": 32,
  "type_distribution": {
    "text": 31,
    "table": 1
  },
  "total_pages": 4,
  "text_block_count": 31,
  "table_block_count": 1
 }
--- a/backend/pipeline/init.py
+++ b/backend/pipeline/init.py
--- a/backend/pipeline/entity_extractor.py
+++ b/backend/pipeline/entity_extractor.py
@@ -0,0 +1,66 @@
 """
 Entity Extractor — LangExtract + DeepSeek entity extraction.
 Independent implementation for the GraphRAG Studio backend.
 """
 from __future__ import annotations
 import os
 from pathlib import Path
 from dotenv import load_dotenv
 import langextract as lx
 from langextract.providers.openai import OpenAILanguageModel
 load_dotenv(Path(__file__).parent.parent / ".env", override=True)
 DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
 DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
 MODEL_ID = "deepseek-chat"
 PROMPT_DESCRIPTION = (
    "Extract named entities from the text in order of appearance. "
    "Entity types: TECHNOLOGY (software, algorithms, models, tools), "
    "ORGANIZATION (companies, research groups, institutions), "
    "PERSON (individual people), "
    "LOCATION (places, geographic entities), "
    "CONCEPT (technical concepts, methodologies, frameworks)."
 )
 EXAMPLES = [
    lx.data.ExampleData(
        text=(
            "LangChain is a framework created by Harrison Chase for building "
            "LLM applications. It integrates with OpenAI models and Pinecone "
            "vector database for semantic search."
        ),
        extractions=[
            lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
            lx.data.Extraction(extraction_class="PERSON", extraction_text="Harrison Chase"),
            lx.data.Extraction(extraction_class="CONCEPT", extraction_text="LLM applications"),
            lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="OpenAI models"),
            lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="Pinecone"),
            lx.data.Extraction(extraction_class="CONCEPT", extraction_text="semantic search"),
        ],
    )
 ]
 def create_model() -> OpenAILanguageModel:
    if not DEEPSEEK_API_KEY:
        raise ValueError("DEEPSEEK_API_KEY not set in backend/.env")
    return OpenAILanguageModel(
        model_id=MODEL_ID,
        api_key=DEEPSEEK_API_KEY,
        base_url=DEEPSEEK_BASE_URL,
    )
 def extract_entities(page_text: str, model: OpenAILanguageModel) -> lx.data.AnnotatedDocument:
    return lx.extract(
        text_or_documents=page_text,
        prompt_description=PROMPT_DESCRIPTION,
        examples=EXAMPLES,
        model=model,
        show_progress=False,
    )
--- a/backend/pipeline/kg_builder.py
+++ b/backend/pipeline/kg_builder.py
@@ -0,0 +1,123 @@
 """
 KG Builder — node deduplication + CO_OCCURS_IN edge generation.
 Independent implementation for the GraphRAG Studio backend.
 """
 from __future__ import annotations
 from collections import defaultdict
 import langextract as lx
 from pipeline.text_assembler import PageText
 ACCEPTED_ALIGNMENTS = {"match_exact", "match_greater", "match_lesser"}
 def build_kg(
    pages: list[PageText],
    annotated_docs: list[lx.data.AnnotatedDocument],
    source_doc_id: str,
 ) -> tuple[list[dict], list[dict]]:
    """Build KG nodes and edges from LangExtract results.
    Returns:
        (nodes, edges) — deduplicated node list and edge list.
    """
    # Phase 1: collect raw entities
    raw_entities = []
    for page, doc in zip(pages, annotated_docs):
        if not doc.extractions:
            continue
        for ext in doc.extractions:
            status = ext.alignment_status.value if ext.alignment_status else None
            if status not in ACCEPTED_ALIGNMENTS:
                continue
            char_start = ext.char_interval.start_pos if ext.char_interval else None
            char_end = ext.char_interval.end_pos if ext.char_interval else None
            raw_entities.append({
                "name": ext.extraction_text,
                "type": ext.extraction_class,
                "char_start": char_start,
                "char_end": char_end,
                "confidence": status,
                "page": page.page_idx,
                "source_doc": source_doc_id,
            })
    # Phase 2: deduplicate nodes
    seen: dict[tuple[str, str], int] = {}
    nodes: list[dict] = []
    node_pages: dict[int, set[int]] = defaultdict(set)
    for entity in raw_entities:
        type_prefix = entity["type"].lower()[:4]
        name_slug = entity["name"].lower().replace(" ", "")[:12]
        dedup_key = (entity["name"].lower(), entity["type"])
        if dedup_key not in seen:
            node_idx = len(nodes)
            seen[dedup_key] = node_idx
            nodes.append({
                "id": f"{type_prefix}_{name_slug}_{node_idx}",
                "name": entity["name"],
                "type": entity["type"],
                "source_doc": entity["source_doc"],
                "char_start": entity["char_start"],
                "char_end": entity["char_end"],
                "confidence": entity["confidence"],
                "page": entity["page"],
            })
        node_idx = seen[dedup_key]
        node_pages[node_idx].add(entity["page"])
    # Phase 3: CO_OCCURS_IN edges
    page_nodes: dict[int, list[int]] = defaultdict(list)
    for node_idx, page_set in node_pages.items():
        for page_idx in page_set:
            page_nodes[page_idx].append(node_idx)
    edges: list[dict] = []
    edge_seen: set[tuple] = set()
    for page_idx, node_indices in sorted(page_nodes.items()):
        for i in range(len(node_indices)):
            for j in range(i + 1, len(node_indices)):
                a = nodes[node_indices[i]]["id"]
                b = nodes[node_indices[j]]["id"]
                src, tgt = (a, b) if a < b else (b, a)
                key = (src, tgt, source_doc_id, page_idx)
                if key in edge_seen:
                    continue
                edge_seen.add(key)
                edges.append({
                    "source": src,
                    "target": tgt,
                    "relation": "CO_OCCURS_IN",
                    "doc_id": source_doc_id,
                    "page": page_idx,
                })
    return nodes, edges
 def extractions_to_records(
    pages: list[PageText],
    annotated_docs: list[lx.data.AnnotatedDocument],
    doc_id: str,
 ) -> list[dict]:
    """Flatten LangExtract results to ExtractionRecord dicts."""
    records = []
    for page, doc in zip(pages, annotated_docs):
        if not doc.extractions:
            continue
        for ext in doc.extractions:
            status = ext.alignment_status.value if ext.alignment_status else None
            records.append({
                "text": ext.extraction_text,
                "type": ext.extraction_class,
                "char_start": ext.char_interval.start_pos if ext.char_interval else None,
                "char_end": ext.char_interval.end_pos if ext.char_interval else None,
                "alignment": status,
                "page": page.page_idx,
                "doc_id": doc_id,
            })
    return records
--- a/backend/pipeline/qa_agent.py
+++ b/backend/pipeline/qa_agent.py
@@ -0,0 +1,217 @@
 """
 QA Agent — LangGraph ReAct agent over the knowledge graph.
 Independent implementation for the GraphRAG Studio backend.
 """
 from __future__ import annotations
 import os
 import re
 from pathlib import Path
 import networkx as nx
 from dotenv import load_dotenv
 from langchain.tools import tool
 from langchain_openai import ChatOpenAI
 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage
 from langgraph.prebuilt import create_react_agent
 load_dotenv(Path(__file__).parent.parent / ".env", override=True)
 DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
 DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
 def build_kg_graph(nodes: list[dict], edges: list[dict]) -> nx.Graph:
    G = nx.Graph()
    for n in nodes:
        G.add_node(n["id"], **n)
    for e in edges:
        G.add_edge(e["source"], e["target"], **{k: v for k, v in e.items() if k not in ("source", "target")})
    return G
 def make_tools(G: nx.Graph) -> list:
    @tool
    def search_entities(query: str) -> str:
        """Search knowledge graph entities by name (case-insensitive substring).
        Args:
            query: Keyword to search for in entity names.
        """
        q = query.lower()
        matches = [data for _, data in G.nodes(data=True) if q in data.get("name", "").lower()]
        if not matches:
            sample = ", ".join(d.get("name", "") for _, d in list(G.nodes(data=True))[:8])
            return f"No entities found matching '{query}'. Sample: {sample}"
        lines = [f"Found {len(matches)} entity(ies) matching '{query}':"]
        for m in matches[:15]:
            lines.append(
                f"  [{m['type']}] \"{m['name']}\" "
                f"(confidence={m.get('confidence','?')}, page={m.get('page',0)}, id={m['id']})"
            )
        return "\n".join(lines)
    @tool
    def get_neighbors(entity_name: str, hops: int = 1) -> str:
        """Get N-hop neighbors of an entity in the knowledge graph.
        Args:
            entity_name: Entity name (partial match).
            hops: Number of hops (1-3, default 1).
        """
        hops = max(1, min(int(hops), 3))
        candidates = [(nid, d) for nid, d in G.nodes(data=True)
                      if entity_name.lower() in d.get("name", "").lower()]
        if not candidates:
            return f"Entity '{entity_name}' not found. Use search_entities first."
        node_id, node_data = candidates[0]
        reachable = nx.single_source_shortest_path_length(G, node_id, cutoff=hops)
        by_hop: dict[int, list] = {}
        for nid, dist in reachable.items():
            if dist > 0:
                by_hop.setdefault(dist, []).append(G.nodes[nid])
        lines = [f"Neighbors of '{node_data['name']}' [{node_data['type']}] within {hops} hop(s):"]
        for hop in sorted(by_hop.keys()):
            hop_nodes = by_hop[hop]
            lines.append(f"\n  Hop {hop} — {len(hop_nodes)} related entities:")
            for n in hop_nodes[:20]:
                lines.append(f"    [{n.get('type','?')}] {n.get('name','?')}")
            if len(hop_nodes) > 20:
                lines.append(f"    ... and {len(hop_nodes)-20} more")
        lines.append(f"\n  Total related entities: {sum(len(v) for v in by_hop.values())}")
        return "\n".join(lines)
    @tool
    def get_entities_by_type(entity_type: str) -> str:
        """List all entities of a specific type.
        Args:
            entity_type: TECHNOLOGY, CONCEPT, PERSON, ORGANIZATION, or LOCATION.
        """
        t_upper = entity_type.strip().upper()
        valid = {"TECHNOLOGY", "CONCEPT", "PERSON", "ORGANIZATION", "LOCATION"}
        if t_upper not in valid:
            present = sorted({d.get("type","") for _, d in G.nodes(data=True)})
            return f"Unknown type '{entity_type}'. Present: {present}"
        matches = [d for _, d in G.nodes(data=True) if d.get("type","") == t_upper]
        if not matches:
            return f"No {t_upper} entities found."
        lines = [f"Found {len(matches)} {t_upper} entities:"]
        for m in matches[:30]:
            lines.append(f"  \"{m['name']}\" (page={m.get('page',0)}, id={m['id']})")
        if len(matches) > 30:
            lines.append(f"  ... and {len(matches)-30} more")
        return "\n".join(lines)
    @tool
    def describe_graph() -> str:
        """Get an overview of the knowledge graph statistics."""
        n_nodes = G.number_of_nodes()
        n_edges = G.number_of_edges()
        type_counts: dict[str, int] = {}
        for _, d in G.nodes(data=True):
            t = d.get("type", "UNKNOWN")
            type_counts[t] = type_counts.get(t, 0) + 1
        lines = [
            f"Knowledge Graph Overview:",
            f"  Nodes: {n_nodes}",
            f"  Edges: {n_edges}",
            f"  Entity types: {type_counts}",
        ]
        if n_nodes > 0:
            centrality = nx.degree_centrality(G)
            top5 = sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]
            lines.append("  Top 5 central nodes:")
            for nid, c in top5:
                nd = G.nodes[nid]
                lines.append(f"    [{nd.get('type','?')}] {nd.get('name','?')} (centrality={c:.3f})")
        return "\n".join(lines)
    return [search_entities, get_neighbors, get_entities_by_type, describe_graph]
 def run_qa(
    question: str,
    history: list[dict],
    nodes: list[dict],
    edges: list[dict],
 ) -> dict:
    """Run Agentic-RAG QA. Returns dict with answer, tool_calls, cited_nodes."""
    if not DEEPSEEK_API_KEY:
        raise ValueError("DEEPSEEK_API_KEY not set in backend/.env")
    G = build_kg_graph(nodes, edges)
    tools = make_tools(G)
    llm = ChatOpenAI(
        model="deepseek-chat",
        api_key=DEEPSEEK_API_KEY,
        base_url=DEEPSEEK_BASE_URL,
        temperature=0,
    )
    system_prompt = (
        "You are a helpful assistant with access to a knowledge graph (KG) built from the user's documents.\n"
        "\n"
        "Guidelines:\n"
        "- If the question is clearly unrelated to the KG (greetings, math, general knowledge, etc.), "
        "answer directly WITHOUT using any tools.\n"
        "- If the question might be answered by the KG (topics related to entities in the documents), "
        "use the tools to search and explore before answering.\n"
        "- When you DO use the KG, cite the entity names and types you found.\n"
        "- If the KG has no relevant information, say so honestly and answer from general knowledge if possible.\n"
        "\n"
        "Available tools: search entities by name, get neighbors, list entities by type, get graph overview."
    )
    agent = create_react_agent(llm, tools, prompt=system_prompt)
    # Build messages: system + history + current question
    messages: list = []
    for msg in history[-8:]:
        role = msg.get("role", "human")
        content = msg.get("content", "") or msg.get("answer", "")
        if role == "human":
            messages.append(HumanMessage(content=msg.get("question", content)))
        else:
            messages.append(AIMessage(content=content))
    messages.append(HumanMessage(content=question))
    result = agent.invoke({"messages": messages})
    # Extract answer from last AIMessage
    answer = ""
    for msg in reversed(result.get("messages", [])):
        if isinstance(msg, AIMessage) and msg.content and not msg.tool_calls:
            answer = msg.content
            break
    # Extract tool calls and cited node IDs from message history
    tool_calls = []
    cited_node_ids: set[str] = set()
    step = 0
    all_messages = result.get("messages", [])
    for i, msg in enumerate(all_messages):
        if isinstance(msg, AIMessage) and msg.tool_calls:
            for tc in msg.tool_calls:
                step += 1
                # Find the corresponding ToolMessage
                output = ""
                for j in range(i + 1, len(all_messages)):
                    tm = all_messages[j]
                    if isinstance(tm, ToolMessage) and tm.tool_call_id == tc.get("id"):
                        output = tm.content
                        break
                tool_input = tc.get("args", {})
                tool_calls.append({
                    "step": step,
                    "tool_name": tc.get("name", ""),
                    "tool_input": str(tool_input),
                    "tool_output": str(output),
                })
                # Extract node IDs mentioned in tool output
                for node_id in re.findall(r'\bid=([^\s,\)\]]+)', str(output)):
                    cited_node_ids.add(node_id)
    return {
        "answer": answer,
        "tool_calls": tool_calls,
        "cited_nodes": list(cited_node_ids),
    }
--- a/backend/pipeline/text_assembler.py
+++ b/backend/pipeline/text_assembler.py
@@ -0,0 +1,107 @@
 """
 Text Assembler — MinerU content_list.json → per-page plain text.
 Independent implementation for the GraphRAG Studio backend.
 """
 from __future__ import annotations
 import dataclasses
 import json
 from collections import defaultdict
 from pathlib import Path
 from bs4 import BeautifulSoup
@dataclasses.dataclass
 class BlockSpan:
    block_index: int
    block_type: str
    page_idx: int
    char_start: int
    char_end: int
    bbox: list
@dataclasses.dataclass
 class PageText:
    page_idx: int
    text: str
    block_spans: list[BlockSpan]
 def html_table_to_text(table_body: str) -> str:
    soup = BeautifulSoup(table_body, "html.parser")
    rows = []
    for tr in soup.find_all("tr"):
        cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
        rows.append(" | ".join(cells))
    return "\n".join(rows)
 def load_content_list(path: Path) -> list[dict]:
    if path.is_dir():
        matches = list(path.glob("*_content_list.json"))
        if not matches:
            matches = list(path.glob("*content_list.json"))
        if not matches:
            raise FileNotFoundError(f"No content_list.json found in {path}")
        path = matches[0]
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)
 def assemble_pages(content_list: list[dict]) -> list[PageText]:
    pages: dict[int, list[tuple[int, dict]]] = defaultdict(list)
    for i, block in enumerate(content_list):
        page_idx = block.get("page_idx", 0)
        pages[page_idx].append((i, block))
    result = []
    for page_idx in sorted(pages.keys()):
        blocks = pages[page_idx]
        buffer = []
        spans = []
        cursor = 0
        for block_index, block in blocks:
            block_type = block.get("type", "unknown")
            bbox = block.get("bbox", [0, 0, 0, 0])
            if block_type == "text":
                block_text = block.get("text", "").rstrip()
            elif block_type == "table":
                table_body = block.get("table_body", "")
                block_text = html_table_to_text(table_body) if table_body else ""
            else:
                continue
            if not block_text:
                continue
            char_start = cursor
            buffer.append(block_text)
            cursor += len(block_text)
            char_end = cursor
            spans.append(BlockSpan(
                block_index=block_index,
                block_type=block_type,
                page_idx=page_idx,
                char_start=char_start,
                char_end=char_end,
                bbox=bbox,
            ))
            buffer.append("\n")
            cursor += 1
        text = "".join(buffer).rstrip("\n")
        result.append(PageText(page_idx=page_idx, text=text, block_spans=spans))
    return result
 def count_blocks_by_type(content_list: list[dict]) -> dict[str, int]:
    counts: dict[str, int] = defaultdict(int)
    for block in content_list:
        counts[block.get("type", "unknown")] += 1
    return dict(counts)
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -0,0 +1,22 @@
 [project]
 name = "graphrag-studio-backend"
 version = "1.0.0"
 description = "GraphRAG Studio — FastAPI backend service"
 requires-python = ">=3.12"
 dependencies = [
    "fastapi>=0.104.0",
    "uvicorn[standard]>=0.24.0",
    "python-multipart>=0.0.6",
    "langextract[all]>=0.1.0",
    "langchain>=0.2.0",
    "langchain-openai>=0.1.0",
    "langgraph>=0.1.0",
    "networkx>=3.0",
    "python-dotenv>=1.0.0",
    "requests>=2.31.0",
    "beautifulsoup4>=4.12.0",
 ]
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
--- a/backend/routers/init.py
+++ b/backend/routers/init.py
--- a/backend/routers/documents.py
+++ b/backend/routers/documents.py
@@ -0,0 +1,71 @@
 """A 组：文档管理（4 个端点）"""
 from fastapi import APIRouter, File, Form, HTTPException, UploadFile
 from fastapi.responses import JSONResponse
 from models.schemas import APIResponse
 from services import document_service as svc
 router = APIRouter(prefix="/documents", tags=["Documents"])
@router.post("/upload", status_code=200)
 async def upload_document(
    file: UploadFile = File(...),
    language: str = Form("ch"),
    enable_formula: bool = Form(True),
    enable_table: bool = Form(True),
 ):
    content = await file.read()
    ok, code, msg = svc.validate_upload(file.filename or "", len(content))
    if not ok:
        return JSONResponse(
            status_code=400,
            content=APIResponse.err(code, msg).model_dump(),
        )
    doc = svc.save_upload(file.filename or "upload", content, language, enable_formula, enable_table)
    # Remove internal field
    doc.pop("upload_filename", None)
    return APIResponse.ok(doc)
@router.get("/{doc_id}")
 async def get_document(doc_id: str):
    doc = svc.get_document(doc_id)
    if not doc:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2001, f"Document '{doc_id}' not found").model_dump(),
        )
    doc.pop("upload_filename", None)
    return APIResponse.ok(doc)
@router.get("")
 async def list_documents(
    page: int = 1,
    page_size: int = 20,
    status: str | None = None,
    format: str | None = None,
 ):
    page_size = min(page_size, 100)
    result = svc.list_documents(page, page_size, status, format)
    for item in result["items"]:
        item.pop("upload_filename", None)
    return APIResponse.ok(result)
@router.delete("/{doc_id}")
 async def delete_document(doc_id: str):
    doc = svc.get_document(doc_id)
    if not doc:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2001, f"Document '{doc_id}' not found").model_dump(),
        )
    ok, removed_nodes, removed_edges = svc.delete_document(doc_id)
    return APIResponse.ok({
        "deleted": True,
        "doc_id": doc_id,
        "removed_nodes": removed_nodes,
        "removed_edges": removed_edges,
    })
--- a/backend/routers/indexing.py
+++ b/backend/routers/indexing.py
@@ -0,0 +1,70 @@
 """B 组：Indexing Pipeline（4 个端点）"""
 from fastapi import APIRouter
 from fastapi.responses import JSONResponse
 from models.schemas import APIResponse, StartIndexRequest
 from services import document_service as doc_svc
 from services import indexing_service as idx_svc
 router = APIRouter(prefix="/index", tags=["Indexing"])
@router.post("/start", status_code=202)
 async def start_indexing(body: StartIndexRequest):
    doc = doc_svc.get_document(body.doc_id)
    if not doc:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2001, f"Document '{body.doc_id}' not found").model_dump(),
        )
    meta = idx_svc.start_indexing(body.doc_id)
    return APIResponse.ok({
        "job_id": meta["job_id"],
        "doc_id": meta["doc_id"],
        "status": meta["status"],
        "stage": meta["stage"],
        "created_at": meta["created_at"],
    })
@router.get("/status/{job_id}")
 async def get_job_status(job_id: str):
    meta = idx_svc.get_job_status(job_id)
    if not meta:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2002, f"Job '{job_id}' not found").model_dump(),
        )
    return APIResponse.ok(meta)
@router.get("/result/{job_id}")
 async def get_job_result(job_id: str):
    result = idx_svc.get_job_result(job_id)
    if not result:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2002, f"Job '{job_id}' not found").model_dump(),
        )
    if result.get("status") not in ("done",) and "stats" not in result:
        return JSONResponse(
            status_code=400,
            content=APIResponse.err(2003, f"Job '{job_id}' is still running (status={result.get('status')})").model_dump(),
        )
    return APIResponse.ok(result)
@router.delete("/jobs/{job_id}")
 async def cancel_job(job_id: str):
    meta = idx_svc.get_job_status(job_id)
    if not meta:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2002, f"Job '{job_id}' not found").model_dump(),
        )
    ok, prev_status = idx_svc.cancel_job(job_id)
    return APIResponse.ok({
        "cancelled": True,
        "job_id": job_id,
        "previous_status": prev_status,
    })
--- a/backend/routers/kg.py
+++ b/backend/routers/kg.py
@@ -0,0 +1,72 @@
 """C 组：知识图谱（6 个端点）"""
 from fastapi import APIRouter
 from fastapi.responses import JSONResponse
 from models.schemas import APIResponse
 from services import kg_service as svc
 router = APIRouter(prefix="/kg", tags=["Knowledge Graph"])
@router.get("/nodes")
 async def list_nodes(
    type: str | None = None,
    doc_id: str | None = None,
    confidence: str | None = None,
    page: int = 1,
    page_size: int = 50,
 ):
    page_size = min(page_size, 200)
    result = svc.get_nodes(page, page_size, type, doc_id, confidence)
    if result["total"] == 0 and not any([type, doc_id, confidence]):
        return JSONResponse(
            status_code=400,
            content=APIResponse.err(3002, "Knowledge graph is empty. Index documents first.").model_dump(),
        )
    return APIResponse.ok(result)
@router.get("/edges")
 async def list_edges(
    doc_id: str | None = None,
    relation: str | None = None,
    page: int = 1,
    page_size: int = 100,
 ):
    page_size = min(page_size, 500)
    result = svc.get_edges(page, page_size, doc_id, relation)
    return APIResponse.ok(result)
@router.get("/nodes/{node_id}")
 async def get_node_detail(node_id: str):
    node = svc.get_node_detail(node_id)
    if not node:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(3001, f"Node '{node_id}' not found").model_dump(),
        )
    return APIResponse.ok(node)
@router.get("/nodes/{node_id}/neighbors")
 async def get_node_neighbors(node_id: str, hops: int = 1):
    result = svc.get_neighbors(node_id, hops)
    if result is None:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(3001, f"Node '{node_id}' not found").model_dump(),
        )
    return APIResponse.ok(result)
@router.get("/stats")
 async def get_kg_stats():
    stats = svc.get_stats()
    return APIResponse.ok(stats)
@router.get("/export")
 async def export_kg(format: str = "json", doc_id: str | None = None):
    result = svc.export_kg(doc_id)
    return APIResponse.ok(result)
--- a/backend/routers/query.py
+++ b/backend/routers/query.py
@@ -0,0 +1,66 @@
 """D 组：QA 问答（4 个端点）"""
 import asyncio
 from functools import partial
 from fastapi import APIRouter
 from fastapi.responses import JSONResponse
 from models.schemas import APIResponse, BatchQueryRequest, QueryRequest
 from services import qa_service as svc
 router = APIRouter(prefix="/query", tags=["QA"])
@router.post("")
 async def run_query(body: QueryRequest):
    try:
        loop = asyncio.get_event_loop()
        result = await loop.run_in_executor(
            None,
            partial(svc.run_query, body.question, [m.model_dump() for m in body.history]),
        )
        return APIResponse.ok(result)
    except ValueError as e:
        if "KG_EMPTY" in str(e):
            return JSONResponse(
                status_code=400,
                content=APIResponse.err(3002, "Knowledge graph is empty. Index documents first.").model_dump(),
            )
        return JSONResponse(
            status_code=500,
            content=APIResponse.err(4001, str(e)).model_dump(),
        )
    except Exception as e:
        return JSONResponse(
            status_code=500,
            content=APIResponse.err(4001, f"QA service error: {e}").model_dump(),
        )
@router.post("/batch", status_code=202)
 async def start_batch(body: BatchQueryRequest):
    if len(body.questions) > 20:
        return JSONResponse(
            status_code=400,
            content=APIResponse.err(1001, "Maximum 20 questions per batch").model_dump(),
        )
    result = svc.start_batch(body.questions)
    return APIResponse.ok(result)
@router.get("/batch/{batch_id}")
 async def get_batch_result(batch_id: str):
    result = svc.get_batch_result(batch_id)
    if not result:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(2002, f"Batch '{batch_id}' not found").model_dump(),
        )
    return APIResponse.ok(result)
@router.get("/history")
 async def get_query_history(page: int = 1, page_size: int = 20):
    page_size = min(page_size, 50)
    result = svc.get_history(page, page_size)
    return APIResponse.ok(result)
--- a/backend/routers/search.py
+++ b/backend/routers/search.py
@@ -0,0 +1,43 @@
 """E 组：搜索（3 个端点）"""
 from fastapi import APIRouter, Query, Request
 from fastapi.responses import JSONResponse
 from models.schemas import APIResponse
 from services import search_service as svc
 router = APIRouter(prefix="/search", tags=["Search"])
@router.get("/entities")
 async def search_entities(q: str, type: str | None = None, limit: int = 15):
    limit = min(limit, 100)
    result = svc.search_entities(q, type, limit)
    return APIResponse.ok(result)
@router.get("/path")
 async def search_path(request: Request, max_hops: int = 3):
    # 'from' is a Python keyword, read from raw query params
    params = dict(request.query_params)
    from_id = params.get("from")
    to_id = params.get("to")
    if not from_id or not to_id:
        return JSONResponse(
            status_code=400,
            content=APIResponse.err(1001, "Parameters 'from' and 'to' are required").model_dump(),
        )
    max_hops = max(1, min(max_hops, 5))
    result = svc.search_path(from_id, to_id, max_hops)
    if result is None:
        return JSONResponse(
            status_code=404,
            content=APIResponse.err(3001, "One or both nodes not found").model_dump(),
        )
    return APIResponse.ok(result)
@router.get("/graph")
 async def search_graph(q: str, include_neighbors: bool = False):
    result = svc.search_graph(q, include_neighbors)
    return APIResponse.ok(result)
--- a/backend/routers/system.py
+++ b/backend/routers/system.py
@@ -0,0 +1,171 @@
 """F 组：系统（4 个端点）"""
 import os
 import time
 from pathlib import Path
 from fastapi import APIRouter
 from models.schemas import APIResponse
 from storage import file_store as fs
 router = APIRouter(tags=["System"])
 _START_TIME = time.time()
@router.get("/health")
 async def health_check():
    env_path = Path(__file__).parent.parent / ".env"
    from dotenv import load_dotenv
    load_dotenv(env_path, override=False)
    mineru_python = Path(os.getenv("MINERU_PYTHON", "F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe"))
    backend_python = Path(__file__).parent.parent / ".venv" / "Scripts" / "python.exe"
    deepseek_key = os.getenv("DEEPSEEK_API_KEY", "")
    deepseek_url = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
    # Check if langextract is importable from backend's venv
    try:
        import subprocess
        result = subprocess.run(
            [str(backend_python), "-c", "import langextract; print('ok')"],
            capture_output=True, text=True, timeout=10
        )
        langextract_ok = result.returncode == 0 and "ok" in result.stdout
    except Exception:
        langextract_ok = False
    components = {
        "mineru_venv": {
            "status": "ok" if mineru_python.exists() else "error",
            "path": str(mineru_python),
            "exists": mineru_python.exists(),
        },
        "langextract_venv": {
            "status": "ok" if langextract_ok else "error",
            "path": str(backend_python),
            "exists": backend_python.exists(),
        },
        "deepseek_api": {
            "status": "ok" if deepseek_key else "error",
            "base_url": deepseek_url,
            "key_configured": bool(deepseek_key),
        },
        "storage": {
            "status": "ok",
            "kg_nodes_exists": fs.kg_nodes_path().exists(),
            "kg_edges_exists": fs.kg_edges_path().exists(),
            "uploads_dir_exists": fs.UPLOADS_DIR.exists(),
        },
    }
    overall = "healthy" if all(c["status"] == "ok" for c in components.values()) else "degraded"
    return APIResponse.ok({
        "status": overall,
        "version": "1.0.0",
        "uptime_seconds": round(time.time() - _START_TIME, 1),
        "components": components,
    })
@router.get("/system/stats")
 async def system_stats():
    from services import indexing_service as idx_svc
    docs = list(fs.load_docs_index().values())
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    history = fs.load_query_history()
    type_dist: dict[str, int] = {}
    for n in nodes:
        t = n.get("type", "UNKNOWN")
        type_dist[t] = type_dist.get(t, 0) + 1
    return APIResponse.ok({
        "total_documents": len(docs),
        "indexed_documents": sum(1 for d in docs if d.get("status") == "indexed"),
        "failed_documents": sum(1 for d in docs if d.get("status") == "failed"),
        "total_nodes": len(nodes),
        "total_edges": len(edges),
        "type_distribution": type_dist,
        "total_queries": len(history),
        "active_jobs": idx_svc.count_active_jobs(),
        "storage_used_mb": fs.storage_used_mb(),
    })
@router.get("/system/formats")
 async def list_formats():
    return APIResponse.ok({
        "formats": [
            {"ext": "pdf",  "description": "PDF 文档（文本型/扫描型/混合型）", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
            {"ext": "docx", "description": "Microsoft Word（新版）", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
            {"ext": "doc",  "description": "Microsoft Word（旧版）", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
            {"ext": "pptx", "description": "PowerPoint（新版）", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
            {"ext": "ppt",  "description": "PowerPoint（旧版）", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
            {"ext": "png",  "description": "PNG 图片（单页）", "max_size_mb": 200, "max_pages": 1, "requires_ocr": True},
            {"ext": "jpg",  "description": "JPEG 图片（单页）", "max_size_mb": 200, "max_pages": 1, "requires_ocr": True},
            {"ext": "jpeg", "description": "JPEG 图片（单页）", "max_size_mb": 200, "max_pages": 1, "requires_ocr": True},
            {"ext": "html", "description": "HTML 文件", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
        ],
        "ocr_languages": [
            {"code": "ch", "name": "中文（默认）"},
            {"code": "en", "name": "英文"},
            {"code": "japan", "name": "日文"},
            {"code": "korean", "name": "韩文"},
            {"code": "french", "name": "法文"},
            {"code": "german", "name": "德文"},
        ],
        "notes": [
            "language 参数默认值为 'ch'（非 'zh'），遵循 PaddleOCR v3 语言代码规范",
            "上传时不需要携带 Content-Type，服务端自动识别",
            "PNG/JPG/JPEG 单次最多处理 1 页",
        ],
    })
@router.get("/system/demo")
 async def get_demo_data():
    # Try backend KG first, then fall back to graphrag_pipeline/output
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    if not nodes:
        # Fallback: load from existing graphrag_pipeline output
        legacy_nodes_path = Path("F:/GraphRAGAgent/graphrag_pipeline/output/kg_nodes.json")
        legacy_edges_path = Path("F:/GraphRAGAgent/graphrag_pipeline/output/kg_edges.json")
        if legacy_nodes_path.exists():
            import json
            nodes = json.loads(legacy_nodes_path.read_text(encoding="utf-8"))
            edges = json.loads(legacy_edges_path.read_text(encoding="utf-8")) if legacy_edges_path.exists() else []
        else:
            from fastapi.responses import JSONResponse
            return JSONResponse(
                status_code=400,
                content=APIResponse.err(3002, "No demo data available. Index a document first.").model_dump(),
            )
    type_counts: dict[str, int] = {}
    for n in nodes:
        t = n.get("type", "UNKNOWN")
        type_counts[t] = type_counts.get(t, 0) + 1
    import networkx as nx
    G = nx.Graph()
    for n in nodes:
        G.add_node(n["id"])
    for e in edges:
        G.add_edge(e["source"], e["target"])
    return APIResponse.ok({
        "nodes": nodes,
        "edges": edges,
        "stats": {
            "nodes": len(nodes),
            "edges": len(edges),
            "type_counts": type_counts,
            "density": round(nx.density(G), 4) if G.number_of_nodes() > 1 else 0.0,
        },
    })
--- a/backend/services/init.py
+++ b/backend/services/init.py
--- a/backend/services/document_service.py
+++ b/backend/services/document_service.py
@@ -0,0 +1,109 @@
 """Document Service — file upload, metadata CRUD."""
 from __future__ import annotations
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
 from storage import file_store as fs
 ALLOWED_EXTENSIONS = {"pdf", "docx", "doc", "pptx", "ppt", "png", "jpg", "jpeg", "html"}
 MAX_FILE_SIZE_MB = 200
 def validate_upload(filename: str, size_bytes: int) -> tuple[bool, int, str]:
    """Returns (ok, error_code, error_msg)."""
    if not filename or "/" in filename or "\\" in filename:
        return False, 1001, "Invalid filename"
    ext = Path(filename).suffix.lower().lstrip(".")
    if ext not in ALLOWED_EXTENSIONS:
        return False, 1002, f"Unsupported file format: .{ext}. Supported: {', '.join(sorted(ALLOWED_EXTENSIONS))}"
    size_mb = size_bytes / (1024 * 1024)
    if size_mb > MAX_FILE_SIZE_MB:
        return False, 1003, f"File size {size_mb:.1f}MB exceeds {MAX_FILE_SIZE_MB}MB limit"
    return True, 0, ""
 def save_upload(filename: str, content: bytes, language: str = "ch",
                enable_formula: bool = True, enable_table: bool = True) -> dict:
    doc_id = uuid.uuid4().hex[:8]
    ext = Path(filename).suffix.lower().lstrip(".")
    upload_filename = f"{doc_id}_{filename}"
    upload_path = fs.UPLOADS_DIR / upload_filename
    upload_path.write_bytes(content)
    doc = {
        "doc_id": doc_id,
        "filename": filename,
        "format": ext,
        "size_bytes": len(content),
        "pages": None,
        "uploaded_at": datetime.now(timezone.utc).isoformat(),
        "status": "uploaded",
        "language": language,
        "enable_formula": enable_formula,
        "enable_table": enable_table,
        "upload_filename": upload_filename,  # internal: actual stored filename
    }
    fs.save_doc(doc)
    return doc
 def get_document(doc_id: str) -> dict | None:
    return fs.get_doc(doc_id)
 def list_documents(page: int = 1, page_size: int = 20,
                   status: str | None = None, fmt: str | None = None) -> dict:
    index = fs.load_docs_index()
    items = list(index.values())
    items.sort(key=lambda d: d.get("uploaded_at", ""), reverse=True)
    if status:
        items = [d for d in items if d.get("status") == status]
    if fmt:
        items = [d for d in items if d.get("format") == fmt.lower()]
    total = len(items)
    start = (page - 1) * page_size
    return {
        "total": total,
        "page": page,
        "page_size": page_size,
        "items": items[start: start + page_size],
    }
 def delete_document(doc_id: str) -> tuple[bool, int, int]:
    """Delete doc and its KG contributions. Returns (ok, removed_nodes, removed_edges)."""
    doc = fs.get_doc(doc_id)
    if not doc:
        return False, 0, 0
    # Remove from KG
    removed_nodes, removed_edges = fs.remove_doc_from_kg(doc_id)
    # Remove upload file
    upload_filename = doc.get("upload_filename", "")
    upload_path = fs.UPLOADS_DIR / upload_filename
    if upload_path.exists():
        upload_path.unlink(missing_ok=True)
    # Remove associated jobs
    for meta in fs.list_all_jobs():
        if meta.get("doc_id") == doc_id:
            fs.delete_job(meta["job_id"])
    # Remove from index
    index = fs.load_docs_index()
    index.pop(doc_id, None)
    fs.save_docs_index(index)
    return True, removed_nodes, removed_edges
 def update_doc_status(doc_id: str, status: str, pages: int | None = None) -> None:
    index = fs.load_docs_index()
    if doc_id in index:
        index[doc_id]["status"] = status
        if pages is not None:
            index[doc_id]["pages"] = pages
        fs.save_docs_index(index)
--- a/backend/services/indexing_service.py
+++ b/backend/services/indexing_service.py
@@ -0,0 +1,255 @@
 """Indexing Service — Pipeline orchestration (parsing → extracting → indexing)."""
 from __future__ import annotations
 import json
 import os
 import subprocess
 import threading
 import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
 from dotenv import load_dotenv
 from storage import file_store as fs
 from services.document_service import update_doc_status
 load_dotenv(Path(__file__).parent.parent / ".env", override=True)
 MINERU_PYTHON = Path(os.getenv("MINERU_PYTHON", "F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe"))
 MINERU_PIPELINE = Path(os.getenv("MINERU_PIPELINE", "F:/GraphRAGAgent/mineru_mvp/pipeline.py"))
 # In-memory registry of active jobs {job_id: threading.Thread}
 _active_threads: dict[str, threading.Thread] = {}
 _cancel_flags: dict[str, bool] = {}
 def start_indexing(doc_id: str) -> dict:
    doc = fs.get_doc(doc_id)
    if not doc:
        return None  # type: ignore
    job_id = f"job_{uuid.uuid4().hex[:8]}"
    now = datetime.now(timezone.utc).isoformat()
    meta = {
        "job_id": job_id,
        "doc_id": doc_id,
        "status": "submitted",
        "stage": "Job submitted",
        "progress": {"parsed_pages": 0, "total_pages": 0, "extracted_entities": 0},
        "created_at": now,
        "elapsed_seconds": 0.0,
        "error": None,
        "pdf_name": doc["filename"],
        "pdf_path": str(fs.UPLOADS_DIR / doc.get("upload_filename", "")),
    }
    fs.save_job_meta(job_id, meta)
    _cancel_flags[job_id] = False
    thread = threading.Thread(target=_run_pipeline, args=(job_id,), daemon=True)
    _active_threads[job_id] = thread
    thread.start()
    return meta
 def _update_meta(job_id: str, **kwargs) -> None:
    meta = fs.load_job_meta(job_id) or {}
    meta.update(kwargs)
    meta["elapsed_seconds"] = round(
        (datetime.now(timezone.utc) - datetime.fromisoformat(meta["created_at"])).total_seconds(), 1
    )
    fs.save_job_meta(job_id, meta)
 def _run_pipeline(job_id: str) -> None:
    meta = fs.load_job_meta(job_id)
    if not meta:
        return
    doc_id = meta["doc_id"]
    pdf_path = Path(meta["pdf_path"])
    job_dir = fs.job_dir(job_id)
    start_time = time.time()
    try:
        # ── Stage 1: parsing ──────────────────────────────────────────────
        if _cancel_flags.get(job_id):
            _update_meta(job_id, status="cancelled", stage="Cancelled")
            return
        _update_meta(job_id, status="parsing", stage="MinerU document parsing...")
        mineru_out_dir = job_dir / "mineru_output"
        mineru_out_dir.mkdir(parents=True, exist_ok=True)
        result = subprocess.run(
            [str(MINERU_PYTHON), str(MINERU_PIPELINE), str(pdf_path)],
            cwd=str(MINERU_PIPELINE.parent),
            capture_output=True,
            text=True,
            timeout=600,
        )
        if result.returncode != 0:
            raise RuntimeError(f"MinerU failed: {result.stderr[:500]}")
        # Find content_list.json in MinerU output
        # MinerU writes output to mineru_mvp/output/{stem}/
        stem = pdf_path.stem
        mineru_default_out = MINERU_PIPELINE.parent / "output" / stem
        content_list_path = None
        if mineru_default_out.exists():
            matches = list(mineru_default_out.glob("*_content_list.json"))
            if matches:
                content_list_path = matches[0]
                # Copy to our job dir
                import shutil
                shutil.copytree(str(mineru_default_out), str(mineru_out_dir), dirs_exist_ok=True)
        if not content_list_path:
            # Fallback: search job mineru_output dir
            matches = list(mineru_out_dir.glob("*_content_list.json"))
            if matches:
                content_list_path = matches[0]
        if not content_list_path or not content_list_path.exists():
            raise RuntimeError(f"MinerU output content_list.json not found. stdout: {result.stdout[:300]}")
        # ── Stage 2: extracting ───────────────────────────────────────────
        if _cancel_flags.get(job_id):
            _update_meta(job_id, status="cancelled", stage="Cancelled")
            return
        from pipeline.text_assembler import load_content_list, assemble_pages, count_blocks_by_type
        from pipeline.entity_extractor import create_model, extract_entities
        from pipeline.kg_builder import build_kg, extractions_to_records
        content_list = load_content_list(content_list_path)
        pages = assemble_pages(content_list)
        total_pages = len(pages)
        block_types = count_blocks_by_type(content_list)
        _update_meta(
            job_id,
            status="extracting",
            stage=f"Extracting entities (LangExtract + DeepSeek)...",
            progress={"parsed_pages": total_pages, "total_pages": total_pages, "extracted_entities": 0},
        )
        update_doc_status(doc_id, "indexing", pages=total_pages)
        model = create_model()
        annotated_docs = []
        total_entities = 0
        for i, page in enumerate(pages):
            if _cancel_flags.get(job_id):
                _update_meta(job_id, status="cancelled", stage="Cancelled")
                return
            _update_meta(
                job_id,
                stage=f"Extracting entities page {i+1}/{total_pages} (LangExtract + DeepSeek)...",
                progress={"parsed_pages": total_pages, "total_pages": total_pages,
                          "extracted_entities": total_entities},
            )
            ann_doc = extract_entities(page.text, model)
            annotated_docs.append(ann_doc)
            total_entities += len(ann_doc.extractions) if ann_doc.extractions else 0
        # Save raw extractions
        records = extractions_to_records(pages, annotated_docs, doc_id)
        fs.write_json(job_dir / "extractions.json", records)
        # ── Stage 3: indexing ─────────────────────────────────────────────
        _update_meta(job_id, status="indexing", stage="Building knowledge graph...")
        nodes, edges = build_kg(pages, annotated_docs, doc_id)
        fs.write_json(job_dir / "kg_nodes.json", nodes)
        fs.write_json(job_dir / "kg_edges.json", edges)
        # Merge into global KG
        fs.merge_kg(nodes, edges, doc_id)
        # Count alignment types
        alignment_counts: dict[str, int] = {}
        type_counts: dict[str, int] = {}
        for r in records:
            al = r.get("alignment") or "null"
            alignment_counts[al] = alignment_counts.get(al, 0) + 1
            t = r.get("type", "UNKNOWN")
            type_counts[t] = type_counts.get(t, 0) + 1
        elapsed = round(time.time() - start_time, 1)
        stats = {
            "blocks": len(content_list),
            "block_types": block_types,
            "pages": total_pages,
            "raw_extractions": len(records),
            "nodes": len(nodes),
            "edges": len(edges),
            "type_counts": type_counts,
            "alignment_counts": alignment_counts,
            "elapsed_seconds": elapsed,
        }
        fs.write_json(job_dir / "stats.json", stats)
        _update_meta(
            job_id,
            status="done",
            stage="Complete",
            progress={"parsed_pages": total_pages, "total_pages": total_pages,
                      "extracted_entities": len(records)},
        )
        update_doc_status(doc_id, "indexed", pages=total_pages)
    except Exception as exc:
        _update_meta(job_id, status="failed", stage=f"Error: {exc}", error=str(exc))
        update_doc_status(doc_id, "failed")
    finally:
        _active_threads.pop(job_id, None)
        _cancel_flags.pop(job_id, None)
 def get_job_status(job_id: str) -> dict | None:
    return fs.load_job_meta(job_id)
 def get_job_result(job_id: str) -> dict | None:
    meta = fs.load_job_meta(job_id)
    if not meta:
        return None
    if meta["status"] != "done":
        return meta
    job_dir = fs.job_dir(job_id)
    stats = fs.read_json(job_dir / "stats.json") or {}
    extractions = fs.read_json(job_dir / "extractions.json") or []
    nodes = fs.read_json(job_dir / "kg_nodes.json") or []
    edges = fs.read_json(job_dir / "kg_edges.json") or []
    return {
        "job_id": meta["job_id"],
        "doc_id": meta["doc_id"],
        "status": "done",
        "stats": stats,
        "extractions": extractions,
        "nodes": nodes,
        "edges": edges,
    }
 def cancel_job(job_id: str) -> tuple[bool, str]:
    meta = fs.load_job_meta(job_id)
    if not meta:
        return False, "not_found"
    prev_status = meta["status"]
    _cancel_flags[job_id] = True
    _update_meta(job_id, status="cancelled", stage="Cancelled by user")
    return True, prev_status
 def count_active_jobs() -> int:
    return sum(1 for t in _active_threads.values() if t.is_alive())
--- a/backend/services/kg_service.py
+++ b/backend/services/kg_service.py
@@ -0,0 +1,167 @@
 """KG Service — NetworkX graph operations over the global KG."""
 from __future__ import annotations
 import networkx as nx
 from storage import file_store as fs
 def _load_graph() -> nx.Graph:
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    G = nx.Graph()
    for n in nodes:
        G.add_node(n["id"], **n)
    for e in edges:
        G.add_edge(e["source"], e["target"],
                   relation=e.get("relation", "CO_OCCURS_IN"),
                   doc_id=e.get("doc_id", ""),
                   page=e.get("page", 0))
    return G
 def get_nodes(page: int = 1, page_size: int = 50,
              node_type: str | None = None,
              doc_id: str | None = None,
              confidence: str | None = None) -> dict:
    nodes = fs.load_kg_nodes()
    G = _load_graph()
    # Attach degree
    degrees = dict(G.degree())
    for n in nodes:
        n["degree"] = degrees.get(n["id"], 0)
    if node_type:
        nodes = [n for n in nodes if n.get("type", "").upper() == node_type.upper()]
    if doc_id:
        nodes = [n for n in nodes if n.get("source_doc") == doc_id]
    if confidence:
        nodes = [n for n in nodes if n.get("confidence") == confidence]
    total = len(nodes)
    start = (page - 1) * page_size
    return {"total": total, "page": page, "page_size": page_size,
            "items": nodes[start: start + page_size]}
 def get_edges(page: int = 1, page_size: int = 100,
              doc_id: str | None = None,
              relation: str | None = None) -> dict:
    edges = fs.load_kg_edges()
    if doc_id:
        edges = [e for e in edges if e.get("doc_id") == doc_id]
    if relation:
        edges = [e for e in edges if e.get("relation") == relation]
    total = len(edges)
    start = (page - 1) * page_size
    return {"total": total, "page": page, "page_size": page_size,
            "items": edges[start: start + page_size]}
 def get_node_detail(node_id: str) -> dict | None:
    nodes = fs.load_kg_nodes()
    node = next((n for n in nodes if n["id"] == node_id), None)
    if not node:
        return None
    G = _load_graph()
    if node_id not in G:
        node["degree"] = 0
        node["degree_centrality"] = 0.0
        node["neighbor_count"] = 0
        return node
    deg = G.degree(node_id)
    centrality = nx.degree_centrality(G)
    node["degree"] = deg
    node["degree_centrality"] = round(centrality.get(node_id, 0.0), 4)
    node["neighbor_count"] = deg
    return node
 def get_neighbors(node_id: str, hops: int = 1) -> dict | None:
    nodes = fs.load_kg_nodes()
    node = next((n for n in nodes if n["id"] == node_id), None)
    if not node:
        return None
    G = _load_graph()
    if node_id not in G:
        return {
            "center": {"id": node_id, "name": node["name"], "type": node["type"], "page": node.get("page", 0)},
            "hops": hops, "neighbors_by_hop": {}, "total_neighbors": 0,
        }
    hops = max(1, min(hops, 3))
    reachable = nx.single_source_shortest_path_length(G, node_id, cutoff=hops)
    by_hop: dict[str, list] = {}
    for nid, dist in reachable.items():
        if dist == 0:
            continue
        nd = G.nodes[nid]
        by_hop.setdefault(str(dist), []).append({
            "id": nid, "name": nd.get("name", ""), "type": nd.get("type", ""), "page": nd.get("page", 0)
        })
    total = sum(len(v) for v in by_hop.values())
    return {
        "center": {"id": node_id, "name": node["name"], "type": node["type"], "page": node.get("page", 0)},
        "hops": hops,
        "neighbors_by_hop": by_hop,
        "total_neighbors": total,
    }
 def get_stats() -> dict:
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    G = _load_graph()
    type_dist: dict[str, int] = {}
    for n in nodes:
        t = n.get("type", "UNKNOWN")
        type_dist[t] = type_dist.get(t, 0) + 1
    relation_types: dict[str, int] = {}
    for e in edges:
        r = e.get("relation", "CO_OCCURS_IN")
        relation_types[r] = relation_types.get(r, 0) + 1
    density = round(nx.density(G), 4) if G.number_of_nodes() > 1 else 0.0
    top5: list[dict] = []
    if G.number_of_nodes() > 0:
        centrality = nx.degree_centrality(G)
        for nid, c in sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]:
            nd = G.nodes[nid]
            top5.append({"node_id": nid, "name": nd.get("name", ""), "type": nd.get("type", ""),
                         "centrality": round(c, 4)})
    source_docs = list({n.get("source_doc", "") for n in nodes if n.get("source_doc")})
    return {
        "total_nodes": len(nodes),
        "total_edges": len(edges),
        "density": density,
        "type_distribution": type_dist,
        "relation_types": relation_types,
        "top5_central_nodes": top5,
        "source_documents": source_docs,
    }
 def export_kg(doc_id: str | None = None) -> dict:
    from datetime import datetime, timezone
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    G = _load_graph()
    degrees = dict(G.degree())
    for n in nodes:
        n["degree"] = degrees.get(n["id"], 0)
    if doc_id:
        nodes = [n for n in nodes if n.get("source_doc") == doc_id]
        edges = [e for e in edges if e.get("doc_id") == doc_id]
    return {
        "format": "json",
        "doc_id": doc_id,
        "total_nodes": len(nodes),
        "total_edges": len(edges),
        "exported_at": datetime.now(timezone.utc).isoformat(),
        "nodes": nodes,
        "edges": edges,
    }
--- a/backend/services/qa_service.py
+++ b/backend/services/qa_service.py
@@ -0,0 +1,85 @@
 """QA Service — Agentic-RAG wrapper."""
 from __future__ import annotations
 import time
 import uuid
 from datetime import datetime, timezone
 from storage import file_store as fs
 def run_query(question: str, history: list[dict]) -> dict:
    from pipeline.qa_agent import run_qa
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    if not nodes:
        raise ValueError("KG_EMPTY")
    start = time.time()
    result = run_qa(question, history, nodes, edges)
    elapsed = round(time.time() - start, 2)
    query_id = f"q_{uuid.uuid4().hex[:10]}"
    now = datetime.now(timezone.utc).isoformat()
    record = {
        "id": query_id,
        "question": question,
        "answer": result["answer"],
        "tool_calls": result["tool_calls"],
        "cited_nodes": result["cited_nodes"],
        "duration_seconds": elapsed,
        "timestamp": now,
    }
    fs.append_query_history(record)
    return record
 def get_history(page: int = 1, page_size: int = 20) -> dict:
    all_records = fs.load_query_history()
    total = len(all_records)
    start = (page - 1) * page_size
    return {
        "total": total,
        "page": page,
        "page_size": page_size,
        "items": all_records[start: start + page_size],
    }
 def start_batch(questions: list[str]) -> dict:
    import threading
    batch_id = f"batch_{uuid.uuid4().hex[:10]}"
    now = datetime.now(timezone.utc).isoformat()
    meta = {
        "batch_id": batch_id,
        "total": len(questions),
        "completed": 0,
        "failed": 0,
        "status": "submitted",
        "created_at": now,
        "results": [],
    }
    fs.save_batch_meta(batch_id, meta)
    def _run():
        for q in questions:
            try:
                res = run_query(q, [])
                meta["results"].append(res)
                meta["completed"] += 1
            except Exception as e:
                meta["failed"] += 1
                meta["results"].append({"question": q, "error": str(e)})
        meta["status"] = "done"
        fs.save_batch_meta(batch_id, meta)
    threading.Thread(target=_run, daemon=True).start()
    return {"batch_id": batch_id, "total": len(questions), "status": "submitted", "created_at": now}
 def get_batch_result(batch_id: str) -> dict | None:
    return fs.load_batch_meta(batch_id)
--- a/backend/services/search_service.py
+++ b/backend/services/search_service.py
@@ -0,0 +1,106 @@
 """Search Service — entity, path, and graph search."""
 from __future__ import annotations
 import networkx as nx
 from storage import file_store as fs
 def _load_graph() -> nx.Graph:
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    G = nx.Graph()
    for n in nodes:
        G.add_node(n["id"], **n)
    for e in edges:
        G.add_edge(e["source"], e["target"],
                   relation=e.get("relation", "CO_OCCURS_IN"),
                   doc_id=e.get("doc_id", ""), page=e.get("page", 0))
    return G
 def search_entities(q: str, entity_type: str | None = None, limit: int = 15) -> dict:
    nodes = fs.load_kg_nodes()
    G = _load_graph()
    degrees = dict(G.degree())
    q_lower = q.lower()
    matches = [n for n in nodes if q_lower in n.get("name", "").lower()]
    if entity_type:
        matches = [n for n in matches if n.get("type", "").upper() == entity_type.upper()]
    for n in matches:
        n["degree"] = degrees.get(n["id"], 0)
    matches = matches[:limit]
    return {"query": q, "total": len(matches), "items": matches}
 def search_path(from_id: str, to_id: str, max_hops: int = 3) -> dict | None:
    nodes = fs.load_kg_nodes()
    node_map = {n["id"]: n for n in nodes}
    if from_id not in node_map or to_id not in node_map:
        return None  # node not found
    G = _load_graph()
    max_hops = max(1, min(max_hops, 5))
    try:
        raw_paths = list(nx.all_simple_paths(G, from_id, to_id, cutoff=max_hops))
    except nx.NetworkXError:
        raw_paths = []
    paths = []
    for path_nodes in raw_paths:
        path_edges = []
        for i in range(len(path_nodes) - 1):
            s, t = path_nodes[i], path_nodes[i + 1]
            edge_data = G.edges[s, t]
            path_edges.append({"source": s, "target": t,
                                "relation": edge_data.get("relation", "CO_OCCURS_IN")})
        paths.append({
            "length": len(path_nodes) - 1,
            "nodes": [{"id": nid, "name": node_map.get(nid, {}).get("name", nid),
                       "type": node_map.get(nid, {}).get("type", "")} for nid in path_nodes],
            "edges": path_edges,
        })
    from_node = node_map[from_id]
    to_node = node_map[to_id]
    return {
        "from": {"id": from_id, "name": from_node.get("name", ""), "type": from_node.get("type", "")},
        "to": {"id": to_id, "name": to_node.get("name", ""), "type": to_node.get("type", "")},
        "max_hops": max_hops,
        "paths": paths,
        "total_paths": len(paths),
    }
 def search_graph(q: str, include_neighbors: bool = False) -> dict:
    nodes = fs.load_kg_nodes()
    edges = fs.load_kg_edges()
    G = _load_graph()
    degrees = dict(G.degree())
    q_lower = q.lower()
    matched = [n for n in nodes if q_lower in n.get("name", "").lower()]
    matched_ids = {n["id"] for n in matched}
    for n in matched:
        n["degree"] = degrees.get(n["id"], 0)
    if include_neighbors:
        neighbor_ids = set()
        for nid in matched_ids:
            if nid in G:
                neighbor_ids.update(G.neighbors(nid))
        all_relevant = matched_ids | neighbor_ids
    else:
        all_relevant = matched_ids
    subgraph_edges = [
        e for e in edges
        if e.get("source") in all_relevant and e.get("target") in all_relevant
    ]
    return {
        "query": q,
        "matched_nodes": matched,
        "subgraph_edges": subgraph_edges,
    }
--- a/backend/storage/init.py
+++ b/backend/storage/init.py
--- a/backend/storage/file_store.py
+++ b/backend/storage/file_store.py
@@ -0,0 +1,268 @@
 """
 File Store — unified JSON read/write for all backend data.
 All data lives under backend/data/.
 """
 from __future__ import annotations
 import json
 import os
 import shutil
 from pathlib import Path
 from typing import Any
 # Root data directory relative to this file
 _BASE = Path(__file__).parent.parent / "data"
 UPLOADS_DIR = _BASE / "uploads"
 JOBS_DIR    = _BASE / "jobs"
 KG_DIR      = _BASE / "kg"
 QUERY_DIR   = _BASE / "jobs"  # query_history.jsonl lives here
 # Ensure directories exist at import time
 for _d in (UPLOADS_DIR, JOBS_DIR, KG_DIR):
    _d.mkdir(parents=True, exist_ok=True)
 # ---------------------------------------------------------------------------
 # Generic helpers
 # ---------------------------------------------------------------------------
 def read_json(path: Path) -> Any:
    """Read and parse a JSON file. Returns None if file doesn't exist."""
    if not path.exists():
        return None
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)
 def write_json(path: Path, data: Any) -> None:
    """Atomically write data as JSON (write to .tmp then rename)."""
    path.parent.mkdir(parents=True, exist_ok=True)
    tmp = path.with_suffix(".tmp")
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    os.replace(tmp, path)
 def append_jsonl(path: Path, record: dict) -> None:
    """Append a record to a JSONL file."""
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")
 def read_jsonl(path: Path) -> list[dict]:
    """Read all records from a JSONL file."""
    if not path.exists():
        return []
    records = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    records.append(json.loads(line))
                except json.JSONDecodeError:
                    pass
    return records
 # ---------------------------------------------------------------------------
 # Document helpers
 # ---------------------------------------------------------------------------
 def docs_index_path() -> Path:
    return _BASE / "docs_index.json"
 def load_docs_index() -> dict[str, dict]:
    """Load the documents index {doc_id: DocumentInfo dict}."""
    data = read_json(docs_index_path())
    return data if isinstance(data, dict) else {}
 def save_docs_index(index: dict[str, dict]) -> None:
    write_json(docs_index_path(), index)
 def get_doc(doc_id: str) -> dict | None:
    return load_docs_index().get(doc_id)
 def save_doc(doc: dict) -> None:
    index = load_docs_index()
    index[doc["doc_id"]] = doc
    save_docs_index(index)
 def delete_doc(doc_id: str) -> bool:
    index = load_docs_index()
    if doc_id not in index:
        return False
    del index[doc_id]
    save_docs_index(index)
    # Remove upload file
    doc_info = index.get(doc_id, {})
    upload_path = UPLOADS_DIR / doc_info.get("upload_filename", "")
    if upload_path.exists():
        upload_path.unlink()
    return True
 # ---------------------------------------------------------------------------
 # Job helpers
 # ---------------------------------------------------------------------------
 def job_dir(job_id: str) -> Path:
    return JOBS_DIR / job_id
 def job_meta_path(job_id: str) -> Path:
    return job_dir(job_id) / "meta.json"
 def load_job_meta(job_id: str) -> dict | None:
    return read_json(job_meta_path(job_id))
 def save_job_meta(job_id: str, meta: dict) -> None:
    job_dir(job_id).mkdir(parents=True, exist_ok=True)
    write_json(job_meta_path(job_id), meta)
 def list_all_jobs() -> list[dict]:
    metas = []
    for d in JOBS_DIR.iterdir():
        if d.is_dir():
            meta = read_json(d / "meta.json")
            if meta:
                metas.append(meta)
    return metas
 def delete_job(job_id: str) -> None:
    jd = job_dir(job_id)
    if jd.exists():
        shutil.rmtree(jd)
 # ---------------------------------------------------------------------------
 # Global KG helpers
 # ---------------------------------------------------------------------------
 def kg_nodes_path() -> Path:
    return KG_DIR / "kg_nodes.json"
 def kg_edges_path() -> Path:
    return KG_DIR / "kg_edges.json"
 def load_kg_nodes() -> list[dict]:
    data = read_json(kg_nodes_path())
    return data if isinstance(data, list) else []
 def load_kg_edges() -> list[dict]:
    data = read_json(kg_edges_path())
    return data if isinstance(data, list) else []
 def save_kg_nodes(nodes: list[dict]) -> None:
    write_json(kg_nodes_path(), nodes)
 def save_kg_edges(edges: list[dict]) -> None:
    write_json(kg_edges_path(), edges)
 def merge_kg(new_nodes: list[dict], new_edges: list[dict], doc_id: str) -> tuple[int, int]:
    """Merge job KG output into global KG. Returns (removed_old, added_new)."""
    existing_nodes = load_kg_nodes()
    existing_edges = load_kg_edges()
    # Remove nodes/edges from this doc
    existing_nodes = [n for n in existing_nodes if n.get("source_doc") != doc_id]
    existing_edges = [e for e in existing_edges if e.get("doc_id") != doc_id]
    # Merge: deduplicate nodes by (name.lower(), type)
    node_keys: set[tuple] = {(n["name"].lower(), n["type"]) for n in existing_nodes}
    for n in new_nodes:
        key = (n["name"].lower(), n["type"])
        if key not in node_keys:
            existing_nodes.append(n)
            node_keys.add(key)
    # Merge edges: deduplicate by (min(src,tgt), max(src,tgt), doc_id)
    edge_keys: set[tuple] = set()
    for e in existing_edges:
        s, t = e["source"], e["target"]
        edge_keys.add((min(s, t), max(s, t), e["doc_id"]))
    for e in new_edges:
        s, t = e["source"], e["target"]
        key = (min(s, t), max(s, t), e["doc_id"])
        if key not in edge_keys:
            existing_edges.append(e)
            edge_keys.add(key)
    save_kg_nodes(existing_nodes)
    save_kg_edges(existing_edges)
    return len(existing_nodes), len(existing_edges)
 def remove_doc_from_kg(doc_id: str) -> tuple[int, int]:
    """Remove all nodes/edges from a document. Returns (removed_nodes, removed_edges)."""
    nodes = load_kg_nodes()
    edges = load_kg_edges()
    old_n, old_e = len(nodes), len(edges)
    nodes = [n for n in nodes if n.get("source_doc") != doc_id]
    edges = [e for e in edges if e.get("doc_id") != doc_id]
    save_kg_nodes(nodes)
    save_kg_edges(edges)
    return old_n - len(nodes), old_e - len(edges)
 # ---------------------------------------------------------------------------
 # Query history helpers
 # ---------------------------------------------------------------------------
 def query_history_path() -> Path:
    return _BASE / "query_history.jsonl"
 def append_query_history(result: dict) -> None:
    append_jsonl(query_history_path(), result)
 def load_query_history() -> list[dict]:
    records = read_jsonl(query_history_path())
    return list(reversed(records))  # newest first
 # ---------------------------------------------------------------------------
 # Batch job helpers
 # ---------------------------------------------------------------------------
 def batch_meta_path(batch_id: str) -> Path:
    return _BASE / "batches" / f"{batch_id}.json"
 def load_batch_meta(batch_id: str) -> dict | None:
    return read_json(batch_meta_path(batch_id))
 def save_batch_meta(batch_id: str, meta: dict) -> None:
    write_json(batch_meta_path(batch_id), meta)
 # ---------------------------------------------------------------------------
 # Storage usage
 # ---------------------------------------------------------------------------
 def storage_used_mb() -> float:
    total = 0
    for path in _BASE.rglob("*"):
        if path.is_file():
            total += path.stat().st_size
    return round(total / (1024 * 1024), 2)
--- a/backend/tests/test_api.py
+++ b/backend/tests/test_api.py
@@ -0,0 +1,256 @@
 """
 API integration tests — tests all major endpoints against a running server.
 Run with: python tests/test_api.py
 Server must be running on http://localhost:8000
 """
 import json
 import sys
 import time
 import urllib.request
 import urllib.error
 from pathlib import Path
 BASE = "http://localhost:8000/api/v1"
 PASS = "\033[92m[PASS]\033[0m"
 FAIL = "\033[91m[FAIL]\033[0m"
 INFO = "\033[94m[INFO]\033[0m"
 results = {"passed": 0, "failed": 0}
 def req(method: str, path: str, body: dict | None = None, form: dict | None = None) -> dict:
    url = BASE + path
    try:
        if method == "GET" and not body and not form:
            r = urllib.request.urlopen(url, timeout=30)
        else:
            if body is not None:
                data = json.dumps(body).encode()
                req_obj = urllib.request.Request(url, data=data, method=method,
                                                 headers={"Content-Type": "application/json"})
            else:
                req_obj = urllib.request.Request(url, method=method)
            r = urllib.request.urlopen(req_obj, timeout=30)
        return json.loads(r.read().decode())
    except urllib.error.HTTPError as e:
        return json.loads(e.read().decode())
 def check(name: str, condition: bool, detail: str = "") -> None:
    if condition:
        results["passed"] += 1
        print(f"  {PASS} {name}")
    else:
        results["failed"] += 1
        print(f"  {FAIL} {name}  {detail}")
 def wait_for_server(max_retries: int = 15) -> bool:
    print(f"{INFO} Waiting for server at {BASE}...")
    for i in range(max_retries):
        try:
            urllib.request.urlopen(BASE.replace("/api/v1", "/"), timeout=3)
            print(f"{INFO} Server is up.")
            return True
        except Exception:
            time.sleep(1)
    return False
 # ─────────────────────────────────────────────────────────────────────────────
 # Test groups
 # ─────────────────────────────────────────────────────────────────────────────
 def test_system():
    print("\n── F 组: System ──")
    r = req("GET", "/health")
    check("GET /health returns code=0", r.get("code") == 0)
    check("health data.status exists", "status" in (r.get("data") or {}))
    check("health data.components exists", "components" in (r.get("data") or {}))
    print(f"  {INFO} status={r.get('data',{}).get('status')} uptime={r.get('data',{}).get('uptime_seconds')}s")
    r = req("GET", "/system/stats")
    check("GET /system/stats returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("stats has total_documents", "total_documents" in d)
    check("stats has total_nodes", "total_nodes" in d)
    print(f"  {INFO} docs={d.get('total_documents')} nodes={d.get('total_nodes')} edges={d.get('total_edges')}")
    r = req("GET", "/system/formats")
    check("GET /system/formats returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("formats list is non-empty", len(d.get("formats", [])) > 0)
    exts = [f["ext"] for f in d.get("formats", [])]
    check("pdf format present", "pdf" in exts)
    check("docx format present", "docx" in exts)
    r = req("GET", "/system/demo")
    check("GET /system/demo returns code=0 or 3002", r.get("code") in (0, 3002))
    if r.get("code") == 0:
        d = r.get("data") or {}
        check("demo data has nodes", "nodes" in d)
        print(f"  {INFO} demo: {len(d.get('nodes',[]))} nodes, {len(d.get('edges',[]))} edges")
    else:
        print(f"  {INFO} demo data not available (no KG yet) — code={r.get('code')}")
 def test_documents():
    print("\n── A 组: Documents ──")
    r = req("GET", "/documents")
    check("GET /documents returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("documents list has total field", "total" in d)
    check("documents list has items field", "items" in d)
    print(f"  {INFO} total documents={d.get('total', 0)}")
    # Upload a test text file (not a real supported format to test validation)
    print("  Testing upload validation...")
    import urllib.request, io
    boundary = "boundary123"
    body_parts = (
        f"--{boundary}\r\n"
        f'Content-Disposition: form-data; name="file"; filename="test.xyz"\r\n'
        f"Content-Type: application/octet-stream\r\n\r\n"
        f"dummy content\r\n"
        f"--{boundary}--\r\n"
    ).encode()
    req_obj = urllib.request.Request(
        BASE + "/documents/upload",
        data=body_parts,
        method="POST",
        headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
    )
    try:
        urllib.request.urlopen(req_obj, timeout=10)
        r_upload = {}
    except urllib.error.HTTPError as e:
        r_upload = json.loads(e.read().decode())
    check("upload unsupported format returns code=1002", r_upload.get("code") == 1002)
    r = req("GET", "/documents/nonexistent_id")
    check("GET /documents/nonexistent returns code=2001", r.get("code") == 2001)
 def test_indexing():
    print("\n── B 组: Indexing ──")
    r = req("POST", "/index/start", body={"doc_id": "nonexistent_doc"})
    check("start indexing nonexistent doc returns 2001", r.get("code") == 2001)
    r = req("GET", "/index/status/nonexistent_job")
    check("get status nonexistent job returns 2002", r.get("code") == 2002)
    r = req("GET", "/index/result/nonexistent_job")
    check("get result nonexistent job returns 2002", r.get("code") == 2002)
    r = req("DELETE", "/index/jobs/nonexistent_job")
    check("cancel nonexistent job returns 2002", r.get("code") == 2002)
 def test_kg():
    print("\n── C 组: Knowledge Graph ──")
    r = req("GET", "/kg/stats")
    check("GET /kg/stats returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("stats has total_nodes", "total_nodes" in d)
    check("stats has total_edges", "total_edges" in d)
    print(f"  {INFO} KG: {d.get('total_nodes')} nodes, {d.get('total_edges')} edges")
    r = req("GET", "/kg/nodes")
    check("GET /kg/nodes returns code 0 or 3002", r.get("code") in (0, 3002))
    if r.get("code") == 0:
        d = r.get("data") or {}
        check("nodes data has items", "items" in d)
        print(f"  {INFO} nodes total={d.get('total')}")
        if d.get("items"):
            node_id = d["items"][0]["id"]
            r2 = req("GET", f"/kg/nodes/{node_id}")
            check(f"GET /kg/nodes/{node_id} returns code=0", r2.get("code") == 0)
            r3 = req("GET", f"/kg/nodes/{node_id}/neighbors?hops=1")
            check(f"GET /kg/nodes/{node_id}/neighbors returns code=0", r3.get("code") == 0)
    else:
        print(f"  {INFO} KG is empty (code=3002) — skipping node detail tests")
    r = req("GET", "/kg/nodes/definitely_not_a_real_node")
    check("GET /kg/nodes/invalid returns code=3001", r.get("code") == 3001)
    r = req("GET", "/kg/edges")
    check("GET /kg/edges returns code=0", r.get("code") == 0)
    r = req("GET", "/kg/export")
    check("GET /kg/export returns code=0", r.get("code") == 0)
 def test_search():
    print("\n── E 组: Search ──")
    r = req("GET", "/search/entities?q=graph")
    check("GET /search/entities returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("search entities has query field", "query" in d)
    check("search entities has items field", "items" in d)
    print(f"  {INFO} 'graph' search: {d.get('total', 0)} results")
    r = req("GET", "/search/entities?q=technology&type=TECHNOLOGY")
    check("GET /search/entities with type filter returns code=0", r.get("code") == 0)
    r = req("GET", "/search/path?max_hops=2")
    check("path search without from/to returns 1001", r.get("code") == 1001)
    r = req("GET", "/search/graph?q=knowledge")
    check("GET /search/graph returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("graph search has matched_nodes", "matched_nodes" in d)
 def test_query():
    print("\n── D 组: QA Query ──")
    # Don't call /query (POST) in basic tests as it needs DeepSeek API + KG data
    r = req("GET", "/query/history")
    check("GET /query/history returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("history has total field", "total" in d)
    check("history has items field", "items" in d)
    print(f"  {INFO} query history: {d.get('total', 0)} records")
    r = req("GET", "/query/batch/nonexistent_batch")
    check("GET /query/batch/nonexistent returns 2002", r.get("code") == 2002)
    r = req("POST", "/query/batch", body={"questions": ["test question"]})
    check("POST /query/batch returns code=0", r.get("code") == 0)
    d = r.get("data") or {}
    check("batch has batch_id", "batch_id" in d)
 # ─────────────────────────────────────────────────────────────────────────────
 # Main
 # ─────────────────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
    if not wait_for_server():
        print(f"\n{FAIL} Server not responding. Start with: python main.py")
        sys.exit(1)
    test_system()
    test_documents()
    test_indexing()
    test_kg()
    test_search()
    test_query()
    total = results["passed"] + results["failed"]
    print(f"\n{'='*50}")
    print(f"Results: {results['passed']}/{total} passed, {results['failed']} failed")
    if results["failed"] == 0:
        print(f"{PASS} All tests passed!")
    else:
        print(f"{FAIL} {results['failed']} test(s) failed")
    print(f"{'='*50}")
    sys.exit(0 if results["failed"] == 0 else 1)
--- a/docs/agentic_rag_specification-v1.0.md
+++ b/docs/agentic_rag_specification-v1.0.md
@@ -0,0 +1,779 @@
 # Agentic-RAG 规范文档 v1.0
 > GraphRAG 问答阶段核心流程：Knowledge Graph → LangChain Agent → QA
 >
 > 数据来源：Bridge Pipeline 输出（`kg_nodes.json` + `kg_edges.json`）
 > 测试验证日期：2026-03-05
 > 全流程运行耗时：~40s（4 个测试查询）
 ---
 ## 目录
 - [一、完整执行思路与脚本位置](#一完整执行思路与脚本位置)
 - [二、LangChain Agent 输入输出规范](#二langchain-agent-输入输出规范)
 - [三、MinerU ↔ Agentic-RAG 对接规范与核心架构](#三mineru--agentic-rag-对接规范与核心架构)
 - [四、问答流程最终数据返回格式规范](#四问答流程最终数据返回格式规范)
 - [五、虚拟环境与依赖](#五虚拟环境与依赖)
 ---
 ## 一、完整执行思路与脚本位置
 ### 1.1 总体架构定位
 Agentic-RAG 是 GraphRAG 系统的**问答阶段**，位于 Bridge Pipeline 之后，负责将知识图谱转化为可交互的智能问答能力。
 ```
 【已完成阶段】                              【本阶段：Agentic-RAG】
 ────────────────────                      ──────────────────────────
 PDF
  ↓ MinerU Cloud API
 content_list.json
  ↓ Bridge Pipeline
 kg_nodes.json (40 nodes)    ──────────→  NetworkX Graph (内存)
 kg_edges.json (780 edges)               ↓
                                         4 个 LangChain @tool
                                         ↓
                                         LangChain v1 create_agent
                                         (DeepSeek deepseek-chat)
                                         ↓
                                         ReAct 推理循环
                                         ↓
                                         自然语言答案
 ```
 ### 1.2 五步执行流程
 | 步骤 | 模块 | 说明 |
 |------|------|------|
 | Step 0 | 环境 + 配置 | 加载 `.env`（DEEPSEEK_API_KEY），初始化 `ChatOpenAI` |
 | Step 1 | KG 加载 | 读取 `kg_nodes.json` + `kg_edges.json`，构建 NetworkX 无向图 |
 | Step 2 | Tool 注册 | 用 `@tool` 装饰器注册 4 个 KG 检索工具 |
 | Step 3 | Agent 构建 | `create_agent(model, tools, system_prompt)` 编译 LangGraph |
 | Step 4 | 问答调用 | `agent.invoke({"messages": [("human", question)]})` |
 | Step 5 | 结果提取 | `result["messages"][-1].content` 获取最终答案 |
 ### 1.3 测试脚本存放位置
 ```
 F:\GraphRAGAgent\graphrag_pipeline\
 ├── agentic_rag_mvp.py          ← 主测试脚本（本规范对应文件）
 ├── .env                         ← DEEPSEEK_API_KEY 配置
 └── output/
    ├── kg_nodes.json            ← Bridge Pipeline 生成（40 节点）
    └── kg_edges.json            ← Bridge Pipeline 生成（780 边）
 ```
 ### 1.4 运行命令
 ```bash
 # MVP 连通性测试（4 个预设测试查询）
 F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe \
    F:/GraphRAGAgent/graphrag_pipeline/agentic_rag_mvp.py
 ```
 ### 1.5 ReAct 推理循环详解
 Agent 使用 **ReAct（Reasoning + Acting）** 模式，每个问题的处理流如下：
 ```
 用户输入 (question: str)
    │
    ▼
 ┌─────────────────────────────────────────────────┐
 │  LLM Reasoning（DeepSeek deepseek-chat）         │
 │  决策：需要调用哪个工具？参数是什么？              │
 └─────────────────────────────────────────────────┘
    │ tool_call
    ▼
 ┌─────────────────────────────────────────────────┐
 │  Tool Execution（NetworkX 本地计算，无 API 调用）  │
 │  search_entities / get_neighbors /               │
 │  get_entities_by_type / describe_graph           │
 └─────────────────────────────────────────────────┘
    │ ToolMessage（工具返回的文本结果）
    ▼
 ┌─────────────────────────────────────────────────┐
 │  LLM Observation（观察工具结果）                  │
 │  决策：结果够用了吗？还需要调更多工具？            │
 └─────────────────────────────────────────────────┘
    │ 继续 tool_call 或输出最终答案
    ▼
 AIMessage（最终自然语言答案）
 ```
 **实测工具调用模式（4 个测试查询）：**
 | 查询类型 | 工具调用序列 | 特点 |
 |---------|------------|------|
 | 图谱整体概览 | `describe_graph` | 单次工具调用 |
 | 类型枚举 | `get_entities_by_type` | 单次工具调用 |
 | 多跳关系推理 | `search_entities` → `get_neighbors` | 两步串行调用 |
 | 概念精确查找 | `search_entities` → `get_neighbors` | 两步串行调用 |
 ---
 ## 二、LangChain Agent 输入输出规范
 ### 2.1 LLM 适配规范
 #### 2.1.1 DeepSeek → LangChain 标准组件
 LangChain v1 使用 `ChatOpenAI` 通过 `base_url` 覆盖接入任何 OpenAI 兼容 API：
 ```python
 from langchain_openai import ChatOpenAI
 llm = ChatOpenAI(
    model="deepseek-chat",                   # DeepSeek 模型名
    api_key=DEEPSEEK_API_KEY,                # 来自 graphrag_pipeline/.env
    base_url="https://api.deepseek.com",     # OpenAI 兼容端点
    temperature=0,                           # 问答场景确定性输出
 )
 ```
 | 参数 | 值 | 说明 |
 |------|-----|------|
 | `model` | `"deepseek-chat"` | DeepSeek 实际模型标识 |
 | `api_key` | `${DEEPSEEK_API_KEY}` | 从 `.env` 读取，与 Bridge Pipeline 共用 |
 | `base_url` | `"https://api.deepseek.com"` | SDK 自动补全 `/v1` 路径 |
 | `temperature` | `0` | 问答场景设为 0，保证可重现性 |
 #### 2.1.2 与 LangExtract 中 DeepSeek 的区别
 | 对比项 | LangExtract 中的 DeepSeek | Agentic-RAG 中的 DeepSeek |
 |--------|--------------------------|--------------------------|
 | 接入方式 | 直接实例化 `OpenAILanguageModel` | LangChain `ChatOpenAI` 标准组件 |
 | API Key 环境变量 | `OPENAI_API_KEY` | `DEEPSEEK_API_KEY` |
 | 调用方式 | `lx.extract(model=model)` | `agent.invoke({"messages": ...})` |
 | 输出格式 | JSON（实体抽取） | 自然语言（问答） |
 | Tool Calling | 不支持（单轮推理） | 支持（ReAct 多轮） |
 ### 2.2 Agent 构建规范
 #### 2.2.1 LangChain v1 create_agent
 ```python
 from langchain.agents import create_agent
 agent = create_agent(
    model=llm,              # ChatOpenAI 实例
    tools=_tools,           # List[BaseTool]，4 个工具
    system_prompt=SYSTEM_PROMPT,  # 系统提示词字符串
 )
 ```
 **版本注意事项：**
 | API | 状态 | 说明 |
 |-----|------|------|
 | `langchain.agents.create_agent` | ✅ LangChain v1 推荐 | 本项目使用 |
 | `langgraph.prebuilt.create_react_agent` | ⚠️ Deprecated in LangGraph V1.0 | 已废弃，勿用 |
 | `langchain.agents.create_react_agent` (旧版) | ❌ Legacy | 已移除 |
 #### 2.2.2 System Prompt 规范
 ```
 You are a Knowledge Graph QA assistant. You have access to a knowledge graph
 extracted from academic documents about GraphRAG and related technologies.
 The graph contains:
 - {node_count} deduplicated entities ({type_list} types)
 - {edge_count} CO_OCCURS_IN edges representing same-page co-occurrence
 Available tools:
 1. search_entities      — find entities by keyword substring
 2. get_neighbors        — explore entity relationships (N-hop BFS)
 3. get_entities_by_type — list all entities of a type
 4. describe_graph       — get graph statistics overview
 Reasoning strategy:
 - Always use at least one tool before answering a factual question
 - For relationship questions, use get_neighbors after identifying the entity with search_entities
 - For enumeration questions, use get_entities_by_type
 - Synthesize tool results into a clear, concise answer
 - Cite the entity names and types in your final answer
 ```
 ### 2.3 Agent 输入规范
 #### 2.3.1 invoke 输入格式
 ```python
 result = agent.invoke({
    "messages": [
        ("human", question)   # 用户问题（自然语言字符串）
    ]
 })
 ```
 **输入字段规范：**
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `messages` | `list[tuple[str, str]]` | 消息列表，格式 `(role, content)` |
 | `role` | `"human"` \| `"ai"` \| `"system"` | 消息角色 |
 | `content` | `str` | 消息内容 |
 **多轮对话输入（支持历史上下文）：**
 ```python
 result = agent.invoke({
    "messages": [
        ("human", "What is GraphRAG?"),
        ("ai", "GraphRAG is a knowledge graph-enhanced RAG system..."),
        ("human", "How does it relate to LLMs?"),   # 当前问题
    ]
 })
 ```
 ### 2.4 Agent 输出规范
 #### 2.4.1 invoke 原始返回
 ```python
 {
    "messages": [
        HumanMessage(content="What is GraphRAG?"),
        AIMessage(content="", tool_calls=[...]),    # 工具调用
        ToolMessage(content="...", tool_call_id="..."),  # 工具结果
        AIMessage(content="GraphRAG is an advanced...")  # 最终答案
    ]
 }
 ```
 #### 2.4.2 消息类型枚举
 | 消息类型 | 角色 | 说明 |
 |---------|------|------|
 | `HumanMessage` | `human` | 用户输入 |
 | `AIMessage`（tool_calls 非空） | `ai` | LLM 决策发起工具调用 |
 | `ToolMessage` | `tool` | 工具执行结果 |
 | `AIMessage`（tool_calls 为空） | `ai` | 最终自然语言答案 |
 #### 2.4.3 最终答案提取
 ```python
 final_msg = result["messages"][-1]
 answer = final_msg.content   # str，最终自然语言答案
 ```
 ### 2.5 四个工具输入输出规范
 #### Tool 1: `search_entities`
 | 项目 | 规范 |
 |------|------|
 | 入参 | `query: str` — 关键词（大小写不敏感子串匹配） |
 | 匹配逻辑 | `query.lower() in entity_name.lower()` |
 | 返回格式 | 多行文本，每行格式：`[{type}] "{name}" (confidence={c}, page={p}, id={id})` |
 | 无匹配时 | 返回提示 + 前 8 个样例实体名 |
 | 最多返回 | 15 条 |
 **实际调用示例：**
 ```
 输入: query="GraphRAG"
 输出:
 Found 3 entity(ies) matching 'GraphRAG':
  [TECHNOLOGY] "GraphRAG" (confidence=match_exact, page=0, id=node_0)
  [CONCEPT] "GraphRAG pipeline" (confidence=match_exact, page=0, id=node_12)
  [CONCEPT] "GraphRAG (Global)" (confidence=match_exact, page=0, id=node_15)
 ```
 #### Tool 2: `get_neighbors`
 | 项目 | 规范 |
 |------|------|
 | 入参 | `entity_name: str`，`hops: int = 1`（范围 1-3） |
 | 匹配逻辑 | 子串匹配找起始节点，取 `candidates[0]` |
 | 遍历算法 | `nx.single_source_shortest_path_length(G, node_id, cutoff=hops)` |
 | 返回格式 | 按 hop 分组，每组 `[{type}] {name}`，每组最多 20 条 |
 | 未找到时 | 返回提示，建议先用 `search_entities` |
 **实际调用示例：**
 ```
 输入: entity_name="GraphRAG", hops=1
 输出:
 Neighbors of 'GraphRAG' [TECHNOLOGY] within 1 hop(s):
  Hop 1 — 39 related entities:
    [CONCEPT] Knowledge Graph Enhanced RAG System
    [CONCEPT] retrieval-augmented generation
    ...
  Total related entities: 39
 ```
 #### Tool 3: `get_entities_by_type`
 | 项目 | 规范 |
 |------|------|
 | 入参 | `entity_type: str`（自动 `.upper()` 处理） |
 | 有效类型 | `TECHNOLOGY`, `CONCEPT`, `PERSON`, `ORGANIZATION`, `LOCATION` |
 | 返回格式 | 按 `name` 字母序排列，每行 `• {name} (confidence={c}, page={p})` |
 | 无效类型时 | 返回错误 + 图谱中实际存在的类型列表 |
 **实际调用示例：**
 ```
 输入: entity_type="TECHNOLOGY"
 输出:
 TECHNOLOGY entities (4 total):
  • GraphRAG (confidence=match_exact, page=0)
  • LLMs (confidence=match_exact, page=0)
  • LangExtract (confidence=match_exact, page=0)
  • MinerU (confidence=match_exact, page=0)
 ```
 #### Tool 4: `describe_graph`
 | 项目 | 规范 |
 |------|------|
 | 入参 | 无参数 |
 | 计算指标 | 节点数、边数、关系类型、图密度（`nx.density`）、度中心性（`nx.degree_centrality`） |
 | 返回格式 | 结构化文本，包含概览 + 类型分布 + Top-5 中心节点 |
 **实际调用示例（实测输出）：**
 ```
 === Knowledge Graph Overview ===
  Nodes (entities):  40
  Edges (relations): 780
  Relation type:     CO_OCCURS_IN (same-page co-occurrence)
  Graph density:     1.0000
  Entity type distribution:
    CONCEPT        :  36
    TECHNOLOGY     :   4
  Top-5 most connected entities (by degree centrality):
    [TECHNOLOGY] GraphRAG (centrality=1.000)
    [CONCEPT] Knowledge Graph Enhanced RAG System (centrality=1.000)
    [CONCEPT] retrieval-augmented generation (centrality=1.000)
    [CONCEPT] knowledge graphs (centrality=1.000)
    [CONCEPT] large language models (centrality=1.000)
 ```
 ---
 ## 三、MinerU ↔ Agentic-RAG 对接规范与核心架构
 ### 3.1 全链路技术架构
 ```
 ┌─────────────────────────────────────────────────────────────────────┐
 │  阶段一：文档解析（MinerU Cloud API）                                  │
 │                                                                      │
 │  PDF 文件                                                            │
 │    │ POST /file-urls/batch (enable_table=True, language="en")        │
 │    ├─ PUT {presigned_url}（裸上传，不带 Content-Type）                │
 │    └─ GET /extract-results/batch/{batch_id}（轮询 done）              │
 │         ↓                                                            │
 │  full_zip_url → 解压 → {uuid}_content_list.json                      │
 │                                                                      │
 │  关键输出字段：type, text, text_level, table_body, page_idx, bbox     │
 └─────────────────────────────────────────────────────────────────────┘
                              ↓
 ┌─────────────────────────────────────────────────────────────────────┐
 │  阶段二：知识图谱构建（Bridge Pipeline）                               │
 │                                                                      │
 │  content_list.json                                                   │
 │    │ text_assembler.py                                               │
 │    ├─ text blocks → .rstrip() 拼接                                   │
 │    ├─ table blocks → BeautifulSoup HTML → pipe 分隔文本              │
 │    └─ PageText(page_idx, text, block_spans)                          │
 │         ↓                                                            │
 │    entity_extractor.py (LangExtract + DeepSeek)                      │
 │         ↓                                                            │
 │    kg_builder.py (去重 + CO_OCCURS_IN 边)                            │
 │         ↓                                                            │
 │  kg_nodes.json (40 nodes)  +  kg_edges.json (780 edges)             │
 └─────────────────────────────────────────────────────────────────────┘
                              ↓
 ┌─────────────────────────────────────────────────────────────────────┐
 │  阶段三：Agentic-RAG 问答（LangChain + LangGraph）                    │
 │                                                                      │
 │  kg_nodes.json → NetworkX.G.add_node(**node)                         │
 │  kg_edges.json → NetworkX.G.add_edge(source, target, **edge)        │
 │                                                                      │
 │  @tool search_entities    ← 子串匹配                                  │
 │  @tool get_neighbors      ← BFS N-hop 遍历                           │
 │  @tool get_entities_by_type ← 类型过滤                               │
 │  @tool describe_graph     ← 图统计                                   │
 │         ↓                                                            │
 │  create_agent(ChatOpenAI("deepseek-chat"), tools, system_prompt)     │
 │         ↓                                                            │
 │  ReAct 推理循环（think → tool_call → observe → repeat）               │
 │         ↓                                                            │
 │  自然语言答案（AIMessage.content）                                     │
 └─────────────────────────────────────────────────────────────────────┘
 ```
 ### 3.2 MinerU → KG 关键参数对接
 | MinerU 输出字段 | Bridge Pipeline 处理 | Agentic-RAG 使用 |
 |---------------|-------------------|----------------|
 | `block["type"]` | 区分 `text`/`table`/`image` | 不直接使用（已由 Bridge 转换） |
 | `block["text"]` | `.rstrip()` 后加入 PageText | 已内化为 `node["name"]` |
 | `block["table_body"]` | BeautifulSoup → pipe 分隔文本 | 已内化为实体描述 |
 | `block["page_idx"]` | 分组依据，记入 BlockSpan | `node["page"]` 字段 |
 | `block["bbox"]` | 记录字符偏移位置 | `node["char_start"]` / `node["char_end"]` |
 | `{uuid}_content_list.json 文件名` | UUID 作为 `source_doc_id` | `node["source_doc"]` / `edge["doc_id"]` |
 ### 3.3 NetworkX 图构建规范
 ```python
 import networkx as nx
 G = nx.Graph()   # 无向图（CO_OCCURS_IN 关系无方向）
 # 节点：来自 kg_nodes.json
 for node in kg_nodes:
    G.add_node(
        node["id"],          # 主键：node_0, node_1, ...
        **node               # 所有字段作为节点属性
    )
 # 边：来自 kg_edges.json
 for edge in kg_edges:
    G.add_edge(
        edge["source"],      # node_0
        edge["target"],      # node_1
        relation=edge["relation"],   # "CO_OCCURS_IN"
        doc_id=edge["doc_id"],       # UUID
        page=edge["page"],           # 0-indexed
    )
 ```
 **图属性：**
 | 属性 | 实测值 | 说明 |
 |------|--------|------|
 | `G.number_of_nodes()` | `40` | 去重实体数 |
 | `G.number_of_edges()` | `780` | CO_OCCURS_IN 边数 |
 | `nx.density(G)` | `1.0` | 完全图（单页文档所有节点两两连接） |
 | `G.nodes[nid]` | `dict` | 节点属性字典（id, name, type, page, confidence, ...） |
 ### 3.4 MinerU API 关键参数（与 Agentic-RAG 相关部分）
 | 参数 | 推荐值 | 影响 Agentic-RAG 的原因 |
 |------|--------|----------------------|
 | `enable_table` | `True` | 表格被解析为 HTML `<table>`，Bridge 转为文本参与实体抽取，影响 KG 节点质量 |
 | `enable_formula` | `True`（默认） | 公式以 LaTeX 内联写入文本，影响文本纯净度，可能产生噪声实体 |
 | `language` | `"en"` / `"ch"` | 影响 OCR 精度，直接影响文本质量和实体对齐率 |
 | `model_version` | `"pipeline"` | 输出 `{uuid}_content_list.json`，Bridge 通过 glob `*_content_list.json` 匹配 |
 | `page_ranges` | 按需设置 | 多页文档可分批处理，减少每批实体数和边数规模 |
 ### 3.5 Agent 系统扩展点
 当 KG 数据更新后（新文档接入），Agentic-RAG 只需**重新加载 JSON 文件**，不需要重新构建 agent：
 ```python
 # 动态重载 KG（新文档处理完成后）
 G.clear()
 G = _load_kg()   # 重新读取 kg_nodes.json + kg_edges.json
 # agent 实例无需重建，tools 引用同一 G 对象
 ```
 ---
 ## 四、问答流程最终数据返回格式规范
 ### 4.1 invoke 完整返回结构
 ```python
 result = agent.invoke({"messages": [("human", question)]})
 # result 类型: dict
 # result.keys(): ["messages"]
 ```
 `result["messages"]` 是一个有序列表，包含完整的对话历史：
 ```python
 [
    HumanMessage,          # 用户输入
    AIMessage,             # 工具调用决策（可能多轮）
    ToolMessage,           # 工具执行结果（可能多轮）
    ...                    # 可能有多轮 AIMessage + ToolMessage
    AIMessage,             # 最终答案（tool_calls=[]）
 ]
 ```
 ### 4.2 HumanMessage 格式
 ```python
 HumanMessage(
    content="What technology entities are in the knowledge graph?",
    additional_kwargs={},
    response_metadata={},
    id="uuid-string",       # 自动生成
 )
 ```
 ### 4.3 AIMessage（工具调用）格式
 ```python
 AIMessage(
    content="",             # 内容为空（LLM 决策调用工具）
    additional_kwargs={
        "tool_calls": [
            {
                "id": "call_abc123",
                "type": "function",
                "function": {
                    "name": "get_entities_by_type",
                    "arguments": "{\"entity_type\": \"TECHNOLOGY\"}"
                }
            }
        ]
    },
    tool_calls=[
        {
            "name": "get_entities_by_type",
            "args": {"entity_type": "TECHNOLOGY"},
            "id": "call_abc123",
            "type": "tool_call",
        }
    ],
    response_metadata={
        "model_name": "deepseek-chat",
        "finish_reason": "tool_calls",
        "usage": {
            "prompt_tokens": 580,
            "completion_tokens": 18,
            "total_tokens": 598,
        }
    },
 )
 ```
 ### 4.4 ToolMessage 格式
 ```python
 ToolMessage(
    content="TECHNOLOGY entities (4 total):\n  • GraphRAG ...\n  • LLMs ...",
    tool_call_id="call_abc123",     # 与 AIMessage.tool_calls[i].id 对应
    name="get_entities_by_type",    # 工具名称
    additional_kwargs={},
    response_metadata={},
 )
 ```
 ### 4.5 AIMessage（最终答案）格式
 ```python
 AIMessage(
    content="## Technology Entities in the Knowledge Graph\n\n1. **GraphRAG** ...",
    additional_kwargs={
        "tool_calls": []   # 空列表，表示无更多工具调用
    },
    tool_calls=[],
    response_metadata={
        "model_name": "deepseek-chat",
        "finish_reason": "stop",
        "usage": {
            "prompt_tokens": 820,
            "completion_tokens": 350,
            "total_tokens": 1170,
        }
    },
    id="msg-uuid-string",
 )
 ```
 ### 4.6 最终答案提取规范
 ```python
 # 标准提取方式
 final_msg = result["messages"][-1]   # 最后一条消息必为最终 AIMessage
 answer: str = final_msg.content      # 自然语言答案
 # 安全提取方式（防御性编程）
 answer = (
    final_msg.content
    if hasattr(final_msg, "content")
    else str(final_msg)
 )
 ```
 ### 4.7 推荐封装数据格式
 业务层调用时建议封装为以下结构，便于下游使用：
 ```python
 from dataclasses import dataclass
 from typing import Any
@dataclass
 class AgenticRAGResponse:
    question: str                  # 用户原始问题
    answer: str                    # 最终答案（Markdown 格式）
    tool_calls: list[dict]         # 工具调用链记录
    total_messages: int            # 对话轮次（含 human/ai/tool 全部）
    token_usage: dict[str, int]    # Token 用量统计
    kg_stats: dict[str, Any]       # KG 规模信息
 ```
 **填充示例：**
 ```python
 def run_query_with_metadata(question: str) -> AgenticRAGResponse:
    result = agent.invoke({"messages": [("human", question)]})
    messages = result["messages"]
    # 提取工具调用链
    tool_calls = []
    for msg in messages:
        if hasattr(msg, "tool_calls") and msg.tool_calls:
            for tc in msg.tool_calls:
                tool_calls.append({
                    "tool": tc["name"],
                    "args": tc["args"],
                    "call_id": tc["id"],
                })
    # Token 统计（来自最后一条 AIMessage）
    last_ai = messages[-1]
    usage = last_ai.response_metadata.get("usage", {})
    return AgenticRAGResponse(
        question=question,
        answer=messages[-1].content,
        tool_calls=tool_calls,
        total_messages=len(messages),
        token_usage={
            "prompt_tokens":     usage.get("prompt_tokens", 0),
            "completion_tokens": usage.get("completion_tokens", 0),
            "total_tokens":      usage.get("total_tokens", 0),
        },
        kg_stats={
            "nodes": G.number_of_nodes(),
            "edges": G.number_of_edges(),
            "density": nx.density(G),
        },
    )
 ```
 ### 4.8 实测问答响应样例
 #### 样例 1：T1-Overview（图谱概览类问题）
 ```
 问题：Give me an overview of the knowledge graph.
      What types of entities does it contain and which entities are most central?
 工具调用链：
  [1] describe_graph()
 最终答案（节选）：
  ## Knowledge Graph Overview
  **Structure:**
  - 40 entities (nodes) connected by 780 edges
  - All edges represent CO_OCCURS_IN relationships
  - Graph density: 1.000 — fully connected graph
  **Entity Types:**
  1. TECHNOLOGY (4): GraphRAG, LLMs, LangExtract, MinerU
  2. CONCEPT (36): retrieval-augmented generation, knowledge graphs, ...
  **Most Central Entities (centrality=1.000):**
  1. [TECHNOLOGY] GraphRAG
  2. [CONCEPT] Knowledge Graph Enhanced RAG System
  ...
 消息轮次：4 条（human + ai_tool_call + tool_result + ai_final）
 Token 用量：约 900 tokens
 ```
 #### 样例 2：T3-MultiHop（多跳推理类问题）
 ```
 问题：What concepts and technologies are most closely related to GraphRAG?
      Explore the graph neighborhood and explain the connections.
 工具调用链：
  [1] search_entities(query="GraphRAG")
  [2] get_neighbors(entity_name="GraphRAG", hops=1)
 最终答案（节选）：
  ## Concepts and Technologies Most Closely Related to GraphRAG
  ### Core Technologies (Directly Connected):
  1. LLMs — The foundation models that GraphRAG enhances
  2. LangExtract — Used for language extraction in the pipeline
  3. MinerU — Part of the data processing ecosystem
  ### Key Concepts:
  - Knowledge Graph Enhanced RAG System (overarching architecture)
  - retrieval-augmented generation (core paradigm)
  - multi-hop reasoning (key capability)
  ...
 消息轮次：6 条（human + 2×ai_tool_call + 2×tool_result + ai_final）
 Token 用量：约 1,200 tokens
 ```
 ### 4.9 错误与边界情况处理
 | 情况 | Agent 行为 | 返回内容 |
 |------|------------|---------|
 | 实体不存在 | 工具返回提示 + 样例实体名 | Agent 改写查询或给出不确定性说明 |
 | 类型不合法 | 工具返回有效类型列表 | Agent 自动纠正并重试 |
 | 问题超出 KG 范围 | 无工具调用结果支撑 | Agent 如实说明 "信息不在当前 KG 中" |
 | Token 超限 | LangChain 内部截断 | 减少 `hops` 或缩短问题 |
 ---
 ## 五、虚拟环境与依赖
 ### 5.1 运行环境
 | 项目 | 值 |
 |------|-----|
 | 虚拟环境 | `F:\GraphRAGAgent\langextract_src\.venv\`（复用 Bridge Pipeline 的 venv） |
 | Python 版本 | 3.12 |
 | 安装方式 | uv |
 ### 5.2 Agentic-RAG 新增依赖
 | 包 | 版本（实测） | 用途 |
 |----|------------|------|
 | `langchain` | 1.2.10 | `@tool` 装饰器、`create_agent` |
 | `langchain-openai` | latest | `ChatOpenAI`（DeepSeek 适配） |
 | `langgraph` | latest | `create_agent` 底层运行时 |
 | `networkx` | latest | KG 图构建、BFS 遍历、中心性计算 |
 ### 5.3 完整依赖安装
 ```bash
 uv pip install langchain langchain-openai langgraph networkx \
  --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
 ```
 ### 5.4 环境变量
 `F:\GraphRAGAgent\graphrag_pipeline\.env`：
 ```env
 DEEPSEEK_API_KEY=sk-xxxxxxxxxxxxxxxx
 DEEPSEEK_BASE_URL=https://api.deepseek.com
 ```
 ---
 ## 附录：各阶段文件依赖速查
 | 阶段 | 输入 | 输出 | 关键脚本 |
 |------|------|------|---------|
 | MinerU 解析 | `*.pdf` | `{uuid}_content_list.json` | `mineru_mvp/pipeline.py` |
 | Bridge Pipeline | `*_content_list.json` | `kg_nodes.json` + `kg_edges.json` | `graphrag_pipeline/bridge.py` |
 | Agentic-RAG | `kg_nodes.json` + `kg_edges.json` | 自然语言答案 | `graphrag_pipeline/agentic_rag_mvp.py` |
 | 规范文档 | 覆盖范围 |
 |---------|---------|
 | `docs/mineru_specification-v1.0.md` | MinerU 解析阶段输入/输出 |
 | `docs/langextract_specification-v1.0.md` | LangExtract 实体抽取参数 |
 | `docs/bridge_pipeline_specification-v1.0.md` | Bridge Pipeline 对接规范与 KG 输出格式 |
 | `docs/agentic_rag_specification-v1.0.md` | **本文件** — Agentic-RAG 问答阶段规范 |
--- a/docs/backend_service_specification-v1.0.md
+++ b/docs/backend_service_specification-v1.0.md
--- a/docs/bridge_pipeline_specification-v1.0.md
+++ b/docs/bridge_pipeline_specification-v1.0.md
@@ -0,0 +1,481 @@
 # Bridge Pipeline Specification v1.0
 > GraphRAG 索引阶段核心流程：MinerU → LangExtract → Knowledge Graph
 ---
 ## 1. Pipeline 执行思路
 ### 1.1 整体架构
 Bridge Pipeline 是 GraphRAG 索引阶段的核心流程，负责将 MinerU 解析后的结构化 PDF 内容送入 LangExtract 完成实体抽取，最终生成知识图谱的节点（Nodes）和边（Edges）。
 ```
 MinerU output                    Bridge Pipeline                      KG output
 ─────────────                    ───────────────                      ─────────
 {uuid}_content_list.json    →    text_assembler.py
  ├─ text blocks                   ├─ 按页拼接纯文本
  └─ table blocks (HTML)           ├─ HTML表格→纯文本
                                   └─ 记录每个block的char偏移
                              →    entity_extractor.py
                                   ├─ 逐页调用 lx.extract()
                                   └─ DeepSeek via OpenAI Provider
                              →    kg_builder.py
                                   ├─ 过滤低质量对齐                  →  kg_nodes.json
                                   ├─ 节点去重 (name.lower(), type)
                                   └─ 同页实体对→CO_OCCURS_IN边       →  kg_edges.json
 ```
 ### 1.2 五步执行流程
 | 步骤 | 模块 | 说明 |
 |------|------|------|
 | Step 1 | `bridge.py` | 加载 MinerU 输出 `content_list.json`，解析输入路径和 source_doc_id |
 | Step 2 | `text_assembler.py` | 按 `page_idx` 分组，拼接纯文本，记录每个 block 的字符偏移 |
 | Step 3 | `entity_extractor.py` | 逐页调用 LangExtract + DeepSeek 完成实体抽取 |
 | Step 4 | `kg_builder.py` | 过滤低质量对齐 → 节点去重 → 同页配对生成 CO_OCCURS_IN 边 |
 | Step 5 | `bridge.py` | 保存 `kg_nodes.json` + `kg_edges.json` 到 output 目录 |
 ### 1.3 文件存放位置
 ```
 F:\GraphRAGAgent\graphrag_pipeline\
 ├── .env                     # DeepSeek API 配置
 ├── CLAUDE.md                # 组件开发规范
 ├── bridge.py                # 主入口（串联完整 Pipeline）
 ├── text_assembler.py        # MinerU JSON → 按页纯文本 + 偏移映射
 ├── entity_extractor.py      # LangExtract + DeepSeek 封装
 ├── kg_builder.py            # KG 节点去重 + 边生成
 └── output/
    ├── kg_nodes.json        # 知识图谱节点（9,851 bytes）
    └── kg_edges.json        # 知识图谱边（129,093 bytes）
 ```
 ### 1.4 运行命令
 ```bash
 # 使用默认测试输入
 F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe F:/GraphRAGAgent/graphrag_pipeline/bridge.py
 # 指定输入文件
 F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe F:/GraphRAGAgent/graphrag_pipeline/bridge.py path/to/content_list.json
 # 指定输入目录（自动查找 *_content_list.json）
 F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe F:/GraphRAGAgent/graphrag_pipeline/bridge.py path/to/output_dir/
 ```
 ---
 ## 2. 实际本地输出文档规范
 ### 2.1 测试运行结果
 - **输入文件**: `F:\GraphRAGAgent\mineru_mvp\output\test_sample\8a719db4-2b50-405b-826d-7bb27b224fa0_content_list.json`
 - **输入规模**: 10 blocks（9 text + 1 table），1 页，2102 字符
 - **抽取结果**: 45 raw extractions → 40 去重节点，780 CO_OCCURS_IN 边
 - **对齐质量**: 全部 40 节点均为 `match_exact`（1 个 `match_fuzzy` 已被过滤）
 - **执行时间**: ~22s（DeepSeek API 调用）
 ### 2.2 kg_nodes.json — 实际输出
 **文件大小**: 9,851 bytes | **节点数**: 40
 **节点类型分布**:
 | 类型 | 数量 | 示例 |
 |------|------|------|
 | TECHNOLOGY | 4 | GraphRAG, MinerU, LLMs, LangExtract |
 | CONCEPT | 36 | knowledge graphs, retrieval-augmented generation, multi-hop reasoning |
 **节点格式（实际样例）**:
 ```json
 {
  "id": "node_0",
  "name": "GraphRAG",
  "type": "TECHNOLOGY",
  "source_doc": "8a719db4-2b50-405b-826d-7bb27b224fa0",
  "char_start": 0,
  "char_end": 8,
  "confidence": "match_exact",
  "page": 0
 }
 ```
 **完整节点列表（前 10 个）**:
 | id | name | type | confidence |
 |----|------|------|-----------|
 | node_0 | GraphRAG | TECHNOLOGY | match_exact |
 | node_1 | Knowledge Graph Enhanced RAG System | CONCEPT | match_exact |
 | node_2 | retrieval-augmented generation | CONCEPT | match_exact |
 | node_3 | knowledge graphs | CONCEPT | match_exact |
 | node_4 | large language models | CONCEPT | match_exact |
 | node_5 | question answering | CONCEPT | match_exact |
 | node_6 | document collections | CONCEPT | match_exact |
 | node_7 | RAG systems | CONCEPT | match_exact |
 | node_8 | vector similarity search | CONCEPT | match_exact |
 | node_9 | hierarchical knowledge graph | CONCEPT | match_exact |
 ### 2.3 kg_edges.json — 实际输出
 **文件大小**: 129,093 bytes | **边数**: 780
 **数学验证**: 40 个节点全部在同一页 → C(40,2) = 40×39/2 = 780 条边 ✓
 **边格式（实际样例）**:
 ```json
 {
  "source": "node_0",
  "target": "node_1",
  "relation": "CO_OCCURS_IN",
  "doc_id": "8a719db4-2b50-405b-826d-7bb27b224fa0",
  "page": 0
 }
 ```
 **完整性校验结果**:
 - 自环数: 0 ✓
 - 重复边数: 0 ✓
 - 关系类型: 全部为 `CO_OCCURS_IN` ✓
 ---
 ## 3. MinerU Pipeline 关键参数规范
 ### 3.1 输入格式：content_list.json
 MinerU 解析 PDF 后输出的 `{uuid}_content_list.json` 是一个 JSON 数组，每个元素代表一个内容块。
 **text block 结构**:
 ```json
 {
  "type": "text",
  "text": "GraphRAG: Knowledge Graph Enhanced RAG System...",
  "text_level": null,
  "page_idx": 0,
  "bbox": [72, 43, 523, 57]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `type` | string | 块类型：`"text"` \| `"table"` \| `"image"` |
 | `text` | string | 文本内容（末尾可能有空格） |
 | `text_level` | int \| null | `null`=正文，`1`=一级标题 |
 | `page_idx` | int | 页码（从 0 开始） |
 | `bbox` | list[int] | 边界框坐标 `[x0, y0, x1, y1]`（归一化 0-1000） |
 **table block 结构**:
 ```json
 {
  "type": "table",
  "table_body": "<table><tr><th>Method</th><th>Score</th></tr>...</table>",
  "table_caption": [],
  "page_idx": 0,
  "bbox": [72, 400, 523, 500]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `table_body` | string | HTML `<table>` 标签完整内容 |
 | `table_caption` | list | 表格标题（通常为空数组） |
 ### 3.2 关键约束
 - 文件命名: `{uuid}_content_list.json`，UUID 用作 source_doc_id
 - block 排列顺序与 PDF 阅读顺序一致
 - `text` 字段末尾可能有多余空格，需 `.rstrip()` 处理
 - `image` 类型块不含可提取文本，Bridge 跳过处理
 ---
 ## 4. LangExtract Pipeline 关键参数规范
 ### 4.1 模型配置
 ```python
 from langextract.providers.openai import OpenAILanguageModel
 model = OpenAILanguageModel(
    model_id="deepseek-chat",
    api_key=DEEPSEEK_API_KEY,
    base_url="https://api.deepseek.com",
 )
 ```
 **重要**: 必须直接实例化 `OpenAILanguageModel`，不能使用 `model_id` 路由。LangExtract 的 `model_id` 同时用于内部路由和 API 请求参数，DeepSeek 不识别 GPT 模型名称。
 ### 4.2 抽取调用
 ```python
 result = lx.extract(
    text_or_documents=page_text,       # 纯文本字符串
    prompt_description=PROMPT,          # 实体类型描述
    examples=EXAMPLES,                  # Few-shot 示例
    model=model,                        # 直接传入模型实例
    show_progress=True,
 )
 ```
 ### 4.3 Prompt 配置
 ```
 Extract named entities from the text in order of appearance.
 Entity types:
  TECHNOLOGY — software, algorithms, models, tools
  ORGANIZATION — companies, research groups, institutions
  PERSON — individual people
  LOCATION — places, geographic entities
  CONCEPT — technical concepts, methodologies, frameworks
 ```
 ### 4.4 Few-shot 示例
 验证可用的示例（MVP 测试 94.1% match_exact）：
 ```python
 lx.data.ExampleData(
    text="LangChain is a framework created by Harrison Chase for building "
         "LLM applications. It integrates with OpenAI models and Pinecone "
         "vector database for semantic search.",
    extractions=[
        lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
        lx.data.Extraction(extraction_class="PERSON", extraction_text="Harrison Chase"),
        lx.data.Extraction(extraction_class="CONCEPT", extraction_text="LLM applications"),
        lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="OpenAI models"),
        lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="Pinecone"),
        lx.data.Extraction(extraction_class="CONCEPT", extraction_text="semantic search"),
    ],
 )
 ```
 ### 4.5 输出格式：AnnotatedDocument
 每页抽取返回一个 `AnnotatedDocument`，其 `extractions` 列表中每个元素包含：
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `extraction_text` | string | 实体名称（必须为输入文本的精确子串） |
 | `extraction_class` | string | 实体类型（TECHNOLOGY/ORGANIZATION/PERSON/LOCATION/CONCEPT） |
 | `char_interval.start_pos` | int | 在输入文本中的起始字符位置 |
 | `char_interval.end_pos` | int | 在输入文本中的结束字符位置 |
 | `alignment_status` | enum | 对齐质量：`match_exact` \| `match_greater` \| `match_lesser` \| `match_fuzzy` \| `None` |
 | `extraction_index` | int | 抽取序号（从 1 开始） |
 | `group_index` | int | 组序号（从 0 开始） |
 ### 4.6 对齐质量过滤规则
 | alignment_status | 含义 | Bridge 处理 |
 |-----------------|------|------------|
 | `match_exact` | LLM 输出与原文完全匹配 | ✅ 接受 |
 | `match_greater` | LLM 输出是原文子串的超集 | ✅ 接受 |
 | `match_lesser` | LLM 输出是原文子串的子集 | ✅ 接受 |
 | `match_fuzzy` | 模糊匹配，偏移不可靠 | ❌ 过滤 |
 | `None` | 无法对齐 | ❌ 过滤 |
 ---
 ## 5. MinerU ↔ LangExtract 接口对接规范
 ### 5.1 核心挑战
 MinerU 输出结构化 JSON 块（含 HTML 表格），而 LangExtract 仅接受纯文本 `str`。Bridge 的 `text_assembler` 模块负责转换和偏移映射。
 ### 5.2 对接转换规则
 | 对接点 | MinerU 规范 | LangExtract 规范 | Bridge 处理 |
 |--------|------------|-----------------|------------|
 | 输入格式 | `content_list.json`（JSON 数组） | 仅接受纯文本 `str` | `text_assembler` 拼接转换 |
 | 文本块 | `block["text"]`，末尾可能有空格 | `extraction_text` 须为原文精确子串 | `.rstrip()` 去尾部空格 |
 | 表格块 | `table_body` 是 `<table>` HTML | 不接受 HTML | BeautifulSoup 转 pipe 分隔纯文本 |
 | 标题判断 | `text_level` 缺失=正文，存在=标题 | 不区分标题/正文 | 标题和正文一起拼入文本 |
 | 坐标系 | bbox 归一化 0-1000 | char_interval 基于输入字符 | BlockSpan 记录偏移映射 |
 | 分页 | `page_idx` 区分不同页 | 单次调用处理一段文本 | 逐页分别调用 `lx.extract()` |
 | 文件名 | `{uuid}_content_list.json` | — | glob `*_content_list.json` 匹配 |
 ### 5.3 文本拼接算法
 ```
 输入: content_list (按 page_idx 分组)
 输出: PageText 列表
 对每页:
  cursor = 0
  对每个 block (保持原顺序):
    if type == "text":
      block_text = block["text"].rstrip()
    elif type == "table":
      block_text = html_table_to_text(block["table_body"])
    else:
      跳过 (image / equation 等)
    记录 BlockSpan(char_start=cursor, char_end=cursor+len(block_text))
    buffer.append(block_text + "\n")
    cursor += len(block_text) + 1
  PageText.text = "".join(buffer).rstrip("\n")
 ```
 ### 5.4 偏移映射数据结构
 ```python
@dataclasses.dataclass
 class BlockSpan:
    block_index: int    # content_list 数组下标
    block_type: str     # "text" | "table"
    page_idx: int       # 页码
    char_start: int     # 在拼接文本中的起始位置
    char_end: int       # 在拼接文本中的结束位置（不含）
    bbox: list[int]     # MinerU 原始 bbox
@dataclasses.dataclass
 class PageText:
    page_idx: int                   # 页码
    text: str                       # 拼接后的纯文本
    block_spans: list[BlockSpan]    # 每个 block 在 text 中的位置
 ```
 ### 5.5 HTML 表格转换
 ```python
 def html_table_to_text(table_body: str) -> str:
    """Convert <table> HTML → pipe-delimited plain text"""
    soup = BeautifulSoup(table_body, "html.parser")
    rows = []
    for tr in soup.find_all("tr"):
        cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
        rows.append(" | ".join(cells))
    return "\n".join(rows)
 ```
 转换示例：
 ```html
 <table><tr><th>Method</th><th>Score</th></tr><tr><td>GraphRAG</td><td>0.85</td></tr></table>
 ```
 →
 ```
 Method | Score
 GraphRAG | 0.85
 ```
 ---
 ## 6. Bridge Pipeline 最终输出关键参数规范
 ### 6.1 kg_nodes.json
 **文件路径**: `graphrag_pipeline/output/kg_nodes.json`
 **结构**: JSON 数组，每个元素为一个去重后的实体节点。
 | 字段 | 类型 | 说明 | 示例 |
 |------|------|------|------|
 | `id` | string | 节点唯一标识，格式 `node_{index}` | `"node_0"` |
 | `name` | string | 实体名称（原文子串） | `"GraphRAG"` |
 | `type` | string | 实体类型 | `"TECHNOLOGY"` |
 | `source_doc` | string | 来源文档 UUID | `"8a719db4-2b50-405b-826d-7bb27b224fa0"` |
 | `char_start` | int | 在拼接文本中的起始字符位置 | `0` |
 | `char_end` | int | 在拼接文本中的结束字符位置 | `8` |
 | `confidence` | string | 对齐质量（仅 `match_exact`/`match_greater`/`match_lesser`） | `"match_exact"` |
 | `page` | int | 来源页码（从 0 开始） | `0` |
 **去重规则**: key = `(name.lower(), type)`，保留首次出现的实体。
 **实体类型枚举**:
 | 类型 | 说明 |
 |------|------|
 | `TECHNOLOGY` | 软件、算法、模型、工具 |
 | `ORGANIZATION` | 公司、研究机构 |
 | `PERSON` | 个人 |
 | `LOCATION` | 地理位置 |
 | `CONCEPT` | 技术概念、方法论、框架 |
 ### 6.2 kg_edges.json
 **文件路径**: `graphrag_pipeline/output/kg_edges.json`
 **结构**: JSON 数组，每个元素为一条同页共现关系边。
 | 字段 | 类型 | 说明 | 示例 |
 |------|------|------|------|
 | `source` | string | 源节点 ID | `"node_0"` |
 | `target` | string | 目标节点 ID | `"node_1"` |
 | `relation` | string | 关系类型（固定 `"CO_OCCURS_IN"`） | `"CO_OCCURS_IN"` |
 | `doc_id` | string | 来源文档 UUID | `"8a719db4-..."` |
 | `page` | int | 共现页码 | `0` |
 **边生成规则**:
 1. 按页分组所有去重后的节点 ID
 2. 同页节点两两配对 → 生成 `CO_OCCURS_IN` 边
 3. 边方向规范化: `source < target`（字典序）
 4. 去重 key: `(source, target, doc_id, page)`
 5. 无自环（source ≠ target）
 **边数公式**: 若某页有 N 个节点，则该页产生 C(N,2) = N×(N-1)/2 条边。
 ### 6.3 输出完整性约束
 | 约束 | 说明 |
 |------|------|
 | 节点 ID 唯一 | 每个节点的 `id` 字段全局唯一 |
 | 边引用合法 | 每条边的 `source` 和 `target` 必须对应存在的节点 `id` |
 | 无自环 | 不存在 `source == target` 的边 |
 | 无重复边 | 同一 `(source, target, doc_id, page)` 组合仅出现一次 |
 | 对齐质量保证 | 所有节点的 `confidence` 仅为 accepted 值（非 fuzzy/null） |
 | char 偏移有效 | `char_start < char_end`，且可定位到拼接文本中的实体子串 |
 ---
 ## 7. 虚拟环境规范
 Bridge Pipeline **复用 LangExtract 的虚拟环境**，不单独创建 venv。
 | 项目 | 值 |
 |------|------|
 | 虚拟环境路径 | `F:\GraphRAGAgent\langextract_src\.venv\` |
 | Python 版本 | 3.12 |
 | 核心依赖 | `langextract[all]`、`beautifulsoup4`、`python-dotenv` |
 | 安装新依赖 | `uv pip install <pkg> --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe` |
 **所有 Python 命令必须使用该虚拟环境运行，禁止使用全局 Python 或其他组件的 venv。**
 ---
 ## 8. 环境配置
 ### 8.1 .env 文件
 位置: `F:\GraphRAGAgent\graphrag_pipeline\.env`
 ```env
 DEEPSEEK_API_KEY=<your-api-key>
 DEEPSEEK_BASE_URL=https://api.deepseek.com
 ```
 ### 8.2 依赖安装
 ```bash
 uv pip install beautifulsoup4 python-dotenv --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
 ```
 ---
 ## 9. 测试验证清单
 - [x] text_assembler 正确读取 content_list.json（10 blocks: 9 text + 1 table）
 - [x] 表格 HTML 转为 pipe 分隔纯文本，无 HTML 标签残留
 - [x] 按页拼接文本长度合理（2102 字符/页）
 - [x] LangExtract 成功调用 DeepSeek 返回 AnnotatedDocument
 - [x] 抽取实体数 45，match_exact 占比 > 95%
 - [x] kg_nodes.json 节点已去重（40 个），每个节点有完整字段
 - [x] kg_edges.json 边为 CO_OCCURS_IN 关系（780 条），无自环，无重复
 - [x] match_fuzzy 对齐的实体已被过滤（1 个）
--- a/docs/frontend_design_specification-v1.0.md
+++ b/docs/frontend_design_specification-v1.0.md
--- a/docs/langextract_specification-v1.0.md
+++ b/docs/langextract_specification-v1.0.md
@@ -0,0 +1,604 @@
 # LangExtract Pipeline 规范文档 v1.0
 > 基于 [google/langextract](https://github.com/google/langextract) 源码分析 + MVP 实测验证
 > 版本基线：2026-03-04 main 分支
 > 本地源码路径：`F:\GraphRAGAgent\langextract_src\`
 > 测试脚本路径：`F:\GraphRAGAgent\langextract_src\mvp_test_deepseek.py`
 ---
 ## 目录
 - [〇、虚拟环境](#〇虚拟环境)
 - [一、Pipeline 执行流程](#一pipeline-执行流程)
  - [1.1 完整执行链路](#11-完整执行链路)
  - [1.2 MVP 测试脚本](#12-mvp-测试脚本)
  - [1.3 输入规范](#13-输入规范)
  - [1.4 不支持的输入格式](#14-不支持的输入格式)
 - [二、模型接入规范](#二模型接入规范)
  - [2.1 模型路由机制](#21-模型路由机制)
  - [2.2 DeepSeek 接入（实测验证）](#22-deepseek-接入实测验证)
  - [2.3 路由陷阱与规避方案](#23-路由陷阱与规避方案)
  - [2.4 OpenAI Provider 构造参数](#24-openai-provider-构造参数)
 - [三、关键参数规范](#三关键参数规范)
  - [3.1 extract() 核心参数](#31-extract-核心参数)
  - [3.2 ExampleData 示例数据格式](#32-exampledata-示例数据格式)
  - [3.3 Extraction 示例条目格式](#33-extraction-示例条目格式)
  - [3.4 分块参数](#34-分块参数)
  - [3.5 Resolver 对齐参数](#35-resolver-对齐参数)
 - [四、输出数据格式规范](#四输出数据格式规范)
  - [4.1 JSONL 输出文件（实际生成）](#41-jsonl-输出文件实际生成)
  - [4.2 AnnotatedDocument 顶层结构](#42-annotateddocument-顶层结构)
  - [4.3 Extraction 字段规范（实测对比）](#43-extraction-字段规范实测对比)
  - [4.4 CharInterval 字符锚点](#44-charinterval-字符锚点)
  - [4.5 AlignmentStatus 对齐状态枚举](#45-alignmentstatus-对齐状态枚举)
  - [4.6 extraction_summary.json（自定义摘要）](#46-extraction_summaryjson自定义摘要)
 - [五、本地生成文件清单](#五本地生成文件清单)
 - [附录：环境变量与常量速查](#附录环境变量与常量速查)
 ---
 ## 〇、虚拟环境
 本组件使用独立的 Python 虚拟环境，与项目其他组件（MinerU MVP、GraphRAG Pipeline 等）完全隔离。
 **所有 Python 命令必须在子虚拟环境中运行，禁止使用全局 Python 或其他组件的 venv。**
 ### 环境信息
 - 虚拟环境路径：`F:\GraphRAGAgent\langextract_src\.venv\`
 - Python 版本：3.12
 - 创建工具：uv
 - 安装方式：`uv pip install -e ".[all]"` （含 openai、google-genai 等 60 个包）
 ### 运行方式
 **方式一：直接使用 venv 内的 Python 解释器（推荐）**
 ```bash
 F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe mvp_test_deepseek.py
 ```
 **方式二：先激活环境再运行**
 ```bash
 cd F:/GraphRAGAgent/langextract_src
 source .venv/Scripts/activate
 python mvp_test_deepseek.py
 ```
 ### 安装新依赖
 ```bash
 uv pip install <package> --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
 ```
 ---
 ## 一、Pipeline 执行流程
 ### 1.1 完整执行链路
 基于 MVP 实测验证的完整 Pipeline 分为 5 个阶段：
 ```
 Step 0: 激活虚拟环境
  └── F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
 Step 1: 准备输入
  ├── 构造纯文本字符串（str）
  ├── 或构造 Document 对象列表
  └── LangExtract 仅接受纯文本，PDF/DOCX 等需前置解析
 Step 2: 构造 Few-shot 示例
  ├── 创建 ExampleData 对象列表
  ├── 每个 ExampleData 包含：text（示例文本） + extractions（标注实体列表）
  └── extraction_text 必须是 text 的精确子串
 Step 3: 配置模型并调用 extract()
  ├── 直接实例化 OpenAILanguageModel（DeepSeek 场景）
  ├── 传入 model_id="deepseek-chat", base_url, api_key
  └── 调用 lx.extract(text_or_documents=..., examples=..., model=model)
 Step 4: LangExtract 内部处理
  ├── 文本分块（基于句子边界，max_char_buffer=1000）
  ├── 构造 Prompt（含 prompt_description + examples）
  ├── 调用 LLM 推理（JSON 格式输出）
  ├── 解析 LLM JSON 响应为 Extraction 对象
  └── 字符级对齐（char_interval + alignment_status）
 Step 5: 保存输出
  ├── lx.io.save_annotated_documents() → JSONL 文件
  └── 自定义 JSON 摘要（可选）
 ```
 ### 1.2 MVP 测试脚本
 **文件路径：** `F:\GraphRAGAgent\langextract_src\mvp_test_deepseek.py`
 **执行命令：**
 ```bash
 F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe mvp_test_deepseek.py
 ```
 **脚本核心流程：**
 ```python
 from langextract.providers.openai import OpenAILanguageModel
 # Step 1: 直接实例化 OpenAI Provider（指向 DeepSeek）
 model = OpenAILanguageModel(
    model_id="deepseek-chat",
    api_key="sk-...",
    base_url="https://api.deepseek.com",
 )
 # Step 2: 构造示例数据
 examples = [
    lx.data.ExampleData(
        text="LangChain is a framework created by Harrison Chase...",
        extractions=[
            lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
            lx.data.Extraction(extraction_class="ORGANIZATION", extraction_text="Harrison Chase"),
            ...
        ],
    )
 ]
 # Step 3: 调用抽取
 result = lx.extract(
    text_or_documents=input_text,
    prompt_description="Extract named entities...",
    examples=examples,
    model=model,
    show_progress=True,
 )
 # Step 4: 保存结果
 lx.io.save_annotated_documents([result], output_name="graphrag_entities.jsonl", output_dir="mvp_output")
 ```
 **实测结果：**
 | 指标 | 值 |
 |------|-----|
 | 输入文本长度 | 520 字符 |
 | 模型 | deepseek-chat |
 | 耗时 | 21.6 秒 |
 | 提取实体数 | 17 |
 | 实体类型分布 | TECHNOLOGY: 9, CONCEPT: 7, ORGANIZATION: 1 |
 | 精确匹配率 | 16/17 (94.1%) — 仅 1 个 match_fuzzy |
 | 输出文件 | 2 个（JSONL + JSON 摘要） |
 ### 1.3 输入规范
 LangExtract **仅接受纯文本**作为输入，支持以下 4 种传入方式：
 | 输入方式 | 示例 | 说明 |
 |---------|------|------|
 | **纯文本字符串** | `extract("这是一段文本...")` | 直接传入文本内容（MVP 实测使用此方式） |
 | **URL** | `extract("https://example.com/article.txt")` | 自动下载 URL 文本内容（`fetch_urls=True`） |
 | **Document 对象** | `extract([Document(text="...", document_id="doc1")])` | 传入 Document 可迭代集合 |
 | **CSV 文件** | 通过 `Dataset` 类加载后传入 | 指定 text 列和 id 列 |
 ### 1.4 不支持的输入格式
 以下格式 **不被支持**，需要在 LangExtract 之前通过外部工具预处理为纯文本：
 | 格式 | 状态 | 预处理方案 |
 |------|------|-----------|
 | PDF | ❌ 不支持 | 使用 MinerU / PyMuPDF 先转文本 |
 | DOCX | ❌ 不支持 | 使用 python-docx 先转文本 |
 | HTML | ❌ 不支持 | 使用 BeautifulSoup 先提取文本 |
 | 图片 | ❌ 不支持 | 使用 OCR 工具先识别文本 |
 | Markdown（含媒体） | ❌ 不支持 | 需提取纯文本部分 |
 | Excel / JSON | ❌ 不支持 | 需序列化为纯文本 |
 ---
 ## 二、模型接入规范
 ### 2.1 模型路由机制
 文件路径：`langextract/providers/patterns.py`
 LangExtract 通过 **正则匹配 `model_id`** 自动路由到对应的 Provider：
 | Provider | 匹配模式 | 优先级 | 示例模型 |
 |----------|---------|--------|---------|
 | **Gemini** | `^gemini` | 10 | `gemini-2.5-flash`, `gemini-1.5-pro` |
 | **OpenAI** | `^gpt-4`, `^gpt4.`, `^gpt-5`, `^gpt5.` | 10 | `gpt-4o`, `gpt-4o-mini` |
 | **Ollama** | `gemma`, `llama`, `mistral`, `phi`, `qwen`, `deepseek` 等 | 10 | `gemma2:2b`, `llama3.2:1b` |
 ### 2.2 DeepSeek 接入（实测验证）
 > **重要发现：** 规范文档 v0 中描述的 `model_id="gpt-4o-mini"` + `language_model_params={"base_url": ...}` 方式 **实测不可用**，因为 `model_id` 同时用于路由和 API 调用，DeepSeek 不识别 `gpt-4o-mini` 模型名。
 **正确方式 — 直接实例化 OpenAI Provider：**
 ```python
 from langextract.providers.openai import OpenAILanguageModel
 model = OpenAILanguageModel(
    model_id="deepseek-chat",           # DeepSeek 实际模型名
    api_key="sk-your-deepseek-key",
    base_url="https://api.deepseek.com",
 )
 result = lx.extract(
    text_or_documents="...",
    examples=[...],
    model=model,                         # 通过 model 参数传入，绕过路由
    show_progress=True,
 )
 ```
 **实测验证状态：** DeepSeek `deepseek-chat` 模型通过此方式成功完成实体抽取，JSON 格式输出正常。
 ### 2.3 路由陷阱与规避方案
 | 方案 | 能否工作 | 原因 |
 |------|---------|------|
 | `model_id="gpt-4o-mini"` + `language_model_params={"base_url": "https://api.deepseek.com"}` | **不能** | `model_id` 被同时用作 API 调用的 `model` 参数，DeepSeek 返回 `400 Model Not Exist` |
 | `config=ModelConfig(model_id="deepseek-chat", provider="openai")` | **不能** | `_create_model_with_schema()` 中使用 `provider` 时未先调用 `load_builtins_once()`，导致 `No provider found` 错误（LangExtract 内部 bug） |
 | `model=OpenAILanguageModel(model_id="deepseek-chat", ...)` | **可以** | 直接实例化绕过路由，`model_id` 正确传递给 DeepSeek API |
 ### 2.4 OpenAI Provider 构造参数
 文件路径：`langextract/providers/openai.py`
 ```python
 class OpenAILanguageModel(BaseLanguageModel):
    def __init__(
        self,
        model_id: str = 'gpt-4o-mini',
        api_key: str | None = None,
        base_url: str | None = None,
        organization: str | None = None,
        format_type: FormatType = FormatType.JSON,
        temperature: float | None = None,
        max_workers: int = 10,
        **kwargs,
    )
 ```
 | 参数 | 默认值 | 说明 |
 |------|--------|------|
 | `model_id` | `gpt-4o-mini` | 模型标识（同时作为 API 调用的 model 参数） |
 | `api_key` | `None` | 环境变量：`OPENAI_API_KEY` 或 `LANGEXTRACT_API_KEY` |
 | `base_url` | `None` | 自定义 API 端点（DeepSeek 使用 `https://api.deepseek.com`） |
 | `temperature` | `None` | 采样温度 |
 | `format_type` | `JSON` | 输出格式（JSON Mode） |
 ---
 ## 三、关键参数规范
 ### 3.1 extract() 核心参数
 文件路径：`langextract/extraction.py`
 ```python
 def extract(
    text_or_documents: typing.Any,          # 必填：纯文本或 Document 列表
    prompt_description: str | None = None,  # 抽取提示词
    examples: typing.Sequence[Any] | None = None,  # 必填：Few-shot 示例
    model_id: str = "gemini-2.5-flash",     # 模型标识（用于路由）
    api_key: str | None = None,             # API Key
    model: typing.Any = None,               # 预配置的模型实例（最高优先级）
    max_char_buffer: int = 1000,            # 分块最大字符数
    temperature: float | None = None,       # 采样温度
    batch_length: int = 10,                 # 每批分块数
    max_workers: int = 10,                  # 最大并行线程
    additional_context: str | None = None,  # 附加上下文
    resolver_params: dict | None = None,    # 对齐参数
    language_model_params: dict | None = None,  # Provider 构造参数
    extraction_passes: int = 1,             # 抽取轮次
    context_window_chars: int | None = None, # 上下文窗口
    config: typing.Any = None,              # ModelConfig 实例
    model_url: str | None = None,           # 自托管端点
    show_progress: bool = True,             # 显示进度条
    ...
 ) -> list[AnnotatedDocument] | AnnotatedDocument
 ```
 **MVP 实测使用的参数组合：**
 | 参数 | 实测值 | 说明 |
 |------|--------|------|
 | `text_or_documents` | 520 字符纯文本 | GraphRAG 领域相关文本 |
 | `prompt_description` | `"Extract named entities..."` | 指定 TECHNOLOGY/ORGANIZATION/CONCEPT 三类 |
 | `examples` | 1 个 ExampleData（含 6 个 Extraction） | Few-shot 示例 |
 | `model` | `OpenAILanguageModel` 实例 | 直接实例化，指向 DeepSeek |
 | `show_progress` | `True` | 显示进度 |
 | `max_char_buffer` | 1000（默认） | 文本未超过阈值，未触发分块 |
 ### 3.2 ExampleData 示例数据格式
 文件路径：`langextract/core/data.py`
 ```python
@dataclasses.dataclass
 class ExampleData:
    text: str                                    # 示例文本（必填）
    extractions: list[Extraction]                # 标注的实体列表（必填）
 ```
 **MVP 实测示例：**
 ```python
 lx.data.ExampleData(
    text="LangChain is a framework created by Harrison Chase for building "
         "LLM applications. It integrates with OpenAI models and Pinecone "
         "vector database for semantic search.",
    extractions=[
        lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
        lx.data.Extraction(extraction_class="ORGANIZATION", extraction_text="Harrison Chase"),
        lx.data.Extraction(extraction_class="CONCEPT", extraction_text="LLM applications"),
        lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="OpenAI models"),
        lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="Pinecone"),
        lx.data.Extraction(extraction_class="CONCEPT", extraction_text="semantic search"),
    ],
 )
 ```
 **约束条件：**
 - `extraction_text` **必须是** `text` 的精确子串（否则对齐失败）
 - `extraction_class` 为自定义字符串，无预定义枚举
 - `examples` 列表不能为空（否则抛出 `ValueError`）
 - 每个 ExampleData 可包含多个不同 `extraction_class` 的条目
 ### 3.3 Extraction 示例条目格式
 ```python
@dataclasses.dataclass(init=False)
 class Extraction:
    extraction_class: str                     # 必填：实体类型
    extraction_text: str                      # 必填：实体文本（须为原文子串）
    attributes: dict[str, str | list[str]] | None = None  # 可选：附加属性
    description: str | None = None            # 可选：实体描述
 ```
 在 examples 中创建时只需要 `extraction_class` 和 `extraction_text`，其余字段由 LangExtract 在推理后自动填充。
 ### 3.4 分块参数
 文件路径：`langextract/chunking.py`
 LangExtract 使用基于 **句子边界** 的确定性分块策略：
 | 参数 | 默认值 | 说明 |
 |------|--------|------|
 | `max_char_buffer` | 1000 | 每个分块最大字符数 |
 | `context_window_chars` | `None` | 前一分块的上下文窗口（用于指代消解） |
 | `batch_length` | 10 | 每批处理的分块数 |
 **分块策略：**
 1. 如果单个句子超过 `max_char_buffer`，按换行符拆分
 2. 如果单个 token 超过 `max_char_buffer`，该 token 独占一个分块
 3. 如果多个句子可以放入 `max_char_buffer`，合并为一个分块
 > **MVP 实测：** 输入文本 520 字符 < `max_char_buffer`（1000），整段文本作为单一分块处理，未触发分块逻辑。
 ### 3.5 Resolver 对齐参数
 通过 `extract()` 的 `resolver_params` 字典传入：
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
 | `enable_fuzzy_alignment` | `bool` | `True` | 精确匹配失败后是否尝试模糊匹配 |
 | `fuzzy_alignment_threshold` | `float` | `0.75` | 模糊匹配最低 token 重叠比率 |
 | `accept_match_lesser` | `bool` | `True` | 是否接受部分精确匹配 |
 | `suppress_parse_errors` | `bool` | `False` | JSON 解析失败时是否继续 |
 > **MVP 实测：** 未传入 `resolver_params`，使用全部默认值。17 个抽取中 16 个 `match_exact`，1 个 `match_fuzzy`（"Microsoft Research"）。
 ---
 ## 四、输出数据格式规范
 ### 4.1 JSONL 输出文件（实际生成）
 **文件路径：** `mvp_output/graphrag_entities.jsonl`
 **文件大小：** 4,650 bytes
 **格式：** JSONL（JSON Lines），每行一个完整的 JSON 对象
 保存 API：
 ```python
 lx.io.save_annotated_documents(
    [result],
    output_name="graphrag_entities.jsonl",
    output_dir="mvp_output"
 )
 ```
 ### 4.2 AnnotatedDocument 顶层结构
 **实际 JSONL 输出的顶层字段（基于本地生成文件）：**
 | 字段 | 类型 | 实测值 | 说明 |
 |------|------|--------|------|
 | `text` | `string` | 520 字符 | 原始输入文本（完整保留） |
 | `document_id` | `string` | `"doc_8498f2b6"` | 自动生成，格式 `doc_{uuid_hex[:8]}` |
 | `extractions` | `array[Extraction]` | 17 个元素 | 抽取的实体列表 |
 > **注意：** JSONL 中字段顺序为 `extractions` → `text` → `document_id`（与 dataclass 定义顺序不同，以实际输出为准）。
 ### 4.3 Extraction 字段规范（实测对比）
 **实际输出的单条 Extraction 完整结构（摘自本地 JSONL 文件）：**
 ```json
 {
  "extraction_class": "TECHNOLOGY",
  "extraction_text": "GraphRAG",
  "char_interval": {
    "start_pos": 0,
    "end_pos": 8
  },
  "alignment_status": "match_exact",
  "extraction_index": 1,
  "group_index": 0,
  "description": null,
  "attributes": {}
 }
 ```
 **实测字段对比（官方 Schema vs 实际输出）：**
 | 字段 | 官方 Schema | 实际输出 | 差异说明 |
 |------|------------|---------|---------|
 | `extraction_class` | `string` | `string` | 一致 |
 | `extraction_text` | `string` | `string` | 一致 |
 | `char_interval` | `object \| null` | `object`（始终存在） | 实测 17 个全部有值 |
 | `alignment_status` | `string \| null` | `string`（始终存在） | 实测 17 个全部有值 |
 | `extraction_index` | `int \| null` | `int`（从 1 开始） | **实测从 1 开始，非 0** |
 | `group_index` | `int \| null` | `int`（从 0 开始） | 实测从 0 开始递增 |
 | `description` | `string \| null` | `null` | 未使用 description 提示时为 null |
 | `attributes` | `dict \| null` | `{}`（空对象） | **实测为空对象 `{}`，非 `null`** |
 | `token_interval` | `object \| null` | **不存在** | **实际 JSONL 输出中无此字段** |
 **关键差异总结：**
 1. `extraction_index` 从 **1** 开始（非 0）
 2. `attributes` 未使用时输出空对象 `{}`（非 `null`）
 3. `token_interval` 字段 **不在 JSONL 输出中**（仅存在于内存对象）
 ### 4.4 CharInterval 字符锚点
 ```json
 {
  "start_pos": 0,
  "end_pos": 8
 }
 ```
 - `start_pos`：起始位置（包含），0-indexed
 - `end_pos`：结束位置（不包含）
 - 语义：`source_text[start_pos:end_pos]` 即为实体在原文中的精确位置
 **实测验证（以 "GraphRAG" 为例）：**
 ```python
 text = "GraphRAG is an advanced..."
 text[0:8]  # → "GraphRAG"  ✓ 匹配
 ```
 ### 4.5 AlignmentStatus 对齐状态枚举
 | 状态值 | 序列化值 | 含义 | 可信度 | MVP 实测数量 |
 |--------|---------|------|--------|-------------|
 | `MATCH_EXACT` | `"match_exact"` | LLM 输出与原文完全匹配 | 最高 | **16** |
 | `MATCH_GREATER` | `"match_greater"` | LLM 输出短于匹配到的原文 | 高 | 0 |
 | `MATCH_LESSER` | `"match_lesser"` | LLM 输出长于匹配到的原文 | 中 | 0 |
 | `MATCH_FUZZY` | `"match_fuzzy"` | 模糊匹配 | 低 | **1** |
 | `None` | `null` | 未找到对齐 | 不可信 | 0 |
 > **实测精确匹配率：** 16/17 = 94.1%。唯一的 `match_fuzzy` 是 "Microsoft Research"。
 ### 4.6 extraction_summary.json（自定义摘要）
 **文件路径：** `mvp_output/extraction_summary.json`
 **文件大小：** 2,863 bytes
 此文件由 MVP 测试脚本自行生成（非 LangExtract 原生输出），结构如下：
 ```json
 {
  "total_extractions": 17,
  "extraction_classes": {
    "TECHNOLOGY": 9,
    "ORGANIZATION": 1,
    "CONCEPT": 7
  },
  "extractions": [
    {
      "class": "TECHNOLOGY",
      "text": "GraphRAG",
      "char_start": 0,
      "char_end": 8,
      "alignment": "match_exact"
    }
  ]
 }
 ```
 ---
 ## 五、本地生成文件清单
 MVP 测试后本地实际生成的文件（共 2 个输出文件）：
 ```
 langextract_src/
 ├── .env                            # DeepSeek API Key 配置
 ├── .venv/                          # 独立虚拟环境（Python 3.12）
 ├── mvp_test_deepseek.py            # MVP 测试脚本
 └── mvp_output/                     # 输出目录
    ├── graphrag_entities.jsonl     # LangExtract 原生 JSONL 输出（4,650 bytes）
    └── extraction_summary.json    # 自定义 JSON 摘要（2,863 bytes）
 ```
 | 文件 | 大小 | 来源 | 说明 |
 |------|------|------|------|
 | `graphrag_entities.jsonl` | 4,650 bytes | `lx.io.save_annotated_documents()` | LangExtract 原生输出，1 行 JSONL，含 17 个 Extraction |
 | `extraction_summary.json` | 2,863 bytes | MVP 脚本自定义 | 扁平化摘要，含类型分布统计 |
 ---
 ## 附录：环境变量与常量速查
 ### 环境变量
 | 变量名 | 适用 Provider | 说明 |
 |--------|--------------|------|
 | `LANGEXTRACT_API_KEY` | 所有 | 通用 API Key 后备 |
 | `GEMINI_API_KEY` | Gemini | Gemini API Key |
 | `OPENAI_API_KEY` | OpenAI | OpenAI / DeepSeek API Key |
 | `OLLAMA_BASE_URL` | Ollama | Ollama 服务地址（默认 `http://localhost:11434`） |
 ### .env 配置（MVP 实测）
 ```env
 OPENAI_API_KEY=sk-55cb39b8a3284355bc80217c11c85d1f
 ```
 ### 模型优先级
 ```
 model（预配置的模型实例） > config（ModelConfig 实例） > model_id + api_key
 ```
 > **MVP 实测使用 `model` 参数**（最高优先级），直接传入 `OpenAILanguageModel` 实例。
 ### 结构化输出支持
 | Provider | Schema 类型 | 结构化输出模式 |
 |----------|------------|---------------|
 | Gemini | `GeminiSchema` | 严格结构化输出 |
 | OpenAI | JSON Mode | 通过 `response_format` 约束 |
 | Ollama | `FormatModeSchema` | JSON 模式（非严格） |
 ### 17 个实测抽取实体完整列表
 | # | extraction_class | extraction_text | char_interval | alignment_status |
 |---|-----------------|-----------------|---------------|-----------------|
 | 1 | TECHNOLOGY | GraphRAG | [0, 8] | match_exact |
 | 2 | ORGANIZATION | Microsoft Research | [75, 93] | match_fuzzy |
 | 3 | CONCEPT | retrieval-augmented generation | [24, 54] | match_exact |
 | 4 | CONCEPT | knowledge graphs | [107, 123] | match_exact |
 | 5 | TECHNOLOGY | GPT-4 | [156, 161] | match_exact |
 | 6 | CONCEPT | multi-hop reasoning | [172, 191] | match_exact |
 | 7 | CONCEPT | community detection algorithms | [209, 239] | match_exact |
 | 8 | TECHNOLOGY | Leiden clustering | [248, 265] | match_exact |
 | 9 | TECHNOLOGY | MinerU | [315, 321] | match_exact |
 | 10 | TECHNOLOGY | LangExtract | [344, 355] | match_exact |
 | 11 | TECHNOLOGY | Neo4j | [383, 388] | match_exact |
 | 12 | CONCEPT | graph database | [396, 410] | match_exact |
 | 13 | CONCEPT | pipeline | [424, 432] | match_exact |
 | 14 | TECHNOLOGY | PDF documents | [443, 456] | match_exact |
 | 15 | TECHNOLOGY | OCR | [465, 468] | match_exact |
 | 16 | TECHNOLOGY | NLP | [473, 476] | match_exact |
 | 17 | CONCEPT | knowledge graph | [504, 519] | match_exact |
--- a/docs/langextract_specification.md
+++ b/docs/langextract_specification.md
@@ -0,0 +1,672 @@
 # LangExtract Pipeline 规范文档
 > 基于 [google/langextract](https://github.com/google/langextract) 源码分析
 > 版本基线：2026-03-04 main 分支
 ---
 ## 目录
 - [一、输入规范](#一输入规范)
  - [1.1 核心入口函数签名](#11-核心入口函数签名)
  - [1.2 支持的输入类型](#12-支持的输入类型)
  - [1.3 Document 数据结构](#13-document-数据结构)
  - [1.4 CSV Dataset 输入](#14-csv-dataset-输入)
  - [1.5 URL 文本下载](#15-url-文本下载)
  - [1.6 分块参数配置](#16-分块参数配置)
  - [1.7 不支持的输入格式](#17-不支持的输入格式)
 - [二、模型接入规范](#二模型接入规范)
  - [2.1 模型路由机制](#21-模型路由机制)
  - [2.2 Gemini Provider](#22-gemini-provider)
  - [2.3 OpenAI Provider](#23-openai-provider)
  - [2.4 Ollama Provider](#24-ollama-provider)
  - [2.5 OpenAI 兼容接口适配（DeepSeek 等）](#25-openai-兼容接口适配deepseek-等)
  - [2.6 模型优先级与配置覆盖关系](#26-模型优先级与配置覆盖关系)
  - [2.7 关于 Embedding 模型](#27-关于-embedding-模型)
 - [三、输出数据格式规范](#三输出数据格式规范)
  - [3.1 AnnotatedDocument 结构](#31-annotateddocument-结构)
  - [3.2 Extraction 结构](#32-extraction-结构)
  - [3.3 CharInterval 字符锚点](#33-charinterval-字符锚点)
  - [3.4 AlignmentStatus 对齐状态枚举](#34-alignmentstatus-对齐状态枚举)
  - [3.5 Resolver 对齐参数](#35-resolver-对齐参数)
  - [3.6 JSONL 输出文件格式](#36-jsonl-输出文件格式)
  - [3.7 完整输出 JSON Schema 示例](#37-完整输出-json-schema-示例)
  - [3.8 HTML 可视化输出](#38-html-可视化输出)
 - [附录：环境变量与常量速查](#附录环境变量与常量速查)
 ---
 ## 一、输入规范
 ### 1.1 核心入口函数签名
 文件路径：`langextract/extraction.py`
 ```python
 def extract(
    text_or_documents: typing.Any,
    prompt_description: str | None = None,
    examples: typing.Sequence[typing.Any] | None = None,
    model_id: str = "gemini-2.5-flash",
    api_key: str | None = None,
    language_model_type: typing.Type[typing.Any] | None = None,  # 已废弃
    format_type: typing.Any = None,
    max_char_buffer: int = 1000,
    temperature: float | None = None,
    fence_output: bool | None = None,
    use_schema_constraints: bool = True,
    batch_length: int = 10,
    max_workers: int = 10,
    additional_context: str | None = None,
    resolver_params: dict | None = None,
    language_model_params: dict | None = None,
    debug: bool = False,
    model_url: str | None = None,
    extraction_passes: int = 1,
    context_window_chars: int | None = None,
    config: typing.Any = None,
    model: typing.Any = None,
    *,
    fetch_urls: bool = True,
    prompt_validation_level: PromptValidationLevel = PromptValidationLevel.WARNING,
    prompt_validation_strict: bool = False,
    show_progress: bool = True,
    tokenizer: Tokenizer | None = None,
 ) -> list[AnnotatedDocument] | AnnotatedDocument
 ```
 **关键参数说明：**
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
 | `text_or_documents` | `Any` | **必填** | 纯文本字符串、URL、或 `Document` 对象的可迭代集合 |
 | `prompt_description` | `str \| None` | `None` | 抽取提示词，描述需要抽取什么实体 |
 | `examples` | `Sequence[Any] \| None` | `None` | **必填** — Few-shot 示例列表（为空则抛出 ValueError） |
 | `model_id` | `str` | `"gemini-2.5-flash"` | 模型标识符，用于自动路由到对应 Provider |
 | `api_key` | `str \| None` | `None` | LLM API Key（也可通过环境变量设置） |
 | `max_char_buffer` | `int` | `1000` | 每个文本分块的最大字符数 |
 | `temperature` | `float \| None` | `None` | 采样温度（`None` 使用模型默认值） |
 | `use_schema_constraints` | `bool` | `True` | 是否启用结构化输出约束 |
 | `batch_length` | `int` | `10` | 每批处理的文本分块数量 |
 | `max_workers` | `int` | `10` | 最大并行工作线程数 |
 | `additional_context` | `str \| None` | `None` | 附加到推理提示词中的上下文信息 |
 | `resolver_params` | `dict \| None` | `None` | 对齐解析器参数（见 [3.5 节](#35-resolver-对齐参数)） |
 | `extraction_passes` | `int` | `1` | 抽取轮次（>1 时多次抽取并合并非重叠结果） |
 | `context_window_chars` | `int \| None` | `None` | 前一分块的上下文窗口字符数（用于指代消解） |
 | `model_url` | `str \| None` | `None` | 自托管模型的 API 端点 URL |
 | `fetch_urls` | `bool` | `True` | 是否自动下载 http(s) URL 内容 |
 ---
 ### 1.2 支持的输入类型
 LangExtract **仅接受纯文本**作为输入，支持以下 4 种传入方式：
 | 输入方式 | 示例 | 说明 |
 |---------|------|------|
 | **纯文本字符串** | `extract("这是一段文本...")` | 直接传入文本内容 |
 | **URL** | `extract("https://example.com/article.txt")` | 自动下载 URL 文本内容（`fetch_urls=True`） |
 | **Document 对象** | `extract([Document(text="...", document_id="doc1")])` | 传入 Document 可迭代集合 |
 | **CSV 文件** | 通过 `Dataset` 类加载后传入 | 指定 text 列和 id 列 |
 ---
 ### 1.3 Document 数据结构
 文件路径：`langextract/core/data.py`
 ```python
@dataclasses.dataclass
 class Document:
    text: str                                    # 必填 — 原始文本内容
    additional_context: str | None = None        # 可选 — 附加上下文
    document_id: str                             # 自动生成 — 格式 "doc_{uuid_hex[:8]}"
    tokenized_text: TokenizedText                # 惰性计算 — 分词后的文本
 ```
 **字段说明：**
 - `text`：**必填**，原始文本内容，类型为 `str`
 - `additional_context`：可选，会附加到推理提示词中
 - `document_id`：通过 property 访问，未设置时自动生成格式为 `doc_{uuid_hex[:8]}` 的唯一 ID
 - `tokenized_text`：通过 property 惰性计算，使用配置的 Tokenizer 进行分词
 ---
 ### 1.4 CSV Dataset 输入
 文件路径：`langextract/io.py`
 ```python
@dataclasses.dataclass(frozen=True)
 class Dataset:
    input_path: pathlib.Path   # CSV 文件路径
    id_key: str                # 文档 ID 对应的列名
    text_key: str              # 文本内容对应的列名
    def load(self, delimiter: str = ',') -> Iterator[Document]:
        """仅支持 .csv 后缀文件，其他格式抛出 NotImplementedError"""
 ```
 **CSV 文件要求：**
 - 文件后缀必须为 `.csv`
 - 必须包含 `text_key` 指定的文本列和 `id_key` 指定的 ID 列
 - 默认分隔符为逗号（`,`），可通过 `delimiter` 参数修改
 - 其他文件格式会直接抛出 `NotImplementedError`
 ---
 ### 1.5 URL 文本下载
 文件路径：`langextract/io.py`
 ```python
 def download_text_from_url(
    url: str,
    timeout: int = 30,           # 默认超时 30 秒
    show_progress: bool = True,
    chunk_size: int = 8192,
 ) -> str
 ```
 **URL 要求：**
 - 必须以 `http://` 或 `https://` 开头
 - 仅下载文本内容（`response.text`），不解析 HTML/PDF 等
 - 需要 `fetch_urls=True`（默认开启）
 ---
 ### 1.6 分块参数配置
 文件路径：`langextract/chunking.py`
 LangExtract 使用基于**句子边界**的确定性分块策略（非语义分块），核心类为 `ChunkIterator`：
 ```python
 class ChunkIterator:
    def __init__(
        self,
        text: str | TokenizedText | None,
        max_char_buffer: int,           # 每个分块最大字符数
        tokenizer_impl: Tokenizer,      # 分词器实例
        document: Document | None = None,
    )
 ```
 **分块策略：**
 1. 如果单个句子超过 `max_char_buffer`，按换行符拆分，同时尊重 token 边界
 2. 如果单个 token 超过 `max_char_buffer`，该 token 独占一个分块
 3. 如果多个句子可以放入 `max_char_buffer`，合并为一个分块
 **TextChunk 输出结构：**
 ```python
@dataclasses.dataclass
 class TextChunk:
    token_interval: TokenInterval       # 在源文档中的 token 区间
    document: Document | None = None    # 源文档引用
    # 属性
    chunk_text: str                     # 重建的文本内容
    sanitized_chunk_text: str           # 标准化空白的文本
    char_interval: CharInterval         # 在源文档中的字符区间
    document_id: str | None             # 源文档 ID
 ```
 ---
 ### 1.7 不支持的输入格式
 以下格式 **不被支持**，需要在 LangExtract 之前通过外部工具预处理为纯文本：
 | 格式 | 状态 | 预处理方案 |
 |------|------|-----------|
 | PDF | ❌ 不支持 | 使用 MinerU / PyMuPDF 先转文本 |
 | DOCX | ❌ 不支持 | 使用 python-docx 先转文本 |
 | HTML | ❌ 不支持 | 使用 BeautifulSoup 先提取文本 |
 | 图片 | ❌ 不支持 | 使用 OCR 工具先识别文本 |
 | Markdown（含媒体） | ❌ 不支持 | 需提取纯文本部分 |
 | Excel / JSON | ❌ 不支持 | 需序列化为纯文本 |
 ---
 ## 二、模型接入规范
 ### 2.1 模型路由机制
 文件路径：`langextract/providers/patterns.py`
 LangExtract 通过 **正则匹配 `model_id`** 自动路由到对应的 Provider：
 | Provider | 匹配模式 | 优先级 | 示例模型 |
 |----------|---------|--------|---------|
 | **Gemini** | `^gemini` | 10 | `gemini-2.5-flash`, `gemini-1.5-pro` |
 | **OpenAI** | `^gpt-4`, `^gpt4.`, `^gpt-5`, `^gpt5.` | 10 | `gpt-4o`, `gpt-4o-mini` |
 | **Ollama** | `gemma`, `llama`, `mistral`, `phi`, `qwen`, `deepseek` 等 | 10 | `gemma2:2b`, `llama3.2:1b` |
 Ollama 额外支持 HuggingFace 格式的模型名：`meta-llama/Llama*`, `google/gemma*`, `mistralai/*`, `microsoft/phi*` 等。
 ---
 ### 2.2 Gemini Provider
 文件路径：`langextract/providers/gemini.py`
 ```python
 class GeminiLanguageModel(BaseLanguageModel):
    def __init__(
        self,
        model_id: str = 'gemini-2.5-flash',
        api_key: str | None = None,
        vertexai: bool = False,
        credentials: Any | None = None,
        project: str | None = None,
        location: str | None = None,
        http_options: Any | None = None,
        gemini_schema: GeminiSchema | None = None,
        format_type: FormatType = FormatType.JSON,
        temperature: float = 0.0,
        max_workers: int = 10,
        fence_output: bool = False,
        **kwargs,
    )
 ```
 | 参数 | 默认值 | 说明 |
 |------|--------|------|
 | `model_id` | `gemini-2.5-flash` | Gemini 模型标识 |
 | `api_key` | `None` | 环境变量：`GEMINI_API_KEY` 或 `LANGEXTRACT_API_KEY` |
 | `vertexai` | `False` | 是否使用 Vertex AI 企业认证 |
 | `temperature` | `0.0` | 采样温度（确定性输出） |
 | `format_type` | `JSON` | 输出格式 |
 **运行时可配参数：** `temperature`, `max_output_tokens`, `top_p`, `top_k`
 **额外参数白名单：** `response_schema`, `response_mime_type`, `safety_settings`, `system_instruction`, `tools`, `stop_sequences`, `candidate_count`
 ---
 ### 2.3 OpenAI Provider
 文件路径：`langextract/providers/openai.py`
 ```python
 class OpenAILanguageModel(BaseLanguageModel):
    def __init__(
        self,
        model_id: str = 'gpt-4o-mini',
        api_key: str | None = None,
        base_url: str | None = None,
        organization: str | None = None,
        format_type: FormatType = FormatType.JSON,
        temperature: float | None = None,
        max_workers: int = 10,
        **kwargs,
    )
 ```
 | 参数 | 默认值 | 说明 |
 |------|--------|------|
 | `model_id` | `gpt-4o-mini` | OpenAI 模型标识 |
 | `api_key` | `None` | 环境变量：`OPENAI_API_KEY` 或 `LANGEXTRACT_API_KEY` |
 | `base_url` | `None` | 自定义 API 端点（用于兼容接口） |
 | `organization` | `None` | OpenAI 组织 ID |
 | `temperature` | `None` | 采样温度 |
 **运行时可配参数：** `temperature`, `max_output_tokens`, `top_p`, `frequency_penalty`, `presence_penalty`, `seed`, `stop`, `logprobs`, `top_logprobs`, `reasoning_effort`, `reasoning`, `response_format`
 ---
 ### 2.4 Ollama Provider
 文件路径：`langextract/providers/ollama.py`
 ```python
 class OllamaLanguageModel(BaseLanguageModel):
    def __init__(
        self,
        model_id: str,                                    # 必填
        model_url: str = 'http://localhost:11434',
        base_url: str | None = None,
        format_type: FormatType | None = None,
        constraint: Constraint = Constraint(),
        timeout: int | None = None,
        **kwargs,
    )
 ```
 | 参数 | 默认值 | 说明 |
 |------|--------|------|
 | `model_id` | **必填** | Ollama 模型名（如 `gemma2:2b`） |
 | `model_url` | `http://localhost:11434` | Ollama 服务地址 |
 | `timeout` | `120` | 请求超时（秒） |
 | `format_type` | `JSON` | 输出格式 |
 **内部默认常量：**
 | 常量 | 值 | 说明 |
 |------|-----|------|
 | `_DEFAULT_TEMPERATURE` | `0.1` | 默认温度 |
 | `_DEFAULT_TIMEOUT` | `120` | 默认超时（秒） |
 | `_DEFAULT_KEEP_ALIVE` | `300` | 模型保活时间（秒） |
 | `_DEFAULT_NUM_CTX` | `2048` | 默认上下文窗口大小 |
 **认证支持：** 可配置 `api_key`、`auth_scheme`（默认 `Bearer`）、`auth_header`（默认 `Authorization`）用于代理 Ollama 实例。
 ---
 ### 2.5 OpenAI 兼容接口适配（DeepSeek 等）
 LangExtract 的 OpenAI Provider 支持 `base_url` 参数，因此可以接入任何 OpenAI 兼容 API：
 ```python
 # DeepSeek 接入示例
 result = lx.extract(
    text_or_documents="...",
    model_id="gpt-4o-mini",               # 触发 OpenAI Provider 路由
    api_key="sk-your-deepseek-key",
    examples=[...],
    language_model_params={
        "base_url": "https://api.deepseek.com",
    },
 )
 ```
 > **注意：** 由于路由基于 `model_id` 正则匹配，使用 DeepSeek 等兼容接口时 `model_id` 仍需使用 `gpt-*` 前缀来命中 OpenAI Provider，或通过 `config` 参数显式指定 Provider。
 ---
 ### 2.6 模型优先级与配置覆盖关系
 模型配置的优先级从高到低：
 ```
 model（预配置的模型实例） > config（ModelConfig 实例） > model_id + api_key
 ```
 **ModelConfig 结构**（`langextract/factory.py`）：
 ```python
@dataclasses.dataclass(slots=True, frozen=True)
 class ModelConfig:
    model_id: str | None = None                              # 模型标识
    provider: str | None = None                              # 显式指定 Provider 名称
    provider_kwargs: dict[str, Any] = field(default_factory=dict)  # Provider 构造参数
 ```
 ---
 ### 2.7 关于 Embedding 模型
 **LangExtract 不使用也不依赖任何 Embedding 模型。**
 - 文本分块使用基于句子边界的确定性分割算法，不涉及语义相似度计算
 - 没有向量索引或向量检索功能
 - 整个代码库中没有任何 Embedding 相关的调用
 ---
 ## 三、输出数据格式规范
 ### 3.1 AnnotatedDocument 结构
 文件路径：`langextract/core/data.py`
 ```python
@dataclasses.dataclass
 class AnnotatedDocument:
    extractions: list[Extraction] | None = None    # 抽取结果列表
    text: str | None = None                        # 原始文本
    document_id: str                               # 文档唯一标识（自动生成）
    tokenized_text: TokenizedText                   # 分词后文本（惰性计算）
 ```
 **序列化后的 JSON 顶层字段：**
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `document_id` | `string` | 文档唯一标识，格式 `doc_{uuid_hex[:8]}` |
 | `text` | `string \| null` | 原始输入文本 |
 | `extractions` | `array[Extraction] \| null` | 抽取的实体列表 |
 ---
 ### 3.2 Extraction 结构
 文件路径：`langextract/core/data.py`
 ```python
@dataclasses.dataclass(init=False)
 class Extraction:
    extraction_class: str                                      # 实体类型
    extraction_text: str                                       # 实体文本
    char_interval: CharInterval | None = None                  # 字符位置锚点
    alignment_status: AlignmentStatus | None = None            # 对齐状态
    extraction_index: int | None = None                        # 抽取顺序索引
    group_index: int | None = None                             # 分组索引
    description: str | None = None                             # 实体描述
    attributes: dict[str, str | list[str]] | None = None       # 附加属性
    token_interval: TokenInterval | None = None                # Token 位置锚点
 ```
 **字段详细说明：**
 | 字段 | 类型 | 必填 | 说明 |
 |------|------|------|------|
 | `extraction_class` | `str` | 是 | 实体类型/分类名称（如 `PERSON`, `ORGANIZATION`） |
 | `extraction_text` | `str` | 是 | 抽取的文本内容（应为原文的子串） |
 | `char_interval` | `CharInterval \| null` | 否 | 在原文中的字符偏移位置 |
 | `alignment_status` | `string \| null` | 否 | 文本对齐质量（见 [3.4 节](#34-alignmentstatus-对齐状态枚举)） |
 | `extraction_index` | `int \| null` | 否 | 在结果列表中的顺序位置 |
 | `group_index` | `int \| null` | 否 | 分组归属（用于关联抽取） |
 | `description` | `string \| null` | 否 | 对该实体的补充描述 |
 | `attributes` | `dict \| null` | 否 | 键值对形式的附加属性 |
 | `token_interval` | `TokenInterval \| null` | 否 | 在原文中的 token 偏移位置 |
 ---
 ### 3.3 CharInterval 字符锚点
 文件路径：`langextract/core/data.py`
 ```python
@dataclasses.dataclass
 class CharInterval:
    start_pos: int | None = None    # 起始位置（包含），0-indexed
    end_pos: int | None = None      # 结束位置（不包含）
 ```
 **语义：** `source_text[start_pos:end_pos]` 即为抽取的文本在原文中的精确位置。
 ---
 ### 3.4 AlignmentStatus 对齐状态枚举
 文件路径：`langextract/core/data.py`
 ```python
 class AlignmentStatus(enum.Enum):
    MATCH_EXACT   = "match_exact"
    MATCH_GREATER = "match_greater"
    MATCH_LESSER  = "match_lesser"
    MATCH_FUZZY   = "match_fuzzy"
 ```
 | 状态值 | 序列化值 | 含义 | 可信度 |
 |--------|---------|------|--------|
 | `MATCH_EXACT` | `"match_exact"` | LLM 输出与原文 token 序列完全匹配 | 最高 |
 | `MATCH_GREATER` | `"match_greater"` | LLM 输出的 token 序列短于匹配到的原文（找到最佳重叠） | 高 |
 | `MATCH_LESSER` | `"match_lesser"` | LLM 输出长于匹配到的原文（部分精确匹配） | 中 |
 | `MATCH_FUZZY` | `"match_fuzzy"` | 模糊匹配，重叠率达到阈值（默认 ≥0.75） | 低 |
 | `None` | `null` | 未找到任何对齐 | 不可信 |
 **对齐流程：**
 ```
 1. 尝试精确 token 级别匹配（difflib）
   ├── 成功且长度相等 → MATCH_EXACT
   ├── 成功但 LLM 输出更长 → MATCH_LESSER
   └── 成功但匹配区域更大 → MATCH_GREATER
 2. 精确匹配失败且 enable_fuzzy_alignment=True
   ├── 最佳重叠窗口 ≥ fuzzy_alignment_threshold → MATCH_FUZZY
   └── 低于阈值 → None
 3. 精确匹配失败且 enable_fuzzy_alignment=False → None
 ```
 ---
 ### 3.5 Resolver 对齐参数
 文件路径：`langextract/resolver.py`
 通过 `extract()` 的 `resolver_params` 字典传入：
 ```python
 result = lx.extract(
    ...,
    resolver_params={
        "enable_fuzzy_alignment": True,       # 是否启用模糊对齐（默认 True）
        "fuzzy_alignment_threshold": 0.75,    # 模糊匹配最低重叠率（默认 0.75）
        "accept_match_lesser": True,          # 是否接受 MATCH_LESSER（默认 True）
        "suppress_parse_errors": False,       # 是否忽略 JSON 解析错误（默认 False）
    },
 )
 ```
 | 参数 | 类型 | 默认值 | 说明 |
 |------|------|--------|------|
 | `enable_fuzzy_alignment` | `bool` | `True` | 精确匹配失败后是否尝试模糊匹配 |
 | `fuzzy_alignment_threshold` | `float` | `0.75` | 模糊匹配的最低 token 重叠比率（0.0~1.0） |
 | `accept_match_lesser` | `bool` | `True` | 是否接受部分精确匹配结果 |
 | `suppress_parse_errors` | `bool` | `False` | JSON 解析失败时是否继续而非报错 |
 ---
 ### 3.6 JSONL 输出文件格式
 文件路径：`langextract/io.py`
 ```python
 def save_annotated_documents(
    annotated_documents: Iterator[AnnotatedDocument],
    output_dir: pathlib.Path | str | None = None,
    output_name: str = 'data.jsonl',
    show_progress: bool = True,
 ) -> None
 ```
 **输出规范：**
 - 文件格式：**JSONL**（JSON Lines），每行一个完整的 JSON 对象
 - 默认文件名：`data.jsonl`
 - 序列化规则：
  - Enum 值转为字符串（如 `AlignmentStatus.MATCH_EXACT` → `"match_exact"`）
  - NumPy / integral 数值类型转为 `int`
  - 以 `_` 开头的私有字段被排除
 ---
 ### 3.7 完整输出 JSON Schema 示例
 单条 JSONL 记录的完整结构：
 ```json
 {
  "document_id": "doc_a1b2c3d4",
  "text": "GraphRAG is a technique developed by Microsoft Research that combines knowledge graphs with retrieval-augmented generation.",
  "extractions": [
    {
      "extraction_class": "TECHNOLOGY",
      "extraction_text": "GraphRAG",
      "char_interval": {
        "start_pos": 0,
        "end_pos": 8
      },
      "alignment_status": "match_exact",
      "extraction_index": 0,
      "group_index": null,
      "description": "A technique combining knowledge graphs with RAG",
      "attributes": {
        "category": "AI/ML",
        "developer": "Microsoft Research"
      },
      "token_interval": {
        "start_index": 0,
        "end_index": 1
      }
    },
    {
      "extraction_class": "ORGANIZATION",
      "extraction_text": "Microsoft Research",
      "char_interval": {
        "start_pos": 46,
        "end_pos": 64
      },
      "alignment_status": "match_exact",
      "extraction_index": 1,
      "group_index": null,
      "description": null,
      "attributes": null,
      "token_interval": {
        "start_index": 7,
        "end_index": 9
      }
    }
  ]
 }
 ```
 ---
 ### 3.8 HTML 可视化输出
 文件路径：`langextract/visualization.py`
 ```python
 def visualize(doc: AnnotatedDocument) -> HTML
 ```
 **功能特性：**
 - 按 `extraction_class` 进行颜色编码高亮（10 色调色板）
 - 交互式 tooltip 显示实体类型和属性
 - 动画导航控件，支持多实体浏览
 - 进度滑块
 - 响应式 HTML/CSS/JavaScript 嵌入
 - 支持 Jupyter / IPython 环境直接渲染
 ---
 ## 附录：环境变量与常量速查
 ### 环境变量
 | 变量名 | 适用 Provider | 说明 |
 |--------|--------------|------|
 | `LANGEXTRACT_API_KEY` | 所有 | 通用 API Key 后备 |
 | `GEMINI_API_KEY` | Gemini | Gemini API Key |
 | `OPENAI_API_KEY` | OpenAI | OpenAI API Key |
 | `OLLAMA_BASE_URL` | Ollama | Ollama 服务地址（默认 `http://localhost:11434`） |
 ### FormatType 枚举
 ```python
 class FormatType(enum.Enum):
    YAML = 'yaml'
    JSON = 'json'
 ```
 ### 结构化输出支持
 | Provider | Schema 类型 | 结构化输出模式 |
 |----------|------------|---------------|
 | Gemini | `GeminiSchema` | 严格结构化输出 |
 | OpenAI | JSON Mode | 通过 `response_format` 约束 |
 | Ollama | `FormatModeSchema` | JSON 模式（非严格） |
 ### Fence Output 逻辑
 | Provider | 默认值 | 说明 |
 |----------|--------|------|
 | Gemini | `False` | 有 Schema 时不需要 fence |
 | OpenAI | `False` | JSON Mode 返回原始 JSON |
 | Ollama | `False` | 返回原始 JSON |
--- a/docs/mineru_specification-v1.0.md
+++ b/docs/mineru_specification-v1.0.md
@@ -0,0 +1,879 @@
 # MinerU 文档解析规范文档 v1.0
 > 基于 [opendatalab/MinerU](https://github.com/opendatalab/MinerU) 官方 API 文档 + 本地 MVP 实测验证
 > 实测后端版本：`pipeline` / `_version_name: 2.6.4`
 > 更新日期：2026-03-04
 ---
 ## 目录
 - [一、Pipeline 执行流程与测试脚本](#一pipeline-执行流程与测试脚本)
  - [1.1 虚拟环境配置（环境隔离）](#11-虚拟环境配置环境隔离)
  - [1.2 完整执行流程（本地文件 → 云端解析 → 本地存储）](#12-完整执行流程本地文件--云端解析--本地存储)
  - [1.3 测试脚本存放位置](#13-测试脚本存放位置)
  - [1.4 Pipeline 各步骤详解](#14-pipeline-各步骤详解)
 - [二、输入格式规范](#二输入格式规范)
  - [2.1 支持的文件格式](#21-支持的文件格式)
  - [2.2 输入限制](#22-输入限制)
  - [2.3 OCR 语言支持](#23-ocr-语言支持)
 - [三、输出格式规范（实测验证）](#三输出格式规范实测验证)
  - [3.1 实际输出文件清单（实测 vs 官方文档对比）](#31-实际输出文件清单实测-vs-官方文档对比)
  - [3.2 content_list.json 字段规范（实测验证）](#32-content_listjson-字段规范实测验证)
  - [3.3 layout.json 字段规范（实测验证）](#33-layoutjson-字段规范实测验证)
  - [3.4 full.md Markdown 输出规范（实测验证）](#34-fullmd-markdown-输出规范实测验证)
 - [四、布局信息规范](#四布局信息规范)
  - [4.1 坐标系定义（实测验证）](#41-坐标系定义实测验证)
  - [4.2 布局分类体系](#42-布局分类体系)
  - [4.3 内容层级与标题级别](#43-内容层级与标题级别)
  - [4.4 布局精度提取指南](#44-布局精度提取指南)
 - [五、云端 API 关键参数规范](#五云端-api-关键参数规范)
  - [5.1 认证配置](#51-认证配置)
  - [5.2 本地文件上传流程 — file-urls/batch](#52-本地文件上传流程--file-urlsbatch)
  - [5.3 URL 直传解析 — extract/task](#53-url-直传解析--extracttask)
  - [5.4 批量 URL 解析 — extract/task/batch](#54-批量-url-解析--extracttaskbatch)
  - [5.5 查询结果接口](#55-查询结果接口)
  - [5.6 通用响应包装结构](#56-通用响应包装结构)
  - [5.7 任务状态枚举（实测验证）](#57-任务状态枚举实测验证)
  - [5.8 错误码速查](#58-错误码速查)
 ---
 ## 一、Pipeline 执行流程与测试脚本
 ### 1.1 虚拟环境配置（环境隔离）
 MinerU MVP 组件使用 **独立的 Python 虚拟环境**，与项目其他组件（LangExtract、GraphRAG Pipeline 等）完全隔离，避免依赖污染。
 | 项目 | 值 |
 |------|-----|
 | 虚拟环境路径 | `F:\GraphRAGAgent\mineru_mvp\.venv\` |
 | Python 版本 | 3.12 |
 | 创建工具 | uv |
 | Python 解释器 | `F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe` |
 **启动 Pipeline 前必须切换到子虚拟环境：**
 ```bash
 # 方式一：直接指定解释器路径（推荐，无需手动激活）
 F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe pipeline.py
 # 方式二：先激活环境再运行
 cd F:/GraphRAGAgent/mineru_mvp
 source .venv/Scripts/activate
 python pipeline.py
 ```
 **安装新依赖：**
 ```bash
 uv pip install <package> --python F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe
 ```
 **已安装依赖清单：**
 | 包 | 用途 |
 |----|------|
 | `requests` | HTTP 客户端（API 调用、文件上传下载） |
 | `python-dotenv` | `.env` 配置文件加载 |
 | `reportlab` | 测试 PDF 生成 |
 ---
 ### 1.2 完整执行流程（本地文件 → 云端解析 → 本地存储）
 ```
 ┌─────────────────────────────────────────────────────────────────┐
 │  Step 0: 激活虚拟环境                                            │
 │  source .venv/Scripts/activate  或  直接使用 .venv 内 python      │
 ├─────────────────────────────────────────────────────────────────┤
 │  Step 1: 获取预签名上传 URL                                      │
 │  POST /file-urls/batch  →  返回 batch_id + file_urls[]          │
 ├─────────────────────────────────────────────────────────────────┤
 │  Step 2: 上传本地文件                                            │
 │  PUT {file_urls[0]}  ←  本地文件二进制流（不带 Content-Type）      │
 ├─────────────────────────────────────────────────────────────────┤
 │  Step 3: 轮询解析结果                                            │
 │  GET /extract-results/batch/{batch_id}                          │
 │  状态流转: waiting-file → pending → running → done/failed        │
 ├─────────────────────────────────────────────────────────────────┤
 │  Step 4: 下载解析结果 ZIP                                        │
 │  GET {full_zip_url}  →  解压到本地 output/ 目录                   │
 ├─────────────────────────────────────────────────────────────────┤
 │  Step 5: 分析解析产物                                            │
 │  读取 *content_list.json  →  统计块类型、页数、生成 summary        │
 └─────────────────────────────────────────────────────────────────┘
 ```
 > **关键发现（实测）：** 上传文件时 **不能** 携带 `Content-Type` 请求头，否则 OSS 预签名 URL 校验失败返回 403 `SignatureDoesNotMatch`。必须使用裸 `PUT` 请求。
 ### 1.3 测试脚本存放位置
 ```
 F:\GraphRAGAgent\mineru_mvp\
 ├── .env                        # API Token 配置
 ├── .venv/                      # 独立虚拟环境（Python 3.12, uv 创建）
 ├── CLAUDE.md                   # Claude Code 组件规范
 ├── create_test_pdf.py          # 测试 PDF 生成脚本（reportlab）
 ├── pipeline.py                 # 完整 Pipeline 脚本（5 步）
 ├── test_sample.pdf             # 生成的测试 PDF（1 页，含标题/段落/表格）
 └── output/
    └── test_sample/            # 解析输出结果
        ├── full.md
        ├── {uuid}_content_list.json
        ├── layout.json
        ├── {uuid}_origin.pdf
        └── images/
            └── {hash}.jpg
 ```
 ### 1.4 Pipeline 各步骤详解
 #### Step 1 — 获取预签名上传 URL
 ```python
 resp = requests.post(
    f"{API_BASE}/file-urls/batch",
    headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
    json={
        "files": [{"name": "test_sample.pdf", "data_id": "mvp_test"}],
        "enable_formula": True,
        "enable_table": True,
        "language": "en",
    },
 )
 batch_id = resp.json()["data"]["batch_id"]
 upload_url = resp.json()["data"]["file_urls"][0]
 ```
 #### Step 2 — 上传文件（裸 PUT，不带 Content-Type）
 ```python
 with open("test_sample.pdf", "rb") as f:
    requests.put(upload_url, data=f)  # 不传 headers
 ```
 #### Step 3 — 轮询结果
 ```python
 while True:
    result = requests.get(
        f"{API_BASE}/extract-results/batch/{batch_id}",
        headers=headers,
    ).json()
    state = result["data"]["extract_result"][0]["state"]
    if state == "done":
        zip_url = result["data"]["extract_result"][0]["full_zip_url"]
        break
    time.sleep(5)
 ```
 #### Step 4 — 下载解压
 ```python
 zip_data = requests.get(zip_url).content
 with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
    zf.extractall("output/test_sample/")
 ```
 #### Step 5 — 分析产物
 ```python
 content_list = json.load(open("output/test_sample/*content_list.json"))
 # 按 type 分类统计、按 page_idx 分组、提取标题层级等
 ```
 ---
 ## 二、输入格式规范
 ### 2.1 支持的文件格式
 | 格式 | 扩展名 | 说明 |
 |------|--------|------|
 | **PDF** | `.pdf` | 核心能力 — 文本型 / 扫描型 / 混合型均支持 |
 | **Word** | `.doc`, `.docx` | 旧版和新版 Word 文档 |
 | **PowerPoint** | `.ppt`, `.pptx` | 旧版和新版演示文稿 |
 | **图片** | `.png`, `.jpg`, `.jpeg` | 单页图片文档，支持 EXIF 方向自动校正 |
 | **HTML** | `.html` | 须指定 `model_version: "MinerU-HTML"` |
 ### 2.2 输入限制
 | 约束项 | 限制值 |
 |--------|--------|
 | 单文件最大体积 | **200 MB** |
 | 单文件最大页数 | **600 页** |
 | 批量请求最大文件数 | **200 个** |
 | 预签名上传 URL 有效期 | **24 小时** |
 | 云端 API 每日最高优先级额度 | **2,000 页**，超出部分降低优先级 |
 ### 2.3 OCR 语言支持
 MinerU 内置 OCR 引擎支持 **109 种语言**（基于 PaddleOCR v3），可通过 `language` 参数指定文档主语言。
 > **注意（官方文档）：** `language` 的默认值为 `"ch"`（非 `"zh"`），遵循 PaddleOCR 语言代码规范。
 | 代码 | 语言 | 代码 | 语言 |
 |------|------|------|------|
 | `ch` | 中文 | `en` | 英文 |
 | `japan` | 日文 | `korean` | 韩文 |
 | `french` | 法文 | `german` | 德文 |
 ---
 ## 三、输出格式规范（实测验证）
 ### 3.1 实际输出文件清单（实测 vs 官方文档对比）
 **实测输出（ZIP 解压后，共 5 个文件）：**
 ```
 output/test_sample/
 ├── full.md                                           # Markdown 输出（单文件）
 ├── {uuid}_content_list.json                          # 扁平化内容块列表
 ├── layout.json                                       # 富元数据中间格式
 ├── {uuid}_origin.pdf                                 # 原始 PDF 副本
 └── images/
    └── {sha256_hash}.jpg                             # 表格/图片截图
 ```
 **与官方文档差异对比：**
 | 项目 | 官方文档描述 | 实测结果 | 差异说明 |
 |------|-------------|---------|---------|
 | Markdown 文件 | `auto/auto.md` + `auto_nlp/auto_nlp.md`（两个子目录） | **`full.md`**（单文件，根目录） | 云端 API 输出为合并的 `full.md`，无子目录拆分 |
 | 中间格式 | `middle.json` | **`layout.json`** | 文件名不同，结构一致 |
 | content_list | `content_list.json` | **`{uuid}_content_list.json`** | 文件名带 UUID 前缀 |
 | 原始文件副本 | 未提及 | **`{uuid}_origin.pdf`** | 云端 API 额外返回原始文件副本 |
 | 调试文件 | `layout.pdf` + `span.pdf` + `model.json` | **无** | 云端 API 不返回调试 PDF 和 model.json |
 | 图片命名 | `img_0_0.png` / `table_0_1.png` | **`{sha256}.jpg`** | 使用内容哈希命名，格式为 JPG |
 > **重要结论：** 以实测为准。对接下游系统时，文件匹配应使用 glob 模式（如 `*content_list.json`）而非固定文件名。
 ### 3.2 content_list.json 字段规范（实测验证）
 文件为 **JSON 数组**，每个元素是一个内容块，按文档阅读顺序排列。
 #### 3.2.1 公共字段
 | 字段 | 类型 | 说明 | 实测验证 |
 |------|------|------|---------|
 | `type` | `string` | 内容类型 | 实测出现：`text`, `table` |
 | `page_idx` | `int` | 所在页码（0-indexed） | 实测值：`0` |
 | `bbox` | `[int, int, int, int]` | 边界框 `[x0, y0, x1, y1]` | 实测范围：`0–1000`（归一化） |
 #### 3.2.2 文本块（type: "text"）
 **实测完整结构：**
 ```json
 {
  "type": "text",
  "text": "GraphRAG: Knowledge Graph Enhanced RAG System ",
  "text_level": 1,
  "bbox": [141, 93, 860, 151],
  "page_idx": 0
 }
 ```
 | 字段 | 类型 | 必现 | 说明 |
 |------|------|------|------|
 | `text` | `string` | 是 | 文本内容（末尾可能有空格） |
 | `text_level` | `int \| 缺失` | 否 | 标题级别：`1`=一级标题；**正文时该字段缺失而非为 `0` 或 `null`** |
 > **实测发现：** 正文段落中 `text_level` 字段 **完全不存在**（不是 `null` 或 `0`），仅标题块才携带该字段。判断标题应使用 `block.get("text_level")` 而非 `block["text_level"] >= 1`。
 #### 3.2.3 表格块（type: "table"）
 **实测完整结构：**
 ```json
 {
  "type": "table",
  "img_path": "images/e382eaafdf341d361c2567b20d9ce56456c17a7dd10ae5dadbcc3961256169c9.jpg",
  "table_caption": [],
  "table_footnote": [],
  "table_body": "<table><tr><td rowspan=1 colspan=2>Method  Comprehensiveness</td>...</table>",
  "bbox": [115, 563, 882, 708],
  "page_idx": 0
 }
 ```
 | 字段 | 类型 | 必现 | 说明 |
 |------|------|------|------|
 | `img_path` | `string` | 是 | 表格截图路径（`images/{sha256}.jpg`） |
 | `table_body` | `string` | 是 | HTML 表格（`<table>` 标签，无 `<html>/<body>` 外层包裹） |
 | `table_caption` | `string[]` | 是 | 表格标题（可为空数组 `[]`） |
 | `table_footnote` | `string[]` | 是 | 表格脚注（可为空数组 `[]`） |
 > **实测发现：** `table_body` 的 HTML 直接以 `<table>` 开头，**不含** `<html><body>` 外层包裹（官方文档示例中有外层包裹，以实测为准）。
 #### 3.2.4 图片块（type: "image"）— 官方文档
 本次测试 PDF 不含独立图片，以下为官方文档规范（待后续实测验证）：
 ```json
 {
  "type": "image",
  "img_path": "images/{hash}.jpg",
  "image_caption": ["Figure 1: ..."],
  "image_footnote": [],
  "bbox": [x0, y0, x1, y1],
  "page_idx": 0
 }
 ```
 #### 3.2.5 公式块（type: "equation"）— 官方文档
 ```json
 {
  "type": "equation",
  "text": "E = mc^2",
  "text_format": "latex",
  "img_path": "images/{hash}.jpg",
  "bbox": [x0, y0, x1, y1],
  "page_idx": 0
 }
 ```
 > **实测发现：** 测试 PDF 结论段的百分数被解析为 LaTeX 内联公式（`$7 2 . 0 \%$`），嵌入在 `text` 类型块中，而非独立的 `equation` 块。这说明 Pipeline 后端会将简单公式内联到文本块中。
 ---
 ### 3.3 layout.json 字段规范（实测验证）
 `layout.json` 对应官方文档中的 `middle.json`，是富元数据中间格式。
 #### 3.3.1 顶层结构（实测）
 ```json
 {
  "_backend": "pipeline",
  "_version_name": "2.6.4",
  "pdf_info": [ ... ]
 }
 ```
 | 字段 | 类型 | 实测值 | 说明 |
 |------|------|--------|------|
 | `_backend` | `string` | `"pipeline"` | 使用的解析后端 |
 | `_version_name` | `string` | `"2.6.4"` | MinerU 版本标识 |
 | `pdf_info` | `array` | 含 1 个元素 | 按页组织的解析结果 |
 #### 3.3.2 页级结构（实测）
 ```json
 {
  "page_idx": 0,
  "page_size": [595, 841],
  "preproc_blocks": [ ... ],
  "para_blocks": [ ... ],
  "discarded_blocks": []
 }
 ```
 | 字段 | 类型 | 实测值 | 说明 |
 |------|------|--------|------|
 | `page_idx` | `int` | `0` | 页码（0-indexed） |
 | `page_size` | `[int, int]` | `[595, 841]` | 页面尺寸 `[宽, 高]`（PDF pt 单位，A4≈595×841） |
 | `preproc_blocks` | `array` | 10 个块 | 预处理阶段的内容块 |
 | `para_blocks` | `array` | 10 个块 | 段落分段后的内容块 |
 | `discarded_blocks` | `array` | `[]` | 被过滤的内容（页眉/页脚等） |
 > **与官方文档差异：** 实测页级结构 **仅包含 3 个数组**（`preproc_blocks`、`para_blocks`、`discarded_blocks`），**不含** 官方文档提到的 `images`、`tables`、`interline_equations` 独立数组。表格和图片直接嵌入在 `preproc_blocks` / `para_blocks` 中。
 #### 3.3.3 内容块层级结构（Block → Line → Span，实测验证）
 **文本/标题块（实测）：**
 ```json
 {
  "type": "title",
  "bbox": [84, 79, 512, 127],
  "lines": [
    {
      "bbox": [80, 77, 515, 106],
      "spans": [
        {
          "bbox": [80, 77, 515, 106],
          "score": 1.0,
          "content": "GraphRAG: Knowledge Graph Enhanced",
          "type": "text"
        }
      ],
      "index": 0
    }
  ],
  "index": 0.5
 }
 ```
 **Block 字段（实测）：**
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `type` | `string` | 块类型：实测出现 `title`, `text`, `table` |
 | `bbox` | `[int, int, int, int]` | 边界框（原始 PDF pt 坐标） |
 | `lines` | `array` | 行数组（文本/标题块） |
 | `blocks` | `array` | 子块数组（仅 `table` 类型容器块） |
 | `index` | `int \| float` | 排序索引（可为小数，如 `0.5`） |
 **Line 字段（实测）：**
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `bbox` | `[int, int, int, int]` | 行边界框 |
 | `spans` | `array` | Span 数组 |
 | `index` | `int` | 行内排序索引 |
 **Span 字段（实测）：**
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `bbox` | `[int, int, int, int]` | Span 边界框 |
 | `type` | `string` | 实测出现：`text`, `table` |
 | `content` | `string` | 文本内容（`type=text` 时） |
 | `score` | `float` | 置信度（实测多为 `1.0`） |
 **表格容器块（实测）：**
 ```json
 {
  "type": "table",
  "bbox": [69, 474, 525, 596],
  "blocks": [
    {
      "type": "table_body",
      "bbox": [69, 474, 525, 596],
      "group_id": 0,
      "lines": [ ... ],
      "index": 0,
      "virtual_lines": [ ... ]
    }
  ],
  "index": 7
 }
 ```
 表格容器块内的子块额外包含：
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `group_id` | `int` | 分组 ID |
 | `virtual_lines` | `array` | 虚拟行结构（表格布局专用） |
 **`para_blocks` 额外字段（实测）：**
 部分 `para_blocks` 中的文本块额外包含 `bbox_fs` 字段（疑似字体大小相关的边界框），如：
 ```json
 {
  "type": "text",
  "bbox": [77, 198, 518, 259],
  "lines": [...],
  "index": 2,
  "bbox_fs": [77, 198, 518, 259]
 }
 ```
 ---
 ### 3.4 full.md Markdown 输出规范（实测验证）
 **实测产物：** 单个 `full.md` 文件（非官方文档描述的 `auto/auto.md` + `auto_nlp/auto_nlp.md` 双目录结构）。
 **实测特征：**
 | 特征 | 实测行为 |
 |------|---------|
 | 标题 | 使用 `# ` 前缀，所有标题均为一级（`# `） |
 | 段落 | 纯文本，段落间以空行分隔 |
 | 表格 | 直接嵌入 HTML `<table>` 标签 |
 | 公式 | 内联使用 `$...$` 定界符（如 `$7 2 . 0 \%$`） |
 | 图片引用 | 本次未出现独立图片引用 |
 **实测输出示例（节选）：**
 ```markdown
 # GraphRAG: Knowledge Graph Enhanced RAG System
 # 1. Introduction
 GraphRAG is an advanced retrieval-augmented generation technique developed by...
 # 3. Performance Comparison
 The following table compares GraphRAG with traditional RAG approaches...
 <table><tr><td rowspan=1 colspan=2>Method  Comprehensiveness</td>...</table>
 # 4. Conclusion
 ...comprehensiveness $7 2 . 0 \%$ vs $3 2 . 4 \%$...
 ```
 ---
 ## 四、布局信息规范
 ### 4.1 坐标系定义（实测验证）
 | 坐标系 | 适用文件 | 实测范围 | 原点 | 说明 |
 |--------|---------|---------|------|------|
 | **归一化整数坐标** | `*content_list.json` | `0 – 1000` | 左上角 | 页面宽高均映射到 0~1000 |
 | **原始 PDF 坐标** | `layout.json` | 实测 `[595, 841]`（A4 pt） | 左上角 | 与 PDF 页面尺寸一致 |
 **bbox 格式统一为 `[x0, y0, x1, y1]`：**
 ```
 (x0, y0) ─────────────────── (x1, y0)
    │                            │
    │       内容区域              │
    │                            │
 (x0, y1) ─────────────────── (x1, y1)
 ```
 **实测对照（标题块 "1. Introduction"）：**
 | 文件 | bbox | 坐标系 |
 |------|------|--------|
 | `content_list.json` | `[131, 200, 317, 222]` | 归一化 0-1000 |
 | `layout.json` | `[78, 169, 189, 187]` | PDF pt（页面 595×841） |
 ### 4.2 布局分类体系
 #### Pipeline 后端（实测 + 官方文档合并）
 **layout.json 中的 `type` 值（实测出现标记 ✅）：**
 | type 值 | 说明 | 实测出现 |
 |---------|------|---------|
 | `title` | 标题 | ✅ |
 | `text` | 正文段落 | ✅ |
 | `table` | 表格容器 | ✅ |
 | `table_body` | 表格主体（子块） | ✅ |
 | `table_caption` | 表格标题 | — |
 | `table_footnote` | 表格脚注 | — |
 | `image_body` | 图片主体 | — |
 | `image_caption` | 图片标题 | — |
 | `image_footnote` | 图片脚注 | — |
 | `interline_equation` | 行间公式 | — |
 | `index` | 目录项 | — |
 | `list` | 列表项 | — |
 #### VLM 后端（官方文档，未实测）
 VLM 后端额外支持：`code`, `code_caption`, `list`, `header`, `footer`, `page_number`, `aside_text`, `page_footnote`, `ref_text`, `algorithm`, `phonetic`。
 ### 4.3 内容层级与标题级别
 `content_list.json` 中 `text_level` 字段标识文档结构层级：
 | text_level | 含义 | Markdown | 实测验证 |
 |------------|------|----------|---------|
 | **字段缺失** | 正文 | 无标记 | ✅ 实测正文块不含 `text_level` 字段 |
 | `1` | 一级标题 | `# Heading` | ✅ 实测验证 |
 | `2` | 二级标题 | `## Heading` | — |
 | `3` | 三级标题 | `### Heading` | — |
 | `4+` | 更深层标题 | `####+ Heading` | — |
 > **重要纠正：** 官方文档描述正文为 `text_level: null` 或 `0`，但实测正文块中 **该字段完全不存在**。正确判断方式：
 ```python
 # 正确写法
 is_heading = block.get("text_level") is not None
 # 错误写法（会 KeyError）
 is_heading = block["text_level"] >= 1
 ```
 ### 4.4 布局精度提取指南
 #### 提取文档大纲
 ```python
 headings = [
    {"level": b["text_level"], "text": b["text"].strip(), "page": b["page_idx"]}
    for b in content_list
    if b["type"] == "text" and b.get("text_level") is not None
 ]
 ```
 #### 提取正文段落
 ```python
 paragraphs = [
    b["text"].strip()
    for b in content_list
    if b["type"] == "text" and b.get("text_level") is None
 ]
 ```
 #### 解析表格数值
 ```python
 from bs4 import BeautifulSoup
 for b in content_list:
    if b["type"] != "table":
        continue
    soup = BeautifulSoup(b["table_body"], "html.parser")
    rows = []
    for tr in soup.find_all("tr"):
        cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
        rows.append(cells)
    # rows 即为二维表格数据
 ```
 #### 按页面位置过滤
 ```python
 def is_upper_half(block):
    """判断内容块是否在页面上半部分（归一化坐标 0-1000）"""
    y_center = (block["bbox"][1] + block["bbox"][3]) / 2
    return y_center < 500
 ```
 ---
 ## 五、云端 API 关键参数规范
 ### 5.1 认证配置
 | 项目 | 值 |
 |------|-----|
 | 请求头 | `Authorization: Bearer {token}` |
 | Token 获取 | [mineru.net/apiManage/token](https://mineru.net/apiManage/token) |
 | .env 配置 | `MINERU_API_TOKEN=xxx` |
 所有接口均需携带 `Authorization` 头，`Content-Type: application/json`（上传文件 PUT 请求除外）。
 ---
 ### 5.2 本地文件上传流程 — file-urls/batch
 **用途：** 本地文件场景 — 获取预签名 URL → PUT 上传 → 自动触发解析
 **接口：** `POST https://mineru.net/api/v4/file-urls/batch`
 #### 请求体
 | 字段 | 类型 | 必填 | 默认值 | 说明 |
 |------|------|------|--------|------|
 | `files` | `array[object]` | **是** | — | 文件列表（最多 200 个） |
 | `files[].name` | `string` | **是** | — | 文件名（须含正确扩展名） |
 | `files[].data_id` | `string` | 否 | — | 业务标识（最长 128 字符，支持字母数字 `_` `-` `.`） |
 | `files[].is_ocr` | `bool` | 否 | `false` | 是否强制 OCR |
 | `files[].page_ranges` | `string` | 否 | — | 页码范围（如 `"2,4-6"` 或 `"2--2"` 表示到倒数第二页） |
 | `model_version` | `string` | 否 | `"pipeline"` | 模型版本：`pipeline` / `vlm` / `MinerU-HTML` |
 | `enable_formula` | `bool` | 否 | `true` | 是否启用公式识别 |
 | `enable_table` | `bool` | 否 | `true` | 是否启用表格识别 |
 | `language` | `string` | 否 | `"ch"` | OCR 语言（PaddleOCR v3 语言代码） |
 | `callback` | `string` | 否 | — | 回调通知 URL（HTTP/HTTPS POST） |
 | `seed` | `string` | 否 | — | 回调签名种子（与 callback 配合，最长 64 字符） |
 | `extra_formats` | `string[]` | 否 | — | 额外输出格式：`"docx"`, `"html"`, `"latex"` |
 #### 响应体（实测验证）
 ```json
 {
  "code": 0,
  "msg": "ok",
  "trace_id": "9ef836ce2a65f46c5f54389e55a14039",
  "data": {
    "batch_id": "6ce0e838-b324-4f1d-8b06-01ddc07e4cd4",
    "file_urls": [
      "https://mineru.oss-cn-shanghai.aliyuncs.com/api-upload/extract/2026-03-04/{batch_id}/{file_uuid}.pdf?Expires=...&OSSAccessKeyId=...&Signature=..."
    ]
  }
 }
 ```
 | 响应字段 | 类型 | 说明 |
 |---------|------|------|
 | `code` | `int` | `0` 表示成功 |
 | `msg` | `string` | 状态信息 |
 | `trace_id` | `string` | 请求追踪 ID |
 | `data.batch_id` | `string` | 批次 ID（后续查询结果使用） |
 | `data.file_urls` | `string[]` | 预签名上传 URL 列表（与 `files` 一一对应） |
 #### 文件上传
 ```
 PUT {file_urls[i]}
 Body: 文件二进制流
 ```
 > **不要传任何请求头**（包括 `Content-Type`），否则 OSS 签名校验失败。
 ---
 ### 5.3 URL 直传解析 — extract/task
 **用途：** 文件已有公网 URL 时直接提交解析
 **接口：** `POST https://mineru.net/api/v4/extract/task`
 #### 请求体
 | 字段 | 类型 | 必填 | 默认值 | 说明 |
 |------|------|------|--------|------|
 | `url` | `string` | **是** | — | 文件公网 URL |
 | `model_version` | `string` | 否 | `"pipeline"` | 模型版本 |
 | `is_ocr` | `bool` | 否 | `false` | 是否强制 OCR |
 | `enable_formula` | `bool` | 否 | `true` | 是否启用公式识别 |
 | `enable_table` | `bool` | 否 | `true` | 是否启用表格识别 |
 | `language` | `string` | 否 | `"ch"` | OCR 语言 |
 | `data_id` | `string` | 否 | — | 业务标识 |
 | `callback` | `string` | 否 | — | 回调 URL |
 | `seed` | `string` | 否 | — | 回调种子 |
 | `extra_formats` | `string[]` | 否 | — | 额外输出格式 |
 | `page_ranges` | `string` | 否 | — | 页码范围 |
 | `no_cache` | `bool` | 否 | `false` | 跳过 URL 缓存 |
 | `cache_tolerance` | `int` | 否 | `900` | 缓存容忍时间（秒） |
 #### 响应体
 ```json
 {
  "code": 0,
  "msg": "ok",
  "trace_id": "string",
  "data": { "task_id": "string" }
 }
 ```
 #### 查询结果
 `GET https://mineru.net/api/v4/extract/task/{task_id}`
 ```json
 {
  "code": 0,
  "data": {
    "task_id": "string",
    "data_id": "string",
    "state": "done",
    "full_zip_url": "https://cdn-mineru.openxlab.org.cn/...",
    "err_msg": null,
    "extract_progress": {
      "extracted_pages": 1,
      "total_pages": 1,
      "start_time": "2026-03-04 12:00:00"
    }
  }
 }
 ```
 ---
 ### 5.4 批量 URL 解析 — extract/task/batch
 **接口：** `POST https://mineru.net/api/v4/extract/task/batch`
 #### 请求体
 ```json
 {
  "files": [
    {"url": "https://...", "data_id": "doc1", "is_ocr": false, "page_ranges": "1-5"}
  ],
  "model_version": "pipeline",
  "enable_formula": true,
  "enable_table": true,
  "language": "ch",
  "extra_formats": ["docx"],
  "no_cache": false,
  "cache_tolerance": 900
 }
 ```
 #### 响应体
 ```json
 {
  "code": 0,
  "data": { "batch_id": "string" }
 }
 ```
 ---
 ### 5.5 查询结果接口
 #### 单任务查询
 `GET https://mineru.net/api/v4/extract/task/{task_id}`
 #### 批量查询（实测验证）
 `GET https://mineru.net/api/v4/extract-results/batch/{batch_id}`
 **响应体（实测验证）：**
 ```json
 {
  "code": 0,
  "msg": "ok",
  "trace_id": "string",
  "data": {
    "batch_id": "3b1729e9-c833-44b4-b9c2-201164001ab0",
    "extract_result": [
      {
        "file_name": "test_sample.pdf",
        "state": "done",
        "full_zip_url": "https://cdn-mineru.openxlab.org.cn/pdf/2026-03-04/...",
        "err_msg": null,
        "data_id": "mvp_test",
        "extract_progress": {
          "extracted_pages": 1,
          "total_pages": 1,
          "start_time": "2026-03-04 ..."
        }
      }
    ]
  }
 }
 ```
 ---
 ### 5.6 通用响应包装结构
 所有 API 响应均遵循统一包装格式：
 ```json
 {
  "code": 0,        // 0 = 成功，非 0 = 失败
  "msg": "ok",       // 状态描述
  "trace_id": "...", // 请求追踪 ID
  "data": { ... }    // 业务数据
 }
 ```
 ---
 ### 5.7 任务状态枚举（实测验证）
 | state | 说明 | 实测出现 |
 |-------|------|---------|
 | `waiting-file` | 等待文件上传完成 | ✅ |
 | `pending` | 排队等待解析 | ✅ |
 | `running` | 正在解析 | — |
 | `converting` | 格式转换中 | — |
 | `done` | 解析完成 | ✅ |
 | `failed` | 解析失败 | — |
 > **实测状态流转：** `waiting-file` → `pending` → `done`（小文件跳过 `running`）
 ---
 ### 5.8 错误码速查
 | 错误码 | 含义 |
 |--------|------|
 | `A0202` | Token 无效 |
 | `A0211` | Token 过期 |
 | `-60005` | 文件超过 200MB |
 | `-60006` | 页数超过 600 页 |
 | `-60018` | 当日解析额度用尽 |
--- a/docs/mineru_specification.md
+++ b/docs/mineru_specification.md
@@ -0,0 +1,680 @@
 # MinerU 文档解析规范文档
 > 基于 [opendatalab/MinerU](https://github.com/opendatalab/MinerU) 官方文档及云端 API 调研
 > 版本基线：2026-03-04
 ---
 ## 目录
 - [一、支持的原始输入文件格式](#一支持的原始输入文件格式)
  - [1.1 支持格式清单](#11-支持格式清单)
  - [1.2 输入限制](#12-输入限制)
  - [1.3 OCR 语言支持](#13-ocr-语言支持)
 - [二、云端 API 输出格式规范](#二云端-api-输出格式规范)
  - [2.1 输出文件总览](#21-输出文件总览)
  - [2.2 content_list.json 字段规范](#22-content_listjson-字段规范)
  - [2.3 middle.json 字段规范](#23-middlejson-字段规范)
  - [2.4 Markdown 输出规范](#24-markdown-输出规范)
  - [2.5 调试与可视化文件](#25-调试与可视化文件)
 - [三、布局信息规范](#三布局信息规范)
  - [3.1 坐标系定义](#31-坐标系定义)
  - [3.2 布局分类体系（Pipeline 后端）](#32-布局分类体系pipeline-后端)
  - [3.3 布局分类体系（VLM 后端）](#33-布局分类体系vlm-后端)
  - [3.4 内容层级与标题级别](#34-内容层级与标题级别)
  - [3.5 布局精度提取指南](#35-布局精度提取指南)
 - [四、云端 API MVP 必要字段](#四云端-api-mvp-必要字段)
  - [4.1 认证配置](#41-认证配置)
  - [4.2 创建解析任务 — 请求规范](#42-创建解析任务--请求规范)
  - [4.3 查询任务结果 — 响应规范](#43-查询任务结果--响应规范)
  - [4.4 批量任务接口](#44-批量任务接口)
  - [4.5 MVP 最小可用请求示例](#45-mvp-最小可用请求示例)
 ---
 ## 一、支持的原始输入文件格式
 ### 1.1 支持格式清单
 | 格式 | 扩展名 | 说明 |
 |------|--------|------|
 | **PDF** | `.pdf` | 核心能力 — 文本型 / 扫描型 / 混合型均支持 |
 | **Word** | `.doc`, `.docx` | 旧版和新版 Word 文档 |
 | **PowerPoint** | `.ppt`, `.pptx` | 旧版和新版演示文稿 |
 | **图片** | `.png`, `.jpg`, `.jpeg` | 单页图片文档，支持 EXIF 方向自动校正 |
 | **HTML** | `.html` | 需指定 `MinerU-HTML` 模型版本 |
 ### 1.2 输入限制
 | 约束项 | 限制值 |
 |--------|--------|
 | 单文件最大体积 | **200 MB** |
 | 单文件最大页数 | **600 页** |
 | 云端 API 每日免费额度 | **2,000 页**（最高优先级），超出部分降低优先级 |
 ### 1.3 OCR 语言支持
 MinerU 内置 OCR 引擎支持 **109 种语言**，可通过 `language` 参数指定文档主语言（默认 `zh` 中文）。常用语言代码：
 | 代码 | 语言 | 代码 | 语言 |
 |------|------|------|------|
 | `zh` | 中文 | `en` | 英文 |
 | `ja` | 日文 | `ko` | 韩文 |
 | `fr` | 法文 | `de` | 德文 |
 ---
 ## 二、云端 API 输出格式规范
 ### 2.1 输出文件总览
 云端 API 任务完成后，返回一个 ZIP 压缩包（通过 `full_zip_url` 获取），解压后包含以下文件：
 ```
 output/
 ├── auto/
 │   ├── auto.md                 # 多模态 Markdown（含图片引用）
 │   └── images/                 # 提取的图片资源
 │       ├── img_0_0.png
 │       ├── table_0_1.png
 │       └── ...
 ├── auto_nlp/
 │   └── auto_nlp.md             # 纯文本 NLP Markdown（无图片）
 ├── middle.json                 # 富元数据中间格式（完整层级结构）
 ├── content_list.json           # 扁平化内容块列表（按阅读顺序）
 ├── layout.pdf                  # 布局分析可视化（调试用）
 ├── span.pdf                    # Span 级别标注（Pipeline 后端，调试用）
 └── model.json                  # 原始模型推理结果（调试用）
 ```
 | 文件 | 用途 | 推荐场景 |
 |------|------|---------|
 | `content_list.json` | 扁平化内容块，按阅读顺序 | **推荐用于下游 NLP/KG 管道对接** |
 | `middle.json` | 完整层级结构，含丰富元数据 | 需要精确布局信息或二次开发 |
 | `auto/auto.md` | 多模态 Markdown | 人工阅读、LLM 直接消费 |
 | `auto_nlp/auto_nlp.md` | 纯文本 Markdown | 纯文本 NLP 处理 |
 | `layout.pdf` | 布局可视化 | 调试、验证解析质量 |
 ---
 ### 2.2 content_list.json 字段规范
 `content_list.json` 是一个 **JSON 数组**，每个元素是一个内容块，按文档阅读顺序排列。
 #### 2.2.1 公共字段（所有类型共有）
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `type` | `string` | 内容类型：`text` / `image` / `table` / `equation` / `code` / `list` |
 | `page_idx` | `int` | 所在页码（**0-indexed**） |
 | `bbox` | `[x0, y0, x1, y1]` | 边界框坐标，归一化到 **0–1000** 范围 |
 #### 2.2.2 文本块（type: "text"）
 ```json
 {
  "type": "text",
  "text": "段落正文内容...",
  "text_level": 0,
  "page_idx": 0,
  "bbox": [72, 120, 540, 145]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `text` | `string` | 文本内容 |
 | `text_level` | `int \| null` | 标题级别：`null` 或 `0` = 正文，`1` = 一级标题，`2` = 二级标题，依此类推 |
 #### 2.2.3 图片块（type: "image"）
 ```json
 {
  "type": "image",
  "img_path": "images/img_0_0.png",
  "image_caption": ["Figure 1: System architecture"],
  "image_footnote": ["Source: internal report"],
  "page_idx": 1,
  "bbox": [100, 200, 500, 600]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `img_path` | `string` | 图片文件相对路径 |
 | `image_caption` | `string[]` | 图片标题列表 |
 | `image_footnote` | `string[]` | 图片脚注列表 |
 #### 2.2.4 表格块（type: "table"）
 ```json
 {
  "type": "table",
  "img_path": "images/table_0_1.png",
  "table_body": "<html><body><table><tr><td>...</td></tr></table></body></html>",
  "table_caption": ["Table 1: Performance comparison"],
  "table_footnote": ["* p < 0.05"],
  "page_idx": 2,
  "bbox": [50, 300, 950, 700]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `img_path` | `string` | 表格截图相对路径 |
 | `table_body` | `string` | 表格 HTML 表示（`<table>` 标签） |
 | `table_caption` | `string[]` | 表格标题列表 |
 | `table_footnote` | `string[]` | 表格脚注列表 |
 #### 2.2.5 公式块（type: "equation"）
 ```json
 {
  "type": "equation",
  "text": "E = mc^2",
  "text_format": "latex",
  "img_path": "images/eq_0_0.png",
  "page_idx": 3,
  "bbox": [200, 400, 800, 450]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `text` | `string` | 公式的 LaTeX 表示 |
 | `text_format` | `string` | 固定值 `"latex"` |
 | `img_path` | `string` | 公式截图相对路径 |
 #### 2.2.6 代码块（type: "code"）— VLM 后端
 ```json
 {
  "type": "code",
  "sub_type": "code",
  "code_body": "def hello():\n    print('hello')",
  "code_caption": ["Listing 1: Example function"],
  "page_idx": 4,
  "bbox": [80, 100, 920, 300]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `sub_type` | `string` | `"code"` 或 `"algorithm"` |
 | `code_body` | `string` | 代码文本内容 |
 | `code_caption` | `string[]` | 代码块标题（可选） |
 #### 2.2.7 列表块（type: "list"）— VLM 后端
 ```json
 {
  "type": "list",
  "sub_type": "text",
  "list_items": ["第一项", "第二项", "第三项"],
  "page_idx": 5,
  "bbox": [72, 200, 540, 350]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `sub_type` | `string` | `"text"` 或 `"ref_text"`（参考文献列表） |
 | `list_items` | `string[]` | 列表项内容 |
 ---
 ### 2.3 middle.json 字段规范
 `middle.json` 是 MinerU 的富元数据中间格式，保留完整的文档层级结构。
 #### 2.3.1 顶层结构
 ```json
 {
  "_backend": "pipeline | vlm | hybrid",
  "_version_name": "2.7.4",
  "pdf_info": [ ... ]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `_backend` | `string` | 使用的解析后端 |
 | `_version_name` | `string` | MinerU 版本标识 |
 | `pdf_info` | `array` | 按页组织的解析结果数组 |
 #### 2.3.2 页级结构（pdf_info 数组元素）
 ```json
 {
  "page_idx": 0,
  "page_size": [595.0, 842.0],
  "preproc_blocks": [ ... ],
  "para_blocks": [ ... ],
  "images": [ ... ],
  "tables": [ ... ],
  "interline_equations": [ ... ],
  "discarded_blocks": [ ... ]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `page_idx` | `int` | 页码（0-indexed） |
 | `page_size` | `[float, float]` | 页面尺寸 `[宽, 高]`（原始 PDF 坐标系，单位 pt） |
 | `preproc_blocks` | `array` | 未分段的预处理块 |
 | `para_blocks` | `array` | **已分段的内容块**（主输出） |
 | `images` | `array` | 提取的图片块 |
 | `tables` | `array` | 提取的表格块 |
 | `interline_equations` | `array` | 行间公式块 |
 | `discarded_blocks` | `array` | 被过滤的内容（页眉、页脚、页码等） |
 #### 2.3.3 内容块层级结构
 内容块采用三级层级：**Block → Line → Span**
 **一级块（Level 1）— 容器块：**
 ```json
 {
  "type": "table",
  "bbox": [x0, y0, x1, y1],
  "blocks": [ ... ]
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `type` | `string` | `"table"` 或 `"image"` |
 | `bbox` | `[x0, y0, x1, y1]` | 边界框坐标（原始 PDF 坐标系） |
 | `blocks` | `array` | 包含的二级块 |
 **二级块（Level 2）— 语义块：**
 ```json
 {
  "type": "text",
  "bbox": [x0, y0, x1, y1],
  "lines": [ ... ]
 }
 ```
 | `type` 值 | 说明 |
 |-----------|------|
 | `text` | 正文段落 |
 | `title` | 标题 |
 | `image_body` | 图片主体 |
 | `image_caption` | 图片标题 |
 | `image_footnote` | 图片脚注 |
 | `table_body` | 表格主体 |
 | `table_caption` | 表格标题 |
 | `table_footnote` | 表格脚注 |
 | `interline_equation` | 行间公式 |
 | `index` | 目录项 |
 | `list` | 列表项 |
 **行结构（Line）：**
 ```json
 {
  "bbox": [x0, y0, x1, y1],
  "spans": [ ... ]
 }
 ```
 **Span 结构（最小粒度）：**
 ```json
 {
  "bbox": [x0, y0, x1, y1],
  "type": "text",
  "content": "具体文本内容",
  "score": 0.95
 }
 ```
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `bbox` | `[x0, y0, x1, y1]` | 边界框坐标 |
 | `type` | `string` | `text` / `image` / `table` / `inline_equation` / `interline_equation` |
 | `content` | `string` | 文本内容（text 类型）|
 | `img_path` | `string` | 图片路径（image/table 类型）|
 | `score` | `float` | 模型置信度（0.0~1.0） |
 ---
 ### 2.4 Markdown 输出规范
 | 文件 | 特点 |
 |------|------|
 | `auto/auto.md` | 图片以 `![](images/img_x_x.png)` 引用；表格保留为 Markdown 表格或 HTML；公式使用 `$...$` 和 `$$...$$` 定界符 |
 | `auto_nlp/auto_nlp.md` | 纯文本，图片/表格替换为占位文本描述；适合直接送入 NLP 管道 |
 ---
 ### 2.5 调试与可视化文件
 | 文件 | 格式 | 说明 |
 |------|------|------|
 | `layout.pdf` | PDF | 每页叠加带编号的检测框，不同颜色区分内容类型，验证布局分析准确性和阅读顺序 |
 | `span.pdf` | PDF | 用不同颜色线框标注页面内容的 span 类型（仅 Pipeline 后端），排查文本丢失和公式识别问题 |
 | `model.json` | JSON | 原始模型推理结果，包含 `category_id`、`poly`（四边形坐标）、`score`（置信度） |
 ---
 ## 三、布局信息规范
 ### 3.1 坐标系定义
 MinerU 使用两套坐标系，取决于输出文件：
 | 坐标系 | 适用文件 | 范围 | 原点 | 说明 |
 |--------|---------|------|------|------|
 | **归一化坐标** | `content_list.json` | `0 – 1000` | 左上角 | 页面宽高均映射到 0~1000 |
 | **原始 PDF 坐标** | `middle.json` | 实际 pt 值 | 左上角 | 与 PDF 页面尺寸一致（如 A4 = 595×842） |
 | **归一化比例坐标** | `model.json`（VLM） | `0.0 – 1.0` | 左上角 | 宽高均映射到 0~1 |
 **bbox 格式统一为：`[x0, y0, x1, y1]`**
 ```
 (x0, y0) ─────────────────── (x1, y0)
    │                            │
    │       内容区域              │
    │                            │
 (x0, y1) ─────────────────── (x1, y1)
 ```
 - `x0, y0`：左上角坐标
 - `x1, y1`：右下角坐标
 ### 3.2 布局分类体系（Pipeline 后端）
 `model.json` 中的 `category_id` 枚举：
 | category_id | 类型 | 说明 |
 |-------------|------|------|
 | 0 | `title` | 标题 |
 | 1 | `plain_text` | 正文文本 |
 | 2 | `abandon` | 丢弃区域（页眉/页脚/页码等） |
 | 3 | `figure` | 图片 |
 | 4 | `figure_caption` | 图片标题 |
 | 5 | `table` | 表格 |
 | 6 | `table_caption` | 表格标题 |
 | 7 | `table_footnote` | 表格脚注 |
 | 8 | `isolate_formula` | 独立行间公式 |
 | 9 | `formula_caption` | 公式标题 |
 | 13 | `embedding` | 嵌入内容 |
 | 14 | `isolated` | 隔离内容 |
 | 15 | `OCR_text` | OCR 识别文本 |
 ### 3.3 布局分类体系（VLM 后端）
 VLM 后端使用字符串类型标识，分类更细：
 | type 值 | 说明 |
 |---------|------|
 | `text` | 正文 |
 | `title` | 标题 |
 | `equation` | 公式 |
 | `image` | 图片 |
 | `image_caption` | 图片标题 |
 | `image_footnote` | 图片脚注 |
 | `table` | 表格 |
 | `table_caption` | 表格标题 |
 | `table_footnote` | 表格脚注 |
 | `code` | 代码块 |
 | `code_caption` | 代码标题 |
 | `list` | 列表 |
 | `header` | 页眉（discarded） |
 | `footer` | 页脚（discarded） |
 | `page_number` | 页码（discarded） |
 | `aside_text` | 边栏文字（discarded） |
 | `page_footnote` | 页面脚注（discarded） |
 | `ref_text` | 参考文献 |
 | `algorithm` | 算法伪代码 |
 | `phonetic` | 注音 |
 ### 3.4 内容层级与标题级别
 `content_list.json` 中的 `text_level` 字段标识文档结构层级：
 | text_level | 含义 | 对应 Markdown |
 |------------|------|--------------|
 | `null` 或 `0` | 正文 | 无标记 |
 | `1` | 一级标题 | `# Heading` |
 | `2` | 二级标题 | `## Heading` |
 | `3` | 三级标题 | `### Heading` |
 | `4` | 四级标题 | `#### Heading` |
 | `5+` | 更深层标题 | `#####+ Heading` |
 ### 3.5 布局精度提取指南
 针对不同数据类型的精确提取建议：
 #### 文本提取
 ```python
 # 从 content_list.json 提取所有正文文本
 texts = [
    block for block in content_list
    if block["type"] == "text"
 ]
 # 按页过滤
 page_0_texts = [b for b in texts if b["page_idx"] == 0]
 ```
 #### 标题层级提取
 ```python
 # 提取文档大纲结构
 headings = [
    {"level": block["text_level"], "text": block["text"], "page": block["page_idx"]}
    for block in content_list
    if block["type"] == "text" and block.get("text_level") and block["text_level"] >= 1
 ]
 ```
 #### 表格数值提取
 ```python
 # 表格以 HTML 形式存储在 table_body 中，可用 BeautifulSoup 解析
 from bs4 import BeautifulSoup
 tables = [b for b in content_list if b["type"] == "table"]
 for table in tables:
    soup = BeautifulSoup(table["table_body"], "html.parser")
    rows = []
    for tr in soup.find_all("tr"):
        cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
        rows.append(cells)
 ```
 #### 空间位置定位
 ```python
 # 利用 bbox 判断内容在页面中的位置
 def get_position(bbox, threshold=500):
    """判断内容在页面的上半部分还是下半部分（归一化坐标 0-1000）"""
    y_center = (bbox[1] + bbox[3]) / 2
    return "upper" if y_center < threshold else "lower"
 # 判断两个块是否水平相邻（同一行）
 def is_same_row(block_a, block_b, tolerance=20):
    return abs(block_a["bbox"][1] - block_b["bbox"][1]) < tolerance
 ```
 ---
 ## 四、云端 API MVP 必要字段
 ### 4.1 认证配置
 | 配置项 | 值 | 获取方式 |
 |--------|-----|---------|
 | Token | Bearer Token 字符串 | [mineru.net/apiManage/token](https://mineru.net/apiManage/token) 注册后获取 |
 **请求头格式（所有接口通用）：**
 ```
 Authorization: Bearer {your_token}
 Content-Type: application/json
 ```
 ---
 ### 4.2 创建解析任务 — 请求规范
 **接口：** `POST https://mineru.net/api/v4/extract/task`
 #### 请求体字段
 | 字段 | 类型 | 必填 | 默认值 | 说明 |
 |------|------|------|--------|------|
 | `url` | `string` | **是** | — | 待解析文件的公网可访问 URL |
 | `is_ocr` | `bool` | 否 | `false` | 是否强制启用 OCR（扫描件建议开启） |
 | `enable_formula` | `bool` | 否 | `true` | 是否启用公式识别 |
 | `enable_table` | `bool` | 否 | `true` | 是否启用表格识别 |
 | `language` | `string` | 否 | `"zh"` | 文档主语言代码 |
 | `model` | `string` | 否 | 自动选择 | 模型版本：`pipeline` / `vlm` / `MinerU-HTML` |
 | `data_id` | `string` | 否 | — | 自定义业务标识（用于关联追踪） |
 | `callback_url` | `string` | 否 | — | 任务完成后的回调通知 URL |
 #### MVP 最小必填字段
 ```json
 {
  "url": "https://example.com/document.pdf"
 }
 ```
 > 仅 `url` 为必填，其余参数均有合理默认值。
 ---
 ### 4.3 查询任务结果 — 响应规范
 **接口：** `GET https://mineru.net/api/v4/extract/task/{task_id}`
 #### 响应体字段
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `task_id` | `string` | 任务唯一标识 |
 | `state` | `string` | 任务状态（见下方枚举） |
 | `err_msg` | `string \| null` | 错误信息（失败时） |
 | `full_zip_url` | `string \| null` | 完整输出 ZIP 下载地址（成功时） |
 | `file_name` | `string` | 原始文件名 |
 | `batch_id` | `string \| null` | 批量任务 ID（如有） |
 #### 任务状态枚举
 | state | 说明 |
 |-------|------|
 | `pending` | 排队等待中 |
 | `processing` | 正在解析 |
 | `done` | 解析完成 |
 | `failed` | 解析失败（查看 `err_msg`） |
 ---
 ### 4.4 批量任务接口
 #### 4.4.1 批量获取上传 URL
 **接口：** `POST https://mineru.net/api/v4/file-urls/batch`
 用于获取文件上传的预签名 URL（适合本地文件上传场景）。
 #### 4.4.2 批量创建任务
 **接口：** `POST https://mineru.net/api/v4/extract/task/batch`
 请求体中 `files` 数组包含多个文件的解析参数。
 #### 4.4.3 批量查询结果
 **接口：** `GET https://mineru.net/api/v4/extract-results/batch/{batch_id}`
 ---
 ### 4.5 MVP 最小可用请求示例
 #### Python 实现
 ```python
 import os
 import time
 import requests
 MINERU_API_TOKEN = os.getenv("MINERU_API_TOKEN")
 BASE_URL = "https://mineru.net/api/v4"
 HEADERS = {
    "Authorization": f"Bearer {MINERU_API_TOKEN}",
    "Content-Type": "application/json",
 }
 # ① 创建解析任务（仅需 url 一个必填字段）
 resp = requests.post(
    f"{BASE_URL}/extract/task",
    headers=HEADERS,
    json={
        "url": "https://example.com/sample.pdf",   # 必填：文件公网 URL
        # "is_ocr": False,                          # 可选：默认 false
        # "enable_formula": True,                   # 可选：默认 true
        # "enable_table": True,                     # 可选：默认 true
        # "language": "zh",                         # 可选：默认中文
    },
 )
 task_id = resp.json()["task_id"]
 print(f"Task created: {task_id}")
 # ② 轮询查询结果
 while True:
    result = requests.get(
        f"{BASE_URL}/extract/task/{task_id}",
        headers=HEADERS,
    ).json()
    state = result["state"]
    print(f"State: {state}")
    if state == "done":
        zip_url = result["full_zip_url"]
        print(f"Download: {zip_url}")
        break
    elif state == "failed":
        print(f"Error: {result['err_msg']}")
        break
    time.sleep(5)
 # ③ 下载并解压结果
 import zipfile, io
 zip_data = requests.get(zip_url).content
 with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
    zf.extractall("./mineru_output/")
    print("Files:", zf.namelist())
 ```
 #### cURL 实现
 ```bash
 # 创建任务
 curl -X POST https://mineru.net/api/v4/extract/task \
  -H "Authorization: Bearer YOUR_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{"url": "https://example.com/sample.pdf"}'
 # 查询结果
 curl https://mineru.net/api/v4/extract/task/{task_id} \
  -H "Authorization: Bearer YOUR_TOKEN"
 ```
 #### MVP 检查清单
 - [ ] 已在 [mineru.net](https://mineru.net/) 注册账号
 - [ ] 已在 [Token 管理页](https://mineru.net/apiManage/token) 获取 API Token
 - [ ] 已将 Token 配置到 `.env` 文件：`MINERU_API_TOKEN=xxx`
 - [ ] 准备了公网可访问的测试文件 URL（PDF/DOCX/PPT/图片）
 - [ ] 安装了 `requests` 库：`pip install requests`
--- a/docs/product_requirements_document-v1.0.md
+++ b/docs/product_requirements_document-v1.0.md
--- a/frontend/.gitignore
+++ b/frontend/.gitignore
@@ -0,0 +1,2 @@
 **/.git_embedded_backup/
--- a/frontend/ATTRIBUTIONS.md
+++ b/frontend/ATTRIBUTIONS.md
@@ -0,0 +1,3 @@
 This Figma Make file includes components from [shadcn/ui](https://ui.shadcn.com/) used under [MIT license](https://github.com/shadcn-ui/ui/blob/main/LICENSE.md).
 This Figma Make file includes photos from [Unsplash](https://unsplash.com) used under [license](https://unsplash.com/license).
--- a/frontend/CLAUDE.md
+++ b/frontend/CLAUDE.md
@@ -0,0 +1,33 @@
 # Frontend — 开发说明
 ## 路径
 ```
 F:\GraphRAGAgent\frontend\
 ```
 ## 启动开发服务器
 ```bash
 cd F:/GraphRAGAgent/frontend
 pnpm dev
 ```
 启动后访问：http://localhost:5173
 ## 依赖安装
 ```bash
 cd F:/GraphRAGAgent/frontend
 pnpm install
 pnpm rebuild @tailwindcss/oxide esbuild
 ```
 > 注意：首次安装后需执行 `pnpm rebuild @tailwindcss/oxide esbuild`，否则 Vite 构建会因原生包未编译而失败。
 ## 构建生产包
 ```bash
 cd F:/GraphRAGAgent/frontend
 pnpm build
 ```
--- a/frontend/README.md
+++ b/frontend/README.md
@@ -0,0 +1,11 @@
  # 构建产品原型
  This is a code bundle for 构建产品原型. The original project is available at https://www.figma.com/design/Tt95Sj8nC3HvirV1Vw4cA7/%E6%9E%84%E5%BB%BA%E4%BA%A7%E5%93%81%E5%8E%9F%E5%9E%8B.
  ## Running the code
  Run `npm i` to install the dependencies.
  Run `npm run dev` to start the development server.
--- a/frontend/guidelines/Guidelines.md
+++ b/frontend/guidelines/Guidelines.md
@@ -0,0 +1,61 @@
 **Add your own guidelines here**
 <!--
 System Guidelines
 Use this file to provide the AI with rules and guidelines you want it to follow.
 This template outlines a few examples of things you can add. You can add your own sections and format it to suit your needs
 TIP: More context isn't always better. It can confuse the LLM. Try and add the most important rules you need
 # General guidelines
 Any general rules you want the AI to follow.
 For example:
 * Only use absolute positioning when necessary. Opt for responsive and well structured layouts that use flexbox and grid by default
 * Refactor code as you go to keep code clean
 * Keep file sizes small and put helper functions and components in their own files.
 --------------
 # Design system guidelines
 Rules for how the AI should make generations look like your company's design system
 Additionally, if you select a design system to use in the prompt box, you can reference
 your design system's components, tokens, variables and components.
 For example:
 * Use a base font-size of 14px
 * Date formats should always be in the format “Jun 10”
 * The bottom toolbar should only ever have a maximum of 4 items
 * Never use the floating action button with the bottom toolbar
 * Chips should always come in sets of 3 or more
 * Don't use a dropdown if there are 2 or fewer options
 You can also create sub sections and add more specific details
 For example:
 ## Button
 The Button component is a fundamental interactive element in our design system, designed to trigger actions or navigate
 users through the application. It provides visual feedback and clear affordances to enhance user experience.
 ### Usage
 Buttons should be used for important actions that users need to take, such as form submissions, confirming choices,
 or initiating processes. They communicate interactivity and should have clear, action-oriented labels.
 ### Variants
 * Primary Button
  * Purpose : Used for the main action in a section or page
  * Visual Style : Bold, filled with the primary brand color
  * Usage : One primary button per section to guide users toward the most important action
 * Secondary Button
  * Purpose : Used for alternative or supporting actions
  * Visual Style : Outlined with the primary color, transparent background
  * Usage : Can appear alongside a primary button for less important actions
 * Tertiary Button
  * Purpose : Used for the least important actions
  * Visual Style : Text-only with no border, using primary color
  * Usage : For actions that should be available but not emphasized
 -->
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -0,0 +1,15 @@
  <!DOCTYPE html>
  <html lang="en">
    <head>
      <meta charset="UTF-8" />
      <meta name="viewport" content="width=device-width, initial-scale=1.0" />
      <title>构建产品原型</title>
    </head>
    <body>
      <div id="root"></div>
      <script type="module" src="/src/main.tsx"></script>
    </body>
  </html>
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -0,0 +1,95 @@
 {
  "name": "@figma/my-make-file",
  "private": true,
  "version": "0.0.1",
  "type": "module",
  "scripts": {
    "build": "vite build",
    "dev": "vite"
  },
  "dependencies": {
    "@emotion/react": "11.14.0",
    "@emotion/styled": "11.14.1",
    "@mui/icons-material": "7.3.5",
    "@mui/material": "7.3.5",
    "@popperjs/core": "2.11.8",
    "@radix-ui/react-accordion": "1.2.3",
    "@radix-ui/react-alert-dialog": "1.1.6",
    "@radix-ui/react-aspect-ratio": "1.1.2",
    "@radix-ui/react-avatar": "1.1.3",
    "@radix-ui/react-checkbox": "1.1.4",
    "@radix-ui/react-collapsible": "1.1.3",
    "@radix-ui/react-context-menu": "2.2.6",
    "@radix-ui/react-dialog": "1.1.6",
    "@radix-ui/react-dropdown-menu": "2.1.6",
    "@radix-ui/react-hover-card": "1.1.6",
    "@radix-ui/react-label": "2.1.2",
    "@radix-ui/react-menubar": "1.1.6",
    "@radix-ui/react-navigation-menu": "1.2.5",
    "@radix-ui/react-popover": "1.1.6",
    "@radix-ui/react-progress": "1.1.2",
    "@radix-ui/react-radio-group": "1.2.3",
    "@radix-ui/react-scroll-area": "1.2.3",
    "@radix-ui/react-select": "2.1.6",
    "@radix-ui/react-separator": "1.1.2",
    "@radix-ui/react-slider": "1.2.3",
    "@radix-ui/react-slot": "1.1.2",
    "@radix-ui/react-switch": "1.1.3",
    "@radix-ui/react-tabs": "1.1.3",
    "@radix-ui/react-toggle": "1.1.2",
    "@radix-ui/react-toggle-group": "1.1.2",
    "@radix-ui/react-tooltip": "1.1.8",
    "@types/d3": "^7.4.3",
    "class-variance-authority": "0.7.1",
    "clsx": "2.1.1",
    "cmdk": "1.1.1",
    "d3": "^7.9.0",
    "date-fns": "3.6.0",
    "embla-carousel-react": "8.6.0",
    "input-otp": "1.4.2",
    "lucide-react": "0.487.0",
    "marked": "^17.0.4",
    "motion": "12.23.24",
    "next-themes": "0.4.6",
    "optional": "^0.1.4",
    "react": "^19.2.5",
    "react-day-picker": "8.10.1",
    "react-dnd": "16.0.1",
    "react-dnd-html5-backend": "16.0.1",
    "react-dom": "^19.2.5",
    "react-hook-form": "7.55.0",
    "react-popper": "2.3.0",
    "react-resizable-panels": "2.1.7",
    "react-responsive-masonry": "2.7.1",
    "react-router": "7.13.0",
    "react-slick": "0.31.0",
    "recharts": "2.15.2",
    "sonner": "2.0.3",
    "tailwind-merge": "3.2.0",
    "tw-animate-css": "1.3.8",
    "vaul": "1.1.2"
  },
  "devDependencies": {
    "@tailwindcss/vite": "4.1.12",
    "@vitejs/plugin-react": "4.7.0",
    "tailwindcss": "4.1.12",
    "vite": "6.3.5"
  },
  "peerDependencies": {
    "react": "18.3.1",
    "react-dom": "18.3.1"
  },
  "peerDependenciesMeta": {
    "react": {
      "optional": true
    },
    "react-dom": {
      "optional": true
    }
  },
  "pnpm": {
    "overrides": {
      "vite": "6.3.5"
    }
  }
 }
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
--- a/frontend/postcss.config.mjs
+++ b/frontend/postcss.config.mjs
@@ -0,0 +1,15 @@
 /**
 * PostCSS Configuration
 *
 * Tailwind CSS v4 (via @tailwindcss/vite) automatically sets up all required
 * PostCSS plugins — you do NOT need to include `tailwindcss` or `autoprefixer` here.
 *
 * This file only exists for adding additional PostCSS plugins, if needed.
 * For example:
 *
 * import postcssNested from 'postcss-nested'
 * export default { plugins: [postcssNested()] }
 *
 * Otherwise, you can leave this file empty.
 */
 export default {}
--- a/frontend/src/app/App.tsx
+++ b/frontend/src/app/App.tsx
@@ -0,0 +1,7 @@
 import { RouterProvider } from 'react-router';
 import { router } from './routes';
 import '../styles/app.css';
 export default function App() {
  return <RouterProvider router={router} />;
 }
--- a/frontend/src/app/api.ts
+++ b/frontend/src/app/api.ts
@@ -0,0 +1,279 @@
 /**
 * GraphRAG Studio — Backend API Client
 * Base: http://localhost:8000/api/v1
 * All functions return the `data` field; throw ApiError on code !== 0
 */
 const BASE = 'http://localhost:8000/api/v1';
 export class ApiError extends Error {
  code: number;
  constructor(code: number, msg: string) {
    super(msg);
    this.code = code;
  }
 }
 async function request<T>(
  method: string,
  path: string,
  options: {
    body?: unknown;
    formData?: FormData;
    params?: Record<string, string | number | boolean | undefined | null>;
  } = {}
 ): Promise<T> {
  let url = BASE + path;
  if (options.params) {
    const parts = Object.entries(options.params)
      .filter(([, v]) => v !== undefined && v !== null && v !== '')
      .map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(String(v))}`);
    if (parts.length) url += '?' + parts.join('&');
  }
  const init: RequestInit = { method };
  if (options.formData) {
    init.body = options.formData;
  } else if (options.body !== undefined) {
    init.headers = { 'Content-Type': 'application/json' };
    init.body = JSON.stringify(options.body);
  }
  const res = await fetch(url, init);
  const json = await res.json();
  if (json.code !== 0) throw new ApiError(json.code, json.msg ?? 'Unknown error');
  return json.data as T;
 }
 const get = <T>(path: string, params?: Record<string, string | number | boolean | undefined | null>) =>
  request<T>('GET', path, { params });
 const post = <T>(path: string, body?: unknown) => request<T>('POST', path, { body });
 const postForm = <T>(path: string, fd: FormData) => request<T>('POST', path, { formData: fd });
 const del = <T>(path: string) => request<T>('DELETE', path);
 // ─── Response Types ───────────────────────────────────────────────────────────
 export interface ApiDoc {
  doc_id: string;
  filename: string;
  format: string;
  pages: number | null;
  status: 'uploaded' | 'indexing' | 'indexed' | 'failed';
  upload_date: string;
  job_id?: string | null;
  file_size?: number;
  error_msg?: string | null;
 }
 export interface ApiJobStatus {
  job_id: string;
  doc_id: string;
  status: 'submitted' | 'queued' | 'parsing' | 'extracting' | 'indexing' | 'done' | 'failed' | 'cancelled';
  stage: string;
  progress: number; // 0.0–1.0
  started_at?: string;
  updated_at?: string;
  error_msg?: string | null;
 }
 export interface ApiIndexResult {
  job_id: string;
  doc_id: string;
  status: string;
  nodes_added: number;
  edges_added: number;
  total_nodes: number;
  total_edges: number;
  pages_processed: number;
  extractions_count: number;
  duration_seconds: number;
 }
 export interface ApiKGNode {
  id: string;
  name: string;
  type: string;
  page: number;
  confidence: string;
  degree: number;
  source_doc: string;
  // Only present in detail endpoint:
  degree_centrality?: number;
  neighbor_count?: number;
 }
 export interface ApiKGEdge {
  id: string;
  source: string;
  target: string;
  relation: string;
  doc_id: string;
  page: number;
 }
 export interface ApiHealthData {
  status: string;
  version: string;
  uptime_seconds: number;
  components: {
    mineru_venv: { status: string };
    langextract_venv: { status: string };
    deepseek_api: { status: string };
    storage: { status: string };
  };
 }
 export interface ApiStats {
  total_documents: number;
  indexed_documents: number;
  failed_documents: number;
  total_nodes: number;
  total_edges: number;
  total_queries: number;
  active_jobs: number;
  storage_used_mb: number;
 }
 export interface ApiToolCall {
  step: number;
  tool_name: string;
  tool_input: string;
  tool_output: string;
 }
 export interface ApiQueryResult {
  id: string;
  question: string;
  answer: string;
  tool_calls: ApiToolCall[];
  cited_nodes: string[]; // node IDs
  duration_seconds: number;
  timestamp: string;
 }
 export interface ApiSearchResult {
  query: string;
  total: number;
  items: ApiKGNode[];
 }
 export interface ApiPathResult {
  from: { id: string; name: string; type: string };
  to: { id: string; name: string; type: string };
  max_hops: number;
  total_paths: number;
  paths: Array<{
    length: number;
    nodes: Array<{ id: string; name: string; type: string }>;
    edges?: Array<{ source: string; target: string; relation: string }>;
  }>;
 }
 export interface ApiGraphSearchResult {
  query: string;
  matched_nodes: ApiKGNode[];
  subgraph_edges: ApiKGEdge[];
  total_nodes: number;
 }
 // ─── API Functions ────────────────────────────────────────────────────────────
 export const api = {
  // A: Documents
  listDocuments: (page = 1, pageSize = 100) =>
    get<{ total: number; page: number; page_size: number; items: ApiDoc[] }>(
      '/documents', { page, page_size: pageSize }
    ),
  getDocument: (docId: string) => get<ApiDoc>(`/documents/${docId}`),
  uploadDocument: (file: File) => {
    const fd = new FormData();
    fd.append('file', file);
    return postForm<{ doc_id: string; filename: string; format: string; status: string }>(
      '/documents/upload', fd
    );
  },
  deleteDocument: (docId: string) =>
    del<{ doc_id: string; removed_nodes: number; removed_edges: number }>(`/documents/${docId}`),
  // B: Indexing
  startIndexing: (docId: string) =>
    post<{ job_id: string; doc_id: string; status: string }>('/index/start', { doc_id: docId }),
  getJobStatus: (jobId: string) => get<ApiJobStatus>(`/index/status/${jobId}`),
  getJobResult: (jobId: string) => get<ApiIndexResult>(`/index/result/${jobId}`),
  cancelJob: (jobId: string) => del<{ job_id: string }>(`/index/jobs/${jobId}`),
  // C: Knowledge Graph
  getNodes: (params?: { page?: number; pageSize?: number; type?: string; docId?: string }) =>
    get<{ total: number; page: number; page_size: number; items: ApiKGNode[] }>('/kg/nodes', {
      page: params?.page,
      page_size: params?.pageSize ?? 500,
      type: params?.type,
      doc_id: params?.docId,
    }),
  getEdges: (params?: { page?: number; pageSize?: number; docId?: string }) =>
    get<{ total: number; page: number; page_size: number; items: ApiKGEdge[] }>('/kg/edges', {
      page: params?.page,
      page_size: params?.pageSize ?? 2000,
      doc_id: params?.docId,
    }),
  getNodeDetail: (nodeId: string) => get<ApiKGNode>(`/kg/nodes/${nodeId}`),
  getNodeNeighbors: (nodeId: string, hops = 1) =>
    get<{
      center: ApiKGNode;
      hops: number;
      neighbors_by_hop: Record<string, ApiKGNode[]>;
      total_neighbors: number;
    }>(`/kg/nodes/${nodeId}/neighbors`, { hops }),
  getKGStats: () =>
    get<{ total_nodes: number; total_edges: number; type_distribution: Record<string, number> }>('/kg/stats'),
  exportKG: () => get<{ nodes: ApiKGNode[]; edges: ApiKGEdge[] }>('/kg/export'),
  // D: QA Query
  query: (question: string, history: { question: string; answer: string }[] = []) => {
    // Transform {question, answer}[] to ChatMessage format expected by backend
    const chatHistory = history.flatMap(h => [
      { role: 'human' as const, content: h.question },
      { role: 'ai' as const, content: h.answer },
    ]);
    return post<ApiQueryResult>('/query', { question, history: chatHistory });
  },
  getQueryHistory: (page = 1, pageSize = 50) =>
    get<{ total: number; page: number; page_size: number; items: ApiQueryResult[] }>(
      '/query/history', { page, page_size: pageSize }
    ),
  // E: Search
  searchEntities: (q: string, type?: string, limit = 15) =>
    get<ApiSearchResult>('/search/entities', {
      q,
      type: type && type !== '全部类型' ? type : undefined,
      limit,
    }),
  searchPath: (fromId: string, toId: string, maxHops = 3) =>
    get<ApiPathResult>('/search/path', { from: fromId, to: toId, max_hops: maxHops }),
  searchGraph: (q: string, includeNeighbors = false) =>
    get<ApiGraphSearchResult>('/search/graph', { q, include_neighbors: includeNeighbors }),
  // F: System
  getHealth: () => get<ApiHealthData>('/health'),
  getSystemStats: () => get<ApiStats>('/system/stats'),
  getDemoData: () =>
    get<{ nodes: ApiKGNode[]; edges: ApiKGEdge[]; stats: Record<string, unknown> }>('/system/demo'),
 };
--- a/frontend/src/app/components/figma/ImageWithFallback.tsx
+++ b/frontend/src/app/components/figma/ImageWithFallback.tsx
@@ -0,0 +1,27 @@
 import React, { useState } from 'react'
 const ERROR_IMG_SRC =
  'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iODgiIGhlaWdodD0iODgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgc3Ryb2tlPSIjMDAwIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBvcGFjaXR5PSIuMyIgZmlsbD0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIzLjciPjxyZWN0IHg9IjE2IiB5PSIxNiIgd2lkdGg9IjU2IiBoZWlnaHQ9IjU2IiByeD0iNiIvPjxwYXRoIGQ9Im0xNiA1OCAxNi0xOCAzMiAzMiIvPjxjaXJjbGUgY3g9IjUzIiBjeT0iMzUiIHI9IjciLz48L3N2Zz4KCg=='
 export function ImageWithFallback(props: React.ImgHTMLAttributes<HTMLImageElement>) {
  const [didError, setDidError] = useState(false)
  const handleError = () => {
    setDidError(true)
  }
  const { src, alt, style, className, ...rest } = props
  return didError ? (
    <div
      className={`inline-block bg-gray-100 text-center align-middle ${className ?? ''}`}
      style={style}
    >
      <div className="flex items-center justify-center w-full h-full">
        <img src={ERROR_IMG_SRC} alt="Error loading image" {...rest} data-original-url={src} />
      </div>
    </div>
  ) : (
    <img src={src} alt={alt} className={className} style={style} {...rest} onError={handleError} />
  )
 }
--- a/frontend/src/app/components/layout/AppLayout.tsx
+++ b/frontend/src/app/components/layout/AppLayout.tsx
@@ -0,0 +1,47 @@
 import React from 'react';
 import { Outlet } from 'react-router';
 import { Toaster } from 'sonner';
 import { Header } from './Header';
 import { Sidebar } from './Sidebar';
 import { StatusBar } from './StatusBar';
 import { useAppState, AppProvider } from '../../store';
 function AppLayoutInner() {
  const { sidebarCollapsed } = useAppState();
  return (
    <div
      style={{
        display: 'grid',
        gridTemplateAreas: '"header header" "sidebar main" "footer footer"',
        gridTemplateColumns: `${sidebarCollapsed ? 72 : 220}px 1fr`,
        gridTemplateRows: '56px 1fr 32px',
        height: '100vh',
        overflow: 'hidden',
        transition: 'grid-template-columns 200ms ease',
      }}
    >
      <Header />
      <Sidebar />
      <main
        style={{
          gridArea: 'main',
          overflowY: 'auto',
          background: 'var(--bg-base)',
        }}
      >
        <Outlet />
      </main>
      <StatusBar />
    </div>
  );
 }
 export function AppLayout() {
  return (
    <AppProvider>
      <AppLayoutInner />
      <Toaster position="top-right" theme="dark" richColors />
    </AppProvider>
  );
 }
--- a/frontend/src/app/components/layout/Header.tsx
+++ b/frontend/src/app/components/layout/Header.tsx
@@ -0,0 +1,145 @@
 import React, { useState, useRef, useEffect } from 'react';
 import { useNavigate } from 'react-router';
 import { Menu, Search, X } from 'lucide-react';
 import { useAppState, type KGNode } from '../../store';
 import { api } from '../../api';
 import { TYPE_COLORS } from '../../mock-data';
 export function Header() {
  const { sidebarCollapsed, setSidebarCollapsed, health } = useAppState();
  const [query, setQuery] = useState('');
  const [showSuggestions, setShowSuggestions] = useState(false);
  const [suggestions, setSuggestions] = useState<KGNode[]>([]);
  const navigate = useNavigate();
  const inputRef = useRef<HTMLInputElement>(null);
  const timerRef = useRef<ReturnType<typeof setTimeout>>();
  useEffect(() => {
    if (query.length >= 2) {
      clearTimeout(timerRef.current);
      timerRef.current = setTimeout(async () => {
        try {
          const res = await api.searchEntities(query, undefined, 5);
          setSuggestions(res.items.map(n => ({
            id: n.id, name: n.name, type: n.type as KGNode['type'],
            page: n.page, confidence: n.confidence as KGNode['confidence'],
            degree: n.degree, centrality: 0, doc_id: n.doc_id,
          })));
          setShowSuggestions(true);
        } catch {
          setSuggestions([]);
        }
      }, 300);
    } else {
      setSuggestions([]);
      setShowSuggestions(false);
    }
    return () => clearTimeout(timerRef.current);
  }, [query]);
  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (query.trim()) {
      setShowSuggestions(false);
      navigate(`/search?q=${encodeURIComponent(query)}`);
    }
  };
  const allOk = Object.values(health).every(v => v === 'ok');
  return (
    <header
      className="flex items-center px-4 gap-4"
      style={{
        gridArea: 'header',
        height: 56,
        background: 'var(--bg-s1)',
        borderBottom: '1px solid var(--border-main)',
        position: 'sticky',
        top: 0,
        zIndex: 100,
      }}
    >
      {/* Left */}
      <button
        onClick={() => setSidebarCollapsed(!sidebarCollapsed)}
        className="p-1.5 rounded-md hover:opacity-80 cursor-pointer"
        style={{ background: 'var(--bg-s2)', color: 'var(--text-3)' }}
        aria-label="Toggle sidebar"
      >
        <Menu size={18} />
      </button>
      <span style={{ color: 'var(--blue)', fontSize: 16, fontWeight: 600, whiteSpace: 'nowrap' }}>
        GraphRAG Studio
      </span>
      {/* Center - Search */}
      <form onSubmit={handleSubmit} className="flex-1 flex justify-center relative" style={{ maxWidth: 400, margin: '0 auto' }}>
        <div className="relative w-full">
          <Search size={14} className="absolute left-3 top-1/2 -translate-y-1/2" style={{ color: 'var(--text-4)' }} />
          <input
            ref={inputRef}
            value={query}
            onChange={e => setQuery(e.target.value)}
            onFocus={() => query.length >= 3 && setShowSuggestions(true)}
            onBlur={() => setTimeout(() => setShowSuggestions(false), 200)}
            placeholder="搜索实体..."
            className="w-full pl-9 pr-8 py-1.5 rounded-md outline-none"
            style={{
              background: 'var(--bg-s2)',
              border: '1px solid var(--border-main)',
              color: 'var(--text-1)',
              fontSize: 13,
            }}
          />
          {query && (
            <button type="button" onClick={() => { setQuery(''); setShowSuggestions(false); }} className="absolute right-2 top-1/2 -translate-y-1/2 cursor-pointer" style={{ color: 'var(--text-4)' }}>
              <X size={14} />
            </button>
          )}
        </div>
        {showSuggestions && suggestions.length > 0 && (
          <div
            className="absolute top-full mt-1 w-full rounded-md overflow-hidden"
            style={{ background: 'var(--bg-s3)', border: '1px solid var(--border-main)', boxShadow: 'var(--shadow-md)', zIndex: 200 }}
          >
            {suggestions.map(s => (
              <button
                key={s.id}
                type="button"
                className="w-full flex items-center gap-2 px-3 py-2 hover:opacity-80 cursor-pointer text-left"
                style={{ background: 'transparent', borderBottom: '1px solid var(--border-muted)' }}
                onMouseDown={() => {
                  setShowSuggestions(false);
                  setQuery('');
                  navigate(`/graph?node=${s.id}`);
                }}
              >
                <span style={{ color: 'var(--text-1)', fontSize: 13 }}>{s.name}</span>
                <span
                  className="px-1.5 py-0.5 rounded"
                  style={{
                    fontSize: 10, fontWeight: 600,
                    background: `${TYPE_COLORS[s.type]}20`,
                    color: TYPE_COLORS[s.type],
                  }}
                >
                  {s.type}
                </span>
              </button>
            ))}
          </div>
        )}
      </form>
      {/* Right */}
      <div className="flex items-center gap-2" style={{ whiteSpace: 'nowrap' }}>
        <span
          className="inline-block w-2 h-2 rounded-full"
          style={{ background: allOk ? 'var(--green)' : 'var(--red)' }}
        />
        <span style={{ color: 'var(--text-3)', fontSize: 12 }}>API: localhost:8000</span>
      </div>
    </header>
  );
 }
--- a/frontend/src/app/components/layout/Sidebar.tsx
+++ b/frontend/src/app/components/layout/Sidebar.tsx
@@ -0,0 +1,110 @@
 import React from 'react';
 import { useNavigate, useLocation } from 'react-router';
 import { LayoutDashboard, FileText, Share2, MessageSquare, Search, Settings } from 'lucide-react';
 import { useAppState } from '../../store';
 const navItems = [
  { icon: LayoutDashboard, label: '仪表盘', path: '/dashboard', badge: null },
  { icon: FileText, label: '文档管理', path: '/documents', badgeKey: 'documents' as const },
  { icon: Share2, label: '知识图谱', path: '/graph', badge: null },
  { icon: MessageSquare, label: '智能问答', path: '/chat', badgeKey: 'queries' as const },
  { icon: Search, label: '搜索', path: '/search', badge: null },
 ];
 export function Sidebar() {
  const { sidebarCollapsed, stats } = useAppState();
  const navigate = useNavigate();
  const location = useLocation();
  const width = sidebarCollapsed ? 72 : 220;
  return (
    <nav
      className="flex flex-col py-3 overflow-hidden"
      style={{
        gridArea: 'sidebar',
        width,
        background: 'var(--bg-s1)',
        borderRight: '1px solid var(--border-main)',
        transition: 'width 200ms ease',
      }}
    >
      <div className="flex flex-col gap-1 px-2">
        {navItems.map(item => {
          const active = location.pathname === item.path ||
            (item.path === '/dashboard' && location.pathname === '/');
          const Icon = item.icon;
          const badgeValue = item.badgeKey ? stats[item.badgeKey] : null;
          return (
            <button
              key={item.path}
              onClick={() => navigate(item.path)}
              className="flex items-center gap-3 rounded-md cursor-pointer relative"
              style={{
                padding: sidebarCollapsed ? '10px 0' : '10px 12px',
                justifyContent: sidebarCollapsed ? 'center' : 'flex-start',
                background: active ? 'rgba(88,166,255,0.1)' : 'transparent',
                color: active ? 'var(--blue)' : 'var(--text-3)',
                fontSize: 14,
                fontWeight: active ? 500 : 400,
                border: 'none',
                transition: 'all 150ms ease',
              }}
              onMouseEnter={e => {
                if (!active) (e.currentTarget as HTMLElement).style.background = 'var(--bg-s2)';
              }}
              onMouseLeave={e => {
                if (!active) (e.currentTarget as HTMLElement).style.background = 'transparent';
              }}
            >
              {active && (
                <div
                  className="absolute left-0 top-2 bottom-2 rounded-r"
                  style={{ width: 2, background: 'var(--blue)' }}
                />
              )}
              <Icon size={18} />
              {!sidebarCollapsed && (
                <>
                  <span className="flex-1 text-left">{item.label}</span>
                  {badgeValue != null && (
                    <span
                      className="px-1.5 py-0.5 rounded-full"
                      style={{
                        fontSize: 11, fontWeight: 600,
                        background: 'var(--bg-s2)',
                        color: 'var(--text-3)',
                        minWidth: 20,
                        textAlign: 'center',
                      }}
                    >
                      {badgeValue}
                    </span>
                  )}
                </>
              )}
            </button>
          );
        })}
      </div>
      <div className="mt-auto px-2">
        <button
          className="flex items-center gap-3 rounded-md w-full cursor-pointer"
          style={{
            padding: sidebarCollapsed ? '10px 0' : '10px 12px',
            justifyContent: sidebarCollapsed ? 'center' : 'flex-start',
            background: 'transparent',
            color: 'var(--text-4)',
            fontSize: 14,
            border: 'none',
          }}
        >
          <Settings size={18} />
          {!sidebarCollapsed && <span>系统设置</span>}
        </button>
      </div>
    </nav>
  );
 }
--- a/frontend/src/app/components/layout/StatusBar.tsx
+++ b/frontend/src/app/components/layout/StatusBar.tsx
@@ -0,0 +1,34 @@
 import React from 'react';
 import { useAppState } from '../../store';
 export function StatusBar() {
  const { documents, health } = useAppState();
  const indexingDoc = documents.find(d => d.status === 'indexing');
  const allOk = Object.values(health).every(v => v === 'ok');
  return (
    <footer
      className="flex items-center justify-between px-4"
      style={{
        gridArea: 'footer',
        height: 32,
        background: 'var(--bg-s1)',
        borderTop: '1px solid var(--border-main)',
        fontSize: 12,
        color: 'var(--text-4)',
      }}
    >
      <div>
        {indexingDoc && (
          <span style={{ color: 'var(--yellow)' }}>
            正在索引 {indexingDoc.filename}... {indexingDoc.progress ?? 0}%
          </span>
        )}
      </div>
      <div className="flex items-center gap-3">
        <span>v1.0.0</span>
        <span className="inline-block w-1.5 h-1.5 rounded-full" style={{ background: allOk ? 'var(--green)' : 'var(--red)' }} />
      </div>
    </footer>
  );
 }
--- a/frontend/src/app/components/pages/Dashboard.tsx
+++ b/frontend/src/app/components/pages/Dashboard.tsx
@@ -0,0 +1,210 @@
 import React from 'react';
 import { useNavigate } from 'react-router';
 import { Share2, MessageSquare, Search, Zap, Upload, FileText, ExternalLink } from 'lucide-react';
 import { useAppState } from '../../store';
 const statCards = [
  { key: 'kg_nodes', label: '图谱节点', color: '#58a6ff', icon: '◈' },
  { key: 'kg_edges', label: '图谱边', color: '#8957e5', icon: '◇' },
  { key: 'documents', label: '文档数', color: '#3fb950', icon: '▤' },
  { key: 'queries', label: '查询次数', color: '#d29922', icon: '◆' },
 ] as const;
 const statusStyles: Record<string, { bg: string; color: string }> = {
  indexed: { bg: '#1a3a22', color: '#3fb950' },
  indexing: { bg: '#2d2a16', color: '#d29922' },
  uploaded: { bg: '#1c2128', color: '#8b949e' },
  failed: { bg: '#3b1a1a', color: '#f85149' },
 };
 export function Dashboard() {
  const { stats, health, documents } = useAppState();
  const navigate = useNavigate();
  const recentDocs = documents.slice(0, 5);
  return (
    <div className="p-6" style={{ maxWidth: 1200, margin: '0 auto' }}>
      {/* Page Title + Upload Button */}
      <div className="flex items-center justify-between mb-6">
        <h1 style={{ color: 'var(--text-1)', fontSize: 20, fontWeight: 600 }}>仪表盘</h1>
        <button
          onClick={() => navigate('/documents')}
          className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
          style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, fontWeight: 500, border: 'none' }}
        >
          <Upload size={14} /> 上传 & 索引
        </button>
      </div>
      {/* Stat Cards */}
      <div className="grid grid-cols-4 gap-4 mb-6" style={{ minWidth: 0 }}>
        {statCards.map(c => (
          <div
            key={c.key}
            className="rounded-lg p-4"
            style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
          >
            <div className="flex items-center justify-between mb-2">
              <span style={{ color: 'var(--text-3)', fontSize: 13 }}>{c.label}</span>
              <span style={{ fontSize: 18, color: c.color }}>{c.icon}</span>
            </div>
            <div style={{ color: c.color, fontSize: 28, fontWeight: 700 }}>
              {stats[c.key].toLocaleString()}
            </div>
          </div>
        ))}
      </div>
      <div className="grid grid-cols-3 gap-4">
        {/* System Health */}
        <div
          className="rounded-lg p-4 col-span-1"
          style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
        >
          <h2 className="mb-4" style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>系统健康</h2>
          <div className="flex flex-col gap-3">
            {[
              { name: 'MinerU venv', status: health.mineru },
              { name: 'LangExtract venv', status: health.langextract },
              { name: 'DeepSeek API', status: health.deepseek },
              { name: 'Storage', status: health.storage },
            ].map(s => (
              <div key={s.name} className="flex items-center justify-between">
                <span style={{ color: 'var(--text-2)', fontSize: 13 }}>{s.name}</span>
                <span className="flex items-center gap-1.5">
                  <span className="inline-block w-2 h-2 rounded-full" style={{ background: s.status === 'ok' ? 'var(--green)' : 'var(--red)' }} />
                  <span style={{ color: s.status === 'ok' ? 'var(--green)' : 'var(--red)', fontSize: 12 }}>{s.status}</span>
                </span>
              </div>
            ))}
          </div>
          {/* Quick Actions */}
          <h2 className="mt-6 mb-3" style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>快捷操作</h2>
          <div className="flex flex-col gap-2">
            {[
              { icon: Share2, label: '浏览图谱', path: '/graph' },
              { icon: MessageSquare, label: '开始对话', path: '/chat' },
              { icon: Search, label: '搜索', path: '/search' },
              { icon: Zap, label: '演示', path: '/graph' },
            ].map(a => (
              <button
                key={a.label}
                onClick={() => navigate(a.path)}
                className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full"
                style={{
                  background: 'var(--bg-s2)',
                  border: '1px solid var(--border-main)',
                  color: 'var(--text-2)',
                  fontSize: 13,
                }}
              >
                <a.icon size={14} style={{ color: 'var(--blue)' }} /> {a.label}
              </button>
            ))}
          </div>
        </div>
        {/* Recent Documents */}
        <div
          className="rounded-lg p-4 col-span-2"
          style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
        >
          <div className="flex items-center justify-between mb-4">
            <h2 style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>最近文档</h2>
            <button
              onClick={() => navigate('/documents')}
              className="flex items-center gap-1 cursor-pointer"
              style={{ color: 'var(--blue)', fontSize: 12, background: 'none', border: 'none' }}
            >
              查看全部 <ExternalLink size={12} />
            </button>
          </div>
          <div className="flex flex-col">
            {/* Table header */}
            <div
              className="grid gap-4 px-3 py-2 rounded-t-md"
              style={{ gridTemplateColumns: '1fr 60px 50px 90px 130px 100px', background: 'var(--bg-s2)', fontSize: 11, fontWeight: 600, color: 'var(--text-3)', textTransform: 'uppercase', letterSpacing: '0.5px' }}
            >
              <span>文件名</span>
              <span>格式</span>
              <span>页数</span>
              <span>状态</span>
              <span>日期</span>
              <span>操作</span>
            </div>
            {recentDocs.map(doc => {
              const st = statusStyles[doc.status];
              return (
                <div
                  key={doc.id}
                  className="grid gap-4 px-3 py-2.5 items-center"
                  style={{
                    gridTemplateColumns: '1fr 60px 50px 90px 130px 100px',
                    borderBottom: '1px solid var(--border-muted)',
                    fontSize: 13,
                  }}
                >
                  <span className="flex items-center gap-2 truncate" style={{ color: 'var(--text-1)' }}>
                    <FileText size={14} style={{ color: 'var(--text-3)', flexShrink: 0 }} />
                    <span className="truncate">{doc.filename}</span>
                  </span>
                  <span style={{ color: 'var(--text-3)' }}>{doc.format}</span>
                  <span style={{ color: 'var(--text-3)' }}>{doc.pages}</span>
                  <span>
                    <span
                      className="px-2 py-0.5 rounded-full"
                      style={{ fontSize: 11, fontWeight: 600, background: st.bg, color: st.color }}
                    >
                      {doc.status}
                    </span>
                  </span>
                  <span style={{ color: 'var(--text-4)', fontSize: 12 }}>
                    {new Date(doc.upload_date).toLocaleDateString('zh-CN', { month: 'short', day: 'numeric', year: 'numeric' })}
                  </span>
                  <span>
                    {doc.status === 'indexed' && (
                      <button
                        onClick={() => navigate(`/graph?doc_id=${doc.id}`)}
                        className="px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
                      >
                        查看图谱
                      </button>
                    )}
                    {doc.status === 'uploaded' && (
                      <button
                        className="px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(35,134,54,0.2)', color: 'var(--green)', border: 'none' }}
                      >
                        索引
                      </button>
                    )}
                    {doc.status === 'indexing' && (
                      <div className="flex items-center gap-2">
                        <div style={{ flex: 1, height: 4, background: 'var(--bg-s2)', borderRadius: 2, overflow: 'hidden' }}>
                          <div style={{ width: `${doc.progress}%`, height: '100%', background: 'var(--yellow)', borderRadius: 2, transition: 'width 300ms' }} />
                        </div>
                        <span style={{ fontSize: 11, color: 'var(--yellow)' }}>{doc.progress}%</span>
                      </div>
                    )}
                    {doc.status === 'failed' && (
                      <button
                        className="px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(248,81,73,0.1)', color: 'var(--red)', border: 'none' }}
                      >
                        重试
                      </button>
                    )}
                  </span>
                </div>
              );
            })}
          </div>
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/app/components/pages/Documents.tsx
+++ b/frontend/src/app/components/pages/Documents.tsx
@@ -0,0 +1,439 @@
 import React, { useState, useCallback, useRef } from 'react';
 import { useNavigate } from 'react-router';
 import { toast } from 'sonner';
 import { Upload, FileText, Trash2, Play, RotateCcw, X, ChevronDown, ChevronRight, Eye } from 'lucide-react';
 import { useAppState } from '../../store';
 import { api, ApiError } from '../../api';
 const statusStyles: Record<string, { bg: string; color: string }> = {
  indexed:  { bg: '#1a3a22', color: '#3fb950' },
  indexing: { bg: '#2d2a16', color: '#d29922' },
  uploaded: { bg: '#1c2128', color: '#8b949e' },
  failed:   { bg: '#3b1a1a', color: '#f85149' },
 };
 export function Documents() {
  const { documents, setDocuments, refreshDocuments, refreshKG } = useAppState();
  const navigate = useNavigate();
  const fileInputRef = useRef<HTMLInputElement>(null);
  const [dragOver, setDragOver] = useState(false);
  const [formatFilter, setFormatFilter] = useState('All');
  const [statusFilter, setStatusFilter] = useState('All');
  const [searchTerm, setSearchTerm] = useState('');
  const [expandedDoc, setExpandedDoc] = useState<string | null>(null);
  const [showDeleteModal, setShowDeleteModal] = useState<string | null>(null);
  const [uploading, setUploading] = useState(false);
  const filteredDocs = documents.filter(d => {
    if (formatFilter !== 'All' && d.format !== formatFilter) return false;
    if (statusFilter !== 'All' && d.status !== statusFilter) return false;
    if (searchTerm && !d.filename.toLowerCase().includes(searchTerm.toLowerCase())) return false;
    return true;
  });
  // ── Upload ──────────────────────────────────────────────────────────────────
  const handleFiles = useCallback(async (files: FileList | File[]) => {
    const fileArr = Array.from(files);
    if (fileArr.length === 0) return;
    setUploading(true);
    for (const file of fileArr) {
      try {
        toast.loading(`上传 ${file.name}...`, { id: `upload-${file.name}` });
        // 1. Upload
        const uploaded = await api.uploadDocument(file);
        const newDoc = {
          id: uploaded.doc_id,
          filename: uploaded.filename,
          format: uploaded.format,
          pages: 0,
          status: 'uploaded' as const,
          upload_date: new Date().toISOString(),
        };
        setDocuments(prev => [newDoc, ...prev]);
        toast.success(`${file.name} 上传成功`, { id: `upload-${file.name}` });
        // 2. Auto-start indexing
        try {
          toast.loading(`开始索引 ${file.name}...`, { id: `index-${uploaded.doc_id}` });
          const job = await api.startIndexing(uploaded.doc_id);
          setDocuments(prev =>
            prev.map(d => d.id === uploaded.doc_id
              ? { ...d, status: 'indexing', job_id: job.job_id, progress: 0 }
              : d
            )
          );
          toast.success(`${file.name} 开始索引`, { id: `index-${uploaded.doc_id}` });
        } catch (err) {
          const msg = err instanceof ApiError ? err.message : '启动索引失败';
          toast.error(msg, { id: `index-${uploaded.doc_id}` });
        }
      } catch (err) {
        const msg = err instanceof ApiError ? err.message : '上传失败';
        toast.error(`${file.name}: ${msg}`, { id: `upload-${file.name}` });
      }
    }
    setUploading(false);
  }, [setDocuments]);
  const handleDragOver = useCallback((e: React.DragEvent) => { e.preventDefault(); setDragOver(true); }, []);
  const handleDragLeave = useCallback(() => setDragOver(false), []);
  const handleDrop = useCallback((e: React.DragEvent) => {
    e.preventDefault();
    setDragOver(false);
    handleFiles(e.dataTransfer.files);
  }, [handleFiles]);
  const handleBrowse = () => fileInputRef.current?.click();
  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    if (e.target.files) handleFiles(e.target.files);
    e.target.value = '';
  };
  // ── Index / Retry ────────────────────────────────────────────────────────────
  const handleStartIndex = useCallback(async (docId: string, filename: string) => {
    try {
      const job = await api.startIndexing(docId);
      setDocuments(prev =>
        prev.map(d => d.id === docId
          ? { ...d, status: 'indexing', job_id: job.job_id, progress: 0, error: undefined }
          : d
        )
      );
      toast.success(`${filename} 开始索引`);
    } catch (err) {
      const msg = err instanceof ApiError ? err.message : '启动索引失败';
      toast.error(msg);
    }
  }, [setDocuments]);
  // ── Cancel ───────────────────────────────────────────────────────────────────
  const handleCancel = useCallback(async (docId: string, jobId: string) => {
    try {
      await api.cancelJob(jobId);
      setDocuments(prev =>
        prev.map(d => d.id === docId
          ? { ...d, status: 'uploaded', job_id: undefined, progress: undefined }
          : d
        )
      );
      toast.info('索引任务已取消');
    } catch (err) {
      const msg = err instanceof ApiError ? err.message : '取消失败';
      toast.error(msg);
    }
  }, [setDocuments]);
  // ── Delete ───────────────────────────────────────────────────────────────────
  const handleDelete = useCallback(async () => {
    if (!showDeleteModal) return;
    try {
      await api.deleteDocument(showDeleteModal);
      setDocuments(prev => prev.filter(d => d.id !== showDeleteModal));
      setShowDeleteModal(null);
      toast.success('文档已删除');
      refreshKG();
    } catch (err) {
      const msg = err instanceof ApiError ? err.message : '删除失败';
      toast.error(msg);
    }
  }, [showDeleteModal, setDocuments, refreshKG]);
  const deleteDoc = documents.find(d => d.id === showDeleteModal);
  return (
    <div className="p-6" style={{ maxWidth: 1200, margin: '0 auto' }}>
      <h1 className="mb-6" style={{ color: 'var(--text-1)', fontSize: 20, fontWeight: 600 }}>文档管理</h1>
      {/* Hidden file input */}
      <input
        ref={fileInputRef}
        type="file"
        multiple
        accept=".pdf,.docx,.doc,.pptx,.ppt,.png,.jpg,.jpeg,.html"
        style={{ display: 'none' }}
        onChange={handleFileChange}
      />
      {/* Upload Area */}
      <div
        onDragOver={handleDragOver}
        onDragLeave={handleDragLeave}
        onDrop={handleDrop}
        onClick={handleBrowse}
        className="flex flex-col items-center justify-center gap-3 rounded-lg p-8 mb-6 cursor-pointer"
        style={{
          border: `2px dashed ${dragOver ? 'var(--blue)' : 'var(--border-main)'}`,
          background: dragOver ? 'rgba(88,166,255,0.05)' : 'var(--bg-s1)',
          transition: 'all 200ms ease',
          opacity: uploading ? 0.6 : 1,
          pointerEvents: uploading ? 'none' : 'auto',
        }}
      >
        <Upload size={32} style={{ color: dragOver ? 'var(--blue)' : 'var(--text-4)' }} />
        <div style={{ color: 'var(--text-2)', fontSize: 14 }}>
          {uploading ? '正在上传...' : (
            <>拖拽文件到此处，或{' '}<span style={{ color: 'var(--blue)' }}>浏览文件</span></>
          )}
        </div>
        <div style={{ color: 'var(--text-4)', fontSize: 12 }}>
          PDF &middot; DOCX &middot; DOC &middot; PPTX &middot; PPT &middot; PNG &middot; JPG &middot; HTML &nbsp;|&nbsp; 单文件最大 200MB
        </div>
      </div>
      {/* Toolbar */}
      <div className="flex items-center gap-3 mb-4">
        <select
          value={formatFilter}
          onChange={e => setFormatFilter(e.target.value)}
          className="px-3 py-1.5 rounded-md cursor-pointer"
          style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
        >
          <option>All</option>
          <option>PDF</option>
          <option>DOCX</option>
          <option>PPTX</option>
          <option>PNG</option>
          <option>JPG</option>
          <option>HTML</option>
        </select>
        <select
          value={statusFilter}
          onChange={e => setStatusFilter(e.target.value)}
          className="px-3 py-1.5 rounded-md cursor-pointer"
          style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
        >
          <option>All</option>
          <option>indexed</option>
          <option>indexing</option>
          <option>uploaded</option>
          <option>failed</option>
        </select>
        <input
          value={searchTerm}
          onChange={e => setSearchTerm(e.target.value)}
          placeholder="搜索文档..."
          className="px-3 py-1.5 rounded-md flex-1"
          style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13, outline: 'none' }}
        />
      </div>
      {/* Document Table */}
      <div className="rounded-lg overflow-hidden" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}>
        {/* Header */}
        <div
          className="grid gap-4 px-4 py-2.5"
          style={{
            gridTemplateColumns: '24px 1fr 70px 50px 100px 140px 160px',
            background: 'var(--bg-s2)', fontSize: 11, fontWeight: 600,
            color: 'var(--text-3)', textTransform: 'uppercase', letterSpacing: '0.5px',
          }}
        >
          <span />
          <span>文件名</span>
          <span>格式</span>
          <span>页数</span>
          <span>状态</span>
          <span>上传日期</span>
          <span>操作</span>
        </div>
        {/* Rows */}
        {filteredDocs.length === 0 ? (
          <div className="flex flex-col items-center justify-center py-12 gap-3">
            <FileText size={40} style={{ color: 'var(--text-4)' }} />
            <span style={{ color: 'var(--text-3)', fontSize: 14 }}>
              {documents.length === 0 ? '暂无文档，请上传文件' : '未找到匹配文档'}
            </span>
          </div>
        ) : (
          filteredDocs.map(doc => {
            const st = statusStyles[doc.status];
            const isExpanded = expandedDoc === doc.id;
            return (
              <React.Fragment key={doc.id}>
                <div
                  className="grid gap-4 px-4 py-3 items-center"
                  style={{
                    gridTemplateColumns: '24px 1fr 70px 50px 100px 140px 160px',
                    borderBottom: '1px solid var(--border-muted)',
                    fontSize: 13,
                  }}
                >
                  <button
                    onClick={() => setExpandedDoc(isExpanded ? null : doc.id)}
                    className="cursor-pointer"
                    style={{ background: 'none', border: 'none', color: 'var(--text-4)', padding: 0 }}
                  >
                    {doc.status === 'indexed'
                      ? (isExpanded ? <ChevronDown size={14} /> : <ChevronRight size={14} />)
                      : <span style={{ width: 14, display: 'inline-block' }} />}
                  </button>
                  <span className="flex items-center gap-2 truncate" style={{ color: 'var(--text-1)' }}>
                    <FileText size={14} style={{ color: 'var(--text-3)', flexShrink: 0 }} />
                    <span className="truncate">{doc.filename}</span>
                  </span>
                  <span style={{ color: 'var(--text-3)' }}>{doc.format}</span>
                  <span style={{ color: 'var(--text-3)' }}>{doc.pages || '—'}</span>
                  <span>
                    <span className="px-2 py-0.5 rounded-full inline-flex items-center gap-1" style={{ fontSize: 11, fontWeight: 600, background: st.bg, color: st.color }}>
                      {doc.status === 'indexing' && (
                        <span className="inline-block w-1.5 h-1.5 rounded-full animate-pulse" style={{ background: st.color }} />
                      )}
                      {doc.status}
                    </span>
                  </span>
                  <span style={{ color: 'var(--text-4)', fontSize: 12 }}>
                    {new Date(doc.upload_date).toLocaleDateString('zh-CN', { month: 'short', day: 'numeric', year: 'numeric' })}
                  </span>
                  <span className="flex items-center gap-2">
                    {doc.status === 'uploaded' && (
                      <button
                        onClick={() => handleStartIndex(doc.id, doc.filename)}
                        className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(35,134,54,0.2)', color: 'var(--green)', border: 'none' }}
                      >
                        <Play size={10} /> 索引
                      </button>
                    )}
                    {doc.status === 'indexing' && (
                      <>
                        <div className="flex items-center gap-1.5 flex-1">
                          <div style={{ flex: 1, height: 4, background: 'var(--bg-s2)', borderRadius: 2, overflow: 'hidden', minWidth: 40 }}>
                            <div style={{ width: `${doc.progress ?? 0}%`, height: '100%', background: 'var(--yellow)', borderRadius: 2, transition: 'width 300ms' }} />
                          </div>
                          <span style={{ fontSize: 10, color: 'var(--yellow)', whiteSpace: 'nowrap' }}>{doc.progress ?? 0}%</span>
                        </div>
                        {doc.job_id && (
                          <button
                            onClick={() => handleCancel(doc.id, doc.job_id!)}
                            className="cursor-pointer"
                            style={{ background: 'none', border: 'none', color: 'var(--text-4)', padding: 2 }}
                          >
                            <X size={12} />
                          </button>
                        )}
                      </>
                    )}
                    {doc.status === 'indexed' && (
                      <button
                        onClick={() => navigate(`/graph?doc_id=${doc.id}`)}
                        className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
                      >
                        <Eye size={10} /> 查看图谱
                      </button>
                    )}
                    {doc.status === 'failed' && (
                      <button
                        onClick={() => handleStartIndex(doc.id, doc.filename)}
                        className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(248,81,73,0.1)', color: 'var(--red)', border: 'none' }}
                      >
                        <RotateCcw size={10} /> 重试
                      </button>
                    )}
                    {doc.status !== 'indexing' && (
                      <button
                        onClick={() => setShowDeleteModal(doc.id)}
                        className="cursor-pointer p-1 rounded"
                        style={{ background: 'none', border: 'none', color: 'var(--text-4)' }}
                      >
                        <Trash2 size={12} />
                      </button>
                    )}
                  </span>
                </div>
                {/* Expanded Result Row */}
                {isExpanded && doc.result && (
                  <div className="px-12 py-3" style={{ background: 'var(--bg-s2)', borderBottom: '1px solid var(--border-muted)' }}>
                    <div className="flex items-center gap-4 mb-2" style={{ fontSize: 13, color: 'var(--text-2)' }}>
                      <span>{doc.result.nodes} 个节点</span>
                      <span style={{ color: 'var(--text-4)' }}>&middot;</span>
                      <span>{doc.result.edges} 条边</span>
                      <span style={{ color: 'var(--text-4)' }}>&middot;</span>
                      <span>{doc.result.pages} 页</span>
                      <span style={{ color: 'var(--text-4)' }}>&middot;</span>
                      <span>{doc.result.extractions} 次提取</span>
                      <span style={{ color: 'var(--text-4)' }}>&middot;</span>
                      <span>{doc.result.duration.toFixed(1)}秒</span>
                    </div>
                    <div className="flex items-center gap-2">
                      <button
                        onClick={() => navigate(`/graph?doc_id=${doc.id}`)}
                        className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 11, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
                      >
                        在图谱中查看
                      </button>
                      {/* 查看提取结果：后端暂未提供独立 API，功能未开发 */}
                      <button
                        disabled
                        title="功能未开发：后端暂无提取记录独立查询接口"
                        className="flex items-center gap-1 px-2 py-1 rounded"
                        style={{ fontSize: 11, background: 'var(--bg-s1)', color: 'var(--text-4)', border: '1px solid var(--border-muted)', cursor: 'not-allowed', opacity: 0.5 }}
                      >
                        查看提取结果 <span style={{ fontSize: 9, background: 'rgba(209,75,75,0.2)', color: '#f85149', padding: '1px 4px', borderRadius: 3, marginLeft: 4 }}>未开发</span>
                      </button>
                    </div>
                  </div>
                )}
                {/* Error message */}
                {doc.status === 'failed' && doc.error && (
                  <div className="px-12 py-2" style={{ background: 'rgba(248,81,73,0.05)', borderBottom: '1px solid var(--border-muted)' }}>
                    <span style={{ fontSize: 12, color: 'var(--red)' }}>{doc.error}</span>
                  </div>
                )}
              </React.Fragment>
            );
          })
        )}
      </div>
      {/* Delete Modal */}
      {showDeleteModal && deleteDoc && (
        <div
          className="fixed inset-0 flex items-center justify-center"
          style={{ background: 'rgba(0,0,0,0.6)', zIndex: 1000 }}
          onClick={() => setShowDeleteModal(null)}
        >
          <div
            className="rounded-xl p-6"
            style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', width: 360, boxShadow: 'var(--shadow-lg)' }}
            onClick={e => e.stopPropagation()}
          >
            <h3 className="mb-3" style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>
              确认删除 "{deleteDoc.filename}"？
            </h3>
            <p className="mb-4" style={{ color: 'var(--text-2)', fontSize: 13 }}>
              该文档及其关联的所有知识图谱数据将被永久删除，此操作不可撤销。
            </p>
            <div className="flex justify-end gap-2">
              <button
                onClick={() => setShowDeleteModal(null)}
                className="px-4 py-2 rounded-md cursor-pointer"
                style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
              >
                取消
              </button>
              <button
                onClick={handleDelete}
                className="px-4 py-2 rounded-md cursor-pointer"
                style={{ background: 'rgba(248,81,73,0.15)', border: '1px solid var(--red)', color: 'var(--red)', fontSize: 13, fontWeight: 500 }}
              >
                删除
              </button>
            </div>
          </div>
        </div>
      )}
    </div>
  );
 }
--- a/frontend/src/app/components/pages/KGExplorer.tsx
+++ b/frontend/src/app/components/pages/KGExplorer.tsx
@@ -0,0 +1,439 @@
 import React, { useEffect, useRef, useState } from 'react';
 import { useNavigate, useSearchParams } from 'react-router';
 import * as d3 from 'd3';
 import { ZoomIn, ZoomOut, Maximize2, Search, Download, Image, X, MessageSquare, Upload, Share2 } from 'lucide-react';
 import { useAppState, type KGNode } from '../../store';
 import { TYPE_COLORS } from '../../mock-data';
 const ENTITY_TYPES = ['TECHNOLOGY', 'CONCEPT', 'PERSON', 'ORGANIZATION', 'LOCATION'] as const;
 const CONFIDENCE_LEVELS = ['match_exact', 'match_greater', 'match_lesser', 'match_fuzzy'] as const;
 export function KGExplorer() {
  const { nodes, edges, documents, selectedNode, setSelectedNode, getNeighbors } = useAppState();
  const navigate = useNavigate();
  const [searchParams] = useSearchParams();
  const svgRef = useRef<SVGSVGElement>(null);
  const containerRef = useRef<HTMLDivElement>(null);
  const simulationRef = useRef<d3.Simulation<any, any>>();
  const zoomRef = useRef<d3.ZoomBehavior<SVGSVGElement, unknown>>();
  const [filterTypes, setFilterTypes] = useState<Set<string>>(new Set(ENTITY_TYPES));
  const [filterConfidence, setFilterConfidence] = useState<Set<string>>(new Set(CONFIDENCE_LEVELS));
  const [filterDoc, setFilterDoc] = useState<string>('all');
  const [searchQuery, setSearchQuery] = useState('');
  const [showFilter, setShowFilter] = useState(true);
  const [tooltip, setTooltip] = useState<{ x: number; y: number; node: KGNode } | null>(null);
  const indexedDocs = documents.filter(d => d.status === 'indexed');
  // Filtered nodes/edges
  const visibleNodes = nodes.filter(n => {
    if (!filterTypes.has(n.type)) return false;
    if (!filterConfidence.has(n.confidence)) return false;
    if (filterDoc !== 'all' && n.doc_id !== filterDoc) return false;
    if (searchQuery && !n.name.toLowerCase().includes(searchQuery.toLowerCase())) return false;
    return true;
  });
  const visibleNodeIds = new Set(visibleNodes.map(n => n.id));
  const visibleEdges = edges.filter(e => visibleNodeIds.has(e.source as string) && visibleNodeIds.has(e.target as string));
  // Neighbors of selected
  const neighborInfo = selectedNode ? getNeighbors(selectedNode.id) : null;
  // D3 rendering
  useEffect(() => {
    if (!svgRef.current || !containerRef.current) return;
    const svg = d3.select(svgRef.current);
    svg.selectAll('*').remove();
    if (visibleNodes.length === 0) return;
    const rect = containerRef.current.getBoundingClientRect();
    const width = rect.width;
    const height = rect.height;
    svg.attr('width', width).attr('height', height);
    const g = svg.append('g');
    const zoom = d3.zoom<SVGSVGElement, unknown>()
      .scaleExtent([0.1, 8])
      .on('zoom', (event) => g.attr('transform', event.transform));
    zoomRef.current = zoom;
    svg.call(zoom);
    // Create simulation data copies
    const simNodes = visibleNodes.map(n => ({ ...n, x: width / 2 + (Math.random() - 0.5) * 200, y: height / 2 + (Math.random() - 0.5) * 200 }));
    const simEdges = visibleEdges.map(e => ({ ...e, source: e.source, target: e.target }));
    const simulation = d3.forceSimulation(simNodes)
      .force('link', d3.forceLink(simEdges).id((d: any) => d.id).distance(60).strength(0.3))
      .force('charge', d3.forceManyBody().strength(-120))
      .force('center', d3.forceCenter(width / 2, height / 2))
      .force('collide', d3.forceCollide().radius((d: any) => getRadius(d.degree) + 4))
      .alphaDecay(0.02);
    simulationRef.current = simulation;
    // Edges
    const link = g.append('g')
      .selectAll('line')
      .data(simEdges)
      .join('line')
      .attr('stroke', '#30363d')
      .attr('stroke-width', 1)
      .attr('stroke-opacity', 0.25);
    // Nodes
    const node = g.append('g')
      .selectAll('circle')
      .data(simNodes)
      .join('circle')
      .attr('r', (d: any) => getRadius(d.degree))
      .attr('fill', (d: any) => TYPE_COLORS[d.type] || '#8b949e')
      .attr('stroke', '#0f1117')
      .attr('stroke-width', 1.5)
      .attr('opacity', 0.9)
      .attr('cursor', 'pointer')
      .on('mouseover', function(event, d: any) {
        d3.select(this).attr('stroke', '#ffffff').attr('stroke-width', 2.5);
        setTooltip({ x: event.clientX + 8, y: event.clientY + 8, node: d });
      })
      .on('mouseout', function() {
        d3.select(this).attr('stroke', '#0f1117').attr('stroke-width', 1.5);
        setTooltip(null);
      })
      .on('click', (_, d: any) => {
        setSelectedNode(d);
        // Highlight logic
        node.attr('opacity', (n: any) => {
          if (n.id === d.id) return 0.9;
          const isNeighbor = simEdges.some((e: any) =>
            (e.source.id === d.id && e.target.id === n.id) ||
            (e.target.id === d.id && e.source.id === n.id)
          );
          return isNeighbor ? 0.9 : 0.1;
        });
        d3.select(node.nodes()[simNodes.indexOf(d)])
          .attr('r', getRadius(d.degree) * 1.5);
        link.attr('stroke-opacity', (e: any) =>
          e.source.id === d.id || e.target.id === d.id ? 0.8 : 0.05
        );
      })
      .call(d3.drag<SVGCircleElement, any>()
        .on('start', (event, d: any) => {
          if (!event.active) simulation.alphaTarget(0.3).restart();
          d.fx = d.x; d.fy = d.y;
        })
        .on('drag', (event, d: any) => { d.fx = event.x; d.fy = event.y; })
        .on('end', (event, d: any) => {
          if (!event.active) simulation.alphaTarget(0);
        })
      );
    // Labels for high-degree nodes
    const label = g.append('g')
      .selectAll('text')
      .data(simNodes.filter(n => n.degree >= 12))
      .join('text')
      .text((d: any) => d.name)
      .attr('font-size', 10)
      .attr('fill', 'var(--text-3)')
      .attr('text-anchor', 'middle')
      .attr('dy', (d: any) => -(getRadius(d.degree) + 6))
      .attr('pointer-events', 'none');
    // Click blank to reset
    svg.on('click', (event) => {
      if (event.target === svgRef.current) {
        setSelectedNode(null);
        node.attr('opacity', 0.9).attr('r', (d: any) => getRadius(d.degree));
        link.attr('stroke-opacity', 0.25);
      }
    });
    simulation.on('tick', () => {
      link
        .attr('x1', (d: any) => d.source.x)
        .attr('y1', (d: any) => d.source.y)
        .attr('x2', (d: any) => d.target.x)
        .attr('y2', (d: any) => d.target.y);
      node
        .attr('cx', (d: any) => d.x)
        .attr('cy', (d: any) => d.y);
      label
        .attr('x', (d: any) => d.x)
        .attr('y', (d: any) => d.y);
    });
    // Handle URL params
    const nodeParam = searchParams.get('node');
    if (nodeParam) {
      const target = simNodes.find(n => n.id === nodeParam);
      if (target) {
        setTimeout(() => {
          const nd = nodes.find(n => n.id === nodeParam);
          if (nd) setSelectedNode(nd);
        }, 500);
      }
    }
    const docParam = searchParams.get('doc_id');
    if (docParam) {
      setFilterDoc(docParam);
    }
    return () => { simulation.stop(); };
  }, [visibleNodes.length, visibleEdges.length, searchQuery, filterDoc]);
  const handleZoomIn = () => {
    if (svgRef.current && zoomRef.current) {
      d3.select(svgRef.current).transition().duration(300).call(zoomRef.current.scaleBy, 1.3);
    }
  };
  const handleZoomOut = () => {
    if (svgRef.current && zoomRef.current) {
      d3.select(svgRef.current).transition().duration(300).call(zoomRef.current.scaleBy, 0.7);
    }
  };
  const handleFitAll = () => {
    if (svgRef.current && zoomRef.current) {
      d3.select(svgRef.current).transition().duration(500).call(zoomRef.current.transform, d3.zoomIdentity);
    }
  };
  const toggleType = (t: string) => {
    const next = new Set(filterTypes);
    if (next.has(t)) next.delete(t); else next.add(t);
    setFilterTypes(next);
  };
  const toggleConfidence = (c: string) => {
    const next = new Set(filterConfidence);
    if (next.has(c)) next.delete(c); else next.add(c);
    setFilterConfidence(next);
  };
  return (
    <div className="flex h-full" style={{ background: 'var(--bg-base)' }}>
      {/* Filter Panel */}
      {showFilter && (
        <div
          className="flex flex-col p-4 overflow-y-auto"
          style={{
            width: 260,
            background: 'var(--bg-s1)',
            borderRight: '1px solid var(--border-main)',
            flexShrink: 0,
          }}
        >
          <h3 className="mb-3" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>来源文档</h3>
          <select
            value={filterDoc}
            onChange={e => setFilterDoc(e.target.value)}
            className="mb-4 px-2 py-1.5 rounded-md w-full"
            style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 12 }}
          >
            <option value="all">全部文档</option>
            {indexedDocs.map(d => (
              <option key={d.id} value={d.id}>{d.filename}</option>
            ))}
          </select>
          <h3 className="mb-2" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>实体类型</h3>
          <div className="flex flex-col gap-1.5 mb-4">
            {ENTITY_TYPES.map(t => {
              const count = nodes.filter(n => n.type === t).length;
              return (
                <label key={t} className="flex items-center gap-2 cursor-pointer" style={{ fontSize: 12, color: 'var(--text-2)' }}>
                  <input
                    type="checkbox"
                    checked={filterTypes.has(t)}
                    onChange={() => toggleType(t)}
                    className="cursor-pointer"
                    style={{ accentColor: TYPE_COLORS[t] }}
                  />
                  <span className="inline-block w-2.5 h-2.5 rounded-full" style={{ background: TYPE_COLORS[t] }} />
                  <span className="flex-1">{t}</span>
                  <span style={{ color: 'var(--text-4)', fontSize: 11 }}>{count}</span>
                </label>
              );
            })}
          </div>
          <h3 className="mb-2" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>置信度</h3>
          <div className="flex flex-col gap-1.5 mb-4">
            {CONFIDENCE_LEVELS.map(c => (
              <label key={c} className="flex items-center gap-2 cursor-pointer" style={{ fontSize: 12, color: 'var(--text-2)' }}>
                <input type="checkbox" checked={filterConfidence.has(c)} onChange={() => toggleConfidence(c)} className="cursor-pointer" />
                {c.replace('match_', '')}
              </label>
            ))}
          </div>
          <div className="mt-auto flex flex-col gap-2">
            <button className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full" style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 12 }}>
              <Image size={12} /> 导出 PNG
            </button>
            <button className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full" style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 12 }}>
              <Download size={12} /> 导出 JSON
            </button>
          </div>
        </div>
      )}
      {/* Graph Area */}
      <div ref={containerRef} className="flex-1 relative" style={{ overflow: 'hidden' }}>
        {/* Toolbar */}
        <div className="absolute top-3 left-3 flex items-center gap-1.5 rounded-md p-1" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', zIndex: 10 }}>
          <button onClick={handleZoomIn} className="p-1.5 rounded cursor-pointer" style={{ background: 'transparent', border: 'none', color: 'var(--text-3)' }}><ZoomIn size={16} /></button>
          <button onClick={handleZoomOut} className="p-1.5 rounded cursor-pointer" style={{ background: 'transparent', border: 'none', color: 'var(--text-3)' }}><ZoomOut size={16} /></button>
          <button onClick={handleFitAll} className="p-1.5 rounded cursor-pointer" style={{ background: 'transparent', border: 'none', color: 'var(--text-3)' }}><Maximize2 size={16} /></button>
          <div style={{ width: 1, height: 20, background: 'var(--border-main)' }} />
          <div className="relative">
            <Search size={12} className="absolute left-2 top-1/2 -translate-y-1/2" style={{ color: 'var(--text-4)' }} />
            <input
              value={searchQuery}
              onChange={e => setSearchQuery(e.target.value)}
              placeholder="搜索..."
              className="pl-7 pr-2 py-1 rounded"
              style={{ width: 120, background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 12, outline: 'none' }}
            />
          </div>
        </div>
        {/* Legend */}
        <div className="absolute bottom-3 left-3 flex flex-wrap gap-3 rounded-md px-3 py-2" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', zIndex: 10 }}>
          {ENTITY_TYPES.map(t => (
            <div key={t} className="flex items-center gap-1.5" style={{ fontSize: 11, color: 'var(--text-3)' }}>
              <span className="inline-block w-2.5 h-2.5 rounded-full" style={{ background: TYPE_COLORS[t] }} />
              {t}
            </div>
          ))}
        </div>
        {/* Stats */}
        <div className="absolute top-3 right-3 rounded-md px-3 py-1.5" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', zIndex: 10, fontSize: 11, color: 'var(--text-3)' }}>
          {visibleNodes.length} 个节点 &middot; {visibleEdges.length} 条边
        </div>
        {visibleNodes.length === 0 ? (
          <div className="flex flex-col items-center justify-center h-full gap-3">
            <Share2 size={48} style={{ color: 'var(--text-4)' }} />
            <span style={{ color: 'var(--text-2)', fontSize: 16 }}>暂无知识图谱</span>
            <button
              onClick={() => navigate('/documents')}
              className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
              style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, border: 'none' }}
            >
              <Upload size={14} /> 上传 & 索引
            </button>
          </div>
        ) : (
          <svg ref={svgRef} className="w-full h-full" />
        )}
        {/* Tooltip */}
        {tooltip && (
          <div
            className="fixed rounded-md px-3 py-2 pointer-events-none"
            style={{
              left: tooltip.x, top: tooltip.y,
              background: 'var(--bg-s3)', border: '1px solid var(--border-main)',
              boxShadow: 'var(--shadow-md)', zIndex: 100, fontSize: 12,
            }}
          >
            <div className="flex items-center gap-2 mb-1">
              <span style={{ color: 'var(--text-1)', fontWeight: 600 }}>{tooltip.node.name}</span>
              <span className="px-1.5 py-0.5 rounded" style={{ fontSize: 10, fontWeight: 600, background: `${TYPE_COLORS[tooltip.node.type]}20`, color: TYPE_COLORS[tooltip.node.type] }}>
                {tooltip.node.type}
              </span>
            </div>
            <div style={{ color: 'var(--text-3)' }}>页码: {tooltip.node.page}</div>
            <div style={{ color: 'var(--text-3)' }}>置信度: {tooltip.node.confidence}</div>
            <div style={{ color: 'var(--text-3)' }}>度数: {tooltip.node.degree}</div>
          </div>
        )}
      </div>
      {/* Detail Panel */}
      {selectedNode && (
        <div
          className="flex flex-col p-4 overflow-y-auto"
          style={{
            width: 300,
            background: 'var(--bg-s1)',
            borderLeft: '1px solid var(--border-main)',
            flexShrink: 0,
          }}
        >
          <div className="flex items-center justify-between mb-3">
            <h2 style={{ color: 'var(--text-1)', fontSize: 18, fontWeight: 600 }}>{selectedNode.name}</h2>
            <button onClick={() => setSelectedNode(null)} className="cursor-pointer" style={{ background: 'none', border: 'none', color: 'var(--text-4)' }}>
              <X size={16} />
            </button>
          </div>
          <span className="inline-block w-fit px-2 py-0.5 rounded mb-4" style={{ fontSize: 11, fontWeight: 600, background: `${TYPE_COLORS[selectedNode.type]}20`, color: TYPE_COLORS[selectedNode.type] }}>
            {selectedNode.type}
          </span>
          {selectedNode.description && (
            <p className="mb-4" style={{ color: 'var(--text-2)', fontSize: 13, lineHeight: 1.6 }}>
              {selectedNode.description}
            </p>
          )}
          <div className="flex flex-col gap-2 mb-4">
            {[
              { label: '页码', value: selectedNode.page },
              { label: '置信度', value: selectedNode.confidence.replace('match_', '') },
              { label: '度数', value: selectedNode.degree },
              { label: '中心性', value: selectedNode.centrality.toFixed(2) },
            ].map(p => (
              <div key={p.label} className="flex justify-between" style={{ fontSize: 13 }}>
                <span style={{ color: 'var(--text-3)' }}>{p.label}</span>
                <span style={{ color: 'var(--text-1)' }}>{p.value}</span>
              </div>
            ))}
          </div>
          <h3 className="mb-2" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>
            邻居节点 ({neighborInfo?.nodes.length ?? 0})
          </h3>
          <div className="flex flex-col gap-1 mb-4">
            {neighborInfo?.nodes.slice(0, 5).map(n => (
              <button
                key={n.id}
                onClick={() => setSelectedNode(n)}
                className="flex items-center gap-2 px-2 py-1.5 rounded cursor-pointer text-left"
                style={{ background: 'var(--bg-s2)', border: 'none', fontSize: 12, color: 'var(--text-2)' }}
              >
                <span className="inline-block w-2 h-2 rounded-full" style={{ background: TYPE_COLORS[n.type] }} />
                <span className="flex-1 truncate">{n.name}</span>
                <span style={{ color: 'var(--text-4)', fontSize: 10 }}>{n.type}</span>
              </button>
            ))}
            {(neighborInfo?.nodes.length ?? 0) > 5 && (
              <span style={{ color: 'var(--blue)', fontSize: 12, cursor: 'pointer' }}>
                查看全部 {neighborInfo?.nodes.length} 个邻居 &rarr;
              </span>
            )}
          </div>
          <button
            onClick={() => navigate(`/chat?q=${encodeURIComponent(`Tell me about ${selectedNode.name}`)}`)}
            className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full justify-center"
            style={{ background: 'rgba(88,166,255,0.1)', border: '1px solid var(--blue)', color: 'var(--blue)', fontSize: 13 }}
          >
            <MessageSquare size={14} /> 询问 AI
          </button>
        </div>
      )}
    </div>
  );
 }
 function getRadius(degree: number): number {
  return Math.max(4, Math.log(degree + 1) * 4);
 }
--- a/frontend/src/app/components/pages/QAChat.tsx
+++ b/frontend/src/app/components/pages/QAChat.tsx
@@ -0,0 +1,377 @@
 import React, { useState, useRef, useEffect } from 'react';
 import { useNavigate, useSearchParams } from 'react-router';
 import { Send, Plus, ChevronRight, Clock, ExternalLink, Info } from 'lucide-react';
 import { toast } from 'sonner';
 import { useAppState, type ChatMessage, type ToolCall } from '../../store';
 import { api, ApiError } from '../../api';
 import { TYPE_COLORS } from '../../mock-data';
 export function QAChat() {
  const { messages, setMessages, chatHistory, suggestedPrompts, nodes, refreshHistory } = useAppState();
  const navigate = useNavigate();
  const [searchParams] = useSearchParams();
  const [input, setInput] = useState('');
  const [isThinking, setIsThinking] = useState(false);
  const [activeHistoryId, setActiveHistoryId] = useState<string | null>(null);
  const [conversationHistory, setConversationHistory] = useState<{ question: string; answer: string }[]>([]);
  const messagesEndRef = useRef<HTMLDivElement>(null);
  const inputRef = useRef<HTMLTextAreaElement>(null);
  useEffect(() => {
    const q = searchParams.get('q');
    if (q) {
      setInput(q);
      inputRef.current?.focus();
    }
  }, [searchParams]);
  useEffect(() => {
    messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
  }, [messages, isThinking]);
  // Build cited node objects from node IDs using local KG
  function resolveCitedNodes(ids: string[]) {
    return ids
      .map(id => {
        const n = nodes.find(n => n.id === id);
        return n ? { id: n.id, name: n.name, type: n.type } : null;
      })
      .filter(Boolean) as { id: string; name: string; type: string }[];
  }
  const handleSend = async () => {
    if (!input.trim() || isThinking) return;
    const question = input.trim();
    setInput('');
    setIsThinking(true);
    const userMsg: ChatMessage = {
      id: `m${Date.now()}`,
      role: 'human',
      content: question,
      timestamp: new Date().toISOString(),
    };
    setMessages(prev => [...prev, userMsg]);
    try {
      const result = await api.query(question, conversationHistory);
      const aiMsg: ChatMessage = {
        id: result.id ?? `m${Date.now() + 1}`,
        role: 'ai',
        content: result.answer,
        timestamp: result.timestamp ?? new Date().toISOString(),
        toolCalls: result.tool_calls.map((tc, i) => ({
          step: tc.step ?? i + 1,
          tool: tc.tool_name,
          input: tc.tool_input,
          output: tc.tool_output,
        })),
        citedNodes: resolveCitedNodes(result.cited_nodes ?? []),
        duration: result.duration_seconds,
      };
      setMessages(prev => [...prev, aiMsg]);
      setConversationHistory(prev => [...prev, { question, answer: result.answer }]);
      // Refresh history sidebar
      refreshHistory();
    } catch (err) {
      const msg = err instanceof ApiError ? err.message : '问答服务异常';
      toast.error(msg);
      setMessages(prev => [...prev, {
        id: `err${Date.now()}`,
        role: 'ai',
        content: `⚠️ 请求失败：${msg}\n\n请确认：\n1. 后端服务已启动（localhost:8000）\n2. 知识图谱已有数据（请先上传并索引文档）\n3. DeepSeek API Key 已配置`,
        timestamp: new Date().toISOString(),
      }]);
    } finally {
      setIsThinking(false);
    }
  };
  const handleKeyDown = (e: React.KeyboardEvent) => {
    if (e.key === 'Enter' && !e.shiftKey) {
      e.preventDefault();
      handleSend();
    }
  };
  const handleNewChat = () => {
    setMessages([]);
    setInput('');
    setActiveHistoryId(null);
    setConversationHistory([]);
  };
  // Load a history item as a single Q&A session
  const handleLoadHistory = (h: typeof chatHistory[0]) => {
    setActiveHistoryId(h.id);
    const msgs: ChatMessage[] = [
      { id: `${h.id}-q`, role: 'human', content: h.question, timestamp: h.timestamp },
      {
        id: `${h.id}-a`, role: 'ai', content: h.answer, timestamp: h.timestamp,
        toolCalls: h.toolCalls,
        citedNodes: resolveCitedNodes(h.citedNodeIds ?? []),
        duration: h.duration,
      },
    ];
    setMessages(msgs);
    setConversationHistory([{ question: h.question, answer: h.answer }]);
  };
  const groupedHistory = {
    '今天': chatHistory.filter(h => h.group === '今天'),
    '昨天': chatHistory.filter(h => h.group === '昨天'),
    '更早': chatHistory.filter(h => h.group === '更早'),
  };
  return (
    <div className="flex h-full" style={{ background: 'var(--bg-base)' }}>
      {/* History Sidebar */}
      <div
        className="flex flex-col"
        style={{ width: 240, background: 'var(--bg-s1)', borderRight: '1px solid var(--border-main)', flexShrink: 0 }}
      >
        <div className="p-3">
          <button
            onClick={handleNewChat}
            className="flex items-center gap-2 w-full px-3 py-2 rounded-md cursor-pointer"
            style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
          >
            <Plus size={14} /> 新对话
          </button>
        </div>
        {/* 历史会话管理说明 */}
        <div className="mx-3 mb-2 px-2 py-1.5 rounded-md flex items-start gap-1.5" style={{ background: 'rgba(88,166,255,0.08)', border: '1px solid rgba(88,166,255,0.2)' }}>
          <Info size={11} style={{ color: 'var(--blue)', flexShrink: 0, marginTop: 1 }} />
          <span style={{ fontSize: 10, color: 'var(--text-4)', lineHeight: 1.4 }}>
            点击历史记录查看单条问答；多轮对话会话管理
            <span style={{ background: 'rgba(248,81,73,0.15)', color: '#f85149', padding: '0 3px', borderRadius: 2, marginLeft: 2 }}>未开发</span>
          </span>
        </div>
        <div className="flex-1 overflow-y-auto px-2">
          {Object.entries(groupedHistory).map(([group, items]) => items.length > 0 && (
            <div key={group} className="mb-3">
              <div className="px-2 py-1" style={{ fontSize: 11, fontWeight: 600, color: 'var(--text-4)', textTransform: 'uppercase', letterSpacing: '0.5px' }}>
                {group}
              </div>
              {items.map(h => (
                <button
                  key={h.id}
                  onClick={() => handleLoadHistory(h)}
                  className="w-full text-left px-2 py-1.5 rounded cursor-pointer truncate block"
                  style={{
                    background: activeHistoryId === h.id ? 'var(--bg-s2)' : 'transparent',
                    color: activeHistoryId === h.id ? 'var(--text-1)' : 'var(--text-3)',
                    fontSize: 12, border: 'none',
                  }}
                >
                  {h.question.length > 28 ? h.question.slice(0, 28) + '...' : h.question}
                </button>
              ))}
            </div>
          ))}
          {chatHistory.length === 0 && (
            <div className="px-2 py-4 text-center" style={{ color: 'var(--text-4)', fontSize: 12 }}>暂无历史记录</div>
          )}
        </div>
      </div>
      {/* Chat Area */}
      <div className="flex-1 flex flex-col">
        {/* Messages */}
        <div className="flex-1 overflow-y-auto p-6">
          {messages.length === 0 ? (
            <div className="flex flex-col items-center justify-center h-full gap-4">
              <div style={{ fontSize: 32 }}>
                <span style={{ color: 'var(--blue)' }}>GraphRAG</span>{' '}
                <span style={{ color: 'var(--text-3)' }}>Studio</span>
              </div>
              <p style={{ color: 'var(--text-3)', fontSize: 14, textAlign: 'center', maxWidth: 500 }}>
                向知识图谱提问。我将使用多步推理从已索引的文档中为您找到准确答案。
              </p>
              <div className="grid grid-cols-2 gap-3 mt-4" style={{ maxWidth: 600, width: '100%' }}>
                {suggestedPrompts.map((p, i) => (
                  <button
                    key={i}
                    onClick={() => setInput(p)}
                    className="text-left p-3 rounded-lg cursor-pointer"
                    style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
                  >
                    {p}
                  </button>
                ))}
              </div>
            </div>
          ) : (
            <div className="flex flex-col gap-4 max-w-3xl mx-auto">
              {messages.map(msg => (
                <div key={msg.id}>
                  {msg.role === 'human' ? (
                    <div className="flex justify-end">
                      <div
                        className="rounded-lg px-4 py-3"
                        style={{ background: 'rgba(88,166,255,0.15)', color: 'var(--text-1)', fontSize: 14, maxWidth: '80%', lineHeight: 1.6 }}
                      >
                        {msg.content}
                      </div>
                    </div>
                  ) : (
                    <div className="flex justify-start">
                      <div
                        className="rounded-lg px-4 py-3"
                        style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 14, maxWidth: '90%', lineHeight: 1.6 }}
                      >
                        <div
                          style={{ whiteSpace: 'pre-wrap' }}
                          dangerouslySetInnerHTML={{ __html: renderSimpleMarkdown(msg.content) }}
                        />
                        {msg.toolCalls && msg.toolCalls.length > 0 && (
                          <ToolCallPanel toolCalls={msg.toolCalls} />
                        )}
                        {msg.citedNodes && msg.citedNodes.length > 0 && (
                          <div className="flex flex-wrap gap-2 mt-3 pt-3" style={{ borderTop: '1px solid var(--border-muted)' }}>
                            {msg.citedNodes.map(cn => (
                              <button
                                key={cn.id}
                                onClick={() => navigate(`/graph?node=${cn.id}`)}
                                className="flex items-center gap-1.5 px-2 py-1 rounded-full cursor-pointer"
                                style={{
                                  background: `${TYPE_COLORS[cn.type] ?? '#8b949e'}15`,
                                  border: `1px solid ${TYPE_COLORS[cn.type] ?? '#8b949e'}40`,
                                  color: TYPE_COLORS[cn.type] ?? '#8b949e',
                                  fontSize: 11, fontWeight: 500,
                                }}
                              >
                                <span className="inline-block w-1.5 h-1.5 rounded-full" style={{ background: TYPE_COLORS[cn.type] ?? '#8b949e' }} />
                                {cn.name}
                                <ExternalLink size={9} />
                              </button>
                            ))}
                          </div>
                        )}
                        {msg.duration !== undefined && (
                          <div className="flex items-center gap-1 mt-2" style={{ color: 'var(--text-4)', fontSize: 11 }}>
                            <Clock size={10} /> {msg.duration.toFixed(1)}s
                          </div>
                        )}
                      </div>
                    </div>
                  )}
                </div>
              ))}
              {isThinking && (
                <div className="flex justify-start">
                  <div
                    className="rounded-lg px-4 py-3 flex items-center gap-1.5"
                    style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
                  >
                    <span className="thinking-dot" />
                    <span className="thinking-dot" />
                    <span className="thinking-dot" />
                  </div>
                </div>
              )}
              <div ref={messagesEndRef} />
            </div>
          )}
        </div>
        {/* Input Area */}
        <div className="p-4" style={{ borderTop: '1px solid var(--border-main)', background: 'var(--bg-s1)' }}>
          <div className="max-w-3xl mx-auto flex gap-2">
            <textarea
              ref={inputRef}
              value={input}
              onChange={e => setInput(e.target.value)}
              onKeyDown={handleKeyDown}
              placeholder="向知识图谱提问..."
              disabled={isThinking}
              rows={1}
              className="flex-1 resize-none rounded-lg px-4 py-2.5 outline-none"
              style={{
                background: 'var(--bg-s2)', border: '1px solid var(--border-main)',
                color: 'var(--text-1)', fontSize: 14, minHeight: 42, maxHeight: 120,
                opacity: isThinking ? 0.5 : 1,
              }}
            />
            <button
              onClick={handleSend}
              disabled={isThinking || !input.trim()}
              className="px-4 py-2 rounded-lg cursor-pointer flex items-center gap-2"
              style={{
                background: input.trim() ? 'var(--green-btn)' : 'var(--bg-s2)',
                color: input.trim() ? '#fff' : 'var(--text-4)',
                border: 'none', fontSize: 13, fontWeight: 500,
                opacity: isThinking ? 0.5 : 1,
              }}
            >
              <Send size={14} /> 发送
            </button>
          </div>
          <div className="max-w-3xl mx-auto mt-1.5">
            <span style={{ color: 'var(--text-4)', fontSize: 11 }}>
              Enter 发送，Shift+Enter 换行 &nbsp;|&nbsp; 批量问答管理
              <span style={{ background: 'rgba(248,81,73,0.15)', color: '#f85149', padding: '0 3px', borderRadius: 2, marginLeft: 4, fontSize: 10 }}>未开发</span>
            </span>
          </div>
        </div>
      </div>
    </div>
  );
 }
 function ToolCallPanel({ toolCalls }: { toolCalls: ToolCall[] }) {
  const [expanded, setExpanded] = useState(false);
  return (
    <div className="mt-3">
      <button
        onClick={() => setExpanded(!expanded)}
        className="flex items-center gap-1.5 cursor-pointer"
        style={{ background: 'none', border: 'none', color: 'var(--text-3)', fontSize: 12 }}
      >
        <ChevronRight
          size={12}
          style={{ transform: expanded ? 'rotate(90deg)' : 'none', transition: 'transform 150ms' }}
        />
        工具调用 ({toolCalls.length} 步)
      </button>
      {expanded && (
        <div className="mt-2 rounded-md overflow-hidden" style={{ background: 'var(--bg-s3)', border: '1px solid var(--border-muted)' }}>
          {toolCalls.map(tc => (
            <div key={tc.step} className="p-3" style={{ borderBottom: '1px solid var(--border-muted)' }}>
              <div className="flex items-center gap-2 mb-2">
                <span style={{ color: 'var(--text-4)', fontSize: 11 }}>步骤 {tc.step}</span>
                <span style={{ color: 'var(--yellow)', fontSize: 12, fontFamily: 'monospace', fontWeight: 600 }}>{tc.tool}</span>
              </div>
              <div className="mb-1" style={{ fontSize: 11, color: 'var(--text-4)' }}>输入:</div>
              <pre className="mb-2 p-2 rounded overflow-x-auto" style={{ background: 'var(--bg-base)', fontSize: 11, color: 'var(--text-3)', fontFamily: 'monospace', lineHeight: 1.5 }}>
                {tc.input}
              </pre>
              <div className="mb-1" style={{ fontSize: 11, color: 'var(--text-4)' }}>输出:</div>
              <pre className="p-2 rounded overflow-x-auto" style={{ background: 'var(--bg-base)', fontSize: 11, color: 'var(--text-3)', fontFamily: 'monospace', lineHeight: 1.5 }}>
                {tc.output}
              </pre>
            </div>
          ))}
        </div>
      )}
    </div>
  );
 }
 function renderSimpleMarkdown(text: string): string {
  return text
    .replace(/\*\*(.*?)\*\*/g, '<strong style="color:var(--text-1)">$1</strong>')
    .replace(/^## (.*$)/gm, '<div style="font-size:16px;font-weight:600;color:var(--text-1);margin:8px 0 4px">$1</div>')
    .replace(/^### (.*$)/gm, '<div style="font-size:14px;font-weight:600;color:var(--text-1);margin:6px 0 4px">$1</div>')
    .replace(/^> (.*$)/gm, '<div style="border-left:3px solid var(--blue);padding-left:12px;color:var(--text-3);margin:8px 0">$1</div>')
    .replace(/^\d+\. (.*$)/gm, '<div style="padding-left:16px;margin:2px 0">$&</div>')
    .replace(/^- (.*$)/gm, '<div style="padding-left:16px;margin:2px 0">&bull; $1</div>')
    .replace(/\n/g, '<br/>');
 }
--- a/frontend/src/app/components/pages/SearchPage.tsx
+++ b/frontend/src/app/components/pages/SearchPage.tsx
@@ -0,0 +1,469 @@
 import React, { useState, useEffect, useRef } from 'react';
 import { useNavigate, useSearchParams } from 'react-router';
 import * as d3 from 'd3';
 import { Search, ExternalLink, MessageSquare, ArrowRight } from 'lucide-react';
 import { useAppState, mapApiNode, mapApiEdge, type KGNode } from '../../store';
 import { api, ApiError } from '../../api';
 import { TYPE_COLORS } from '../../mock-data';
 const ENTITY_TYPES_OPTIONS = ['全部类型', 'TECHNOLOGY', 'CONCEPT', 'PERSON', 'ORGANIZATION', 'LOCATION'];
 export function SearchPage() {
  const { nodes, edges, getNeighbors } = useAppState();
  const navigate = useNavigate();
  const [searchParams, setSearchParams] = useSearchParams();
  const [query, setQuery] = useState(searchParams.get('q') || '');
  const [typeFilter, setTypeFilter] = useState(searchParams.get('type') || '全部类型');
  const [activeTab, setActiveTab] = useState<'entity' | 'path' | 'graph'>(
    (searchParams.get('tab') as 'entity' | 'path' | 'graph') || 'entity'
  );
  const [results, setResults] = useState<KGNode[]>([]);
  const [selectedResult, setSelectedResult] = useState<KGNode | null>(null);
  const [hasSearched, setHasSearched] = useState(false);
  const [searching, setSearching] = useState(false);
  // Path search
  const [pathFrom, setPathFrom] = useState('');
  const [pathTo, setPathTo] = useState('');
  const [maxHops, setMaxHops] = useState(3);
  const [pathResult, setPathResult] = useState<KGNode[] | null>(null);
  const [pathSearching, setPathSearching] = useState(false);
  const [pathError, setPathError] = useState('');
  // Graph search
  const [graphQuery, setGraphQuery] = useState('');
  const [includeNeighbors, setIncludeNeighbors] = useState(true);
  const [graphResults, setGraphResults] = useState<KGNode[]>([]);
  const [graphSearching, setGraphSearching] = useState(false);
  const previewRef = useRef<SVGSVGElement>(null);
  // Auto-search from URL
  useEffect(() => {
    const q = searchParams.get('q');
    if (q) {
      setQuery(q);
      doEntitySearch(q, typeFilter);
    }
  }, []); // eslint-disable-line react-hooks/exhaustive-deps
  // ── Entity Search ─────────────────────────────────────────────────────────
  const doEntitySearch = async (q: string, type: string) => {
    if (!q.trim()) return;
    setSearching(true);
    setHasSearched(true);
    try {
      const res = await api.searchEntities(q.trim(), type !== '全部类型' ? type : undefined, 50);
      const mapped = res.items.map(mapApiNode);
      setResults(mapped);
      setSelectedResult(mapped[0] ?? null);
      setSearchParams({ q: q.trim(), type, tab: 'entity' });
    } catch {
      setResults([]);
    } finally {
      setSearching(false);
    }
  };
  const handleEntitySearch = () => doEntitySearch(query, typeFilter);
  // ── Preview graph for selected entity ────────────────────────────────────
  useEffect(() => {
    if (!selectedResult || !previewRef.current) return;
    const svg = d3.select(previewRef.current);
    svg.selectAll('*').remove();
    // Use local KG for preview (already loaded)
    const { nodes: neighbors, edges: nEdges } = getNeighbors(selectedResult.id);
    const allNodes = [selectedResult, ...neighbors];
    const width = 380;
    const height = 280;
    svg.attr('width', width).attr('height', height);
    const g = svg.append('g');
    const simNodes = allNodes.map(n => ({ ...n, x: width / 2 + (Math.random() - 0.5) * 100, y: height / 2 + (Math.random() - 0.5) * 100 }));
    const simEdges = nEdges.map(e => ({ ...e }));
    const simulation = d3.forceSimulation(simNodes)
      .force('link', d3.forceLink(simEdges).id((d: any) => d.id).distance(50).strength(0.5))
      .force('charge', d3.forceManyBody().strength(-80))
      .force('center', d3.forceCenter(width / 2, height / 2))
      .alphaDecay(0.05);
    const link = g.selectAll('line').data(simEdges).join('line')
      .attr('stroke', '#30363d').attr('stroke-width', 1).attr('stroke-opacity', 0.4);
    const node = g.selectAll('circle').data(simNodes).join('circle')
      .attr('r', (d: any) => d.id === selectedResult.id ? 8 : 5)
      .attr('fill', (d: any) => TYPE_COLORS[d.type] ?? '#8b949e')
      .attr('stroke', (d: any) => d.id === selectedResult.id ? '#fff' : '#0f1117')
      .attr('stroke-width', (d: any) => d.id === selectedResult.id ? 2 : 1);
    g.selectAll('text').data(simNodes.filter(n => n.id === selectedResult.id || n.degree >= 10)).join('text')
      .text((d: any) => d.name).attr('font-size', 9).attr('fill', 'var(--text-3)')
      .attr('text-anchor', 'middle').attr('dy', -12).attr('pointer-events', 'none');
    simulation.on('tick', () => {
      link.attr('x1', (d: any) => d.source.x).attr('y1', (d: any) => d.source.y)
        .attr('x2', (d: any) => d.target.x).attr('y2', (d: any) => d.target.y);
      node.attr('cx', (d: any) => d.x).attr('cy', (d: any) => d.y);
    });
    return () => simulation.stop();
  }, [selectedResult, getNeighbors]);
  // ── Path Search ───────────────────────────────────────────────────────────
  const handlePathSearch = async () => {
    if (!pathFrom.trim() || !pathTo.trim()) return;
    setPathError('');
    setPathResult(null);
    // Resolve names to node IDs from local KG
    const fromNode = nodes.find(n => n.name.toLowerCase().includes(pathFrom.toLowerCase()));
    const toNode = nodes.find(n => n.name.toLowerCase().includes(pathTo.toLowerCase()));
    if (!fromNode) { setPathError(`未找到起点实体"${pathFrom}"，请检查名称是否正确`); return; }
    if (!toNode) { setPathError(`未找到终点实体"${pathTo}"，请检查名称是否正确`); return; }
    setPathSearching(true);
    try {
      const res = await api.searchPath(fromNode.id, toNode.id, maxHops);
      if (!res.paths || res.paths.length === 0) {
        setPathResult([]);
      } else {
        // Use the shortest path (first result)
        const firstPath = res.paths[0];
        const pathNodes = firstPath.nodes
          .map(n => {
            const local = nodes.find(ln => ln.id === n.id);
            return local ?? { id: n.id, name: n.name, type: n.type as KGNode['type'], page: 0, confidence: 'match_exact' as const, degree: 0, centrality: 0, doc_id: '' };
          });
        setPathResult(pathNodes);
      }
    } catch (err) {
      if (err instanceof ApiError && err.code === 3001) {
        setPathResult([]);
      } else {
        setPathError(err instanceof ApiError ? err.message : '路径查找失败');
      }
    } finally {
      setPathSearching(false);
    }
  };
  // ── Graph Search ──────────────────────────────────────────────────────────
  const handleGraphSearch = async () => {
    if (!graphQuery.trim()) return;
    setGraphSearching(true);
    try {
      const res = await api.searchGraph(graphQuery.trim(), includeNeighbors);
      setGraphResults(res.matched_nodes.map(mapApiNode));
    } catch {
      setGraphResults([]);
    } finally {
      setGraphSearching(false);
    }
  };
  return (
    <div className="p-6" style={{ maxWidth: 1200, margin: '0 auto' }}>
      <h1 className="mb-6" style={{ color: 'var(--text-1)', fontSize: 20, fontWeight: 600 }}>搜索</h1>
      {/* Search Header */}
      <div className="flex gap-3 mb-4">
        <div className="relative flex-1">
          <Search size={14} className="absolute left-3 top-1/2 -translate-y-1/2" style={{ color: 'var(--text-4)' }} />
          <input
            value={query}
            onChange={e => setQuery(e.target.value)}
            onKeyDown={e => e.key === 'Enter' && handleEntitySearch()}
            placeholder="搜索实体..."
            className="w-full pl-9 pr-4 py-2.5 rounded-lg outline-none"
            style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 14 }}
          />
        </div>
        <select
          value={typeFilter}
          onChange={e => setTypeFilter(e.target.value)}
          className="px-3 py-2 rounded-lg cursor-pointer"
          style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
        >
          {ENTITY_TYPES_OPTIONS.map(t => <option key={t}>{t}</option>)}
        </select>
        <button
          onClick={handleEntitySearch}
          disabled={searching}
          className="flex items-center gap-2 px-5 py-2 rounded-lg cursor-pointer"
          style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, fontWeight: 500, border: 'none', opacity: searching ? 0.7 : 1 }}
        >
          <Search size={14} /> {searching ? '搜索中...' : '搜索'}
        </button>
      </div>
      {/* Tabs */}
      <div className="flex gap-0 mb-6" style={{ borderBottom: '1px solid var(--border-main)' }}>
        {([
          { key: 'entity' as const, label: '实体搜索' },
          { key: 'path' as const, label: '路径搜索' },
          { key: 'graph' as const, label: '图谱搜索' },
        ]).map(tab => (
          <button
            key={tab.key}
            onClick={() => setActiveTab(tab.key)}
            className="px-4 py-2.5 cursor-pointer relative"
            style={{
              background: 'transparent', border: 'none',
              color: activeTab === tab.key ? 'var(--blue)' : 'var(--text-3)',
              fontSize: 13, fontWeight: activeTab === tab.key ? 600 : 400,
            }}
          >
            {tab.label}
            {activeTab === tab.key && (
              <div className="absolute bottom-0 left-0 right-0 h-0.5" style={{ background: 'var(--blue)' }} />
            )}
          </button>
        ))}
      </div>
      {/* Entity Search Tab */}
      {activeTab === 'entity' && (
        <div className="flex gap-4">
          <div className="flex-1" style={{ minWidth: 0 }}>
            {!hasSearched ? (
              <div className="flex flex-col items-center justify-center py-16 gap-3">
                <Search size={36} style={{ color: 'var(--text-4)' }} />
                <span style={{ color: 'var(--text-3)', fontSize: 14 }}>输入查询以搜索实体</span>
              </div>
            ) : searching ? (
              <div className="flex flex-col items-center justify-center py-16 gap-3">
                <span style={{ color: 'var(--text-3)', fontSize: 14 }}>搜索中...</span>
              </div>
            ) : results.length === 0 ? (
              <div className="flex flex-col items-center justify-center py-16 gap-3">
                <span style={{ color: 'var(--text-3)', fontSize: 14 }}>未找到实体 "{query}"</span>
                <button
                  onClick={() => navigate('/graph')}
                  className="flex items-center gap-1 cursor-pointer"
                  style={{ color: 'var(--blue)', fontSize: 13, background: 'none', border: 'none' }}
                >
                  探索知识图谱 <ExternalLink size={12} />
                </button>
              </div>
            ) : (
              <div className="flex flex-col gap-2">
                <div style={{ color: 'var(--text-4)', fontSize: 12, marginBottom: 4 }}>
                  找到 {results.length} 个结果
                </div>
                {results.map(r => (
                  <button
                    key={r.id}
                    onClick={() => setSelectedResult(r)}
                    className="flex items-center gap-3 p-3 rounded-lg cursor-pointer text-left w-full"
                    style={{
                      background: selectedResult?.id === r.id ? 'var(--bg-s2)' : 'var(--bg-s1)',
                      border: `1px solid ${selectedResult?.id === r.id ? 'var(--blue)' : 'var(--border-main)'}`,
                    }}
                  >
                    <span className="inline-block w-3 h-3 rounded-full flex-shrink-0" style={{ background: TYPE_COLORS[r.type] ?? '#8b949e' }} />
                    <div className="flex-1 min-w-0">
                      <div className="flex items-center gap-2 mb-0.5">
                        <span style={{ color: 'var(--text-1)', fontSize: 14, fontWeight: 500 }}>{r.name}</span>
                        <span className="px-1.5 py-0.5 rounded" style={{ fontSize: 10, fontWeight: 600, background: `${TYPE_COLORS[r.type] ?? '#8b949e'}20`, color: TYPE_COLORS[r.type] ?? '#8b949e' }}>
                          {r.type}
                        </span>
                      </div>
                      <div className="flex items-center gap-3" style={{ fontSize: 11, color: 'var(--text-4)' }}>
                        <span>页码 {r.page}</span>
                        <span>度数 {r.degree}</span>
                        <span>{r.confidence.replace('match_', '')}</span>
                      </div>
                    </div>
                    <div className="flex items-center gap-1.5 flex-shrink-0">
                      <button
                        onClick={e => { e.stopPropagation(); navigate(`/graph?node=${r.id}`); }}
                        className="px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 10, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
                      >
                        查看图谱
                      </button>
                      <button
                        onClick={e => { e.stopPropagation(); navigate(`/chat?q=${encodeURIComponent(`What is ${r.name}`)}`); }}
                        className="px-2 py-1 rounded cursor-pointer"
                        style={{ fontSize: 10, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
                      >
                        <MessageSquare size={10} />
                      </button>
                    </div>
                  </button>
                ))}
              </div>
            )}
          </div>
          {/* Preview Graph */}
          {selectedResult && (
            <div
              className="rounded-lg p-3 flex-shrink-0"
              style={{ width: 400, background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
            >
              <div className="flex items-center justify-between mb-2">
                <span style={{ color: 'var(--text-1)', fontSize: 13, fontWeight: 600 }}>
                  预览: {selectedResult.name}
                </span>
                <span style={{ fontSize: 11, color: 'var(--text-4)' }}>1 跳邻居</span>
              </div>
              <svg ref={previewRef} className="w-full" style={{ height: 280, background: 'var(--bg-base)', borderRadius: 6 }} />
            </div>
          )}
        </div>
      )}
      {/* Path Search Tab */}
      {activeTab === 'path' && (
        <div>
          <div className="flex items-end gap-3 mb-6">
            <div className="flex-1">
              <label style={{ fontSize: 12, color: 'var(--text-3)', display: 'block', marginBottom: 4 }}>起点实体名称</label>
              <input
                value={pathFrom}
                onChange={e => setPathFrom(e.target.value)}
                placeholder="如: GraphRAG"
                className="w-full px-3 py-2 rounded-md outline-none"
                style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13 }}
              />
            </div>
            <div className="flex-1">
              <label style={{ fontSize: 12, color: 'var(--text-3)', display: 'block', marginBottom: 4 }}>终点实体名称</label>
              <input
                value={pathTo}
                onChange={e => setPathTo(e.target.value)}
                placeholder="如: LLM"
                className="w-full px-3 py-2 rounded-md outline-none"
                style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13 }}
              />
            </div>
            <div>
              <label style={{ fontSize: 12, color: 'var(--text-3)', display: 'block', marginBottom: 4 }}>最大跳数</label>
              <select
                value={maxHops}
                onChange={e => setMaxHops(Number(e.target.value))}
                className="px-3 py-2 rounded-md cursor-pointer"
                style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
              >
                {[1, 2, 3, 4, 5].map(n => <option key={n} value={n}>{n}</option>)}
              </select>
            </div>
            <button
              onClick={handlePathSearch}
              disabled={pathSearching}
              className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
              style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, border: 'none', opacity: pathSearching ? 0.7 : 1 }}
            >
              {pathSearching ? '查找中...' : '查找路径'}
            </button>
          </div>
          {pathError && (
            <div className="mb-4 px-4 py-2 rounded-md" style={{ background: 'rgba(248,81,73,0.1)', border: '1px solid rgba(248,81,73,0.3)', color: 'var(--red)', fontSize: 13 }}>
              {pathError}
            </div>
          )}
          {pathResult !== null && (
            pathResult.length === 0 ? (
              <div className="text-center py-12" style={{ color: 'var(--text-3)', fontSize: 14 }}>
                这两个实体之间没有路径（在 {maxHops} 跳内）
              </div>
            ) : (
              <div className="flex items-center gap-2 flex-wrap p-6 rounded-lg" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}>
                <span style={{ fontSize: 12, color: 'var(--text-4)', marginBottom: 8, display: 'block', width: '100%' }}>
                  路径长度 {pathResult.length - 1} 跳
                </span>
                {pathResult.map((n, i) => (
                  <React.Fragment key={n.id}>
                    <button
                      onClick={() => navigate(`/graph?node=${n.id}`)}
                      className="flex items-center gap-2 px-3 py-2 rounded-lg cursor-pointer"
                      style={{ background: 'var(--bg-s2)', border: `1px solid ${TYPE_COLORS[n.type] ?? '#8b949e'}40` }}
                    >
                      <span className="w-2.5 h-2.5 rounded-full" style={{ background: TYPE_COLORS[n.type] ?? '#8b949e' }} />
                      <span style={{ color: 'var(--text-1)', fontSize: 13 }}>{n.name}</span>
                      <span style={{ fontSize: 10, color: TYPE_COLORS[n.type] ?? '#8b949e' }}>{n.type}</span>
                    </button>
                    {i < pathResult.length - 1 && (
                      <ArrowRight size={16} style={{ color: 'var(--text-4)' }} />
                    )}
                  </React.Fragment>
                ))}
              </div>
            )
          )}
        </div>
      )}
      {/* Graph Search Tab */}
      {activeTab === 'graph' && (
        <div>
          <div className="flex items-end gap-3 mb-6">
            <div className="flex-1">
              <input
                value={graphQuery}
                onChange={e => setGraphQuery(e.target.value)}
                onKeyDown={e => e.key === 'Enter' && handleGraphSearch()}
                placeholder="搜索关键词..."
                className="w-full px-3 py-2 rounded-md outline-none"
                style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13 }}
              />
            </div>
            <label className="flex items-center gap-2 cursor-pointer px-3 py-2">
              <input
                type="checkbox"
                checked={includeNeighbors}
                onChange={e => setIncludeNeighbors(e.target.checked)}
                style={{ accentColor: 'var(--blue)' }}
              />
              <span style={{ fontSize: 12, color: 'var(--text-2)' }}>包含邻居</span>
            </label>
            <button
              onClick={handleGraphSearch}
              disabled={graphSearching}
              className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
              style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, border: 'none', opacity: graphSearching ? 0.7 : 1 }}
            >
              {graphSearching ? '搜索中...' : '搜索'}
            </button>
          </div>
          {graphResults.length > 0 && (
            <>
              <div style={{ color: 'var(--text-4)', fontSize: 12, marginBottom: 8 }}>
                找到 {graphResults.length} 个节点
              </div>
              <div className="flex flex-wrap gap-2">
                {graphResults.map(n => (
                  <button
                    key={n.id}
                    onClick={() => navigate(`/graph?node=${n.id}`)}
                    className="flex items-center gap-2 px-3 py-1.5 rounded-full cursor-pointer"
                    style={{ background: `${TYPE_COLORS[n.type] ?? '#8b949e'}15`, border: `1px solid ${TYPE_COLORS[n.type] ?? '#8b949e'}40`, color: TYPE_COLORS[n.type] ?? '#8b949e', fontSize: 12 }}
                  >
                    <span className="w-2 h-2 rounded-full" style={{ background: TYPE_COLORS[n.type] ?? '#8b949e' }} />
                    {n.name}
                  </button>
                ))}
              </div>
            </>
          )}
          {graphSearching === false && graphQuery && graphResults.length === 0 && (
            <div className="text-center py-12" style={{ color: 'var(--text-3)', fontSize: 14 }}>
              未找到包含 "{graphQuery}" 的节点
            </div>
          )}
        </div>
      )}
    </div>
  );
 }
--- a/frontend/src/app/components/ui/accordion.tsx
+++ b/frontend/src/app/components/ui/accordion.tsx
@@ -0,0 +1,66 @@
 "use client";
 import * as React from "react";
 import * as AccordionPrimitive from "@radix-ui/react-accordion";
 import { ChevronDownIcon } from "lucide-react";
 import { cn } from "./utils";
 function Accordion({
  ...props
 }: React.ComponentProps<typeof AccordionPrimitive.Root>) {
  return <AccordionPrimitive.Root data-slot="accordion" {...props} />;
 }
 function AccordionItem({
  className,
  ...props
 }: React.ComponentProps<typeof AccordionPrimitive.Item>) {
  return (
    <AccordionPrimitive.Item
      data-slot="accordion-item"
      className={cn("border-b last:border-b-0", className)}
      {...props}
    />
  );
 }
 function AccordionTrigger({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof AccordionPrimitive.Trigger>) {
  return (
    <AccordionPrimitive.Header className="flex">
      <AccordionPrimitive.Trigger
        data-slot="accordion-trigger"
        className={cn(
          "focus-visible:border-ring focus-visible:ring-ring/50 flex flex-1 items-start justify-between gap-4 rounded-md py-4 text-left text-sm font-medium transition-all outline-none hover:underline focus-visible:ring-[3px] disabled:pointer-events-none disabled:opacity-50 [&[data-state=open]>svg]:rotate-180",
          className,
        )}
        {...props}
      >
        {children}
        <ChevronDownIcon className="text-muted-foreground pointer-events-none size-4 shrink-0 translate-y-0.5 transition-transform duration-200" />
      </AccordionPrimitive.Trigger>
    </AccordionPrimitive.Header>
  );
 }
 function AccordionContent({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof AccordionPrimitive.Content>) {
  return (
    <AccordionPrimitive.Content
      data-slot="accordion-content"
      className="data-[state=closed]:animate-accordion-up data-[state=open]:animate-accordion-down overflow-hidden text-sm"
      {...props}
    >
      <div className={cn("pt-0 pb-4", className)}>{children}</div>
    </AccordionPrimitive.Content>
  );
 }
 export { Accordion, AccordionItem, AccordionTrigger, AccordionContent };
--- a/frontend/src/app/components/ui/alert-dialog.tsx
+++ b/frontend/src/app/components/ui/alert-dialog.tsx
@@ -0,0 +1,157 @@
 "use client";
 import * as React from "react";
 import * as AlertDialogPrimitive from "@radix-ui/react-alert-dialog";
 import { cn } from "./utils";
 import { buttonVariants } from "./button";
 function AlertDialog({
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Root>) {
  return <AlertDialogPrimitive.Root data-slot="alert-dialog" {...props} />;
 }
 function AlertDialogTrigger({
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Trigger>) {
  return (
    <AlertDialogPrimitive.Trigger data-slot="alert-dialog-trigger" {...props} />
  );
 }
 function AlertDialogPortal({
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Portal>) {
  return (
    <AlertDialogPrimitive.Portal data-slot="alert-dialog-portal" {...props} />
  );
 }
 function AlertDialogOverlay({
  className,
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Overlay>) {
  return (
    <AlertDialogPrimitive.Overlay
      data-slot="alert-dialog-overlay"
      className={cn(
        "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
        className,
      )}
      {...props}
    />
  );
 }
 function AlertDialogContent({
  className,
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Content>) {
  return (
    <AlertDialogPortal>
      <AlertDialogOverlay />
      <AlertDialogPrimitive.Content
        data-slot="alert-dialog-content"
        className={cn(
          "bg-background data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg",
          className,
        )}
        {...props}
      />
    </AlertDialogPortal>
  );
 }
 function AlertDialogHeader({
  className,
  ...props
 }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-header"
      className={cn("flex flex-col gap-2 text-center sm:text-left", className)}
      {...props}
    />
  );
 }
 function AlertDialogFooter({
  className,
  ...props
 }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-footer"
      className={cn(
        "flex flex-col-reverse gap-2 sm:flex-row sm:justify-end",
        className,
      )}
      {...props}
    />
  );
 }
 function AlertDialogTitle({
  className,
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Title>) {
  return (
    <AlertDialogPrimitive.Title
      data-slot="alert-dialog-title"
      className={cn("text-lg font-semibold", className)}
      {...props}
    />
  );
 }
 function AlertDialogDescription({
  className,
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Description>) {
  return (
    <AlertDialogPrimitive.Description
      data-slot="alert-dialog-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
 }
 function AlertDialogAction({
  className,
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Action>) {
  return (
    <AlertDialogPrimitive.Action
      className={cn(buttonVariants(), className)}
      {...props}
    />
  );
 }
 function AlertDialogCancel({
  className,
  ...props
 }: React.ComponentProps<typeof AlertDialogPrimitive.Cancel>) {
  return (
    <AlertDialogPrimitive.Cancel
      className={cn(buttonVariants({ variant: "outline" }), className)}
      {...props}
    />
  );
 }
 export {
  AlertDialog,
  AlertDialogPortal,
  AlertDialogOverlay,
  AlertDialogTrigger,
  AlertDialogContent,
  AlertDialogHeader,
  AlertDialogFooter,
  AlertDialogTitle,
  AlertDialogDescription,
  AlertDialogAction,
  AlertDialogCancel,
 };
--- a/frontend/src/app/components/ui/alert.tsx
+++ b/frontend/src/app/components/ui/alert.tsx
@@ -0,0 +1,66 @@
 import * as React from "react";
 import { cva, type VariantProps } from "class-variance-authority";
 import { cn } from "./utils";
 const alertVariants = cva(
  "relative w-full rounded-lg border px-4 py-3 text-sm grid has-[>svg]:grid-cols-[calc(var(--spacing)*4)_1fr] grid-cols-[0_1fr] has-[>svg]:gap-x-3 gap-y-0.5 items-start [&>svg]:size-4 [&>svg]:translate-y-0.5 [&>svg]:text-current",
  {
    variants: {
      variant: {
        default: "bg-card text-card-foreground",
        destructive:
          "text-destructive bg-card [&>svg]:text-current *:data-[slot=alert-description]:text-destructive/90",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
 );
 function Alert({
  className,
  variant,
  ...props
 }: React.ComponentProps<"div"> & VariantProps<typeof alertVariants>) {
  return (
    <div
      data-slot="alert"
      role="alert"
      className={cn(alertVariants({ variant }), className)}
      {...props}
    />
  );
 }
 function AlertTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-title"
      className={cn(
        "col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight",
        className,
      )}
      {...props}
    />
  );
 }
 function AlertDescription({
  className,
  ...props
 }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-description"
      className={cn(
        "text-muted-foreground col-start-2 grid justify-items-start gap-1 text-sm [&_p]:leading-relaxed",
        className,
      )}
      {...props}
    />
  );
 }
 export { Alert, AlertTitle, AlertDescription };
--- a/frontend/src/app/components/ui/aspect-ratio.tsx
+++ b/frontend/src/app/components/ui/aspect-ratio.tsx
@@ -0,0 +1,11 @@
 "use client";
 import * as AspectRatioPrimitive from "@radix-ui/react-aspect-ratio";
 function AspectRatio({
  ...props
 }: React.ComponentProps<typeof AspectRatioPrimitive.Root>) {
  return <AspectRatioPrimitive.Root data-slot="aspect-ratio" {...props} />;
 }
 export { AspectRatio };
--- a/frontend/src/app/components/ui/avatar.tsx
+++ b/frontend/src/app/components/ui/avatar.tsx
@@ -0,0 +1,53 @@
 "use client";
 import * as React from "react";
 import * as AvatarPrimitive from "@radix-ui/react-avatar";
 import { cn } from "./utils";
 function Avatar({
  className,
  ...props
 }: React.ComponentProps<typeof AvatarPrimitive.Root>) {
  return (
    <AvatarPrimitive.Root
      data-slot="avatar"
      className={cn(
        "relative flex size-10 shrink-0 overflow-hidden rounded-full",
        className,
      )}
      {...props}
    />
  );
 }
 function AvatarImage({
  className,
  ...props
 }: React.ComponentProps<typeof AvatarPrimitive.Image>) {
  return (
    <AvatarPrimitive.Image
      data-slot="avatar-image"
      className={cn("aspect-square size-full", className)}
      {...props}
    />
  );
 }
 function AvatarFallback({
  className,
  ...props
 }: React.ComponentProps<typeof AvatarPrimitive.Fallback>) {
  return (
    <AvatarPrimitive.Fallback
      data-slot="avatar-fallback"
      className={cn(
        "bg-muted flex size-full items-center justify-center rounded-full",
        className,
      )}
      {...props}
    />
  );
 }
 export { Avatar, AvatarImage, AvatarFallback };
--- a/frontend/src/app/components/ui/badge.tsx
+++ b/frontend/src/app/components/ui/badge.tsx
@@ -0,0 +1,46 @@
 import * as React from "react";
 import { Slot } from "@radix-ui/react-slot";
 import { cva, type VariantProps } from "class-variance-authority";
 import { cn } from "./utils";
 const badgeVariants = cva(
  "inline-flex items-center justify-center rounded-md border px-2 py-0.5 text-xs font-medium w-fit whitespace-nowrap shrink-0 [&>svg]:size-3 gap-1 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive transition-[color,box-shadow] overflow-hidden",
  {
    variants: {
      variant: {
        default:
          "border-transparent bg-primary text-primary-foreground [a&]:hover:bg-primary/90",
        secondary:
          "border-transparent bg-secondary text-secondary-foreground [a&]:hover:bg-secondary/90",
        destructive:
          "border-transparent bg-destructive text-white [a&]:hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
        outline:
          "text-foreground [a&]:hover:bg-accent [a&]:hover:text-accent-foreground",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
 );
 function Badge({
  className,
  variant,
  asChild = false,
  ...props
 }: React.ComponentProps<"span"> &
  VariantProps<typeof badgeVariants> & { asChild?: boolean }) {
  const Comp = asChild ? Slot : "span";
  return (
    <Comp
      data-slot="badge"
      className={cn(badgeVariants({ variant }), className)}
      {...props}
    />
  );
 }
 export { Badge, badgeVariants };
--- a/frontend/src/app/components/ui/breadcrumb.tsx
+++ b/frontend/src/app/components/ui/breadcrumb.tsx
@@ -0,0 +1,109 @@
 import * as React from "react";
 import { Slot } from "@radix-ui/react-slot";
 import { ChevronRight, MoreHorizontal } from "lucide-react";
 import { cn } from "./utils";
 function Breadcrumb({ ...props }: React.ComponentProps<"nav">) {
  return <nav aria-label="breadcrumb" data-slot="breadcrumb" {...props} />;
 }
 function BreadcrumbList({ className, ...props }: React.ComponentProps<"ol">) {
  return (
    <ol
      data-slot="breadcrumb-list"
      className={cn(
        "text-muted-foreground flex flex-wrap items-center gap-1.5 text-sm break-words sm:gap-2.5",
        className,
      )}
      {...props}
    />
  );
 }
 function BreadcrumbItem({ className, ...props }: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="breadcrumb-item"
      className={cn("inline-flex items-center gap-1.5", className)}
      {...props}
    />
  );
 }
 function BreadcrumbLink({
  asChild,
  className,
  ...props
 }: React.ComponentProps<"a"> & {
  asChild?: boolean;
 }) {
  const Comp = asChild ? Slot : "a";
  return (
    <Comp
      data-slot="breadcrumb-link"
      className={cn("hover:text-foreground transition-colors", className)}
      {...props}
    />
  );
 }
 function BreadcrumbPage({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="breadcrumb-page"
      role="link"
      aria-disabled="true"
      aria-current="page"
      className={cn("text-foreground font-normal", className)}
      {...props}
    />
  );
 }
 function BreadcrumbSeparator({
  children,
  className,
  ...props
 }: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="breadcrumb-separator"
      role="presentation"
      aria-hidden="true"
      className={cn("[&>svg]:size-3.5", className)}
      {...props}
    >
      {children ?? <ChevronRight />}
    </li>
  );
 }
 function BreadcrumbEllipsis({
  className,
  ...props
 }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="breadcrumb-ellipsis"
      role="presentation"
      aria-hidden="true"
      className={cn("flex size-9 items-center justify-center", className)}
      {...props}
    >
      <MoreHorizontal className="size-4" />
      <span className="sr-only">More</span>
    </span>
  );
 }
 export {
  Breadcrumb,
  BreadcrumbList,
  BreadcrumbItem,
  BreadcrumbLink,
  BreadcrumbPage,
  BreadcrumbSeparator,
  BreadcrumbEllipsis,
 };
--- a/frontend/src/app/components/ui/button.tsx
+++ b/frontend/src/app/components/ui/button.tsx
@@ -0,0 +1,58 @@
 import * as React from "react";
 import { Slot } from "@radix-ui/react-slot";
 import { cva, type VariantProps } from "class-variance-authority";
 import { cn } from "./utils";
 const buttonVariants = cva(
  "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
  {
    variants: {
      variant: {
        default: "bg-primary text-primary-foreground hover:bg-primary/90",
        destructive:
          "bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
        outline:
          "border bg-background text-foreground hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
        secondary:
          "bg-secondary text-secondary-foreground hover:bg-secondary/80",
        ghost:
          "hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
        link: "text-primary underline-offset-4 hover:underline",
      },
      size: {
        default: "h-9 px-4 py-2 has-[>svg]:px-3",
        sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
        lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
        icon: "size-9 rounded-md",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
 );
 function Button({
  className,
  variant,
  size,
  asChild = false,
  ...props
 }: React.ComponentProps<"button"> &
  VariantProps<typeof buttonVariants> & {
    asChild?: boolean;
  }) {
  const Comp = asChild ? Slot : "button";
  return (
    <Comp
      data-slot="button"
      className={cn(buttonVariants({ variant, size, className }))}
      {...props}
    />
  );
 }
 export { Button, buttonVariants };
--- a/frontend/src/app/components/ui/calendar.tsx
+++ b/frontend/src/app/components/ui/calendar.tsx
@@ -0,0 +1,75 @@
 "use client";
 import * as React from "react";
 import { ChevronLeft, ChevronRight } from "lucide-react";
 import { DayPicker } from "react-day-picker";
 import { cn } from "./utils";
 import { buttonVariants } from "./button";
 function Calendar({
  className,
  classNames,
  showOutsideDays = true,
  ...props
 }: React.ComponentProps<typeof DayPicker>) {
  return (
    <DayPicker
      showOutsideDays={showOutsideDays}
      className={cn("p-3", className)}
      classNames={{
        months: "flex flex-col sm:flex-row gap-2",
        month: "flex flex-col gap-4",
        caption: "flex justify-center pt-1 relative items-center w-full",
        caption_label: "text-sm font-medium",
        nav: "flex items-center gap-1",
        nav_button: cn(
          buttonVariants({ variant: "outline" }),
          "size-7 bg-transparent p-0 opacity-50 hover:opacity-100",
        ),
        nav_button_previous: "absolute left-1",
        nav_button_next: "absolute right-1",
        table: "w-full border-collapse space-x-1",
        head_row: "flex",
        head_cell:
          "text-muted-foreground rounded-md w-8 font-normal text-[0.8rem]",
        row: "flex w-full mt-2",
        cell: cn(
          "relative p-0 text-center text-sm focus-within:relative focus-within:z-20 [&:has([aria-selected])]:bg-accent [&:has([aria-selected].day-range-end)]:rounded-r-md",
          props.mode === "range"
            ? "[&:has(>.day-range-end)]:rounded-r-md [&:has(>.day-range-start)]:rounded-l-md first:[&:has([aria-selected])]:rounded-l-md last:[&:has([aria-selected])]:rounded-r-md"
            : "[&:has([aria-selected])]:rounded-md",
        ),
        day: cn(
          buttonVariants({ variant: "ghost" }),
          "size-8 p-0 font-normal aria-selected:opacity-100",
        ),
        day_range_start:
          "day-range-start aria-selected:bg-primary aria-selected:text-primary-foreground",
        day_range_end:
          "day-range-end aria-selected:bg-primary aria-selected:text-primary-foreground",
        day_selected:
          "bg-primary text-primary-foreground hover:bg-primary hover:text-primary-foreground focus:bg-primary focus:text-primary-foreground",
        day_today: "bg-accent text-accent-foreground",
        day_outside:
          "day-outside text-muted-foreground aria-selected:text-muted-foreground",
        day_disabled: "text-muted-foreground opacity-50",
        day_range_middle:
          "aria-selected:bg-accent aria-selected:text-accent-foreground",
        day_hidden: "invisible",
        ...classNames,
      }}
      components={{
        IconLeft: ({ className, ...props }) => (
          <ChevronLeft className={cn("size-4", className)} {...props} />
        ),
        IconRight: ({ className, ...props }) => (
          <ChevronRight className={cn("size-4", className)} {...props} />
        ),
      }}
      {...props}
    />
  );
 }
 export { Calendar };
--- a/frontend/src/app/components/ui/card.tsx
+++ b/frontend/src/app/components/ui/card.tsx
@@ -0,0 +1,92 @@
 import * as React from "react";
 import { cn } from "./utils";
 function Card({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card"
      className={cn(
        "bg-card text-card-foreground flex flex-col gap-6 rounded-xl border",
        className,
      )}
      {...props}
    />
  );
 }
 function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-header"
      className={cn(
        "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-1.5 px-6 pt-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
        className,
      )}
      {...props}
    />
  );
 }
 function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <h4
      data-slot="card-title"
      className={cn("leading-none", className)}
      {...props}
    />
  );
 }
 function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <p
      data-slot="card-description"
      className={cn("text-muted-foreground", className)}
      {...props}
    />
  );
 }
 function CardAction({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-action"
      className={cn(
        "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
        className,
      )}
      {...props}
    />
  );
 }
 function CardContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-content"
      className={cn("px-6 [&:last-child]:pb-6", className)}
      {...props}
    />
  );
 }
 function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-footer"
      className={cn("flex items-center px-6 pb-6 [.border-t]:pt-6", className)}
      {...props}
    />
  );
 }
 export {
  Card,
  CardHeader,
  CardFooter,
  CardTitle,
  CardAction,
  CardDescription,
  CardContent,
 };
--- a/frontend/src/app/components/ui/carousel.tsx
+++ b/frontend/src/app/components/ui/carousel.tsx
@@ -0,0 +1,241 @@
 "use client";
 import * as React from "react";
 import useEmblaCarousel, {
  type UseEmblaCarouselType,
 } from "embla-carousel-react";
 import { ArrowLeft, ArrowRight } from "lucide-react";
 import { cn } from "./utils";
 import { Button } from "./button";
 type CarouselApi = UseEmblaCarouselType[1];
 type UseCarouselParameters = Parameters<typeof useEmblaCarousel>;
 type CarouselOptions = UseCarouselParameters[0];
 type CarouselPlugin = UseCarouselParameters[1];
 type CarouselProps = {
  opts?: CarouselOptions;
  plugins?: CarouselPlugin;
  orientation?: "horizontal" | "vertical";
  setApi?: (api: CarouselApi) => void;
 };
 type CarouselContextProps = {
  carouselRef: ReturnType<typeof useEmblaCarousel>[0];
  api: ReturnType<typeof useEmblaCarousel>[1];
  scrollPrev: () => void;
  scrollNext: () => void;
  canScrollPrev: boolean;
  canScrollNext: boolean;
 } & CarouselProps;
 const CarouselContext = React.createContext<CarouselContextProps | null>(null);
 function useCarousel() {
  const context = React.useContext(CarouselContext);
  if (!context) {
    throw new Error("useCarousel must be used within a <Carousel />");
  }
  return context;
 }
 function Carousel({
  orientation = "horizontal",
  opts,
  setApi,
  plugins,
  className,
  children,
  ...props
 }: React.ComponentProps<"div"> & CarouselProps) {
  const [carouselRef, api] = useEmblaCarousel(
    {
      ...opts,
      axis: orientation === "horizontal" ? "x" : "y",
    },
    plugins,
  );
  const [canScrollPrev, setCanScrollPrev] = React.useState(false);
  const [canScrollNext, setCanScrollNext] = React.useState(false);
  const onSelect = React.useCallback((api: CarouselApi) => {
    if (!api) return;
    setCanScrollPrev(api.canScrollPrev());
    setCanScrollNext(api.canScrollNext());
  }, []);
  const scrollPrev = React.useCallback(() => {
    api?.scrollPrev();
  }, [api]);
  const scrollNext = React.useCallback(() => {
    api?.scrollNext();
  }, [api]);
  const handleKeyDown = React.useCallback(
    (event: React.KeyboardEvent<HTMLDivElement>) => {
      if (event.key === "ArrowLeft") {
        event.preventDefault();
        scrollPrev();
      } else if (event.key === "ArrowRight") {
        event.preventDefault();
        scrollNext();
      }
    },
    [scrollPrev, scrollNext],
  );
  React.useEffect(() => {
    if (!api || !setApi) return;
    setApi(api);
  }, [api, setApi]);
  React.useEffect(() => {
    if (!api) return;
    onSelect(api);
    api.on("reInit", onSelect);
    api.on("select", onSelect);
    return () => {
      api?.off("select", onSelect);
    };
  }, [api, onSelect]);
  return (
    <CarouselContext.Provider
      value={{
        carouselRef,
        api: api,
        opts,
        orientation:
          orientation || (opts?.axis === "y" ? "vertical" : "horizontal"),
        scrollPrev,
        scrollNext,
        canScrollPrev,
        canScrollNext,
      }}
    >
      <div
        onKeyDownCapture={handleKeyDown}
        className={cn("relative", className)}
        role="region"
        aria-roledescription="carousel"
        data-slot="carousel"
        {...props}
      >
        {children}
      </div>
    </CarouselContext.Provider>
  );
 }
 function CarouselContent({ className, ...props }: React.ComponentProps<"div">) {
  const { carouselRef, orientation } = useCarousel();
  return (
    <div
      ref={carouselRef}
      className="overflow-hidden"
      data-slot="carousel-content"
    >
      <div
        className={cn(
          "flex",
          orientation === "horizontal" ? "-ml-4" : "-mt-4 flex-col",
          className,
        )}
        {...props}
      />
    </div>
  );
 }
 function CarouselItem({ className, ...props }: React.ComponentProps<"div">) {
  const { orientation } = useCarousel();
  return (
    <div
      role="group"
      aria-roledescription="slide"
      data-slot="carousel-item"
      className={cn(
        "min-w-0 shrink-0 grow-0 basis-full",
        orientation === "horizontal" ? "pl-4" : "pt-4",
        className,
      )}
      {...props}
    />
  );
 }
 function CarouselPrevious({
  className,
  variant = "outline",
  size = "icon",
  ...props
 }: React.ComponentProps<typeof Button>) {
  const { orientation, scrollPrev, canScrollPrev } = useCarousel();
  return (
    <Button
      data-slot="carousel-previous"
      variant={variant}
      size={size}
      className={cn(
        "absolute size-8 rounded-full",
        orientation === "horizontal"
          ? "top-1/2 -left-12 -translate-y-1/2"
          : "-top-12 left-1/2 -translate-x-1/2 rotate-90",
        className,
      )}
      disabled={!canScrollPrev}
      onClick={scrollPrev}
      {...props}
    >
      <ArrowLeft />
      <span className="sr-only">Previous slide</span>
    </Button>
  );
 }
 function CarouselNext({
  className,
  variant = "outline",
  size = "icon",
  ...props
 }: React.ComponentProps<typeof Button>) {
  const { orientation, scrollNext, canScrollNext } = useCarousel();
  return (
    <Button
      data-slot="carousel-next"
      variant={variant}
      size={size}
      className={cn(
        "absolute size-8 rounded-full",
        orientation === "horizontal"
          ? "top-1/2 -right-12 -translate-y-1/2"
          : "-bottom-12 left-1/2 -translate-x-1/2 rotate-90",
        className,
      )}
      disabled={!canScrollNext}
      onClick={scrollNext}
      {...props}
    >
      <ArrowRight />
      <span className="sr-only">Next slide</span>
    </Button>
  );
 }
 export {
  type CarouselApi,
  Carousel,
  CarouselContent,
  CarouselItem,
  CarouselPrevious,
  CarouselNext,
 };
--- a/frontend/src/app/components/ui/chart.tsx
+++ b/frontend/src/app/components/ui/chart.tsx
@@ -0,0 +1,353 @@
 "use client";
 import * as React from "react";
 import * as RechartsPrimitive from "recharts";
 import { cn } from "./utils";
 // Format: { THEME_NAME: CSS_SELECTOR }
 const THEMES = { light: "", dark: ".dark" } as const;
 export type ChartConfig = {
  [k in string]: {
    label?: React.ReactNode;
    icon?: React.ComponentType;
  } & (
    | { color?: string; theme?: never }
    | { color?: never; theme: Record<keyof typeof THEMES, string> }
  );
 };
 type ChartContextProps = {
  config: ChartConfig;
 };
 const ChartContext = React.createContext<ChartContextProps | null>(null);
 function useChart() {
  const context = React.useContext(ChartContext);
  if (!context) {
    throw new Error("useChart must be used within a <ChartContainer />");
  }
  return context;
 }
 function ChartContainer({
  id,
  className,
  children,
  config,
  ...props
 }: React.ComponentProps<"div"> & {
  config: ChartConfig;
  children: React.ComponentProps<
    typeof RechartsPrimitive.ResponsiveContainer
  >["children"];
 }) {
  const uniqueId = React.useId();
  const chartId = `chart-${id || uniqueId.replace(/:/g, "")}`;
  return (
    <ChartContext.Provider value={{ config }}>
      <div
        data-slot="chart"
        data-chart={chartId}
        className={cn(
          "[&_.recharts-cartesian-axis-tick_text]:fill-muted-foreground [&_.recharts-cartesian-grid_line[stroke='#ccc']]:stroke-border/50 [&_.recharts-curve.recharts-tooltip-cursor]:stroke-border [&_.recharts-polar-grid_[stroke='#ccc']]:stroke-border [&_.recharts-radial-bar-background-sector]:fill-muted [&_.recharts-rectangle.recharts-tooltip-cursor]:fill-muted [&_.recharts-reference-line_[stroke='#ccc']]:stroke-border flex aspect-video justify-center text-xs [&_.recharts-dot[stroke='#fff']]:stroke-transparent [&_.recharts-layer]:outline-hidden [&_.recharts-sector]:outline-hidden [&_.recharts-sector[stroke='#fff']]:stroke-transparent [&_.recharts-surface]:outline-hidden",
          className,
        )}
        {...props}
      >
        <ChartStyle id={chartId} config={config} />
        <RechartsPrimitive.ResponsiveContainer>
          {children}
        </RechartsPrimitive.ResponsiveContainer>
      </div>
    </ChartContext.Provider>
  );
 }
 const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => {
  const colorConfig = Object.entries(config).filter(
    ([, config]) => config.theme || config.color,
  );
  if (!colorConfig.length) {
    return null;
  }
  return (
    <style
      dangerouslySetInnerHTML={{
        __html: Object.entries(THEMES)
          .map(
            ([theme, prefix]) => `
 ${prefix} [data-chart=${id}] {
 ${colorConfig
  .map(([key, itemConfig]) => {
    const color =
      itemConfig.theme?.[theme as keyof typeof itemConfig.theme] ||
      itemConfig.color;
    return color ? `  --color-${key}: ${color};` : null;
  })
  .join("\n")}
 }
 `,
          )
          .join("\n"),
      }}
    />
  );
 };
 const ChartTooltip = RechartsPrimitive.Tooltip;
 function ChartTooltipContent({
  active,
  payload,
  className,
  indicator = "dot",
  hideLabel = false,
  hideIndicator = false,
  label,
  labelFormatter,
  labelClassName,
  formatter,
  color,
  nameKey,
  labelKey,
 }: React.ComponentProps<typeof RechartsPrimitive.Tooltip> &
  React.ComponentProps<"div"> & {
    hideLabel?: boolean;
    hideIndicator?: boolean;
    indicator?: "line" | "dot" | "dashed";
    nameKey?: string;
    labelKey?: string;
  }) {
  const { config } = useChart();
  const tooltipLabel = React.useMemo(() => {
    if (hideLabel || !payload?.length) {
      return null;
    }
    const [item] = payload;
    const key = `${labelKey || item?.dataKey || item?.name || "value"}`;
    const itemConfig = getPayloadConfigFromPayload(config, item, key);
    const value =
      !labelKey && typeof label === "string"
        ? config[label as keyof typeof config]?.label || label
        : itemConfig?.label;
    if (labelFormatter) {
      return (
        <div className={cn("font-medium", labelClassName)}>
          {labelFormatter(value, payload)}
        </div>
      );
    }
    if (!value) {
      return null;
    }
    return <div className={cn("font-medium", labelClassName)}>{value}</div>;
  }, [
    label,
    labelFormatter,
    payload,
    hideLabel,
    labelClassName,
    config,
    labelKey,
  ]);
  if (!active || !payload?.length) {
    return null;
  }
  const nestLabel = payload.length === 1 && indicator !== "dot";
  return (
    <div
      className={cn(
        "border-border/50 bg-background grid min-w-[8rem] items-start gap-1.5 rounded-lg border px-2.5 py-1.5 text-xs shadow-xl",
        className,
      )}
    >
      {!nestLabel ? tooltipLabel : null}
      <div className="grid gap-1.5">
        {payload.map((item, index) => {
          const key = `${nameKey || item.name || item.dataKey || "value"}`;
          const itemConfig = getPayloadConfigFromPayload(config, item, key);
          const indicatorColor = color || item.payload.fill || item.color;
          return (
            <div
              key={item.dataKey}
              className={cn(
                "[&>svg]:text-muted-foreground flex w-full flex-wrap items-stretch gap-2 [&>svg]:h-2.5 [&>svg]:w-2.5",
                indicator === "dot" && "items-center",
              )}
            >
              {formatter && item?.value !== undefined && item.name ? (
                formatter(item.value, item.name, item, index, item.payload)
              ) : (
                <>
                  {itemConfig?.icon ? (
                    <itemConfig.icon />
                  ) : (
                    !hideIndicator && (
                      <div
                        className={cn(
                          "shrink-0 rounded-[2px] border-(--color-border) bg-(--color-bg)",
                          {
                            "h-2.5 w-2.5": indicator === "dot",
                            "w-1": indicator === "line",
                            "w-0 border-[1.5px] border-dashed bg-transparent":
                              indicator === "dashed",
                            "my-0.5": nestLabel && indicator === "dashed",
                          },
                        )}
                        style={
                          {
                            "--color-bg": indicatorColor,
                            "--color-border": indicatorColor,
                          } as React.CSSProperties
                        }
                      />
                    )
                  )}
                  <div
                    className={cn(
                      "flex flex-1 justify-between leading-none",
                      nestLabel ? "items-end" : "items-center",
                    )}
                  >
                    <div className="grid gap-1.5">
                      {nestLabel ? tooltipLabel : null}
                      <span className="text-muted-foreground">
                        {itemConfig?.label || item.name}
                      </span>
                    </div>
                    {item.value && (
                      <span className="text-foreground font-mono font-medium tabular-nums">
                        {item.value.toLocaleString()}
                      </span>
                    )}
                  </div>
                </>
              )}
            </div>
          );
        })}
      </div>
    </div>
  );
 }
 const ChartLegend = RechartsPrimitive.Legend;
 function ChartLegendContent({
  className,
  hideIcon = false,
  payload,
  verticalAlign = "bottom",
  nameKey,
 }: React.ComponentProps<"div"> &
  Pick<RechartsPrimitive.LegendProps, "payload" | "verticalAlign"> & {
    hideIcon?: boolean;
    nameKey?: string;
  }) {
  const { config } = useChart();
  if (!payload?.length) {
    return null;
  }
  return (
    <div
      className={cn(
        "flex items-center justify-center gap-4",
        verticalAlign === "top" ? "pb-3" : "pt-3",
        className,
      )}
    >
      {payload.map((item) => {
        const key = `${nameKey || item.dataKey || "value"}`;
        const itemConfig = getPayloadConfigFromPayload(config, item, key);
        return (
          <div
            key={item.value}
            className={cn(
              "[&>svg]:text-muted-foreground flex items-center gap-1.5 [&>svg]:h-3 [&>svg]:w-3",
            )}
          >
            {itemConfig?.icon && !hideIcon ? (
              <itemConfig.icon />
            ) : (
              <div
                className="h-2 w-2 shrink-0 rounded-[2px]"
                style={{
                  backgroundColor: item.color,
                }}
              />
            )}
            {itemConfig?.label}
          </div>
        );
      })}
    </div>
  );
 }
 // Helper to extract item config from a payload.
 function getPayloadConfigFromPayload(
  config: ChartConfig,
  payload: unknown,
  key: string,
 ) {
  if (typeof payload !== "object" || payload === null) {
    return undefined;
  }
  const payloadPayload =
    "payload" in payload &&
    typeof payload.payload === "object" &&
    payload.payload !== null
      ? payload.payload
      : undefined;
  let configLabelKey: string = key;
  if (
    key in payload &&
    typeof payload[key as keyof typeof payload] === "string"
  ) {
    configLabelKey = payload[key as keyof typeof payload] as string;
  } else if (
    payloadPayload &&
    key in payloadPayload &&
    typeof payloadPayload[key as keyof typeof payloadPayload] === "string"
  ) {
    configLabelKey = payloadPayload[
      key as keyof typeof payloadPayload
    ] as string;
  }
  return configLabelKey in config
    ? config[configLabelKey]
    : config[key as keyof typeof config];
 }
 export {
  ChartContainer,
  ChartTooltip,
  ChartTooltipContent,
  ChartLegend,
  ChartLegendContent,
  ChartStyle,
 };
--- a/frontend/src/app/components/ui/checkbox.tsx
+++ b/frontend/src/app/components/ui/checkbox.tsx
@@ -0,0 +1,32 @@
 "use client";
 import * as React from "react";
 import * as CheckboxPrimitive from "@radix-ui/react-checkbox";
 import { CheckIcon } from "lucide-react";
 import { cn } from "./utils";
 function Checkbox({
  className,
  ...props
 }: React.ComponentProps<typeof CheckboxPrimitive.Root>) {
  return (
    <CheckboxPrimitive.Root
      data-slot="checkbox"
      className={cn(
        "peer border bg-input-background dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <CheckboxPrimitive.Indicator
        data-slot="checkbox-indicator"
        className="flex items-center justify-center text-current transition-none"
      >
        <CheckIcon className="size-3.5" />
      </CheckboxPrimitive.Indicator>
    </CheckboxPrimitive.Root>
  );
 }
 export { Checkbox };
--- a/frontend/src/app/components/ui/collapsible.tsx
+++ b/frontend/src/app/components/ui/collapsible.tsx
@@ -0,0 +1,33 @@
 "use client";
 import * as CollapsiblePrimitive from "@radix-ui/react-collapsible";
 function Collapsible({
  ...props
 }: React.ComponentProps<typeof CollapsiblePrimitive.Root>) {
  return <CollapsiblePrimitive.Root data-slot="collapsible" {...props} />;
 }
 function CollapsibleTrigger({
  ...props
 }: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleTrigger>) {
  return (
    <CollapsiblePrimitive.CollapsibleTrigger
      data-slot="collapsible-trigger"
      {...props}
    />
  );
 }
 function CollapsibleContent({
  ...props
 }: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleContent>) {
  return (
    <CollapsiblePrimitive.CollapsibleContent
      data-slot="collapsible-content"
      {...props}
    />
  );
 }
 export { Collapsible, CollapsibleTrigger, CollapsibleContent };
--- a/frontend/src/app/components/ui/command.tsx
+++ b/frontend/src/app/components/ui/command.tsx
@@ -0,0 +1,177 @@
 "use client";
 import * as React from "react";
 import { Command as CommandPrimitive } from "cmdk";
 import { SearchIcon } from "lucide-react";
 import { cn } from "./utils";
 import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogHeader,
  DialogTitle,
 } from "./dialog";
 function Command({
  className,
  ...props
 }: React.ComponentProps<typeof CommandPrimitive>) {
  return (
    <CommandPrimitive
      data-slot="command"
      className={cn(
        "bg-popover text-popover-foreground flex h-full w-full flex-col overflow-hidden rounded-md",
        className,
      )}
      {...props}
    />
  );
 }
 function CommandDialog({
  title = "Command Palette",
  description = "Search for a command to run...",
  children,
  ...props
 }: React.ComponentProps<typeof Dialog> & {
  title?: string;
  description?: string;
 }) {
  return (
    <Dialog {...props}>
      <DialogHeader className="sr-only">
        <DialogTitle>{title}</DialogTitle>
        <DialogDescription>{description}</DialogDescription>
      </DialogHeader>
      <DialogContent className="overflow-hidden p-0">
        <Command className="[&_[cmdk-group-heading]]:text-muted-foreground **:data-[slot=command-input-wrapper]:h-12 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group]]:px-2 [&_[cmdk-group]:not([hidden])_~[cmdk-group]]:pt-0 [&_[cmdk-input-wrapper]_svg]:h-5 [&_[cmdk-input-wrapper]_svg]:w-5 [&_[cmdk-input]]:h-12 [&_[cmdk-item]]:px-2 [&_[cmdk-item]]:py-3 [&_[cmdk-item]_svg]:h-5 [&_[cmdk-item]_svg]:w-5">
          {children}
        </Command>
      </DialogContent>
    </Dialog>
  );
 }
 function CommandInput({
  className,
  ...props
 }: React.ComponentProps<typeof CommandPrimitive.Input>) {
  return (
    <div
      data-slot="command-input-wrapper"
      className="flex h-9 items-center gap-2 border-b px-3"
    >
      <SearchIcon className="size-4 shrink-0 opacity-50" />
      <CommandPrimitive.Input
        data-slot="command-input"
        className={cn(
          "placeholder:text-muted-foreground flex h-10 w-full rounded-md bg-transparent py-3 text-sm outline-hidden disabled:cursor-not-allowed disabled:opacity-50",
          className,
        )}
        {...props}
      />
    </div>
  );
 }
 function CommandList({
  className,
  ...props
 }: React.ComponentProps<typeof CommandPrimitive.List>) {
  return (
    <CommandPrimitive.List
      data-slot="command-list"
      className={cn(
        "max-h-[300px] scroll-py-1 overflow-x-hidden overflow-y-auto",
        className,
      )}
      {...props}
    />
  );
 }
 function CommandEmpty({
  ...props
 }: React.ComponentProps<typeof CommandPrimitive.Empty>) {
  return (
    <CommandPrimitive.Empty
      data-slot="command-empty"
      className="py-6 text-center text-sm"
      {...props}
    />
  );
 }
 function CommandGroup({
  className,
  ...props
 }: React.ComponentProps<typeof CommandPrimitive.Group>) {
  return (
    <CommandPrimitive.Group
      data-slot="command-group"
      className={cn(
        "text-foreground [&_[cmdk-group-heading]]:text-muted-foreground overflow-hidden p-1 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium",
        className,
      )}
      {...props}
    />
  );
 }
 function CommandSeparator({
  className,
  ...props
 }: React.ComponentProps<typeof CommandPrimitive.Separator>) {
  return (
    <CommandPrimitive.Separator
      data-slot="command-separator"
      className={cn("bg-border -mx-1 h-px", className)}
      {...props}
    />
  );
 }
 function CommandItem({
  className,
  ...props
 }: React.ComponentProps<typeof CommandPrimitive.Item>) {
  return (
    <CommandPrimitive.Item
      data-slot="command-item"
      className={cn(
        "data-[selected=true]:bg-accent data-[selected=true]:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled=true]:pointer-events-none data-[disabled=true]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    />
  );
 }
 function CommandShortcut({
  className,
  ...props
 }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="command-shortcut"
      className={cn(
        "text-muted-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
 }
 export {
  Command,
  CommandDialog,
  CommandInput,
  CommandList,
  CommandEmpty,
  CommandGroup,
  CommandItem,
  CommandShortcut,
  CommandSeparator,
 };
--- a/frontend/src/app/components/ui/context-menu.tsx
+++ b/frontend/src/app/components/ui/context-menu.tsx
@@ -0,0 +1,252 @@
 "use client";
 import * as React from "react";
 import * as ContextMenuPrimitive from "@radix-ui/react-context-menu";
 import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
 import { cn } from "./utils";
 function ContextMenu({
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Root>) {
  return <ContextMenuPrimitive.Root data-slot="context-menu" {...props} />;
 }
 function ContextMenuTrigger({
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Trigger>) {
  return (
    <ContextMenuPrimitive.Trigger data-slot="context-menu-trigger" {...props} />
  );
 }
 function ContextMenuGroup({
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Group>) {
  return (
    <ContextMenuPrimitive.Group data-slot="context-menu-group" {...props} />
  );
 }
 function ContextMenuPortal({
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Portal>) {
  return (
    <ContextMenuPrimitive.Portal data-slot="context-menu-portal" {...props} />
  );
 }
 function ContextMenuSub({
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Sub>) {
  return <ContextMenuPrimitive.Sub data-slot="context-menu-sub" {...props} />;
 }
 function ContextMenuRadioGroup({
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.RadioGroup>) {
  return (
    <ContextMenuPrimitive.RadioGroup
      data-slot="context-menu-radio-group"
      {...props}
    />
  );
 }
 function ContextMenuSubTrigger({
  className,
  inset,
  children,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.SubTrigger> & {
  inset?: boolean;
 }) {
  return (
    <ContextMenuPrimitive.SubTrigger
      data-slot="context-menu-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      {children}
      <ChevronRightIcon className="ml-auto" />
    </ContextMenuPrimitive.SubTrigger>
  );
 }
 function ContextMenuSubContent({
  className,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.SubContent>) {
  return (
    <ContextMenuPrimitive.SubContent
      data-slot="context-menu-sub-content"
      className={cn(
        "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-context-menu-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
        className,
      )}
      {...props}
    />
  );
 }
 function ContextMenuContent({
  className,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Content>) {
  return (
    <ContextMenuPrimitive.Portal>
      <ContextMenuPrimitive.Content
        data-slot="context-menu-content"
        className={cn(
          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 max-h-(--radix-context-menu-content-available-height) min-w-[8rem] origin-(--radix-context-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md",
          className,
        )}
        {...props}
      />
    </ContextMenuPrimitive.Portal>
  );
 }
 function ContextMenuItem({
  className,
  inset,
  variant = "default",
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
 }) {
  return (
    <ContextMenuPrimitive.Item
      data-slot="context-menu-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    />
  );
 }
 function ContextMenuCheckboxItem({
  className,
  children,
  checked,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.CheckboxItem>) {
  return (
    <ContextMenuPrimitive.CheckboxItem
      data-slot="context-menu-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
        <ContextMenuPrimitive.ItemIndicator>
          <CheckIcon className="size-4" />
        </ContextMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </ContextMenuPrimitive.CheckboxItem>
  );
 }
 function ContextMenuRadioItem({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.RadioItem>) {
  return (
    <ContextMenuPrimitive.RadioItem
      data-slot="context-menu-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
        <ContextMenuPrimitive.ItemIndicator>
          <CircleIcon className="size-2 fill-current" />
        </ContextMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </ContextMenuPrimitive.RadioItem>
  );
 }
 function ContextMenuLabel({
  className,
  inset,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Label> & {
  inset?: boolean;
 }) {
  return (
    <ContextMenuPrimitive.Label
      data-slot="context-menu-label"
      data-inset={inset}
      className={cn(
        "text-foreground px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
 }
 function ContextMenuSeparator({
  className,
  ...props
 }: React.ComponentProps<typeof ContextMenuPrimitive.Separator>) {
  return (
    <ContextMenuPrimitive.Separator
      data-slot="context-menu-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
 }
 function ContextMenuShortcut({
  className,
  ...props
 }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="context-menu-shortcut"
      className={cn(
        "text-muted-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
 }
 export {
  ContextMenu,
  ContextMenuTrigger,
  ContextMenuContent,
  ContextMenuItem,
  ContextMenuCheckboxItem,
  ContextMenuRadioItem,
  ContextMenuLabel,
  ContextMenuSeparator,
  ContextMenuShortcut,
  ContextMenuGroup,
  ContextMenuPortal,
  ContextMenuSub,
  ContextMenuSubContent,
  ContextMenuSubTrigger,
  ContextMenuRadioGroup,
 };
--- a/frontend/src/app/components/ui/dialog.tsx
+++ b/frontend/src/app/components/ui/dialog.tsx
@@ -0,0 +1,135 @@
 "use client";
 import * as React from "react";
 import * as DialogPrimitive from "@radix-ui/react-dialog";
 import { XIcon } from "lucide-react";
 import { cn } from "./utils";
 function Dialog({
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Root>) {
  return <DialogPrimitive.Root data-slot="dialog" {...props} />;
 }
 function DialogTrigger({
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Trigger>) {
  return <DialogPrimitive.Trigger data-slot="dialog-trigger" {...props} />;
 }
 function DialogPortal({
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Portal>) {
  return <DialogPrimitive.Portal data-slot="dialog-portal" {...props} />;
 }
 function DialogClose({
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Close>) {
  return <DialogPrimitive.Close data-slot="dialog-close" {...props} />;
 }
 function DialogOverlay({
  className,
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Overlay>) {
  return (
    <DialogPrimitive.Overlay
      data-slot="dialog-overlay"
      className={cn(
        "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
        className,
      )}
      {...props}
    />
  );
 }
 function DialogContent({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Content>) {
  return (
    <DialogPortal data-slot="dialog-portal">
      <DialogOverlay />
      <DialogPrimitive.Content
        data-slot="dialog-content"
        className={cn(
          "bg-background data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg",
          className,
        )}
        {...props}
      >
        {children}
        <DialogPrimitive.Close className="ring-offset-background focus:ring-ring data-[state=open]:bg-accent data-[state=open]:text-muted-foreground absolute top-4 right-4 rounded-xs opacity-70 transition-opacity hover:opacity-100 focus:ring-2 focus:ring-offset-2 focus:outline-hidden disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4">
          <XIcon />
          <span className="sr-only">Close</span>
        </DialogPrimitive.Close>
      </DialogPrimitive.Content>
    </DialogPortal>
  );
 }
 function DialogHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="dialog-header"
      className={cn("flex flex-col gap-2 text-center sm:text-left", className)}
      {...props}
    />
  );
 }
 function DialogFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="dialog-footer"
      className={cn(
        "flex flex-col-reverse gap-2 sm:flex-row sm:justify-end",
        className,
      )}
      {...props}
    />
  );
 }
 function DialogTitle({
  className,
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Title>) {
  return (
    <DialogPrimitive.Title
      data-slot="dialog-title"
      className={cn("text-lg leading-none font-semibold", className)}
      {...props}
    />
  );
 }
 function DialogDescription({
  className,
  ...props
 }: React.ComponentProps<typeof DialogPrimitive.Description>) {
  return (
    <DialogPrimitive.Description
      data-slot="dialog-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
 }
 export {
  Dialog,
  DialogClose,
  DialogContent,
  DialogDescription,
  DialogFooter,
  DialogHeader,
  DialogOverlay,
  DialogPortal,
  DialogTitle,
  DialogTrigger,
 };
--- a/frontend/src/app/components/ui/drawer.tsx
+++ b/frontend/src/app/components/ui/drawer.tsx
@@ -0,0 +1,132 @@
 "use client";
 import * as React from "react";
 import { Drawer as DrawerPrimitive } from "vaul";
 import { cn } from "./utils";
 function Drawer({
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Root>) {
  return <DrawerPrimitive.Root data-slot="drawer" {...props} />;
 }
 function DrawerTrigger({
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Trigger>) {
  return <DrawerPrimitive.Trigger data-slot="drawer-trigger" {...props} />;
 }
 function DrawerPortal({
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Portal>) {
  return <DrawerPrimitive.Portal data-slot="drawer-portal" {...props} />;
 }
 function DrawerClose({
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Close>) {
  return <DrawerPrimitive.Close data-slot="drawer-close" {...props} />;
 }
 function DrawerOverlay({
  className,
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Overlay>) {
  return (
    <DrawerPrimitive.Overlay
      data-slot="drawer-overlay"
      className={cn(
        "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
        className,
      )}
      {...props}
    />
  );
 }
 function DrawerContent({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Content>) {
  return (
    <DrawerPortal data-slot="drawer-portal">
      <DrawerOverlay />
      <DrawerPrimitive.Content
        data-slot="drawer-content"
        className={cn(
          "group/drawer-content bg-background fixed z-50 flex h-auto flex-col",
          "data-[vaul-drawer-direction=top]:inset-x-0 data-[vaul-drawer-direction=top]:top-0 data-[vaul-drawer-direction=top]:mb-24 data-[vaul-drawer-direction=top]:max-h-[80vh] data-[vaul-drawer-direction=top]:rounded-b-lg data-[vaul-drawer-direction=top]:border-b",
          "data-[vaul-drawer-direction=bottom]:inset-x-0 data-[vaul-drawer-direction=bottom]:bottom-0 data-[vaul-drawer-direction=bottom]:mt-24 data-[vaul-drawer-direction=bottom]:max-h-[80vh] data-[vaul-drawer-direction=bottom]:rounded-t-lg data-[vaul-drawer-direction=bottom]:border-t",
          "data-[vaul-drawer-direction=right]:inset-y-0 data-[vaul-drawer-direction=right]:right-0 data-[vaul-drawer-direction=right]:w-3/4 data-[vaul-drawer-direction=right]:border-l data-[vaul-drawer-direction=right]:sm:max-w-sm",
          "data-[vaul-drawer-direction=left]:inset-y-0 data-[vaul-drawer-direction=left]:left-0 data-[vaul-drawer-direction=left]:w-3/4 data-[vaul-drawer-direction=left]:border-r data-[vaul-drawer-direction=left]:sm:max-w-sm",
          className,
        )}
        {...props}
      >
        <div className="bg-muted mx-auto mt-4 hidden h-2 w-[100px] shrink-0 rounded-full group-data-[vaul-drawer-direction=bottom]/drawer-content:block" />
        {children}
      </DrawerPrimitive.Content>
    </DrawerPortal>
  );
 }
 function DrawerHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="drawer-header"
      className={cn("flex flex-col gap-1.5 p-4", className)}
      {...props}
    />
  );
 }
 function DrawerFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="drawer-footer"
      className={cn("mt-auto flex flex-col gap-2 p-4", className)}
      {...props}
    />
  );
 }
 function DrawerTitle({
  className,
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Title>) {
  return (
    <DrawerPrimitive.Title
      data-slot="drawer-title"
      className={cn("text-foreground font-semibold", className)}
      {...props}
    />
  );
 }
 function DrawerDescription({
  className,
  ...props
 }: React.ComponentProps<typeof DrawerPrimitive.Description>) {
  return (
    <DrawerPrimitive.Description
      data-slot="drawer-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
 }
 export {
  Drawer,
  DrawerPortal,
  DrawerOverlay,
  DrawerTrigger,
  DrawerClose,
  DrawerContent,
  DrawerHeader,
  DrawerFooter,
  DrawerTitle,
  DrawerDescription,
 };
--- a/frontend/src/app/components/ui/dropdown-menu.tsx
+++ b/frontend/src/app/components/ui/dropdown-menu.tsx
@@ -0,0 +1,257 @@
 "use client";
 import * as React from "react";
 import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu";
 import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
 import { cn } from "./utils";
 function DropdownMenu({
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Root>) {
  return <DropdownMenuPrimitive.Root data-slot="dropdown-menu" {...props} />;
 }
 function DropdownMenuPortal({
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Portal>) {
  return (
    <DropdownMenuPrimitive.Portal data-slot="dropdown-menu-portal" {...props} />
  );
 }
 function DropdownMenuTrigger({
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Trigger>) {
  return (
    <DropdownMenuPrimitive.Trigger
      data-slot="dropdown-menu-trigger"
      {...props}
    />
  );
 }
 function DropdownMenuContent({
  className,
  sideOffset = 4,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Content>) {
  return (
    <DropdownMenuPrimitive.Portal>
      <DropdownMenuPrimitive.Content
        data-slot="dropdown-menu-content"
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 max-h-(--radix-dropdown-menu-content-available-height) min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md",
          className,
        )}
        {...props}
      />
    </DropdownMenuPrimitive.Portal>
  );
 }
 function DropdownMenuGroup({
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Group>) {
  return (
    <DropdownMenuPrimitive.Group data-slot="dropdown-menu-group" {...props} />
  );
 }
 function DropdownMenuItem({
  className,
  inset,
  variant = "default",
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
 }) {
  return (
    <DropdownMenuPrimitive.Item
      data-slot="dropdown-menu-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    />
  );
 }
 function DropdownMenuCheckboxItem({
  className,
  children,
  checked,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.CheckboxItem>) {
  return (
    <DropdownMenuPrimitive.CheckboxItem
      data-slot="dropdown-menu-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
        <DropdownMenuPrimitive.ItemIndicator>
          <CheckIcon className="size-4" />
        </DropdownMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </DropdownMenuPrimitive.CheckboxItem>
  );
 }
 function DropdownMenuRadioGroup({
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.RadioGroup>) {
  return (
    <DropdownMenuPrimitive.RadioGroup
      data-slot="dropdown-menu-radio-group"
      {...props}
    />
  );
 }
 function DropdownMenuRadioItem({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.RadioItem>) {
  return (
    <DropdownMenuPrimitive.RadioItem
      data-slot="dropdown-menu-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
        <DropdownMenuPrimitive.ItemIndicator>
          <CircleIcon className="size-2 fill-current" />
        </DropdownMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </DropdownMenuPrimitive.RadioItem>
  );
 }
 function DropdownMenuLabel({
  className,
  inset,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Label> & {
  inset?: boolean;
 }) {
  return (
    <DropdownMenuPrimitive.Label
      data-slot="dropdown-menu-label"
      data-inset={inset}
      className={cn(
        "px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
 }
 function DropdownMenuSeparator({
  className,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Separator>) {
  return (
    <DropdownMenuPrimitive.Separator
      data-slot="dropdown-menu-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
 }
 function DropdownMenuShortcut({
  className,
  ...props
 }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="dropdown-menu-shortcut"
      className={cn(
        "text-muted-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
 }
 function DropdownMenuSub({
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.Sub>) {
  return <DropdownMenuPrimitive.Sub data-slot="dropdown-menu-sub" {...props} />;
 }
 function DropdownMenuSubTrigger({
  className,
  inset,
  children,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.SubTrigger> & {
  inset?: boolean;
 }) {
  return (
    <DropdownMenuPrimitive.SubTrigger
      data-slot="dropdown-menu-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[inset]:pl-8",
        className,
      )}
      {...props}
    >
      {children}
      <ChevronRightIcon className="ml-auto size-4" />
    </DropdownMenuPrimitive.SubTrigger>
  );
 }
 function DropdownMenuSubContent({
  className,
  ...props
 }: React.ComponentProps<typeof DropdownMenuPrimitive.SubContent>) {
  return (
    <DropdownMenuPrimitive.SubContent
      data-slot="dropdown-menu-sub-content"
      className={cn(
        "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
        className,
      )}
      {...props}
    />
  );
 }
 export {
  DropdownMenu,
  DropdownMenuPortal,
  DropdownMenuTrigger,
  DropdownMenuContent,
  DropdownMenuGroup,
  DropdownMenuLabel,
  DropdownMenuItem,
  DropdownMenuCheckboxItem,
  DropdownMenuRadioGroup,
  DropdownMenuRadioItem,
  DropdownMenuSeparator,
  DropdownMenuShortcut,
  DropdownMenuSub,
  DropdownMenuSubTrigger,
  DropdownMenuSubContent,
 };
--- a/frontend/src/app/components/ui/form.tsx
+++ b/frontend/src/app/components/ui/form.tsx
@@ -0,0 +1,168 @@
 "use client";
 import * as React from "react";
 import * as LabelPrimitive from "@radix-ui/react-label";
 import { Slot } from "@radix-ui/react-slot";
 import {
  Controller,
  FormProvider,
  useFormContext,
  useFormState,
  type ControllerProps,
  type FieldPath,
  type FieldValues,
 } from "react-hook-form";
 import { cn } from "./utils";
 import { Label } from "./label";
 const Form = FormProvider;
 type FormFieldContextValue<
  TFieldValues extends FieldValues = FieldValues,
  TName extends FieldPath<TFieldValues> = FieldPath<TFieldValues>,
 > = {
  name: TName;
 };
 const FormFieldContext = React.createContext<FormFieldContextValue>(
  {} as FormFieldContextValue,
 );
 const FormField = <
  TFieldValues extends FieldValues = FieldValues,
  TName extends FieldPath<TFieldValues> = FieldPath<TFieldValues>,
 >({
  ...props
 }: ControllerProps<TFieldValues, TName>) => {
  return (
    <FormFieldContext.Provider value={{ name: props.name }}>
      <Controller {...props} />
    </FormFieldContext.Provider>
  );
 };
 const useFormField = () => {
  const fieldContext = React.useContext(FormFieldContext);
  const itemContext = React.useContext(FormItemContext);
  const { getFieldState } = useFormContext();
  const formState = useFormState({ name: fieldContext.name });
  const fieldState = getFieldState(fieldContext.name, formState);
  if (!fieldContext) {
    throw new Error("useFormField should be used within <FormField>");
  }
  const { id } = itemContext;
  return {
    id,
    name: fieldContext.name,
    formItemId: `${id}-form-item`,
    formDescriptionId: `${id}-form-item-description`,
    formMessageId: `${id}-form-item-message`,
    ...fieldState,
  };
 };
 type FormItemContextValue = {
  id: string;
 };
 const FormItemContext = React.createContext<FormItemContextValue>(
  {} as FormItemContextValue,
 );
 function FormItem({ className, ...props }: React.ComponentProps<"div">) {
  const id = React.useId();
  return (
    <FormItemContext.Provider value={{ id }}>
      <div
        data-slot="form-item"
        className={cn("grid gap-2", className)}
        {...props}
      />
    </FormItemContext.Provider>
  );
 }
 function FormLabel({
  className,
  ...props
 }: React.ComponentProps<typeof LabelPrimitive.Root>) {
  const { error, formItemId } = useFormField();
  return (
    <Label
      data-slot="form-label"
      data-error={!!error}
      className={cn("data-[error=true]:text-destructive", className)}
      htmlFor={formItemId}
      {...props}
    />
  );
 }
 function FormControl({ ...props }: React.ComponentProps<typeof Slot>) {
  const { error, formItemId, formDescriptionId, formMessageId } =
    useFormField();
  return (
    <Slot
      data-slot="form-control"
      id={formItemId}
      aria-describedby={
        !error
          ? `${formDescriptionId}`
          : `${formDescriptionId} ${formMessageId}`
      }
      aria-invalid={!!error}
      {...props}
    />
  );
 }
 function FormDescription({ className, ...props }: React.ComponentProps<"p">) {
  const { formDescriptionId } = useFormField();
  return (
    <p
      data-slot="form-description"
      id={formDescriptionId}
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
 }
 function FormMessage({ className, ...props }: React.ComponentProps<"p">) {
  const { error, formMessageId } = useFormField();
  const body = error ? String(error?.message ?? "") : props.children;
  if (!body) {
    return null;
  }
  return (
    <p
      data-slot="form-message"
      id={formMessageId}
      className={cn("text-destructive text-sm", className)}
      {...props}
    >
      {body}
    </p>
  );
 }
 export {
  useFormField,
  Form,
  FormItem,
  FormLabel,
  FormControl,
  FormDescription,
  FormMessage,
  FormField,
 };
--- a/frontend/src/app/components/ui/hover-card.tsx
+++ b/frontend/src/app/components/ui/hover-card.tsx
@@ -0,0 +1,44 @@
 "use client";
 import * as React from "react";
 import * as HoverCardPrimitive from "@radix-ui/react-hover-card";
 import { cn } from "./utils";
 function HoverCard({
  ...props
 }: React.ComponentProps<typeof HoverCardPrimitive.Root>) {
  return <HoverCardPrimitive.Root data-slot="hover-card" {...props} />;
 }
 function HoverCardTrigger({
  ...props
 }: React.ComponentProps<typeof HoverCardPrimitive.Trigger>) {
  return (
    <HoverCardPrimitive.Trigger data-slot="hover-card-trigger" {...props} />
  );
 }
 function HoverCardContent({
  className,
  align = "center",
  sideOffset = 4,
  ...props
 }: React.ComponentProps<typeof HoverCardPrimitive.Content>) {
  return (
    <HoverCardPrimitive.Portal data-slot="hover-card-portal">
      <HoverCardPrimitive.Content
        data-slot="hover-card-content"
        align={align}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-64 origin-(--radix-hover-card-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden",
          className,
        )}
        {...props}
      />
    </HoverCardPrimitive.Portal>
  );
 }
 export { HoverCard, HoverCardTrigger, HoverCardContent };
--- a/frontend/src/app/components/ui/input-otp.tsx
+++ b/frontend/src/app/components/ui/input-otp.tsx
@@ -0,0 +1,77 @@
 "use client";
 import * as React from "react";
 import { OTPInput, OTPInputContext } from "input-otp";
 import { MinusIcon } from "lucide-react";
 import { cn } from "./utils";
 function InputOTP({
  className,
  containerClassName,
  ...props
 }: React.ComponentProps<typeof OTPInput> & {
  containerClassName?: string;
 }) {
  return (
    <OTPInput
      data-slot="input-otp"
      containerClassName={cn(
        "flex items-center gap-2 has-disabled:opacity-50",
        containerClassName,
      )}
      className={cn("disabled:cursor-not-allowed", className)}
      {...props}
    />
  );
 }
 function InputOTPGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="input-otp-group"
      className={cn("flex items-center gap-1", className)}
      {...props}
    />
  );
 }
 function InputOTPSlot({
  index,
  className,
  ...props
 }: React.ComponentProps<"div"> & {
  index: number;
 }) {
  const inputOTPContext = React.useContext(OTPInputContext);
  const { char, hasFakeCaret, isActive } = inputOTPContext?.slots[index] ?? {};
  return (
    <div
      data-slot="input-otp-slot"
      data-active={isActive}
      className={cn(
        "data-[active=true]:border-ring data-[active=true]:ring-ring/50 data-[active=true]:aria-invalid:ring-destructive/20 dark:data-[active=true]:aria-invalid:ring-destructive/40 aria-invalid:border-destructive data-[active=true]:aria-invalid:border-destructive dark:bg-input/30 border-input relative flex h-9 w-9 items-center justify-center border-y border-r text-sm bg-input-background transition-all outline-none first:rounded-l-md first:border-l last:rounded-r-md data-[active=true]:z-10 data-[active=true]:ring-[3px]",
        className,
      )}
      {...props}
    >
      {char}
      {hasFakeCaret && (
        <div className="pointer-events-none absolute inset-0 flex items-center justify-center">
          <div className="animate-caret-blink bg-foreground h-4 w-px duration-1000" />
        </div>
      )}
    </div>
  );
 }
 function InputOTPSeparator({ ...props }: React.ComponentProps<"div">) {
  return (
    <div data-slot="input-otp-separator" role="separator" {...props}>
      <MinusIcon />
    </div>
  );
 }
 export { InputOTP, InputOTPGroup, InputOTPSlot, InputOTPSeparator };
--- a/frontend/src/app/components/ui/input.tsx
+++ b/frontend/src/app/components/ui/input.tsx
@@ -0,0 +1,21 @@
 import * as React from "react";
 import { cn } from "./utils";
 function Input({ className, type, ...props }: React.ComponentProps<"input">) {
  return (
    <input
      type={type}
      data-slot="input"
      className={cn(
        "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input flex h-9 w-full min-w-0 rounded-md border px-3 py-1 text-base bg-input-background transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
        "focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
        "aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
        className,
      )}
      {...props}
    />
  );
 }
 export { Input };
--- a/frontend/src/app/components/ui/label.tsx
+++ b/frontend/src/app/components/ui/label.tsx
@@ -0,0 +1,24 @@
 "use client";
 import * as React from "react";
 import * as LabelPrimitive from "@radix-ui/react-label";
 import { cn } from "./utils";
 function Label({
  className,
  ...props
 }: React.ComponentProps<typeof LabelPrimitive.Root>) {
  return (
    <LabelPrimitive.Root
      data-slot="label"
      className={cn(
        "flex items-center gap-2 text-sm leading-none font-medium select-none group-data-[disabled=true]:pointer-events-none group-data-[disabled=true]:opacity-50 peer-disabled:cursor-not-allowed peer-disabled:opacity-50",
        className,
      )}
      {...props}
    />
  );
 }
 export { Label };
--- a/frontend/src/app/components/ui/menubar.tsx
+++ b/frontend/src/app/components/ui/menubar.tsx
@@ -0,0 +1,276 @@
 "use client";
 import * as React from "react";
 import * as MenubarPrimitive from "@radix-ui/react-menubar";
 import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
 import { cn } from "./utils";
 function Menubar({
  className,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Root>) {
  return (
    <MenubarPrimitive.Root
      data-slot="menubar"
      className={cn(
        "bg-background flex h-9 items-center gap-1 rounded-md border p-1 shadow-xs",
        className,
      )}
      {...props}
    />
  );
 }
 function MenubarMenu({
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Menu>) {
  return <MenubarPrimitive.Menu data-slot="menubar-menu" {...props} />;
 }
 function MenubarGroup({
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Group>) {
  return <MenubarPrimitive.Group data-slot="menubar-group" {...props} />;
 }
 function MenubarPortal({
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Portal>) {
  return <MenubarPrimitive.Portal data-slot="menubar-portal" {...props} />;
 }
 function MenubarRadioGroup({
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.RadioGroup>) {
  return (
    <MenubarPrimitive.RadioGroup data-slot="menubar-radio-group" {...props} />
  );
 }
 function MenubarTrigger({
  className,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Trigger>) {
  return (
    <MenubarPrimitive.Trigger
      data-slot="menubar-trigger"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex items-center rounded-sm px-2 py-1 text-sm font-medium outline-hidden select-none",
        className,
      )}
      {...props}
    />
  );
 }
 function MenubarContent({
  className,
  align = "start",
  alignOffset = -4,
  sideOffset = 8,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Content>) {
  return (
    <MenubarPortal>
      <MenubarPrimitive.Content
        data-slot="menubar-content"
        align={align}
        alignOffset={alignOffset}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[12rem] origin-(--radix-menubar-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-md",
          className,
        )}
        {...props}
      />
    </MenubarPortal>
  );
 }
 function MenubarItem({
  className,
  inset,
  variant = "default",
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
 }) {
  return (
    <MenubarPrimitive.Item
      data-slot="menubar-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    />
  );
 }
 function MenubarCheckboxItem({
  className,
  children,
  checked,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.CheckboxItem>) {
  return (
    <MenubarPrimitive.CheckboxItem
      data-slot="menubar-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-xs py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
        <MenubarPrimitive.ItemIndicator>
          <CheckIcon className="size-4" />
        </MenubarPrimitive.ItemIndicator>
      </span>
      {children}
    </MenubarPrimitive.CheckboxItem>
  );
 }
 function MenubarRadioItem({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.RadioItem>) {
  return (
    <MenubarPrimitive.RadioItem
      data-slot="menubar-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-xs py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
        <MenubarPrimitive.ItemIndicator>
          <CircleIcon className="size-2 fill-current" />
        </MenubarPrimitive.ItemIndicator>
      </span>
      {children}
    </MenubarPrimitive.RadioItem>
  );
 }
 function MenubarLabel({
  className,
  inset,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Label> & {
  inset?: boolean;
 }) {
  return (
    <MenubarPrimitive.Label
      data-slot="menubar-label"
      data-inset={inset}
      className={cn(
        "px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
 }
 function MenubarSeparator({
  className,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Separator>) {
  return (
    <MenubarPrimitive.Separator
      data-slot="menubar-separator"
      className={cn("bg-border -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
 }
 function MenubarShortcut({
  className,
  ...props
 }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="menubar-shortcut"
      className={cn(
        "text-muted-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
 }
 function MenubarSub({
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.Sub>) {
  return <MenubarPrimitive.Sub data-slot="menubar-sub" {...props} />;
 }
 function MenubarSubTrigger({
  className,
  inset,
  children,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.SubTrigger> & {
  inset?: boolean;
 }) {
  return (
    <MenubarPrimitive.SubTrigger
      data-slot="menubar-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-none select-none data-[inset]:pl-8",
        className,
      )}
      {...props}
    >
      {children}
      <ChevronRightIcon className="ml-auto h-4 w-4" />
    </MenubarPrimitive.SubTrigger>
  );
 }
 function MenubarSubContent({
  className,
  ...props
 }: React.ComponentProps<typeof MenubarPrimitive.SubContent>) {
  return (
    <MenubarPrimitive.SubContent
      data-slot="menubar-sub-content"
      className={cn(
        "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-menubar-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
        className,
      )}
      {...props}
    />
  );
 }
 export {
  Menubar,
  MenubarPortal,
  MenubarMenu,
  MenubarTrigger,
  MenubarContent,
  MenubarGroup,
  MenubarSeparator,
  MenubarLabel,
  MenubarItem,
  MenubarShortcut,
  MenubarCheckboxItem,
  MenubarRadioGroup,
  MenubarRadioItem,
  MenubarSub,
  MenubarSubTrigger,
  MenubarSubContent,
 };
--- a/frontend/src/app/components/ui/navigation-menu.tsx
+++ b/frontend/src/app/components/ui/navigation-menu.tsx
@@ -0,0 +1,168 @@
 import * as React from "react";
 import * as NavigationMenuPrimitive from "@radix-ui/react-navigation-menu";
 import { cva } from "class-variance-authority";
 import { ChevronDownIcon } from "lucide-react";
 import { cn } from "./utils";
 function NavigationMenu({
  className,
  children,
  viewport = true,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Root> & {
  viewport?: boolean;
 }) {
  return (
    <NavigationMenuPrimitive.Root
      data-slot="navigation-menu"
      data-viewport={viewport}
      className={cn(
        "group/navigation-menu relative flex max-w-max flex-1 items-center justify-center",
        className,
      )}
      {...props}
    >
      {children}
      {viewport && <NavigationMenuViewport />}
    </NavigationMenuPrimitive.Root>
  );
 }
 function NavigationMenuList({
  className,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.List>) {
  return (
    <NavigationMenuPrimitive.List
      data-slot="navigation-menu-list"
      className={cn(
        "group flex flex-1 list-none items-center justify-center gap-1",
        className,
      )}
      {...props}
    />
  );
 }
 function NavigationMenuItem({
  className,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Item>) {
  return (
    <NavigationMenuPrimitive.Item
      data-slot="navigation-menu-item"
      className={cn("relative", className)}
      {...props}
    />
  );
 }
 const navigationMenuTriggerStyle = cva(
  "group inline-flex h-9 w-max items-center justify-center rounded-md bg-background px-4 py-2 text-sm font-medium hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground disabled:pointer-events-none disabled:opacity-50 data-[state=open]:hover:bg-accent data-[state=open]:text-accent-foreground data-[state=open]:focus:bg-accent data-[state=open]:bg-accent/50 focus-visible:ring-ring/50 outline-none transition-[color,box-shadow] focus-visible:ring-[3px] focus-visible:outline-1",
 );
 function NavigationMenuTrigger({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Trigger>) {
  return (
    <NavigationMenuPrimitive.Trigger
      data-slot="navigation-menu-trigger"
      className={cn(navigationMenuTriggerStyle(), "group", className)}
      {...props}
    >
      {children}{" "}
      <ChevronDownIcon
        className="relative top-[1px] ml-1 size-3 transition duration-300 group-data-[state=open]:rotate-180"
        aria-hidden="true"
      />
    </NavigationMenuPrimitive.Trigger>
  );
 }
 function NavigationMenuContent({
  className,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Content>) {
  return (
    <NavigationMenuPrimitive.Content
      data-slot="navigation-menu-content"
      className={cn(
        "data-[motion^=from-]:animate-in data-[motion^=to-]:animate-out data-[motion^=from-]:fade-in data-[motion^=to-]:fade-out data-[motion=from-end]:slide-in-from-right-52 data-[motion=from-start]:slide-in-from-left-52 data-[motion=to-end]:slide-out-to-right-52 data-[motion=to-start]:slide-out-to-left-52 top-0 left-0 w-full p-2 pr-2.5 md:absolute md:w-auto",
        "group-data-[viewport=false]/navigation-menu:bg-popover group-data-[viewport=false]/navigation-menu:text-popover-foreground group-data-[viewport=false]/navigation-menu:data-[state=open]:animate-in group-data-[viewport=false]/navigation-menu:data-[state=closed]:animate-out group-data-[viewport=false]/navigation-menu:data-[state=closed]:zoom-out-95 group-data-[viewport=false]/navigation-menu:data-[state=open]:zoom-in-95 group-data-[viewport=false]/navigation-menu:data-[state=open]:fade-in-0 group-data-[viewport=false]/navigation-menu:data-[state=closed]:fade-out-0 group-data-[viewport=false]/navigation-menu:top-full group-data-[viewport=false]/navigation-menu:mt-1.5 group-data-[viewport=false]/navigation-menu:overflow-hidden group-data-[viewport=false]/navigation-menu:rounded-md group-data-[viewport=false]/navigation-menu:border group-data-[viewport=false]/navigation-menu:shadow group-data-[viewport=false]/navigation-menu:duration-200 **:data-[slot=navigation-menu-link]:focus:ring-0 **:data-[slot=navigation-menu-link]:focus:outline-none",
        className,
      )}
      {...props}
    />
  );
 }
 function NavigationMenuViewport({
  className,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Viewport>) {
  return (
    <div
      className={cn(
        "absolute top-full left-0 isolate z-50 flex justify-center",
      )}
    >
      <NavigationMenuPrimitive.Viewport
        data-slot="navigation-menu-viewport"
        className={cn(
          "origin-top-center bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-90 relative mt-1.5 h-[var(--radix-navigation-menu-viewport-height)] w-full overflow-hidden rounded-md border shadow md:w-[var(--radix-navigation-menu-viewport-width)]",
          className,
        )}
        {...props}
      />
    </div>
  );
 }
 function NavigationMenuLink({
  className,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Link>) {
  return (
    <NavigationMenuPrimitive.Link
      data-slot="navigation-menu-link"
      className={cn(
        "data-[active=true]:focus:bg-accent data-[active=true]:hover:bg-accent data-[active=true]:bg-accent/50 data-[active=true]:text-accent-foreground hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground focus-visible:ring-ring/50 [&_svg:not([class*='text-'])]:text-muted-foreground flex flex-col gap-1 rounded-sm p-2 text-sm transition-all outline-none focus-visible:ring-[3px] focus-visible:outline-1 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    />
  );
 }
 function NavigationMenuIndicator({
  className,
  ...props
 }: React.ComponentProps<typeof NavigationMenuPrimitive.Indicator>) {
  return (
    <NavigationMenuPrimitive.Indicator
      data-slot="navigation-menu-indicator"
      className={cn(
        "data-[state=visible]:animate-in data-[state=hidden]:animate-out data-[state=hidden]:fade-out data-[state=visible]:fade-in top-full z-[1] flex h-1.5 items-end justify-center overflow-hidden",
        className,
      )}
      {...props}
    >
      <div className="bg-border relative top-[60%] h-2 w-2 rotate-45 rounded-tl-sm shadow-md" />
    </NavigationMenuPrimitive.Indicator>
  );
 }
 export {
  NavigationMenu,
  NavigationMenuList,
  NavigationMenuItem,
  NavigationMenuContent,
  NavigationMenuTrigger,
  NavigationMenuLink,
  NavigationMenuIndicator,
  NavigationMenuViewport,
  navigationMenuTriggerStyle,
 };
--- a/frontend/src/app/components/ui/pagination.tsx
+++ b/frontend/src/app/components/ui/pagination.tsx
@@ -0,0 +1,127 @@
 import * as React from "react";
 import {
  ChevronLeftIcon,
  ChevronRightIcon,
  MoreHorizontalIcon,
 } from "lucide-react";
 import { cn } from "./utils";
 import { Button, buttonVariants } from "./button";
 function Pagination({ className, ...props }: React.ComponentProps<"nav">) {
  return (
    <nav
      role="navigation"
      aria-label="pagination"
      data-slot="pagination"
      className={cn("mx-auto flex w-full justify-center", className)}
      {...props}
    />
  );
 }
 function PaginationContent({
  className,
  ...props
 }: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="pagination-content"
      className={cn("flex flex-row items-center gap-1", className)}
      {...props}
    />
  );
 }
 function PaginationItem({ ...props }: React.ComponentProps<"li">) {
  return <li data-slot="pagination-item" {...props} />;
 }
 type PaginationLinkProps = {
  isActive?: boolean;
 } & Pick<React.ComponentProps<typeof Button>, "size"> &
  React.ComponentProps<"a">;
 function PaginationLink({
  className,
  isActive,
  size = "icon",
  ...props
 }: PaginationLinkProps) {
  return (
    <a
      aria-current={isActive ? "page" : undefined}
      data-slot="pagination-link"
      data-active={isActive}
      className={cn(
        buttonVariants({
          variant: isActive ? "outline" : "ghost",
          size,
        }),
        className,
      )}
      {...props}
    />
  );
 }
 function PaginationPrevious({
  className,
  ...props
 }: React.ComponentProps<typeof PaginationLink>) {
  return (
    <PaginationLink
      aria-label="Go to previous page"
      size="default"
      className={cn("gap-1 px-2.5 sm:pl-2.5", className)}
      {...props}
    >
      <ChevronLeftIcon />
      <span className="hidden sm:block">Previous</span>
    </PaginationLink>
  );
 }
 function PaginationNext({
  className,
  ...props
 }: React.ComponentProps<typeof PaginationLink>) {
  return (
    <PaginationLink
      aria-label="Go to next page"
      size="default"
      className={cn("gap-1 px-2.5 sm:pr-2.5", className)}
      {...props}
    >
      <span className="hidden sm:block">Next</span>
      <ChevronRightIcon />
    </PaginationLink>
  );
 }
 function PaginationEllipsis({
  className,
  ...props
 }: React.ComponentProps<"span">) {
  return (
    <span
      aria-hidden
      data-slot="pagination-ellipsis"
      className={cn("flex size-9 items-center justify-center", className)}
      {...props}
    >
      <MoreHorizontalIcon className="size-4" />
      <span className="sr-only">More pages</span>
    </span>
  );
 }
 export {
  Pagination,
  PaginationContent,
  PaginationLink,
  PaginationItem,
  PaginationPrevious,
  PaginationNext,
  PaginationEllipsis,
 };
--- a/frontend/src/app/components/ui/popover.tsx
+++ b/frontend/src/app/components/ui/popover.tsx
@@ -0,0 +1,48 @@
 "use client";
 import * as React from "react";
 import * as PopoverPrimitive from "@radix-ui/react-popover";
 import { cn } from "./utils";
 function Popover({
  ...props
 }: React.ComponentProps<typeof PopoverPrimitive.Root>) {
  return <PopoverPrimitive.Root data-slot="popover" {...props} />;
 }
 function PopoverTrigger({
  ...props
 }: React.ComponentProps<typeof PopoverPrimitive.Trigger>) {
  return <PopoverPrimitive.Trigger data-slot="popover-trigger" {...props} />;
 }
 function PopoverContent({
  className,
  align = "center",
  sideOffset = 4,
  ...props
 }: React.ComponentProps<typeof PopoverPrimitive.Content>) {
  return (
    <PopoverPrimitive.Portal>
      <PopoverPrimitive.Content
        data-slot="popover-content"
        align={align}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-72 origin-(--radix-popover-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden",
          className,
        )}
        {...props}
      />
    </PopoverPrimitive.Portal>
  );
 }
 function PopoverAnchor({
  ...props
 }: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {
  return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />;
 }
 export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor };
--- a/frontend/src/app/components/ui/progress.tsx
+++ b/frontend/src/app/components/ui/progress.tsx
@@ -0,0 +1,31 @@
 "use client";
 import * as React from "react";
 import * as ProgressPrimitive from "@radix-ui/react-progress";
 import { cn } from "./utils";
 function Progress({
  className,
  value,
  ...props
 }: React.ComponentProps<typeof ProgressPrimitive.Root>) {
  return (
    <ProgressPrimitive.Root
      data-slot="progress"
      className={cn(
        "bg-primary/20 relative h-2 w-full overflow-hidden rounded-full",
        className,
      )}
      {...props}
    >
      <ProgressPrimitive.Indicator
        data-slot="progress-indicator"
        className="bg-primary h-full w-full flex-1 transition-all"
        style={{ transform: `translateX(-${100 - (value || 0)}%)` }}
      />
    </ProgressPrimitive.Root>
  );
 }
 export { Progress };
--- a/frontend/src/app/components/ui/radio-group.tsx
+++ b/frontend/src/app/components/ui/radio-group.tsx
@@ -0,0 +1,45 @@
 "use client";
 import * as React from "react";
 import * as RadioGroupPrimitive from "@radix-ui/react-radio-group";
 import { CircleIcon } from "lucide-react";
 import { cn } from "./utils";
 function RadioGroup({
  className,
  ...props
 }: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {
  return (
    <RadioGroupPrimitive.Root
      data-slot="radio-group"
      className={cn("grid gap-3", className)}
      {...props}
    />
  );
 }
 function RadioGroupItem({
  className,
  ...props
 }: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {
  return (
    <RadioGroupPrimitive.Item
      data-slot="radio-group-item"
      className={cn(
        "border-input text-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 aspect-square size-4 shrink-0 rounded-full border shadow-xs transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <RadioGroupPrimitive.Indicator
        data-slot="radio-group-indicator"
        className="relative flex items-center justify-center"
      >
        <CircleIcon className="fill-primary absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2" />
      </RadioGroupPrimitive.Indicator>
    </RadioGroupPrimitive.Item>
  );
 }
 export { RadioGroup, RadioGroupItem };
--- a/frontend/src/app/components/ui/resizable.tsx
+++ b/frontend/src/app/components/ui/resizable.tsx
@@ -0,0 +1,56 @@
 "use client";
 import * as React from "react";
 import { GripVerticalIcon } from "lucide-react";
 import * as ResizablePrimitive from "react-resizable-panels";
 import { cn } from "./utils";
 function ResizablePanelGroup({
  className,
  ...props
 }: React.ComponentProps<typeof ResizablePrimitive.PanelGroup>) {
  return (
    <ResizablePrimitive.PanelGroup
      data-slot="resizable-panel-group"
      className={cn(
        "flex h-full w-full data-[panel-group-direction=vertical]:flex-col",
        className,
      )}
      {...props}
    />
  );
 }
 function ResizablePanel({
  ...props
 }: React.ComponentProps<typeof ResizablePrimitive.Panel>) {
  return <ResizablePrimitive.Panel data-slot="resizable-panel" {...props} />;
 }
 function ResizableHandle({
  withHandle,
  className,
  ...props
 }: React.ComponentProps<typeof ResizablePrimitive.PanelResizeHandle> & {
  withHandle?: boolean;
 }) {
  return (
    <ResizablePrimitive.PanelResizeHandle
      data-slot="resizable-handle"
      className={cn(
        "bg-border focus-visible:ring-ring relative flex w-px items-center justify-center after:absolute after:inset-y-0 after:left-1/2 after:w-1 after:-translate-x-1/2 focus-visible:ring-1 focus-visible:ring-offset-1 focus-visible:outline-hidden data-[panel-group-direction=vertical]:h-px data-[panel-group-direction=vertical]:w-full data-[panel-group-direction=vertical]:after:left-0 data-[panel-group-direction=vertical]:after:h-1 data-[panel-group-direction=vertical]:after:w-full data-[panel-group-direction=vertical]:after:-translate-y-1/2 data-[panel-group-direction=vertical]:after:translate-x-0 [&[data-panel-group-direction=vertical]>div]:rotate-90",
        className,
      )}
      {...props}
    >
      {withHandle && (
        <div className="bg-border z-10 flex h-4 w-3 items-center justify-center rounded-xs border">
          <GripVerticalIcon className="size-2.5" />
        </div>
      )}
    </ResizablePrimitive.PanelResizeHandle>
  );
 }
 export { ResizablePanelGroup, ResizablePanel, ResizableHandle };
--- a/frontend/src/app/components/ui/scroll-area.tsx
+++ b/frontend/src/app/components/ui/scroll-area.tsx
@@ -0,0 +1,58 @@
 "use client";
 import * as React from "react";
 import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";
 import { cn } from "./utils";
 function ScrollArea({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {
  return (
    <ScrollAreaPrimitive.Root
      data-slot="scroll-area"
      className={cn("relative", className)}
      {...props}
    >
      <ScrollAreaPrimitive.Viewport
        data-slot="scroll-area-viewport"
        className="focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1"
      >
        {children}
      </ScrollAreaPrimitive.Viewport>
      <ScrollBar />
      <ScrollAreaPrimitive.Corner />
    </ScrollAreaPrimitive.Root>
  );
 }
 function ScrollBar({
  className,
  orientation = "vertical",
  ...props
 }: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {
  return (
    <ScrollAreaPrimitive.ScrollAreaScrollbar
      data-slot="scroll-area-scrollbar"
      orientation={orientation}
      className={cn(
        "flex touch-none p-px transition-colors select-none",
        orientation === "vertical" &&
          "h-full w-2.5 border-l border-l-transparent",
        orientation === "horizontal" &&
          "h-2.5 flex-col border-t border-t-transparent",
        className,
      )}
      {...props}
    >
      <ScrollAreaPrimitive.ScrollAreaThumb
        data-slot="scroll-area-thumb"
        className="bg-border relative flex-1 rounded-full"
      />
    </ScrollAreaPrimitive.ScrollAreaScrollbar>
  );
 }
 export { ScrollArea, ScrollBar };
--- a/frontend/src/app/components/ui/select.tsx
+++ b/frontend/src/app/components/ui/select.tsx
@@ -0,0 +1,189 @@
 "use client";
 import * as React from "react";
 import * as SelectPrimitive from "@radix-ui/react-select";
 import {
  CheckIcon,
  ChevronDownIcon,
  ChevronUpIcon,
 } from "lucide-react";
 import { cn } from "./utils";
 function Select({
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Root>) {
  return <SelectPrimitive.Root data-slot="select" {...props} />;
 }
 function SelectGroup({
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Group>) {
  return <SelectPrimitive.Group data-slot="select-group" {...props} />;
 }
 function SelectValue({
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Value>) {
  return <SelectPrimitive.Value data-slot="select-value" {...props} />;
 }
 function SelectTrigger({
  className,
  size = "default",
  children,
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Trigger> & {
  size?: "sm" | "default";
 }) {
  return (
    <SelectPrimitive.Trigger
      data-slot="select-trigger"
      data-size={size}
      className={cn(
        "border-input data-[placeholder]:text-muted-foreground [&_svg:not([class*='text-'])]:text-muted-foreground focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 dark:hover:bg-input/50 flex w-full items-center justify-between gap-2 rounded-md border bg-input-background px-3 py-2 text-sm whitespace-nowrap transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50 data-[size=default]:h-9 data-[size=sm]:h-8 *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      {children}
      <SelectPrimitive.Icon asChild>
        <ChevronDownIcon className="size-4 opacity-50" />
      </SelectPrimitive.Icon>
    </SelectPrimitive.Trigger>
  );
 }
 function SelectContent({
  className,
  children,
  position = "popper",
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Content>) {
  return (
    <SelectPrimitive.Portal>
      <SelectPrimitive.Content
        data-slot="select-content"
        className={cn(
          "bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 relative z-50 max-h-(--radix-select-content-available-height) min-w-[8rem] origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border shadow-md",
          position === "popper" &&
            "data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
          className,
        )}
        position={position}
        {...props}
      >
        <SelectScrollUpButton />
        <SelectPrimitive.Viewport
          className={cn(
            "p-1",
            position === "popper" &&
              "h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)] scroll-my-1",
          )}
        >
          {children}
        </SelectPrimitive.Viewport>
        <SelectScrollDownButton />
      </SelectPrimitive.Content>
    </SelectPrimitive.Portal>
  );
 }
 function SelectLabel({
  className,
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Label>) {
  return (
    <SelectPrimitive.Label
      data-slot="select-label"
      className={cn("text-muted-foreground px-2 py-1.5 text-xs", className)}
      {...props}
    />
  );
 }
 function SelectItem({
  className,
  children,
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Item>) {
  return (
    <SelectPrimitive.Item
      data-slot="select-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex w-full cursor-default items-center gap-2 rounded-sm py-1.5 pr-8 pl-2 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2",
        className,
      )}
      {...props}
    >
      <span className="absolute right-2 flex size-3.5 items-center justify-center">
        <SelectPrimitive.ItemIndicator>
          <CheckIcon className="size-4" />
        </SelectPrimitive.ItemIndicator>
      </span>
      <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
    </SelectPrimitive.Item>
  );
 }
 function SelectSeparator({
  className,
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.Separator>) {
  return (
    <SelectPrimitive.Separator
      data-slot="select-separator"
      className={cn("bg-border pointer-events-none -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
 }
 function SelectScrollUpButton({
  className,
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.ScrollUpButton>) {
  return (
    <SelectPrimitive.ScrollUpButton
      data-slot="select-scroll-up-button"
      className={cn(
        "flex cursor-default items-center justify-center py-1",
        className,
      )}
      {...props}
    >
      <ChevronUpIcon className="size-4" />
    </SelectPrimitive.ScrollUpButton>
  );
 }
 function SelectScrollDownButton({
  className,
  ...props
 }: React.ComponentProps<typeof SelectPrimitive.ScrollDownButton>) {
  return (
    <SelectPrimitive.ScrollDownButton
      data-slot="select-scroll-down-button"
      className={cn(
        "flex cursor-default items-center justify-center py-1",
        className,
      )}
      {...props}
    >
      <ChevronDownIcon className="size-4" />
    </SelectPrimitive.ScrollDownButton>
  );
 }
 export {
  Select,
  SelectContent,
  SelectGroup,
  SelectItem,
  SelectLabel,
  SelectScrollDownButton,
  SelectScrollUpButton,
  SelectSeparator,
  SelectTrigger,
  SelectValue,
 };
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,3 @@`
							`This Figma Make file includes components from [shadcn/ui](https://ui.shadcn.com/) used under [MIT license](https://github.com/shadcn-ui/ui/blob/main/LICENSE.md).`

							`This Figma Make file includes photos from [Unsplash](https://unsplash.com) used under [license](https://unsplash.com/license).`