GraphRAG Studio — initial commit: multimodal RAG system with KG visualization
Full-stack application for document-to-knowledge-graph pipeline: - Backend: FastAPI + LangGraph ReAct agent + DeepSeek + MinerU parsing - Frontend: React 19 + Vite + D3.js + shadcn/ui - Pipeline: MinerU parsing → LangExtract entity extraction → KG building Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
39
.gitignore
vendored
Normal file
39
.gitignore
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
# IDE / Editor
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Dependencies
|
||||
node_modules/
|
||||
.pnpm-store/
|
||||
|
||||
# Build output
|
||||
dist/
|
||||
build/
|
||||
|
||||
# Environment & secrets
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.venv/
|
||||
*.egg-info/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# OMC
|
||||
.omc/
|
||||
**/.git_embedded_backup/
|
||||
|
||||
# Claude Code personal config
|
||||
settings.json
|
||||
31
CLAUDE.md
Normal file
31
CLAUDE.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# GraphRAG Studio — Project Conventions
|
||||
|
||||
## 1. 目录结构
|
||||
|
||||
- **前端代码** 统一放在 `frontend/` 目录下
|
||||
- **后端代码** 统一放在 `backend/` 目录下
|
||||
|
||||
```
|
||||
GraphRAGAgent/
|
||||
├── frontend/ # 所有前端代码(HTML/CSS/JS)
|
||||
├── backend/ # 所有后端代码(FastAPI 服务)
|
||||
└── docs/ # 规范文档
|
||||
```
|
||||
|
||||
## 2. 环境变量与敏感配置
|
||||
|
||||
- 所有外部配置(API Key、Base URL、Token 等)统一在 `backend/.env` 中管理
|
||||
- `.env` 文件**禁止提交到 Git**,必须在 `.gitignore` 中忽略
|
||||
- 提供 `backend/.env.example` 作为配置模板(不含真实值)
|
||||
|
||||
## 3. 后端虚拟环境
|
||||
|
||||
- 后端服务必须使用 `uv` 创建独立虚拟环境:
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
uv venv
|
||||
uv pip install -r requirements.txt
|
||||
```
|
||||
|
||||
- 虚拟环境目录 `.venv/` 不提交到 Git
|
||||
10
backend/.env.example
Normal file
10
backend/.env.example
Normal file
@@ -0,0 +1,10 @@
|
||||
# DeepSeek API (required for entity extraction + QA)
|
||||
DEEPSEEK_API_KEY=your_deepseek_api_key_here
|
||||
DEEPSEEK_BASE_URL=https://api.deepseek.com
|
||||
|
||||
# MinerU (required for document parsing)
|
||||
MINERU_API_TOKEN=your_mineru_api_token_here
|
||||
|
||||
# MinerU venv path (absolute path to python.exe)
|
||||
MINERU_PYTHON=F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe
|
||||
MINERU_PIPELINE=F:/GraphRAGAgent/mineru_mvp/pipeline.py
|
||||
10
backend/.gitignore
vendored
Normal file
10
backend/.gitignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
.env
|
||||
.venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
data/uploads/
|
||||
data/jobs/
|
||||
data/kg/
|
||||
*.egg-info/
|
||||
dist/
|
||||
28
backend/CLAUDE.md
Normal file
28
backend/CLAUDE.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Backend — GraphRAG Studio API
|
||||
|
||||
## 路径
|
||||
|
||||
```
|
||||
F:\GraphRAGAgent\backend\
|
||||
```
|
||||
|
||||
## 启动命令
|
||||
|
||||
```bash
|
||||
cd F:/GraphRAGAgent/backend
|
||||
.venv/Scripts/python.exe -m uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||
```
|
||||
|
||||
## 接口测试
|
||||
|
||||
服务启动后,运行:
|
||||
|
||||
```bash
|
||||
.venv/Scripts/python.exe tests/test_api.py
|
||||
```
|
||||
|
||||
## API 文档
|
||||
|
||||
- Swagger UI:http://localhost:8000/docs
|
||||
- ReDoc:http://localhost:8000/redoc
|
||||
- 健康检查:http://localhost:8000/api/v1/health
|
||||
58
backend/main.py
Normal file
58
backend/main.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
GraphRAG Studio — FastAPI Backend
|
||||
Entry point: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure backend/ is in sys.path for absolute imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
load_dotenv(Path(__file__).parent / ".env", override=True)
|
||||
|
||||
from routers import documents, indexing, kg, query, search, system
|
||||
|
||||
app = FastAPI(
|
||||
title="GraphRAG Studio API",
|
||||
description="Multimodal RAG Q&A system backend — MinerU + LangExtract + Agentic-RAG",
|
||||
version="1.0.0",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# All routers under /api/v1. Each router carries its own sub-prefix.
|
||||
# documents.router prefix="/documents" → /api/v1/documents
|
||||
# indexing.router prefix="/index" → /api/v1/index
|
||||
# kg.router prefix="/kg" → /api/v1/kg
|
||||
# query.router prefix="/query" → /api/v1/query
|
||||
# search.router prefix="/search" → /api/v1/search
|
||||
# system.router no prefix → /api/v1/health, /api/v1/system/...
|
||||
PREFIX = "/api/v1"
|
||||
app.include_router(documents.router, prefix=PREFIX)
|
||||
app.include_router(indexing.router, prefix=PREFIX)
|
||||
app.include_router(kg.router, prefix=PREFIX)
|
||||
app.include_router(query.router, prefix=PREFIX)
|
||||
app.include_router(search.router, prefix=PREFIX)
|
||||
app.include_router(system.router, prefix=PREFIX)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"msg": "GraphRAG Studio API v1.0.0", "docs": "/docs", "health": "/api/v1/health"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
|
||||
0
backend/models/__init__.py
Normal file
0
backend/models/__init__.py
Normal file
360
backend/models/schemas.py
Normal file
360
backend/models/schemas.py
Normal file
@@ -0,0 +1,360 @@
|
||||
"""
|
||||
Pydantic v2 schemas — all API data objects per backend_service_specification-v1.0.md
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any, Generic, Optional, TypeVar
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Universal response envelope
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class APIResponse(BaseModel, Generic[T]):
|
||||
code: int = 0
|
||||
msg: str = "success"
|
||||
request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
data: Optional[T] = None
|
||||
|
||||
@classmethod
|
||||
def ok(cls, data: Any = None) -> "APIResponse":
|
||||
return cls(code=0, msg="success", data=data)
|
||||
|
||||
@classmethod
|
||||
def err(cls, code: int, msg: str) -> "APIResponse":
|
||||
return cls(code=code, msg=msg, data=None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# A. Document schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DocumentInfo(BaseModel):
|
||||
doc_id: str
|
||||
filename: str
|
||||
format: str
|
||||
size_bytes: int
|
||||
pages: Optional[int] = None
|
||||
uploaded_at: str
|
||||
status: str # uploaded | indexed | failed
|
||||
language: str = "ch"
|
||||
enable_formula: bool = True
|
||||
enable_table: bool = True
|
||||
|
||||
|
||||
class DocumentListData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[DocumentInfo]
|
||||
|
||||
|
||||
class DeleteDocumentData(BaseModel):
|
||||
deleted: bool
|
||||
doc_id: str
|
||||
removed_nodes: int
|
||||
removed_edges: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# B. Indexing job schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class IndexingProgress(BaseModel):
|
||||
parsed_pages: int = 0
|
||||
total_pages: int = 0
|
||||
extracted_entities: int = 0
|
||||
|
||||
|
||||
class IndexingJobStatus(BaseModel):
|
||||
job_id: str
|
||||
doc_id: str
|
||||
status: str # submitted|queued|parsing|extracting|indexing|done|failed|cancelled
|
||||
stage: str = ""
|
||||
progress: IndexingProgress = Field(default_factory=IndexingProgress)
|
||||
created_at: str
|
||||
elapsed_seconds: float = 0.0
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class StartIndexRequest(BaseModel):
|
||||
doc_id: str
|
||||
|
||||
|
||||
class CancelJobData(BaseModel):
|
||||
cancelled: bool
|
||||
job_id: str
|
||||
previous_status: str
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C. KG schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class KGNode(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
source_doc: str
|
||||
char_start: Optional[int] = None
|
||||
char_end: Optional[int] = None
|
||||
confidence: Optional[str] = None
|
||||
page: int = 0
|
||||
degree: int = 0
|
||||
|
||||
|
||||
class KGNodeDetail(KGNode):
|
||||
degree_centrality: float = 0.0
|
||||
neighbor_count: int = 0
|
||||
|
||||
|
||||
class KGEdge(BaseModel):
|
||||
source: str
|
||||
target: str
|
||||
relation: str = "CO_OCCURS_IN"
|
||||
doc_id: str
|
||||
page: int = 0
|
||||
|
||||
|
||||
class KGNodeListData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[KGNode]
|
||||
|
||||
|
||||
class KGEdgeListData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[KGEdge]
|
||||
|
||||
|
||||
class KGStatsData(BaseModel):
|
||||
total_nodes: int
|
||||
total_edges: int
|
||||
density: float
|
||||
type_distribution: dict[str, int]
|
||||
relation_types: dict[str, int]
|
||||
top5_central_nodes: list[dict]
|
||||
source_documents: list[str]
|
||||
|
||||
|
||||
class KGExportData(BaseModel):
|
||||
format: str
|
||||
doc_id: Optional[str]
|
||||
total_nodes: int
|
||||
total_edges: int
|
||||
exported_at: str
|
||||
nodes: list[KGNode]
|
||||
edges: list[KGEdge]
|
||||
|
||||
|
||||
class NeighborInfo(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
page: int
|
||||
|
||||
|
||||
class NeighborsData(BaseModel):
|
||||
center: NeighborInfo
|
||||
hops: int
|
||||
neighbors_by_hop: dict[str, list[NeighborInfo]]
|
||||
total_neighbors: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# D. QA schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: str # human | ai
|
||||
content: str
|
||||
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
question: str
|
||||
history: list[ChatMessage] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ToolCallRecord(BaseModel):
|
||||
tool: str
|
||||
input: dict
|
||||
output: str
|
||||
|
||||
|
||||
class QAResult(BaseModel):
|
||||
query_id: str
|
||||
question: str
|
||||
answer: str
|
||||
tool_calls: list[ToolCallRecord] = Field(default_factory=list)
|
||||
cited_nodes: list[str] = Field(default_factory=list)
|
||||
elapsed_seconds: float
|
||||
created_at: str
|
||||
|
||||
|
||||
class QAHistoryData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[QAResult]
|
||||
|
||||
|
||||
class BatchQueryRequest(BaseModel):
|
||||
questions: list[str] = Field(..., max_length=20)
|
||||
|
||||
|
||||
class BatchQueryData(BaseModel):
|
||||
batch_id: str
|
||||
total: int
|
||||
status: str
|
||||
created_at: str
|
||||
|
||||
|
||||
class BatchResultData(BaseModel):
|
||||
batch_id: str
|
||||
total: int
|
||||
completed: int
|
||||
failed: int
|
||||
status: str
|
||||
results: list[QAResult]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# E. Search schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class EntitySearchData(BaseModel):
|
||||
query: str
|
||||
total: int
|
||||
items: list[KGNode]
|
||||
|
||||
|
||||
class PathNode(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
|
||||
|
||||
class PathEdge(BaseModel):
|
||||
source: str
|
||||
target: str
|
||||
relation: str
|
||||
|
||||
|
||||
class PathInfo(BaseModel):
|
||||
length: int
|
||||
nodes: list[PathNode]
|
||||
edges: list[PathEdge]
|
||||
|
||||
|
||||
class PathSearchData(BaseModel):
|
||||
from_node: PathNode = Field(alias="from")
|
||||
to_node: PathNode = Field(alias="to")
|
||||
max_hops: int
|
||||
paths: list[PathInfo]
|
||||
total_paths: int
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class GraphSearchData(BaseModel):
|
||||
query: str
|
||||
matched_nodes: list[KGNode]
|
||||
subgraph_edges: list[KGEdge]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# F. System schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ComponentHealth(BaseModel):
|
||||
status: str # ok | error
|
||||
path: Optional[str] = None
|
||||
exists: Optional[bool] = None
|
||||
base_url: Optional[str] = None
|
||||
key_configured: Optional[bool] = None
|
||||
kg_nodes_exists: Optional[bool] = None
|
||||
kg_edges_exists: Optional[bool] = None
|
||||
uploads_dir_exists: Optional[bool] = None
|
||||
|
||||
|
||||
class HealthData(BaseModel):
|
||||
status: str
|
||||
version: str
|
||||
uptime_seconds: float
|
||||
components: dict[str, ComponentHealth]
|
||||
|
||||
|
||||
class SystemStatsData(BaseModel):
|
||||
total_documents: int
|
||||
indexed_documents: int
|
||||
failed_documents: int
|
||||
total_nodes: int
|
||||
total_edges: int
|
||||
type_distribution: dict[str, int]
|
||||
total_queries: int
|
||||
active_jobs: int
|
||||
storage_used_mb: float
|
||||
|
||||
|
||||
class FormatInfo(BaseModel):
|
||||
ext: str
|
||||
description: str
|
||||
max_size_mb: int
|
||||
max_pages: int
|
||||
requires_ocr: bool
|
||||
|
||||
|
||||
class FormatsData(BaseModel):
|
||||
formats: list[FormatInfo]
|
||||
ocr_languages: list[dict]
|
||||
notes: list[str]
|
||||
|
||||
|
||||
class DemoData(BaseModel):
|
||||
nodes: list[KGNode]
|
||||
edges: list[KGEdge]
|
||||
stats: dict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# B3 index result
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class IndexResultStats(BaseModel):
|
||||
blocks: int = 0
|
||||
block_types: dict[str, int] = Field(default_factory=dict)
|
||||
pages: int = 0
|
||||
raw_extractions: int = 0
|
||||
nodes: int = 0
|
||||
edges: int = 0
|
||||
type_counts: dict[str, int] = Field(default_factory=dict)
|
||||
alignment_counts: dict[str, int] = Field(default_factory=dict)
|
||||
elapsed_seconds: float = 0.0
|
||||
|
||||
|
||||
class ExtractionRecord(BaseModel):
|
||||
text: str
|
||||
type: str
|
||||
char_start: Optional[int] = None
|
||||
char_end: Optional[int] = None
|
||||
alignment: Optional[str] = None
|
||||
page: int = 0
|
||||
doc_id: str
|
||||
|
||||
|
||||
class IndexResultData(BaseModel):
|
||||
job_id: str
|
||||
doc_id: str
|
||||
status: str
|
||||
stats: Optional[IndexResultStats] = None
|
||||
extractions: Optional[list[ExtractionRecord]] = None
|
||||
nodes: Optional[list[KGNode]] = None
|
||||
edges: Optional[list[KGEdge]] = None
|
||||
@@ -0,0 +1,367 @@
|
||||
[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "GraphRAG System ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
344,
|
||||
175,
|
||||
655,
|
||||
204
|
||||
],
|
||||
"page_idx": 0
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Technical Architecture Overview ",
|
||||
"bbox": [
|
||||
289,
|
||||
234,
|
||||
710,
|
||||
254
|
||||
],
|
||||
"page_idx": 0
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Version 1.0 | March 2026 ",
|
||||
"bbox": [
|
||||
364,
|
||||
272,
|
||||
633,
|
||||
290
|
||||
],
|
||||
"page_idx": 0
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "1. Abstract ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
52,
|
||||
42,
|
||||
200,
|
||||
61
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "This document presents the technical architecture of a Multimodal GraphRAG System designed for intelligent document parsing and knowledge graph construction. The system integrates MinerU for document parsing, LangExtract for structured entity extraction, and a graph database for knowledge storage and retrieval. ",
|
||||
"bbox": [
|
||||
48,
|
||||
83,
|
||||
951,
|
||||
171
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The pipeline supports multiple document formats including PDF, DOCX, PPTX, and image files. Extracted entities and relations are stored as graph nodes and edges, enabling semantic search and question answering over large document collections. ",
|
||||
"bbox": [
|
||||
48,
|
||||
200,
|
||||
949,
|
||||
265
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "2. System Components ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
299,
|
||||
321,
|
||||
318
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "2.1 Document Parsing Module ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
343,
|
||||
349,
|
||||
361
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "MinerU Cloud API (v4) serves as the document parsing backend. It accepts PDF, DOCX, PPTX, PNG, JPG, and HTML files. Output includes Markdown text, structured content_list.json, and extracted images. ",
|
||||
"bbox": [
|
||||
48,
|
||||
373,
|
||||
951,
|
||||
436
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "2.2 Entity Extraction Module ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
461,
|
||||
357,
|
||||
479
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "LangExtract (v1.1.1) performs structured information extraction from plain text using few-shot prompting with LLM backends (Gemini, OpenAI, or local Ollama). Each extraction includes character-level position anchoring. ",
|
||||
"bbox": [
|
||||
48,
|
||||
492,
|
||||
949,
|
||||
555
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "2.3 Knowledge Graph Module ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
580,
|
||||
337,
|
||||
596
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Extracted entities and relationships are stored in a graph database. Node types include: Person, Organization, Location, Event, Concept. Edge types include: RELATED_TO, BELONGS_TO, CAUSED_BY, LOCATED_IN. ",
|
||||
"bbox": [
|
||||
48,
|
||||
608,
|
||||
949,
|
||||
674
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "2.4 Retrieval Module ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
697,
|
||||
272,
|
||||
715
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The retrieval layer supports hybrid search combining vector similarity and graph traversal. \nQuery results are ranked by relevance score and returned with source document references. ",
|
||||
"bbox": [
|
||||
48,
|
||||
727,
|
||||
944,
|
||||
766
|
||||
],
|
||||
"page_idx": 1
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "3. Data Pipeline ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
42,
|
||||
268,
|
||||
61
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The end-to-end data pipeline consists of the following stages: ",
|
||||
"bbox": [
|
||||
50,
|
||||
83,
|
||||
623,
|
||||
99
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Stage 1: Document Ingestion ",
|
||||
"bbox": [
|
||||
68,
|
||||
130,
|
||||
322,
|
||||
146
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "- Accept raw documents (PDF, DOCX, images, HTML) - Submit to MinerU API for parsing - Poll task status until state $\\underline { { \\underline { { \\mathbf { \\delta \\pi } } } } }$ done ",
|
||||
"bbox": [
|
||||
85,
|
||||
153,
|
||||
531,
|
||||
217
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Stage 2: Content Extraction ",
|
||||
"bbox": [
|
||||
68,
|
||||
249,
|
||||
322,
|
||||
263
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "- Download and decompress full_zip_url - Parse content_list.json into Document objects - Separate text blocks, tables, images, equations ",
|
||||
"bbox": [
|
||||
85,
|
||||
272,
|
||||
542,
|
||||
335
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Stage 3: Entity & Relation Extraction ",
|
||||
"bbox": [
|
||||
67,
|
||||
367,
|
||||
415,
|
||||
381
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "- Feed text blocks to LangExtract - Extract entities with char_interval positions - Extract relationships between entities ",
|
||||
"bbox": [
|
||||
85,
|
||||
390,
|
||||
526,
|
||||
454
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Stage 4: Graph Construction ",
|
||||
"bbox": [
|
||||
68,
|
||||
485,
|
||||
322,
|
||||
500
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "- Map extractions to graph nodes and edges - Store with source provenance (page_idx, bbox) - Build vector embeddings for semantic search ",
|
||||
"bbox": [
|
||||
85,
|
||||
508,
|
||||
522,
|
||||
571
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "4. Supported File Formats ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
50,
|
||||
604,
|
||||
326,
|
||||
620
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "table",
|
||||
"img_path": "images/1ed7aacecd20fecef8dc27ee2fe76dc1ae7fa93c44f7d10878d17a41f21a6bef.jpg",
|
||||
"table_caption": [],
|
||||
"table_footnote": [],
|
||||
"table_body": "<table><tr><td rowspan=1 colspan=1>Format</td><td rowspan=1 colspan=1>Extension</td><td rowspan=1 colspan=1>OCR Required</td><td rowspan=1 colspan=1>ModeI</td></tr><tr><td rowspan=1 colspan=1>PDF (text)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline / vlm</td></tr><tr><td rowspan=1 colspan=1>PDF (scan)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>Yes</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>Word</td><td rowspan=1 colspan=1>. docx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>PowerPoint</td><td rowspan=1 colspan=1>.pptx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>Image</td><td rowspan=1 colspan=1>.png / .jpg</td><td rowspan=1 colspan=1>Auto</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>HTML</td><td rowspan=1 colspan=1>.html</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>MinerU-HTML</td></tr></table>",
|
||||
"bbox": [
|
||||
45,
|
||||
634,
|
||||
882,
|
||||
806
|
||||
],
|
||||
"page_idx": 2
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "5. API Configuration Reference ",
|
||||
"text_level": 1,
|
||||
"bbox": [
|
||||
48,
|
||||
42,
|
||||
457,
|
||||
63
|
||||
],
|
||||
"page_idx": 3
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The following environment variables must be configured before running the MinerU parsing service: ",
|
||||
"bbox": [
|
||||
48,
|
||||
83,
|
||||
952,
|
||||
123
|
||||
],
|
||||
"page_idx": 3
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "MINERU_API_TOKEN : Bearer token for API authentication \nMINERU_USER_UID : User UUID for quota management \nMINERU_BASE_URL : https://mineru.net/api/v4 \nMINERU_MODEL_VERSION : pipeline (default) | vlm | MinerU-HTML \nMINERU_LANGUAGE : ch (Chinese) | en (English) \nMINERU_IS_OCR : false (text PDF) | true (scanned PDF) \nMINERU_ENABLE_FORMULA: true | false \nMINERU_ENABLE_TABLE : true | false ",
|
||||
"bbox": [
|
||||
65,
|
||||
152,
|
||||
636,
|
||||
337
|
||||
],
|
||||
"page_idx": 3
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Rate Limits: ",
|
||||
"bbox": [
|
||||
48,
|
||||
367,
|
||||
161,
|
||||
381
|
||||
],
|
||||
"page_idx": 3
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "- Max file size : 200 MB per file - Max pages : 600 pages per file - Daily quota : 2000 pages (high priority) - Batch limit : 200 files per request ",
|
||||
"bbox": [
|
||||
65,
|
||||
388,
|
||||
504,
|
||||
478
|
||||
],
|
||||
"page_idx": 3
|
||||
}
|
||||
]
|
||||
Binary file not shown.
71
backend/output/8456b615_sample_graphrag_overview/full.md
Normal file
71
backend/output/8456b615_sample_graphrag_overview/full.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# GraphRAG System
|
||||
|
||||
Technical Architecture Overview
|
||||
|
||||
Version 1.0 | March 2026
|
||||
|
||||
# 1. Abstract
|
||||
|
||||
This document presents the technical architecture of a Multimodal GraphRAG System designed for intelligent document parsing and knowledge graph construction. The system integrates MinerU for document parsing, LangExtract for structured entity extraction, and a graph database for knowledge storage and retrieval.
|
||||
|
||||
The pipeline supports multiple document formats including PDF, DOCX, PPTX, and image files. Extracted entities and relations are stored as graph nodes and edges, enabling semantic search and question answering over large document collections.
|
||||
|
||||
# 2. System Components
|
||||
|
||||
# 2.1 Document Parsing Module
|
||||
|
||||
MinerU Cloud API (v4) serves as the document parsing backend. It accepts PDF, DOCX, PPTX, PNG, JPG, and HTML files. Output includes Markdown text, structured content_list.json, and extracted images.
|
||||
|
||||
# 2.2 Entity Extraction Module
|
||||
|
||||
LangExtract (v1.1.1) performs structured information extraction from plain text using few-shot prompting with LLM backends (Gemini, OpenAI, or local Ollama). Each extraction includes character-level position anchoring.
|
||||
|
||||
# 2.3 Knowledge Graph Module
|
||||
|
||||
Extracted entities and relationships are stored in a graph database. Node types include: Person, Organization, Location, Event, Concept. Edge types include: RELATED_TO, BELONGS_TO, CAUSED_BY, LOCATED_IN.
|
||||
|
||||
# 2.4 Retrieval Module
|
||||
|
||||
The retrieval layer supports hybrid search combining vector similarity and graph traversal.
|
||||
Query results are ranked by relevance score and returned with source document references.
|
||||
|
||||
# 3. Data Pipeline
|
||||
|
||||
The end-to-end data pipeline consists of the following stages:
|
||||
|
||||
Stage 1: Document Ingestion
|
||||
|
||||
- Accept raw documents (PDF, DOCX, images, HTML) - Submit to MinerU API for parsing - Poll task status until state $\underline { { \underline { { \mathbf { \delta \pi } } } } }$ done
|
||||
|
||||
Stage 2: Content Extraction
|
||||
|
||||
- Download and decompress full_zip_url - Parse content_list.json into Document objects - Separate text blocks, tables, images, equations
|
||||
|
||||
Stage 3: Entity & Relation Extraction
|
||||
|
||||
- Feed text blocks to LangExtract - Extract entities with char_interval positions - Extract relationships between entities
|
||||
|
||||
Stage 4: Graph Construction
|
||||
|
||||
- Map extractions to graph nodes and edges - Store with source provenance (page_idx, bbox) - Build vector embeddings for semantic search
|
||||
|
||||
# 4. Supported File Formats
|
||||
|
||||
<table><tr><td rowspan=1 colspan=1>Format</td><td rowspan=1 colspan=1>Extension</td><td rowspan=1 colspan=1>OCR Required</td><td rowspan=1 colspan=1>ModeI</td></tr><tr><td rowspan=1 colspan=1>PDF (text)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline / vlm</td></tr><tr><td rowspan=1 colspan=1>PDF (scan)</td><td rowspan=1 colspan=1>. pdf</td><td rowspan=1 colspan=1>Yes</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>Word</td><td rowspan=1 colspan=1>. docx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>PowerPoint</td><td rowspan=1 colspan=1>.pptx</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>pipeline</td></tr><tr><td rowspan=1 colspan=1>Image</td><td rowspan=1 colspan=1>.png / .jpg</td><td rowspan=1 colspan=1>Auto</td><td rowspan=1 colspan=1>vIlm</td></tr><tr><td rowspan=1 colspan=1>HTML</td><td rowspan=1 colspan=1>.html</td><td rowspan=1 colspan=1>No</td><td rowspan=1 colspan=1>MinerU-HTML</td></tr></table>
|
||||
|
||||
# 5. API Configuration Reference
|
||||
|
||||
The following environment variables must be configured before running the MinerU parsing service:
|
||||
|
||||
MINERU_API_TOKEN : Bearer token for API authentication
|
||||
MINERU_USER_UID : User UUID for quota management
|
||||
MINERU_BASE_URL : https://mineru.net/api/v4
|
||||
MINERU_MODEL_VERSION : pipeline (default) | vlm | MinerU-HTML
|
||||
MINERU_LANGUAGE : ch (Chinese) | en (English)
|
||||
MINERU_IS_OCR : false (text PDF) | true (scanned PDF)
|
||||
MINERU_ENABLE_FORMULA: true | false
|
||||
MINERU_ENABLE_TABLE : true | false
|
||||
|
||||
Rate Limits:
|
||||
|
||||
- Max file size : 200 MB per file - Max pages : 600 pages per file - Daily quota : 2000 pages (high priority) - Batch limit : 200 files per request
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
4063
backend/output/8456b615_sample_graphrag_overview/layout.json
Normal file
4063
backend/output/8456b615_sample_graphrag_overview/layout.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_blocks": 32,
|
||||
"type_distribution": {
|
||||
"text": 31,
|
||||
"table": 1
|
||||
},
|
||||
"total_pages": 4,
|
||||
"text_block_count": 31,
|
||||
"table_block_count": 1
|
||||
}
|
||||
0
backend/pipeline/__init__.py
Normal file
0
backend/pipeline/__init__.py
Normal file
66
backend/pipeline/entity_extractor.py
Normal file
66
backend/pipeline/entity_extractor.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""
|
||||
Entity Extractor — LangExtract + DeepSeek entity extraction.
|
||||
Independent implementation for the GraphRAG Studio backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import langextract as lx
|
||||
from langextract.providers.openai import OpenAILanguageModel
|
||||
|
||||
load_dotenv(Path(__file__).parent.parent / ".env", override=True)
|
||||
|
||||
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
|
||||
DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
|
||||
MODEL_ID = "deepseek-chat"
|
||||
|
||||
PROMPT_DESCRIPTION = (
|
||||
"Extract named entities from the text in order of appearance. "
|
||||
"Entity types: TECHNOLOGY (software, algorithms, models, tools), "
|
||||
"ORGANIZATION (companies, research groups, institutions), "
|
||||
"PERSON (individual people), "
|
||||
"LOCATION (places, geographic entities), "
|
||||
"CONCEPT (technical concepts, methodologies, frameworks)."
|
||||
)
|
||||
|
||||
EXAMPLES = [
|
||||
lx.data.ExampleData(
|
||||
text=(
|
||||
"LangChain is a framework created by Harrison Chase for building "
|
||||
"LLM applications. It integrates with OpenAI models and Pinecone "
|
||||
"vector database for semantic search."
|
||||
),
|
||||
extractions=[
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
|
||||
lx.data.Extraction(extraction_class="PERSON", extraction_text="Harrison Chase"),
|
||||
lx.data.Extraction(extraction_class="CONCEPT", extraction_text="LLM applications"),
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="OpenAI models"),
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="Pinecone"),
|
||||
lx.data.Extraction(extraction_class="CONCEPT", extraction_text="semantic search"),
|
||||
],
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def create_model() -> OpenAILanguageModel:
|
||||
if not DEEPSEEK_API_KEY:
|
||||
raise ValueError("DEEPSEEK_API_KEY not set in backend/.env")
|
||||
return OpenAILanguageModel(
|
||||
model_id=MODEL_ID,
|
||||
api_key=DEEPSEEK_API_KEY,
|
||||
base_url=DEEPSEEK_BASE_URL,
|
||||
)
|
||||
|
||||
|
||||
def extract_entities(page_text: str, model: OpenAILanguageModel) -> lx.data.AnnotatedDocument:
|
||||
return lx.extract(
|
||||
text_or_documents=page_text,
|
||||
prompt_description=PROMPT_DESCRIPTION,
|
||||
examples=EXAMPLES,
|
||||
model=model,
|
||||
show_progress=False,
|
||||
)
|
||||
123
backend/pipeline/kg_builder.py
Normal file
123
backend/pipeline/kg_builder.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
KG Builder — node deduplication + CO_OCCURS_IN edge generation.
|
||||
Independent implementation for the GraphRAG Studio backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import langextract as lx
|
||||
|
||||
from pipeline.text_assembler import PageText
|
||||
|
||||
ACCEPTED_ALIGNMENTS = {"match_exact", "match_greater", "match_lesser"}
|
||||
|
||||
|
||||
def build_kg(
|
||||
pages: list[PageText],
|
||||
annotated_docs: list[lx.data.AnnotatedDocument],
|
||||
source_doc_id: str,
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Build KG nodes and edges from LangExtract results.
|
||||
|
||||
Returns:
|
||||
(nodes, edges) — deduplicated node list and edge list.
|
||||
"""
|
||||
# Phase 1: collect raw entities
|
||||
raw_entities = []
|
||||
for page, doc in zip(pages, annotated_docs):
|
||||
if not doc.extractions:
|
||||
continue
|
||||
for ext in doc.extractions:
|
||||
status = ext.alignment_status.value if ext.alignment_status else None
|
||||
if status not in ACCEPTED_ALIGNMENTS:
|
||||
continue
|
||||
char_start = ext.char_interval.start_pos if ext.char_interval else None
|
||||
char_end = ext.char_interval.end_pos if ext.char_interval else None
|
||||
raw_entities.append({
|
||||
"name": ext.extraction_text,
|
||||
"type": ext.extraction_class,
|
||||
"char_start": char_start,
|
||||
"char_end": char_end,
|
||||
"confidence": status,
|
||||
"page": page.page_idx,
|
||||
"source_doc": source_doc_id,
|
||||
})
|
||||
|
||||
# Phase 2: deduplicate nodes
|
||||
seen: dict[tuple[str, str], int] = {}
|
||||
nodes: list[dict] = []
|
||||
node_pages: dict[int, set[int]] = defaultdict(set)
|
||||
|
||||
for entity in raw_entities:
|
||||
type_prefix = entity["type"].lower()[:4]
|
||||
name_slug = entity["name"].lower().replace(" ", "")[:12]
|
||||
dedup_key = (entity["name"].lower(), entity["type"])
|
||||
if dedup_key not in seen:
|
||||
node_idx = len(nodes)
|
||||
seen[dedup_key] = node_idx
|
||||
nodes.append({
|
||||
"id": f"{type_prefix}_{name_slug}_{node_idx}",
|
||||
"name": entity["name"],
|
||||
"type": entity["type"],
|
||||
"source_doc": entity["source_doc"],
|
||||
"char_start": entity["char_start"],
|
||||
"char_end": entity["char_end"],
|
||||
"confidence": entity["confidence"],
|
||||
"page": entity["page"],
|
||||
})
|
||||
node_idx = seen[dedup_key]
|
||||
node_pages[node_idx].add(entity["page"])
|
||||
|
||||
# Phase 3: CO_OCCURS_IN edges
|
||||
page_nodes: dict[int, list[int]] = defaultdict(list)
|
||||
for node_idx, page_set in node_pages.items():
|
||||
for page_idx in page_set:
|
||||
page_nodes[page_idx].append(node_idx)
|
||||
|
||||
edges: list[dict] = []
|
||||
edge_seen: set[tuple] = set()
|
||||
|
||||
for page_idx, node_indices in sorted(page_nodes.items()):
|
||||
for i in range(len(node_indices)):
|
||||
for j in range(i + 1, len(node_indices)):
|
||||
a = nodes[node_indices[i]]["id"]
|
||||
b = nodes[node_indices[j]]["id"]
|
||||
src, tgt = (a, b) if a < b else (b, a)
|
||||
key = (src, tgt, source_doc_id, page_idx)
|
||||
if key in edge_seen:
|
||||
continue
|
||||
edge_seen.add(key)
|
||||
edges.append({
|
||||
"source": src,
|
||||
"target": tgt,
|
||||
"relation": "CO_OCCURS_IN",
|
||||
"doc_id": source_doc_id,
|
||||
"page": page_idx,
|
||||
})
|
||||
|
||||
return nodes, edges
|
||||
|
||||
|
||||
def extractions_to_records(
|
||||
pages: list[PageText],
|
||||
annotated_docs: list[lx.data.AnnotatedDocument],
|
||||
doc_id: str,
|
||||
) -> list[dict]:
|
||||
"""Flatten LangExtract results to ExtractionRecord dicts."""
|
||||
records = []
|
||||
for page, doc in zip(pages, annotated_docs):
|
||||
if not doc.extractions:
|
||||
continue
|
||||
for ext in doc.extractions:
|
||||
status = ext.alignment_status.value if ext.alignment_status else None
|
||||
records.append({
|
||||
"text": ext.extraction_text,
|
||||
"type": ext.extraction_class,
|
||||
"char_start": ext.char_interval.start_pos if ext.char_interval else None,
|
||||
"char_end": ext.char_interval.end_pos if ext.char_interval else None,
|
||||
"alignment": status,
|
||||
"page": page.page_idx,
|
||||
"doc_id": doc_id,
|
||||
})
|
||||
return records
|
||||
217
backend/pipeline/qa_agent.py
Normal file
217
backend/pipeline/qa_agent.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
QA Agent — LangGraph ReAct agent over the knowledge graph.
|
||||
Independent implementation for the GraphRAG Studio backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import networkx as nx
|
||||
from dotenv import load_dotenv
|
||||
from langchain.tools import tool
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
load_dotenv(Path(__file__).parent.parent / ".env", override=True)
|
||||
|
||||
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
|
||||
DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
|
||||
|
||||
|
||||
def build_kg_graph(nodes: list[dict], edges: list[dict]) -> nx.Graph:
|
||||
G = nx.Graph()
|
||||
for n in nodes:
|
||||
G.add_node(n["id"], **n)
|
||||
for e in edges:
|
||||
G.add_edge(e["source"], e["target"], **{k: v for k, v in e.items() if k not in ("source", "target")})
|
||||
return G
|
||||
|
||||
|
||||
def make_tools(G: nx.Graph) -> list:
|
||||
@tool
|
||||
def search_entities(query: str) -> str:
|
||||
"""Search knowledge graph entities by name (case-insensitive substring).
|
||||
Args:
|
||||
query: Keyword to search for in entity names.
|
||||
"""
|
||||
q = query.lower()
|
||||
matches = [data for _, data in G.nodes(data=True) if q in data.get("name", "").lower()]
|
||||
if not matches:
|
||||
sample = ", ".join(d.get("name", "") for _, d in list(G.nodes(data=True))[:8])
|
||||
return f"No entities found matching '{query}'. Sample: {sample}"
|
||||
lines = [f"Found {len(matches)} entity(ies) matching '{query}':"]
|
||||
for m in matches[:15]:
|
||||
lines.append(
|
||||
f" [{m['type']}] \"{m['name']}\" "
|
||||
f"(confidence={m.get('confidence','?')}, page={m.get('page',0)}, id={m['id']})"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@tool
|
||||
def get_neighbors(entity_name: str, hops: int = 1) -> str:
|
||||
"""Get N-hop neighbors of an entity in the knowledge graph.
|
||||
Args:
|
||||
entity_name: Entity name (partial match).
|
||||
hops: Number of hops (1-3, default 1).
|
||||
"""
|
||||
hops = max(1, min(int(hops), 3))
|
||||
candidates = [(nid, d) for nid, d in G.nodes(data=True)
|
||||
if entity_name.lower() in d.get("name", "").lower()]
|
||||
if not candidates:
|
||||
return f"Entity '{entity_name}' not found. Use search_entities first."
|
||||
node_id, node_data = candidates[0]
|
||||
reachable = nx.single_source_shortest_path_length(G, node_id, cutoff=hops)
|
||||
by_hop: dict[int, list] = {}
|
||||
for nid, dist in reachable.items():
|
||||
if dist > 0:
|
||||
by_hop.setdefault(dist, []).append(G.nodes[nid])
|
||||
lines = [f"Neighbors of '{node_data['name']}' [{node_data['type']}] within {hops} hop(s):"]
|
||||
for hop in sorted(by_hop.keys()):
|
||||
hop_nodes = by_hop[hop]
|
||||
lines.append(f"\n Hop {hop} — {len(hop_nodes)} related entities:")
|
||||
for n in hop_nodes[:20]:
|
||||
lines.append(f" [{n.get('type','?')}] {n.get('name','?')}")
|
||||
if len(hop_nodes) > 20:
|
||||
lines.append(f" ... and {len(hop_nodes)-20} more")
|
||||
lines.append(f"\n Total related entities: {sum(len(v) for v in by_hop.values())}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@tool
|
||||
def get_entities_by_type(entity_type: str) -> str:
|
||||
"""List all entities of a specific type.
|
||||
Args:
|
||||
entity_type: TECHNOLOGY, CONCEPT, PERSON, ORGANIZATION, or LOCATION.
|
||||
"""
|
||||
t_upper = entity_type.strip().upper()
|
||||
valid = {"TECHNOLOGY", "CONCEPT", "PERSON", "ORGANIZATION", "LOCATION"}
|
||||
if t_upper not in valid:
|
||||
present = sorted({d.get("type","") for _, d in G.nodes(data=True)})
|
||||
return f"Unknown type '{entity_type}'. Present: {present}"
|
||||
matches = [d for _, d in G.nodes(data=True) if d.get("type","") == t_upper]
|
||||
if not matches:
|
||||
return f"No {t_upper} entities found."
|
||||
lines = [f"Found {len(matches)} {t_upper} entities:"]
|
||||
for m in matches[:30]:
|
||||
lines.append(f" \"{m['name']}\" (page={m.get('page',0)}, id={m['id']})")
|
||||
if len(matches) > 30:
|
||||
lines.append(f" ... and {len(matches)-30} more")
|
||||
return "\n".join(lines)
|
||||
|
||||
@tool
|
||||
def describe_graph() -> str:
|
||||
"""Get an overview of the knowledge graph statistics."""
|
||||
n_nodes = G.number_of_nodes()
|
||||
n_edges = G.number_of_edges()
|
||||
type_counts: dict[str, int] = {}
|
||||
for _, d in G.nodes(data=True):
|
||||
t = d.get("type", "UNKNOWN")
|
||||
type_counts[t] = type_counts.get(t, 0) + 1
|
||||
lines = [
|
||||
f"Knowledge Graph Overview:",
|
||||
f" Nodes: {n_nodes}",
|
||||
f" Edges: {n_edges}",
|
||||
f" Entity types: {type_counts}",
|
||||
]
|
||||
if n_nodes > 0:
|
||||
centrality = nx.degree_centrality(G)
|
||||
top5 = sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||
lines.append(" Top 5 central nodes:")
|
||||
for nid, c in top5:
|
||||
nd = G.nodes[nid]
|
||||
lines.append(f" [{nd.get('type','?')}] {nd.get('name','?')} (centrality={c:.3f})")
|
||||
return "\n".join(lines)
|
||||
|
||||
return [search_entities, get_neighbors, get_entities_by_type, describe_graph]
|
||||
|
||||
|
||||
def run_qa(
|
||||
question: str,
|
||||
history: list[dict],
|
||||
nodes: list[dict],
|
||||
edges: list[dict],
|
||||
) -> dict:
|
||||
"""Run Agentic-RAG QA. Returns dict with answer, tool_calls, cited_nodes."""
|
||||
if not DEEPSEEK_API_KEY:
|
||||
raise ValueError("DEEPSEEK_API_KEY not set in backend/.env")
|
||||
|
||||
G = build_kg_graph(nodes, edges)
|
||||
tools = make_tools(G)
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model="deepseek-chat",
|
||||
api_key=DEEPSEEK_API_KEY,
|
||||
base_url=DEEPSEEK_BASE_URL,
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
system_prompt = (
|
||||
"You are a helpful assistant with access to a knowledge graph (KG) built from the user's documents.\n"
|
||||
"\n"
|
||||
"Guidelines:\n"
|
||||
"- If the question is clearly unrelated to the KG (greetings, math, general knowledge, etc.), "
|
||||
"answer directly WITHOUT using any tools.\n"
|
||||
"- If the question might be answered by the KG (topics related to entities in the documents), "
|
||||
"use the tools to search and explore before answering.\n"
|
||||
"- When you DO use the KG, cite the entity names and types you found.\n"
|
||||
"- If the KG has no relevant information, say so honestly and answer from general knowledge if possible.\n"
|
||||
"\n"
|
||||
"Available tools: search entities by name, get neighbors, list entities by type, get graph overview."
|
||||
)
|
||||
|
||||
agent = create_react_agent(llm, tools, prompt=system_prompt)
|
||||
|
||||
# Build messages: system + history + current question
|
||||
messages: list = []
|
||||
for msg in history[-8:]:
|
||||
role = msg.get("role", "human")
|
||||
content = msg.get("content", "") or msg.get("answer", "")
|
||||
if role == "human":
|
||||
messages.append(HumanMessage(content=msg.get("question", content)))
|
||||
else:
|
||||
messages.append(AIMessage(content=content))
|
||||
messages.append(HumanMessage(content=question))
|
||||
|
||||
result = agent.invoke({"messages": messages})
|
||||
|
||||
# Extract answer from last AIMessage
|
||||
answer = ""
|
||||
for msg in reversed(result.get("messages", [])):
|
||||
if isinstance(msg, AIMessage) and msg.content and not msg.tool_calls:
|
||||
answer = msg.content
|
||||
break
|
||||
|
||||
# Extract tool calls and cited node IDs from message history
|
||||
tool_calls = []
|
||||
cited_node_ids: set[str] = set()
|
||||
step = 0
|
||||
all_messages = result.get("messages", [])
|
||||
for i, msg in enumerate(all_messages):
|
||||
if isinstance(msg, AIMessage) and msg.tool_calls:
|
||||
for tc in msg.tool_calls:
|
||||
step += 1
|
||||
# Find the corresponding ToolMessage
|
||||
output = ""
|
||||
for j in range(i + 1, len(all_messages)):
|
||||
tm = all_messages[j]
|
||||
if isinstance(tm, ToolMessage) and tm.tool_call_id == tc.get("id"):
|
||||
output = tm.content
|
||||
break
|
||||
tool_input = tc.get("args", {})
|
||||
tool_calls.append({
|
||||
"step": step,
|
||||
"tool_name": tc.get("name", ""),
|
||||
"tool_input": str(tool_input),
|
||||
"tool_output": str(output),
|
||||
})
|
||||
# Extract node IDs mentioned in tool output
|
||||
for node_id in re.findall(r'\bid=([^\s,\)\]]+)', str(output)):
|
||||
cited_node_ids.add(node_id)
|
||||
|
||||
return {
|
||||
"answer": answer,
|
||||
"tool_calls": tool_calls,
|
||||
"cited_nodes": list(cited_node_ids),
|
||||
}
|
||||
107
backend/pipeline/text_assembler.py
Normal file
107
backend/pipeline/text_assembler.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
Text Assembler — MinerU content_list.json → per-page plain text.
|
||||
Independent implementation for the GraphRAG Studio backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BlockSpan:
|
||||
block_index: int
|
||||
block_type: str
|
||||
page_idx: int
|
||||
char_start: int
|
||||
char_end: int
|
||||
bbox: list
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PageText:
|
||||
page_idx: int
|
||||
text: str
|
||||
block_spans: list[BlockSpan]
|
||||
|
||||
|
||||
def html_table_to_text(table_body: str) -> str:
|
||||
soup = BeautifulSoup(table_body, "html.parser")
|
||||
rows = []
|
||||
for tr in soup.find_all("tr"):
|
||||
cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
|
||||
rows.append(" | ".join(cells))
|
||||
return "\n".join(rows)
|
||||
|
||||
|
||||
def load_content_list(path: Path) -> list[dict]:
|
||||
if path.is_dir():
|
||||
matches = list(path.glob("*_content_list.json"))
|
||||
if not matches:
|
||||
matches = list(path.glob("*content_list.json"))
|
||||
if not matches:
|
||||
raise FileNotFoundError(f"No content_list.json found in {path}")
|
||||
path = matches[0]
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def assemble_pages(content_list: list[dict]) -> list[PageText]:
|
||||
pages: dict[int, list[tuple[int, dict]]] = defaultdict(list)
|
||||
for i, block in enumerate(content_list):
|
||||
page_idx = block.get("page_idx", 0)
|
||||
pages[page_idx].append((i, block))
|
||||
|
||||
result = []
|
||||
for page_idx in sorted(pages.keys()):
|
||||
blocks = pages[page_idx]
|
||||
buffer = []
|
||||
spans = []
|
||||
cursor = 0
|
||||
|
||||
for block_index, block in blocks:
|
||||
block_type = block.get("type", "unknown")
|
||||
bbox = block.get("bbox", [0, 0, 0, 0])
|
||||
|
||||
if block_type == "text":
|
||||
block_text = block.get("text", "").rstrip()
|
||||
elif block_type == "table":
|
||||
table_body = block.get("table_body", "")
|
||||
block_text = html_table_to_text(table_body) if table_body else ""
|
||||
else:
|
||||
continue
|
||||
|
||||
if not block_text:
|
||||
continue
|
||||
|
||||
char_start = cursor
|
||||
buffer.append(block_text)
|
||||
cursor += len(block_text)
|
||||
char_end = cursor
|
||||
|
||||
spans.append(BlockSpan(
|
||||
block_index=block_index,
|
||||
block_type=block_type,
|
||||
page_idx=page_idx,
|
||||
char_start=char_start,
|
||||
char_end=char_end,
|
||||
bbox=bbox,
|
||||
))
|
||||
buffer.append("\n")
|
||||
cursor += 1
|
||||
|
||||
text = "".join(buffer).rstrip("\n")
|
||||
result.append(PageText(page_idx=page_idx, text=text, block_spans=spans))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def count_blocks_by_type(content_list: list[dict]) -> dict[str, int]:
|
||||
counts: dict[str, int] = defaultdict(int)
|
||||
for block in content_list:
|
||||
counts[block.get("type", "unknown")] += 1
|
||||
return dict(counts)
|
||||
22
backend/pyproject.toml
Normal file
22
backend/pyproject.toml
Normal file
@@ -0,0 +1,22 @@
|
||||
[project]
|
||||
name = "graphrag-studio-backend"
|
||||
version = "1.0.0"
|
||||
description = "GraphRAG Studio — FastAPI backend service"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"fastapi>=0.104.0",
|
||||
"uvicorn[standard]>=0.24.0",
|
||||
"python-multipart>=0.0.6",
|
||||
"langextract[all]>=0.1.0",
|
||||
"langchain>=0.2.0",
|
||||
"langchain-openai>=0.1.0",
|
||||
"langgraph>=0.1.0",
|
||||
"networkx>=3.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
"requests>=2.31.0",
|
||||
"beautifulsoup4>=4.12.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
0
backend/routers/__init__.py
Normal file
0
backend/routers/__init__.py
Normal file
71
backend/routers/documents.py
Normal file
71
backend/routers/documents.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""A 组:文档管理(4 个端点)"""
|
||||
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from models.schemas import APIResponse
|
||||
from services import document_service as svc
|
||||
|
||||
router = APIRouter(prefix="/documents", tags=["Documents"])
|
||||
|
||||
|
||||
@router.post("/upload", status_code=200)
|
||||
async def upload_document(
|
||||
file: UploadFile = File(...),
|
||||
language: str = Form("ch"),
|
||||
enable_formula: bool = Form(True),
|
||||
enable_table: bool = Form(True),
|
||||
):
|
||||
content = await file.read()
|
||||
ok, code, msg = svc.validate_upload(file.filename or "", len(content))
|
||||
if not ok:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(code, msg).model_dump(),
|
||||
)
|
||||
doc = svc.save_upload(file.filename or "upload", content, language, enable_formula, enable_table)
|
||||
# Remove internal field
|
||||
doc.pop("upload_filename", None)
|
||||
return APIResponse.ok(doc)
|
||||
|
||||
|
||||
@router.get("/{doc_id}")
|
||||
async def get_document(doc_id: str):
|
||||
doc = svc.get_document(doc_id)
|
||||
if not doc:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2001, f"Document '{doc_id}' not found").model_dump(),
|
||||
)
|
||||
doc.pop("upload_filename", None)
|
||||
return APIResponse.ok(doc)
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_documents(
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
status: str | None = None,
|
||||
format: str | None = None,
|
||||
):
|
||||
page_size = min(page_size, 100)
|
||||
result = svc.list_documents(page, page_size, status, format)
|
||||
for item in result["items"]:
|
||||
item.pop("upload_filename", None)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.delete("/{doc_id}")
|
||||
async def delete_document(doc_id: str):
|
||||
doc = svc.get_document(doc_id)
|
||||
if not doc:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2001, f"Document '{doc_id}' not found").model_dump(),
|
||||
)
|
||||
ok, removed_nodes, removed_edges = svc.delete_document(doc_id)
|
||||
return APIResponse.ok({
|
||||
"deleted": True,
|
||||
"doc_id": doc_id,
|
||||
"removed_nodes": removed_nodes,
|
||||
"removed_edges": removed_edges,
|
||||
})
|
||||
70
backend/routers/indexing.py
Normal file
70
backend/routers/indexing.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""B 组:Indexing Pipeline(4 个端点)"""
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from models.schemas import APIResponse, StartIndexRequest
|
||||
from services import document_service as doc_svc
|
||||
from services import indexing_service as idx_svc
|
||||
|
||||
router = APIRouter(prefix="/index", tags=["Indexing"])
|
||||
|
||||
|
||||
@router.post("/start", status_code=202)
|
||||
async def start_indexing(body: StartIndexRequest):
|
||||
doc = doc_svc.get_document(body.doc_id)
|
||||
if not doc:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2001, f"Document '{body.doc_id}' not found").model_dump(),
|
||||
)
|
||||
meta = idx_svc.start_indexing(body.doc_id)
|
||||
return APIResponse.ok({
|
||||
"job_id": meta["job_id"],
|
||||
"doc_id": meta["doc_id"],
|
||||
"status": meta["status"],
|
||||
"stage": meta["stage"],
|
||||
"created_at": meta["created_at"],
|
||||
})
|
||||
|
||||
|
||||
@router.get("/status/{job_id}")
|
||||
async def get_job_status(job_id: str):
|
||||
meta = idx_svc.get_job_status(job_id)
|
||||
if not meta:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2002, f"Job '{job_id}' not found").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(meta)
|
||||
|
||||
|
||||
@router.get("/result/{job_id}")
|
||||
async def get_job_result(job_id: str):
|
||||
result = idx_svc.get_job_result(job_id)
|
||||
if not result:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2002, f"Job '{job_id}' not found").model_dump(),
|
||||
)
|
||||
if result.get("status") not in ("done",) and "stats" not in result:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(2003, f"Job '{job_id}' is still running (status={result.get('status')})").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.delete("/jobs/{job_id}")
|
||||
async def cancel_job(job_id: str):
|
||||
meta = idx_svc.get_job_status(job_id)
|
||||
if not meta:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2002, f"Job '{job_id}' not found").model_dump(),
|
||||
)
|
||||
ok, prev_status = idx_svc.cancel_job(job_id)
|
||||
return APIResponse.ok({
|
||||
"cancelled": True,
|
||||
"job_id": job_id,
|
||||
"previous_status": prev_status,
|
||||
})
|
||||
72
backend/routers/kg.py
Normal file
72
backend/routers/kg.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""C 组:知识图谱(6 个端点)"""
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from models.schemas import APIResponse
|
||||
from services import kg_service as svc
|
||||
|
||||
router = APIRouter(prefix="/kg", tags=["Knowledge Graph"])
|
||||
|
||||
|
||||
@router.get("/nodes")
|
||||
async def list_nodes(
|
||||
type: str | None = None,
|
||||
doc_id: str | None = None,
|
||||
confidence: str | None = None,
|
||||
page: int = 1,
|
||||
page_size: int = 50,
|
||||
):
|
||||
page_size = min(page_size, 200)
|
||||
result = svc.get_nodes(page, page_size, type, doc_id, confidence)
|
||||
if result["total"] == 0 and not any([type, doc_id, confidence]):
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(3002, "Knowledge graph is empty. Index documents first.").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/edges")
|
||||
async def list_edges(
|
||||
doc_id: str | None = None,
|
||||
relation: str | None = None,
|
||||
page: int = 1,
|
||||
page_size: int = 100,
|
||||
):
|
||||
page_size = min(page_size, 500)
|
||||
result = svc.get_edges(page, page_size, doc_id, relation)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/nodes/{node_id}")
|
||||
async def get_node_detail(node_id: str):
|
||||
node = svc.get_node_detail(node_id)
|
||||
if not node:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(3001, f"Node '{node_id}' not found").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(node)
|
||||
|
||||
|
||||
@router.get("/nodes/{node_id}/neighbors")
|
||||
async def get_node_neighbors(node_id: str, hops: int = 1):
|
||||
result = svc.get_neighbors(node_id, hops)
|
||||
if result is None:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(3001, f"Node '{node_id}' not found").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_kg_stats():
|
||||
stats = svc.get_stats()
|
||||
return APIResponse.ok(stats)
|
||||
|
||||
|
||||
@router.get("/export")
|
||||
async def export_kg(format: str = "json", doc_id: str | None = None):
|
||||
result = svc.export_kg(doc_id)
|
||||
return APIResponse.ok(result)
|
||||
66
backend/routers/query.py
Normal file
66
backend/routers/query.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""D 组:QA 问答(4 个端点)"""
|
||||
import asyncio
|
||||
from functools import partial
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from models.schemas import APIResponse, BatchQueryRequest, QueryRequest
|
||||
from services import qa_service as svc
|
||||
|
||||
router = APIRouter(prefix="/query", tags=["QA"])
|
||||
|
||||
|
||||
@router.post("")
|
||||
async def run_query(body: QueryRequest):
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
partial(svc.run_query, body.question, [m.model_dump() for m in body.history]),
|
||||
)
|
||||
return APIResponse.ok(result)
|
||||
except ValueError as e:
|
||||
if "KG_EMPTY" in str(e):
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(3002, "Knowledge graph is empty. Index documents first.").model_dump(),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content=APIResponse.err(4001, str(e)).model_dump(),
|
||||
)
|
||||
except Exception as e:
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content=APIResponse.err(4001, f"QA service error: {e}").model_dump(),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/batch", status_code=202)
|
||||
async def start_batch(body: BatchQueryRequest):
|
||||
if len(body.questions) > 20:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(1001, "Maximum 20 questions per batch").model_dump(),
|
||||
)
|
||||
result = svc.start_batch(body.questions)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/batch/{batch_id}")
|
||||
async def get_batch_result(batch_id: str):
|
||||
result = svc.get_batch_result(batch_id)
|
||||
if not result:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(2002, f"Batch '{batch_id}' not found").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/history")
|
||||
async def get_query_history(page: int = 1, page_size: int = 20):
|
||||
page_size = min(page_size, 50)
|
||||
result = svc.get_history(page, page_size)
|
||||
return APIResponse.ok(result)
|
||||
43
backend/routers/search.py
Normal file
43
backend/routers/search.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""E 组:搜索(3 个端点)"""
|
||||
from fastapi import APIRouter, Query, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from models.schemas import APIResponse
|
||||
from services import search_service as svc
|
||||
|
||||
router = APIRouter(prefix="/search", tags=["Search"])
|
||||
|
||||
|
||||
@router.get("/entities")
|
||||
async def search_entities(q: str, type: str | None = None, limit: int = 15):
|
||||
limit = min(limit, 100)
|
||||
result = svc.search_entities(q, type, limit)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/path")
|
||||
async def search_path(request: Request, max_hops: int = 3):
|
||||
# 'from' is a Python keyword, read from raw query params
|
||||
params = dict(request.query_params)
|
||||
from_id = params.get("from")
|
||||
to_id = params.get("to")
|
||||
|
||||
if not from_id or not to_id:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(1001, "Parameters 'from' and 'to' are required").model_dump(),
|
||||
)
|
||||
max_hops = max(1, min(max_hops, 5))
|
||||
result = svc.search_path(from_id, to_id, max_hops)
|
||||
if result is None:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content=APIResponse.err(3001, "One or both nodes not found").model_dump(),
|
||||
)
|
||||
return APIResponse.ok(result)
|
||||
|
||||
|
||||
@router.get("/graph")
|
||||
async def search_graph(q: str, include_neighbors: bool = False):
|
||||
result = svc.search_graph(q, include_neighbors)
|
||||
return APIResponse.ok(result)
|
||||
171
backend/routers/system.py
Normal file
171
backend/routers/system.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""F 组:系统(4 个端点)"""
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
from models.schemas import APIResponse
|
||||
from storage import file_store as fs
|
||||
|
||||
router = APIRouter(tags=["System"])
|
||||
|
||||
_START_TIME = time.time()
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check():
|
||||
env_path = Path(__file__).parent.parent / ".env"
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(env_path, override=False)
|
||||
|
||||
mineru_python = Path(os.getenv("MINERU_PYTHON", "F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe"))
|
||||
backend_python = Path(__file__).parent.parent / ".venv" / "Scripts" / "python.exe"
|
||||
deepseek_key = os.getenv("DEEPSEEK_API_KEY", "")
|
||||
deepseek_url = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
|
||||
|
||||
# Check if langextract is importable from backend's venv
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
[str(backend_python), "-c", "import langextract; print('ok')"],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
langextract_ok = result.returncode == 0 and "ok" in result.stdout
|
||||
except Exception:
|
||||
langextract_ok = False
|
||||
|
||||
components = {
|
||||
"mineru_venv": {
|
||||
"status": "ok" if mineru_python.exists() else "error",
|
||||
"path": str(mineru_python),
|
||||
"exists": mineru_python.exists(),
|
||||
},
|
||||
"langextract_venv": {
|
||||
"status": "ok" if langextract_ok else "error",
|
||||
"path": str(backend_python),
|
||||
"exists": backend_python.exists(),
|
||||
},
|
||||
"deepseek_api": {
|
||||
"status": "ok" if deepseek_key else "error",
|
||||
"base_url": deepseek_url,
|
||||
"key_configured": bool(deepseek_key),
|
||||
},
|
||||
"storage": {
|
||||
"status": "ok",
|
||||
"kg_nodes_exists": fs.kg_nodes_path().exists(),
|
||||
"kg_edges_exists": fs.kg_edges_path().exists(),
|
||||
"uploads_dir_exists": fs.UPLOADS_DIR.exists(),
|
||||
},
|
||||
}
|
||||
|
||||
overall = "healthy" if all(c["status"] == "ok" for c in components.values()) else "degraded"
|
||||
|
||||
return APIResponse.ok({
|
||||
"status": overall,
|
||||
"version": "1.0.0",
|
||||
"uptime_seconds": round(time.time() - _START_TIME, 1),
|
||||
"components": components,
|
||||
})
|
||||
|
||||
|
||||
@router.get("/system/stats")
|
||||
async def system_stats():
|
||||
from services import indexing_service as idx_svc
|
||||
|
||||
docs = list(fs.load_docs_index().values())
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
history = fs.load_query_history()
|
||||
|
||||
type_dist: dict[str, int] = {}
|
||||
for n in nodes:
|
||||
t = n.get("type", "UNKNOWN")
|
||||
type_dist[t] = type_dist.get(t, 0) + 1
|
||||
|
||||
return APIResponse.ok({
|
||||
"total_documents": len(docs),
|
||||
"indexed_documents": sum(1 for d in docs if d.get("status") == "indexed"),
|
||||
"failed_documents": sum(1 for d in docs if d.get("status") == "failed"),
|
||||
"total_nodes": len(nodes),
|
||||
"total_edges": len(edges),
|
||||
"type_distribution": type_dist,
|
||||
"total_queries": len(history),
|
||||
"active_jobs": idx_svc.count_active_jobs(),
|
||||
"storage_used_mb": fs.storage_used_mb(),
|
||||
})
|
||||
|
||||
|
||||
@router.get("/system/formats")
|
||||
async def list_formats():
|
||||
return APIResponse.ok({
|
||||
"formats": [
|
||||
{"ext": "pdf", "description": "PDF 文档(文本型/扫描型/混合型)", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
|
||||
{"ext": "docx", "description": "Microsoft Word(新版)", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
|
||||
{"ext": "doc", "description": "Microsoft Word(旧版)", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
|
||||
{"ext": "pptx", "description": "PowerPoint(新版)", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
|
||||
{"ext": "ppt", "description": "PowerPoint(旧版)", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
|
||||
{"ext": "png", "description": "PNG 图片(单页)", "max_size_mb": 200, "max_pages": 1, "requires_ocr": True},
|
||||
{"ext": "jpg", "description": "JPEG 图片(单页)", "max_size_mb": 200, "max_pages": 1, "requires_ocr": True},
|
||||
{"ext": "jpeg", "description": "JPEG 图片(单页)", "max_size_mb": 200, "max_pages": 1, "requires_ocr": True},
|
||||
{"ext": "html", "description": "HTML 文件", "max_size_mb": 200, "max_pages": 600, "requires_ocr": False},
|
||||
],
|
||||
"ocr_languages": [
|
||||
{"code": "ch", "name": "中文(默认)"},
|
||||
{"code": "en", "name": "英文"},
|
||||
{"code": "japan", "name": "日文"},
|
||||
{"code": "korean", "name": "韩文"},
|
||||
{"code": "french", "name": "法文"},
|
||||
{"code": "german", "name": "德文"},
|
||||
],
|
||||
"notes": [
|
||||
"language 参数默认值为 'ch'(非 'zh'),遵循 PaddleOCR v3 语言代码规范",
|
||||
"上传时不需要携带 Content-Type,服务端自动识别",
|
||||
"PNG/JPG/JPEG 单次最多处理 1 页",
|
||||
],
|
||||
})
|
||||
|
||||
|
||||
@router.get("/system/demo")
|
||||
async def get_demo_data():
|
||||
# Try backend KG first, then fall back to graphrag_pipeline/output
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
|
||||
if not nodes:
|
||||
# Fallback: load from existing graphrag_pipeline output
|
||||
legacy_nodes_path = Path("F:/GraphRAGAgent/graphrag_pipeline/output/kg_nodes.json")
|
||||
legacy_edges_path = Path("F:/GraphRAGAgent/graphrag_pipeline/output/kg_edges.json")
|
||||
if legacy_nodes_path.exists():
|
||||
import json
|
||||
nodes = json.loads(legacy_nodes_path.read_text(encoding="utf-8"))
|
||||
edges = json.loads(legacy_edges_path.read_text(encoding="utf-8")) if legacy_edges_path.exists() else []
|
||||
else:
|
||||
from fastapi.responses import JSONResponse
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content=APIResponse.err(3002, "No demo data available. Index a document first.").model_dump(),
|
||||
)
|
||||
|
||||
type_counts: dict[str, int] = {}
|
||||
for n in nodes:
|
||||
t = n.get("type", "UNKNOWN")
|
||||
type_counts[t] = type_counts.get(t, 0) + 1
|
||||
|
||||
import networkx as nx
|
||||
G = nx.Graph()
|
||||
for n in nodes:
|
||||
G.add_node(n["id"])
|
||||
for e in edges:
|
||||
G.add_edge(e["source"], e["target"])
|
||||
|
||||
return APIResponse.ok({
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"stats": {
|
||||
"nodes": len(nodes),
|
||||
"edges": len(edges),
|
||||
"type_counts": type_counts,
|
||||
"density": round(nx.density(G), 4) if G.number_of_nodes() > 1 else 0.0,
|
||||
},
|
||||
})
|
||||
0
backend/services/__init__.py
Normal file
0
backend/services/__init__.py
Normal file
109
backend/services/document_service.py
Normal file
109
backend/services/document_service.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""Document Service — file upload, metadata CRUD."""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from storage import file_store as fs
|
||||
|
||||
ALLOWED_EXTENSIONS = {"pdf", "docx", "doc", "pptx", "ppt", "png", "jpg", "jpeg", "html"}
|
||||
MAX_FILE_SIZE_MB = 200
|
||||
|
||||
|
||||
def validate_upload(filename: str, size_bytes: int) -> tuple[bool, int, str]:
|
||||
"""Returns (ok, error_code, error_msg)."""
|
||||
if not filename or "/" in filename or "\\" in filename:
|
||||
return False, 1001, "Invalid filename"
|
||||
ext = Path(filename).suffix.lower().lstrip(".")
|
||||
if ext not in ALLOWED_EXTENSIONS:
|
||||
return False, 1002, f"Unsupported file format: .{ext}. Supported: {', '.join(sorted(ALLOWED_EXTENSIONS))}"
|
||||
size_mb = size_bytes / (1024 * 1024)
|
||||
if size_mb > MAX_FILE_SIZE_MB:
|
||||
return False, 1003, f"File size {size_mb:.1f}MB exceeds {MAX_FILE_SIZE_MB}MB limit"
|
||||
return True, 0, ""
|
||||
|
||||
|
||||
def save_upload(filename: str, content: bytes, language: str = "ch",
|
||||
enable_formula: bool = True, enable_table: bool = True) -> dict:
|
||||
doc_id = uuid.uuid4().hex[:8]
|
||||
ext = Path(filename).suffix.lower().lstrip(".")
|
||||
upload_filename = f"{doc_id}_{filename}"
|
||||
upload_path = fs.UPLOADS_DIR / upload_filename
|
||||
upload_path.write_bytes(content)
|
||||
|
||||
doc = {
|
||||
"doc_id": doc_id,
|
||||
"filename": filename,
|
||||
"format": ext,
|
||||
"size_bytes": len(content),
|
||||
"pages": None,
|
||||
"uploaded_at": datetime.now(timezone.utc).isoformat(),
|
||||
"status": "uploaded",
|
||||
"language": language,
|
||||
"enable_formula": enable_formula,
|
||||
"enable_table": enable_table,
|
||||
"upload_filename": upload_filename, # internal: actual stored filename
|
||||
}
|
||||
fs.save_doc(doc)
|
||||
return doc
|
||||
|
||||
|
||||
def get_document(doc_id: str) -> dict | None:
|
||||
return fs.get_doc(doc_id)
|
||||
|
||||
|
||||
def list_documents(page: int = 1, page_size: int = 20,
|
||||
status: str | None = None, fmt: str | None = None) -> dict:
|
||||
index = fs.load_docs_index()
|
||||
items = list(index.values())
|
||||
items.sort(key=lambda d: d.get("uploaded_at", ""), reverse=True)
|
||||
if status:
|
||||
items = [d for d in items if d.get("status") == status]
|
||||
if fmt:
|
||||
items = [d for d in items if d.get("format") == fmt.lower()]
|
||||
total = len(items)
|
||||
start = (page - 1) * page_size
|
||||
return {
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"items": items[start: start + page_size],
|
||||
}
|
||||
|
||||
|
||||
def delete_document(doc_id: str) -> tuple[bool, int, int]:
|
||||
"""Delete doc and its KG contributions. Returns (ok, removed_nodes, removed_edges)."""
|
||||
doc = fs.get_doc(doc_id)
|
||||
if not doc:
|
||||
return False, 0, 0
|
||||
|
||||
# Remove from KG
|
||||
removed_nodes, removed_edges = fs.remove_doc_from_kg(doc_id)
|
||||
|
||||
# Remove upload file
|
||||
upload_filename = doc.get("upload_filename", "")
|
||||
upload_path = fs.UPLOADS_DIR / upload_filename
|
||||
if upload_path.exists():
|
||||
upload_path.unlink(missing_ok=True)
|
||||
|
||||
# Remove associated jobs
|
||||
for meta in fs.list_all_jobs():
|
||||
if meta.get("doc_id") == doc_id:
|
||||
fs.delete_job(meta["job_id"])
|
||||
|
||||
# Remove from index
|
||||
index = fs.load_docs_index()
|
||||
index.pop(doc_id, None)
|
||||
fs.save_docs_index(index)
|
||||
|
||||
return True, removed_nodes, removed_edges
|
||||
|
||||
|
||||
def update_doc_status(doc_id: str, status: str, pages: int | None = None) -> None:
|
||||
index = fs.load_docs_index()
|
||||
if doc_id in index:
|
||||
index[doc_id]["status"] = status
|
||||
if pages is not None:
|
||||
index[doc_id]["pages"] = pages
|
||||
fs.save_docs_index(index)
|
||||
255
backend/services/indexing_service.py
Normal file
255
backend/services/indexing_service.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""Indexing Service — Pipeline orchestration (parsing → extracting → indexing)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from storage import file_store as fs
|
||||
from services.document_service import update_doc_status
|
||||
|
||||
load_dotenv(Path(__file__).parent.parent / ".env", override=True)
|
||||
|
||||
MINERU_PYTHON = Path(os.getenv("MINERU_PYTHON", "F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe"))
|
||||
MINERU_PIPELINE = Path(os.getenv("MINERU_PIPELINE", "F:/GraphRAGAgent/mineru_mvp/pipeline.py"))
|
||||
|
||||
# In-memory registry of active jobs {job_id: threading.Thread}
|
||||
_active_threads: dict[str, threading.Thread] = {}
|
||||
_cancel_flags: dict[str, bool] = {}
|
||||
|
||||
|
||||
def start_indexing(doc_id: str) -> dict:
|
||||
doc = fs.get_doc(doc_id)
|
||||
if not doc:
|
||||
return None # type: ignore
|
||||
|
||||
job_id = f"job_{uuid.uuid4().hex[:8]}"
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
meta = {
|
||||
"job_id": job_id,
|
||||
"doc_id": doc_id,
|
||||
"status": "submitted",
|
||||
"stage": "Job submitted",
|
||||
"progress": {"parsed_pages": 0, "total_pages": 0, "extracted_entities": 0},
|
||||
"created_at": now,
|
||||
"elapsed_seconds": 0.0,
|
||||
"error": None,
|
||||
"pdf_name": doc["filename"],
|
||||
"pdf_path": str(fs.UPLOADS_DIR / doc.get("upload_filename", "")),
|
||||
}
|
||||
fs.save_job_meta(job_id, meta)
|
||||
|
||||
_cancel_flags[job_id] = False
|
||||
thread = threading.Thread(target=_run_pipeline, args=(job_id,), daemon=True)
|
||||
_active_threads[job_id] = thread
|
||||
thread.start()
|
||||
|
||||
return meta
|
||||
|
||||
|
||||
def _update_meta(job_id: str, **kwargs) -> None:
|
||||
meta = fs.load_job_meta(job_id) or {}
|
||||
meta.update(kwargs)
|
||||
meta["elapsed_seconds"] = round(
|
||||
(datetime.now(timezone.utc) - datetime.fromisoformat(meta["created_at"])).total_seconds(), 1
|
||||
)
|
||||
fs.save_job_meta(job_id, meta)
|
||||
|
||||
|
||||
def _run_pipeline(job_id: str) -> None:
|
||||
meta = fs.load_job_meta(job_id)
|
||||
if not meta:
|
||||
return
|
||||
|
||||
doc_id = meta["doc_id"]
|
||||
pdf_path = Path(meta["pdf_path"])
|
||||
job_dir = fs.job_dir(job_id)
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# ── Stage 1: parsing ──────────────────────────────────────────────
|
||||
if _cancel_flags.get(job_id):
|
||||
_update_meta(job_id, status="cancelled", stage="Cancelled")
|
||||
return
|
||||
|
||||
_update_meta(job_id, status="parsing", stage="MinerU document parsing...")
|
||||
mineru_out_dir = job_dir / "mineru_output"
|
||||
mineru_out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = subprocess.run(
|
||||
[str(MINERU_PYTHON), str(MINERU_PIPELINE), str(pdf_path)],
|
||||
cwd=str(MINERU_PIPELINE.parent),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"MinerU failed: {result.stderr[:500]}")
|
||||
|
||||
# Find content_list.json in MinerU output
|
||||
# MinerU writes output to mineru_mvp/output/{stem}/
|
||||
stem = pdf_path.stem
|
||||
mineru_default_out = MINERU_PIPELINE.parent / "output" / stem
|
||||
content_list_path = None
|
||||
|
||||
if mineru_default_out.exists():
|
||||
matches = list(mineru_default_out.glob("*_content_list.json"))
|
||||
if matches:
|
||||
content_list_path = matches[0]
|
||||
# Copy to our job dir
|
||||
import shutil
|
||||
shutil.copytree(str(mineru_default_out), str(mineru_out_dir), dirs_exist_ok=True)
|
||||
|
||||
if not content_list_path:
|
||||
# Fallback: search job mineru_output dir
|
||||
matches = list(mineru_out_dir.glob("*_content_list.json"))
|
||||
if matches:
|
||||
content_list_path = matches[0]
|
||||
|
||||
if not content_list_path or not content_list_path.exists():
|
||||
raise RuntimeError(f"MinerU output content_list.json not found. stdout: {result.stdout[:300]}")
|
||||
|
||||
# ── Stage 2: extracting ───────────────────────────────────────────
|
||||
if _cancel_flags.get(job_id):
|
||||
_update_meta(job_id, status="cancelled", stage="Cancelled")
|
||||
return
|
||||
|
||||
from pipeline.text_assembler import load_content_list, assemble_pages, count_blocks_by_type
|
||||
from pipeline.entity_extractor import create_model, extract_entities
|
||||
from pipeline.kg_builder import build_kg, extractions_to_records
|
||||
|
||||
content_list = load_content_list(content_list_path)
|
||||
pages = assemble_pages(content_list)
|
||||
total_pages = len(pages)
|
||||
block_types = count_blocks_by_type(content_list)
|
||||
|
||||
_update_meta(
|
||||
job_id,
|
||||
status="extracting",
|
||||
stage=f"Extracting entities (LangExtract + DeepSeek)...",
|
||||
progress={"parsed_pages": total_pages, "total_pages": total_pages, "extracted_entities": 0},
|
||||
)
|
||||
update_doc_status(doc_id, "indexing", pages=total_pages)
|
||||
|
||||
model = create_model()
|
||||
annotated_docs = []
|
||||
total_entities = 0
|
||||
|
||||
for i, page in enumerate(pages):
|
||||
if _cancel_flags.get(job_id):
|
||||
_update_meta(job_id, status="cancelled", stage="Cancelled")
|
||||
return
|
||||
|
||||
_update_meta(
|
||||
job_id,
|
||||
stage=f"Extracting entities page {i+1}/{total_pages} (LangExtract + DeepSeek)...",
|
||||
progress={"parsed_pages": total_pages, "total_pages": total_pages,
|
||||
"extracted_entities": total_entities},
|
||||
)
|
||||
ann_doc = extract_entities(page.text, model)
|
||||
annotated_docs.append(ann_doc)
|
||||
total_entities += len(ann_doc.extractions) if ann_doc.extractions else 0
|
||||
|
||||
# Save raw extractions
|
||||
records = extractions_to_records(pages, annotated_docs, doc_id)
|
||||
fs.write_json(job_dir / "extractions.json", records)
|
||||
|
||||
# ── Stage 3: indexing ─────────────────────────────────────────────
|
||||
_update_meta(job_id, status="indexing", stage="Building knowledge graph...")
|
||||
|
||||
nodes, edges = build_kg(pages, annotated_docs, doc_id)
|
||||
fs.write_json(job_dir / "kg_nodes.json", nodes)
|
||||
fs.write_json(job_dir / "kg_edges.json", edges)
|
||||
|
||||
# Merge into global KG
|
||||
fs.merge_kg(nodes, edges, doc_id)
|
||||
|
||||
# Count alignment types
|
||||
alignment_counts: dict[str, int] = {}
|
||||
type_counts: dict[str, int] = {}
|
||||
for r in records:
|
||||
al = r.get("alignment") or "null"
|
||||
alignment_counts[al] = alignment_counts.get(al, 0) + 1
|
||||
t = r.get("type", "UNKNOWN")
|
||||
type_counts[t] = type_counts.get(t, 0) + 1
|
||||
|
||||
elapsed = round(time.time() - start_time, 1)
|
||||
stats = {
|
||||
"blocks": len(content_list),
|
||||
"block_types": block_types,
|
||||
"pages": total_pages,
|
||||
"raw_extractions": len(records),
|
||||
"nodes": len(nodes),
|
||||
"edges": len(edges),
|
||||
"type_counts": type_counts,
|
||||
"alignment_counts": alignment_counts,
|
||||
"elapsed_seconds": elapsed,
|
||||
}
|
||||
fs.write_json(job_dir / "stats.json", stats)
|
||||
|
||||
_update_meta(
|
||||
job_id,
|
||||
status="done",
|
||||
stage="Complete",
|
||||
progress={"parsed_pages": total_pages, "total_pages": total_pages,
|
||||
"extracted_entities": len(records)},
|
||||
)
|
||||
update_doc_status(doc_id, "indexed", pages=total_pages)
|
||||
|
||||
except Exception as exc:
|
||||
_update_meta(job_id, status="failed", stage=f"Error: {exc}", error=str(exc))
|
||||
update_doc_status(doc_id, "failed")
|
||||
finally:
|
||||
_active_threads.pop(job_id, None)
|
||||
_cancel_flags.pop(job_id, None)
|
||||
|
||||
|
||||
def get_job_status(job_id: str) -> dict | None:
|
||||
return fs.load_job_meta(job_id)
|
||||
|
||||
|
||||
def get_job_result(job_id: str) -> dict | None:
|
||||
meta = fs.load_job_meta(job_id)
|
||||
if not meta:
|
||||
return None
|
||||
if meta["status"] != "done":
|
||||
return meta
|
||||
|
||||
job_dir = fs.job_dir(job_id)
|
||||
stats = fs.read_json(job_dir / "stats.json") or {}
|
||||
extractions = fs.read_json(job_dir / "extractions.json") or []
|
||||
nodes = fs.read_json(job_dir / "kg_nodes.json") or []
|
||||
edges = fs.read_json(job_dir / "kg_edges.json") or []
|
||||
|
||||
return {
|
||||
"job_id": meta["job_id"],
|
||||
"doc_id": meta["doc_id"],
|
||||
"status": "done",
|
||||
"stats": stats,
|
||||
"extractions": extractions,
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
}
|
||||
|
||||
|
||||
def cancel_job(job_id: str) -> tuple[bool, str]:
|
||||
meta = fs.load_job_meta(job_id)
|
||||
if not meta:
|
||||
return False, "not_found"
|
||||
prev_status = meta["status"]
|
||||
_cancel_flags[job_id] = True
|
||||
_update_meta(job_id, status="cancelled", stage="Cancelled by user")
|
||||
return True, prev_status
|
||||
|
||||
|
||||
def count_active_jobs() -> int:
|
||||
return sum(1 for t in _active_threads.values() if t.is_alive())
|
||||
167
backend/services/kg_service.py
Normal file
167
backend/services/kg_service.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""KG Service — NetworkX graph operations over the global KG."""
|
||||
from __future__ import annotations
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from storage import file_store as fs
|
||||
|
||||
|
||||
def _load_graph() -> nx.Graph:
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
G = nx.Graph()
|
||||
for n in nodes:
|
||||
G.add_node(n["id"], **n)
|
||||
for e in edges:
|
||||
G.add_edge(e["source"], e["target"],
|
||||
relation=e.get("relation", "CO_OCCURS_IN"),
|
||||
doc_id=e.get("doc_id", ""),
|
||||
page=e.get("page", 0))
|
||||
return G
|
||||
|
||||
|
||||
def get_nodes(page: int = 1, page_size: int = 50,
|
||||
node_type: str | None = None,
|
||||
doc_id: str | None = None,
|
||||
confidence: str | None = None) -> dict:
|
||||
nodes = fs.load_kg_nodes()
|
||||
G = _load_graph()
|
||||
# Attach degree
|
||||
degrees = dict(G.degree())
|
||||
for n in nodes:
|
||||
n["degree"] = degrees.get(n["id"], 0)
|
||||
|
||||
if node_type:
|
||||
nodes = [n for n in nodes if n.get("type", "").upper() == node_type.upper()]
|
||||
if doc_id:
|
||||
nodes = [n for n in nodes if n.get("source_doc") == doc_id]
|
||||
if confidence:
|
||||
nodes = [n for n in nodes if n.get("confidence") == confidence]
|
||||
|
||||
total = len(nodes)
|
||||
start = (page - 1) * page_size
|
||||
return {"total": total, "page": page, "page_size": page_size,
|
||||
"items": nodes[start: start + page_size]}
|
||||
|
||||
|
||||
def get_edges(page: int = 1, page_size: int = 100,
|
||||
doc_id: str | None = None,
|
||||
relation: str | None = None) -> dict:
|
||||
edges = fs.load_kg_edges()
|
||||
if doc_id:
|
||||
edges = [e for e in edges if e.get("doc_id") == doc_id]
|
||||
if relation:
|
||||
edges = [e for e in edges if e.get("relation") == relation]
|
||||
total = len(edges)
|
||||
start = (page - 1) * page_size
|
||||
return {"total": total, "page": page, "page_size": page_size,
|
||||
"items": edges[start: start + page_size]}
|
||||
|
||||
|
||||
def get_node_detail(node_id: str) -> dict | None:
|
||||
nodes = fs.load_kg_nodes()
|
||||
node = next((n for n in nodes if n["id"] == node_id), None)
|
||||
if not node:
|
||||
return None
|
||||
G = _load_graph()
|
||||
if node_id not in G:
|
||||
node["degree"] = 0
|
||||
node["degree_centrality"] = 0.0
|
||||
node["neighbor_count"] = 0
|
||||
return node
|
||||
deg = G.degree(node_id)
|
||||
centrality = nx.degree_centrality(G)
|
||||
node["degree"] = deg
|
||||
node["degree_centrality"] = round(centrality.get(node_id, 0.0), 4)
|
||||
node["neighbor_count"] = deg
|
||||
return node
|
||||
|
||||
|
||||
def get_neighbors(node_id: str, hops: int = 1) -> dict | None:
|
||||
nodes = fs.load_kg_nodes()
|
||||
node = next((n for n in nodes if n["id"] == node_id), None)
|
||||
if not node:
|
||||
return None
|
||||
G = _load_graph()
|
||||
if node_id not in G:
|
||||
return {
|
||||
"center": {"id": node_id, "name": node["name"], "type": node["type"], "page": node.get("page", 0)},
|
||||
"hops": hops, "neighbors_by_hop": {}, "total_neighbors": 0,
|
||||
}
|
||||
hops = max(1, min(hops, 3))
|
||||
reachable = nx.single_source_shortest_path_length(G, node_id, cutoff=hops)
|
||||
by_hop: dict[str, list] = {}
|
||||
for nid, dist in reachable.items():
|
||||
if dist == 0:
|
||||
continue
|
||||
nd = G.nodes[nid]
|
||||
by_hop.setdefault(str(dist), []).append({
|
||||
"id": nid, "name": nd.get("name", ""), "type": nd.get("type", ""), "page": nd.get("page", 0)
|
||||
})
|
||||
total = sum(len(v) for v in by_hop.values())
|
||||
return {
|
||||
"center": {"id": node_id, "name": node["name"], "type": node["type"], "page": node.get("page", 0)},
|
||||
"hops": hops,
|
||||
"neighbors_by_hop": by_hop,
|
||||
"total_neighbors": total,
|
||||
}
|
||||
|
||||
|
||||
def get_stats() -> dict:
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
G = _load_graph()
|
||||
|
||||
type_dist: dict[str, int] = {}
|
||||
for n in nodes:
|
||||
t = n.get("type", "UNKNOWN")
|
||||
type_dist[t] = type_dist.get(t, 0) + 1
|
||||
|
||||
relation_types: dict[str, int] = {}
|
||||
for e in edges:
|
||||
r = e.get("relation", "CO_OCCURS_IN")
|
||||
relation_types[r] = relation_types.get(r, 0) + 1
|
||||
|
||||
density = round(nx.density(G), 4) if G.number_of_nodes() > 1 else 0.0
|
||||
|
||||
top5: list[dict] = []
|
||||
if G.number_of_nodes() > 0:
|
||||
centrality = nx.degree_centrality(G)
|
||||
for nid, c in sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]:
|
||||
nd = G.nodes[nid]
|
||||
top5.append({"node_id": nid, "name": nd.get("name", ""), "type": nd.get("type", ""),
|
||||
"centrality": round(c, 4)})
|
||||
|
||||
source_docs = list({n.get("source_doc", "") for n in nodes if n.get("source_doc")})
|
||||
|
||||
return {
|
||||
"total_nodes": len(nodes),
|
||||
"total_edges": len(edges),
|
||||
"density": density,
|
||||
"type_distribution": type_dist,
|
||||
"relation_types": relation_types,
|
||||
"top5_central_nodes": top5,
|
||||
"source_documents": source_docs,
|
||||
}
|
||||
|
||||
|
||||
def export_kg(doc_id: str | None = None) -> dict:
|
||||
from datetime import datetime, timezone
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
G = _load_graph()
|
||||
degrees = dict(G.degree())
|
||||
for n in nodes:
|
||||
n["degree"] = degrees.get(n["id"], 0)
|
||||
if doc_id:
|
||||
nodes = [n for n in nodes if n.get("source_doc") == doc_id]
|
||||
edges = [e for e in edges if e.get("doc_id") == doc_id]
|
||||
return {
|
||||
"format": "json",
|
||||
"doc_id": doc_id,
|
||||
"total_nodes": len(nodes),
|
||||
"total_edges": len(edges),
|
||||
"exported_at": datetime.now(timezone.utc).isoformat(),
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
}
|
||||
85
backend/services/qa_service.py
Normal file
85
backend/services/qa_service.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""QA Service — Agentic-RAG wrapper."""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from storage import file_store as fs
|
||||
|
||||
|
||||
def run_query(question: str, history: list[dict]) -> dict:
|
||||
from pipeline.qa_agent import run_qa
|
||||
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
|
||||
if not nodes:
|
||||
raise ValueError("KG_EMPTY")
|
||||
|
||||
start = time.time()
|
||||
result = run_qa(question, history, nodes, edges)
|
||||
elapsed = round(time.time() - start, 2)
|
||||
|
||||
query_id = f"q_{uuid.uuid4().hex[:10]}"
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
record = {
|
||||
"id": query_id,
|
||||
"question": question,
|
||||
"answer": result["answer"],
|
||||
"tool_calls": result["tool_calls"],
|
||||
"cited_nodes": result["cited_nodes"],
|
||||
"duration_seconds": elapsed,
|
||||
"timestamp": now,
|
||||
}
|
||||
fs.append_query_history(record)
|
||||
return record
|
||||
|
||||
|
||||
def get_history(page: int = 1, page_size: int = 20) -> dict:
|
||||
all_records = fs.load_query_history()
|
||||
total = len(all_records)
|
||||
start = (page - 1) * page_size
|
||||
return {
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"items": all_records[start: start + page_size],
|
||||
}
|
||||
|
||||
|
||||
def start_batch(questions: list[str]) -> dict:
|
||||
import threading
|
||||
|
||||
batch_id = f"batch_{uuid.uuid4().hex[:10]}"
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
meta = {
|
||||
"batch_id": batch_id,
|
||||
"total": len(questions),
|
||||
"completed": 0,
|
||||
"failed": 0,
|
||||
"status": "submitted",
|
||||
"created_at": now,
|
||||
"results": [],
|
||||
}
|
||||
fs.save_batch_meta(batch_id, meta)
|
||||
|
||||
def _run():
|
||||
for q in questions:
|
||||
try:
|
||||
res = run_query(q, [])
|
||||
meta["results"].append(res)
|
||||
meta["completed"] += 1
|
||||
except Exception as e:
|
||||
meta["failed"] += 1
|
||||
meta["results"].append({"question": q, "error": str(e)})
|
||||
meta["status"] = "done"
|
||||
fs.save_batch_meta(batch_id, meta)
|
||||
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
return {"batch_id": batch_id, "total": len(questions), "status": "submitted", "created_at": now}
|
||||
|
||||
|
||||
def get_batch_result(batch_id: str) -> dict | None:
|
||||
return fs.load_batch_meta(batch_id)
|
||||
106
backend/services/search_service.py
Normal file
106
backend/services/search_service.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Search Service — entity, path, and graph search."""
|
||||
from __future__ import annotations
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from storage import file_store as fs
|
||||
|
||||
|
||||
def _load_graph() -> nx.Graph:
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
G = nx.Graph()
|
||||
for n in nodes:
|
||||
G.add_node(n["id"], **n)
|
||||
for e in edges:
|
||||
G.add_edge(e["source"], e["target"],
|
||||
relation=e.get("relation", "CO_OCCURS_IN"),
|
||||
doc_id=e.get("doc_id", ""), page=e.get("page", 0))
|
||||
return G
|
||||
|
||||
|
||||
def search_entities(q: str, entity_type: str | None = None, limit: int = 15) -> dict:
|
||||
nodes = fs.load_kg_nodes()
|
||||
G = _load_graph()
|
||||
degrees = dict(G.degree())
|
||||
q_lower = q.lower()
|
||||
matches = [n for n in nodes if q_lower in n.get("name", "").lower()]
|
||||
if entity_type:
|
||||
matches = [n for n in matches if n.get("type", "").upper() == entity_type.upper()]
|
||||
for n in matches:
|
||||
n["degree"] = degrees.get(n["id"], 0)
|
||||
matches = matches[:limit]
|
||||
return {"query": q, "total": len(matches), "items": matches}
|
||||
|
||||
|
||||
def search_path(from_id: str, to_id: str, max_hops: int = 3) -> dict | None:
|
||||
nodes = fs.load_kg_nodes()
|
||||
node_map = {n["id"]: n for n in nodes}
|
||||
if from_id not in node_map or to_id not in node_map:
|
||||
return None # node not found
|
||||
|
||||
G = _load_graph()
|
||||
max_hops = max(1, min(max_hops, 5))
|
||||
|
||||
try:
|
||||
raw_paths = list(nx.all_simple_paths(G, from_id, to_id, cutoff=max_hops))
|
||||
except nx.NetworkXError:
|
||||
raw_paths = []
|
||||
|
||||
paths = []
|
||||
for path_nodes in raw_paths:
|
||||
path_edges = []
|
||||
for i in range(len(path_nodes) - 1):
|
||||
s, t = path_nodes[i], path_nodes[i + 1]
|
||||
edge_data = G.edges[s, t]
|
||||
path_edges.append({"source": s, "target": t,
|
||||
"relation": edge_data.get("relation", "CO_OCCURS_IN")})
|
||||
paths.append({
|
||||
"length": len(path_nodes) - 1,
|
||||
"nodes": [{"id": nid, "name": node_map.get(nid, {}).get("name", nid),
|
||||
"type": node_map.get(nid, {}).get("type", "")} for nid in path_nodes],
|
||||
"edges": path_edges,
|
||||
})
|
||||
|
||||
from_node = node_map[from_id]
|
||||
to_node = node_map[to_id]
|
||||
return {
|
||||
"from": {"id": from_id, "name": from_node.get("name", ""), "type": from_node.get("type", "")},
|
||||
"to": {"id": to_id, "name": to_node.get("name", ""), "type": to_node.get("type", "")},
|
||||
"max_hops": max_hops,
|
||||
"paths": paths,
|
||||
"total_paths": len(paths),
|
||||
}
|
||||
|
||||
|
||||
def search_graph(q: str, include_neighbors: bool = False) -> dict:
|
||||
nodes = fs.load_kg_nodes()
|
||||
edges = fs.load_kg_edges()
|
||||
G = _load_graph()
|
||||
degrees = dict(G.degree())
|
||||
q_lower = q.lower()
|
||||
|
||||
matched = [n for n in nodes if q_lower in n.get("name", "").lower()]
|
||||
matched_ids = {n["id"] for n in matched}
|
||||
for n in matched:
|
||||
n["degree"] = degrees.get(n["id"], 0)
|
||||
|
||||
if include_neighbors:
|
||||
neighbor_ids = set()
|
||||
for nid in matched_ids:
|
||||
if nid in G:
|
||||
neighbor_ids.update(G.neighbors(nid))
|
||||
all_relevant = matched_ids | neighbor_ids
|
||||
else:
|
||||
all_relevant = matched_ids
|
||||
|
||||
subgraph_edges = [
|
||||
e for e in edges
|
||||
if e.get("source") in all_relevant and e.get("target") in all_relevant
|
||||
]
|
||||
|
||||
return {
|
||||
"query": q,
|
||||
"matched_nodes": matched,
|
||||
"subgraph_edges": subgraph_edges,
|
||||
}
|
||||
0
backend/storage/__init__.py
Normal file
0
backend/storage/__init__.py
Normal file
268
backend/storage/file_store.py
Normal file
268
backend/storage/file_store.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""
|
||||
File Store — unified JSON read/write for all backend data.
|
||||
All data lives under backend/data/.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# Root data directory relative to this file
|
||||
_BASE = Path(__file__).parent.parent / "data"
|
||||
|
||||
UPLOADS_DIR = _BASE / "uploads"
|
||||
JOBS_DIR = _BASE / "jobs"
|
||||
KG_DIR = _BASE / "kg"
|
||||
QUERY_DIR = _BASE / "jobs" # query_history.jsonl lives here
|
||||
|
||||
# Ensure directories exist at import time
|
||||
for _d in (UPLOADS_DIR, JOBS_DIR, KG_DIR):
|
||||
_d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generic helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def read_json(path: Path) -> Any:
|
||||
"""Read and parse a JSON file. Returns None if file doesn't exist."""
|
||||
if not path.exists():
|
||||
return None
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def write_json(path: Path, data: Any) -> None:
|
||||
"""Atomically write data as JSON (write to .tmp then rename)."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(".tmp")
|
||||
with open(tmp, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
os.replace(tmp, path)
|
||||
|
||||
|
||||
def append_jsonl(path: Path, record: dict) -> None:
|
||||
"""Append a record to a JSONL file."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def read_jsonl(path: Path) -> list[dict]:
|
||||
"""Read all records from a JSONL file."""
|
||||
if not path.exists():
|
||||
return []
|
||||
records = []
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
records.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return records
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def docs_index_path() -> Path:
|
||||
return _BASE / "docs_index.json"
|
||||
|
||||
|
||||
def load_docs_index() -> dict[str, dict]:
|
||||
"""Load the documents index {doc_id: DocumentInfo dict}."""
|
||||
data = read_json(docs_index_path())
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
|
||||
def save_docs_index(index: dict[str, dict]) -> None:
|
||||
write_json(docs_index_path(), index)
|
||||
|
||||
|
||||
def get_doc(doc_id: str) -> dict | None:
|
||||
return load_docs_index().get(doc_id)
|
||||
|
||||
|
||||
def save_doc(doc: dict) -> None:
|
||||
index = load_docs_index()
|
||||
index[doc["doc_id"]] = doc
|
||||
save_docs_index(index)
|
||||
|
||||
|
||||
def delete_doc(doc_id: str) -> bool:
|
||||
index = load_docs_index()
|
||||
if doc_id not in index:
|
||||
return False
|
||||
del index[doc_id]
|
||||
save_docs_index(index)
|
||||
# Remove upload file
|
||||
doc_info = index.get(doc_id, {})
|
||||
upload_path = UPLOADS_DIR / doc_info.get("upload_filename", "")
|
||||
if upload_path.exists():
|
||||
upload_path.unlink()
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Job helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def job_dir(job_id: str) -> Path:
|
||||
return JOBS_DIR / job_id
|
||||
|
||||
|
||||
def job_meta_path(job_id: str) -> Path:
|
||||
return job_dir(job_id) / "meta.json"
|
||||
|
||||
|
||||
def load_job_meta(job_id: str) -> dict | None:
|
||||
return read_json(job_meta_path(job_id))
|
||||
|
||||
|
||||
def save_job_meta(job_id: str, meta: dict) -> None:
|
||||
job_dir(job_id).mkdir(parents=True, exist_ok=True)
|
||||
write_json(job_meta_path(job_id), meta)
|
||||
|
||||
|
||||
def list_all_jobs() -> list[dict]:
|
||||
metas = []
|
||||
for d in JOBS_DIR.iterdir():
|
||||
if d.is_dir():
|
||||
meta = read_json(d / "meta.json")
|
||||
if meta:
|
||||
metas.append(meta)
|
||||
return metas
|
||||
|
||||
|
||||
def delete_job(job_id: str) -> None:
|
||||
jd = job_dir(job_id)
|
||||
if jd.exists():
|
||||
shutil.rmtree(jd)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Global KG helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def kg_nodes_path() -> Path:
|
||||
return KG_DIR / "kg_nodes.json"
|
||||
|
||||
|
||||
def kg_edges_path() -> Path:
|
||||
return KG_DIR / "kg_edges.json"
|
||||
|
||||
|
||||
def load_kg_nodes() -> list[dict]:
|
||||
data = read_json(kg_nodes_path())
|
||||
return data if isinstance(data, list) else []
|
||||
|
||||
|
||||
def load_kg_edges() -> list[dict]:
|
||||
data = read_json(kg_edges_path())
|
||||
return data if isinstance(data, list) else []
|
||||
|
||||
|
||||
def save_kg_nodes(nodes: list[dict]) -> None:
|
||||
write_json(kg_nodes_path(), nodes)
|
||||
|
||||
|
||||
def save_kg_edges(edges: list[dict]) -> None:
|
||||
write_json(kg_edges_path(), edges)
|
||||
|
||||
|
||||
def merge_kg(new_nodes: list[dict], new_edges: list[dict], doc_id: str) -> tuple[int, int]:
|
||||
"""Merge job KG output into global KG. Returns (removed_old, added_new)."""
|
||||
existing_nodes = load_kg_nodes()
|
||||
existing_edges = load_kg_edges()
|
||||
|
||||
# Remove nodes/edges from this doc
|
||||
existing_nodes = [n for n in existing_nodes if n.get("source_doc") != doc_id]
|
||||
existing_edges = [e for e in existing_edges if e.get("doc_id") != doc_id]
|
||||
|
||||
# Merge: deduplicate nodes by (name.lower(), type)
|
||||
node_keys: set[tuple] = {(n["name"].lower(), n["type"]) for n in existing_nodes}
|
||||
for n in new_nodes:
|
||||
key = (n["name"].lower(), n["type"])
|
||||
if key not in node_keys:
|
||||
existing_nodes.append(n)
|
||||
node_keys.add(key)
|
||||
|
||||
# Merge edges: deduplicate by (min(src,tgt), max(src,tgt), doc_id)
|
||||
edge_keys: set[tuple] = set()
|
||||
for e in existing_edges:
|
||||
s, t = e["source"], e["target"]
|
||||
edge_keys.add((min(s, t), max(s, t), e["doc_id"]))
|
||||
|
||||
for e in new_edges:
|
||||
s, t = e["source"], e["target"]
|
||||
key = (min(s, t), max(s, t), e["doc_id"])
|
||||
if key not in edge_keys:
|
||||
existing_edges.append(e)
|
||||
edge_keys.add(key)
|
||||
|
||||
save_kg_nodes(existing_nodes)
|
||||
save_kg_edges(existing_edges)
|
||||
return len(existing_nodes), len(existing_edges)
|
||||
|
||||
|
||||
def remove_doc_from_kg(doc_id: str) -> tuple[int, int]:
|
||||
"""Remove all nodes/edges from a document. Returns (removed_nodes, removed_edges)."""
|
||||
nodes = load_kg_nodes()
|
||||
edges = load_kg_edges()
|
||||
old_n, old_e = len(nodes), len(edges)
|
||||
nodes = [n for n in nodes if n.get("source_doc") != doc_id]
|
||||
edges = [e for e in edges if e.get("doc_id") != doc_id]
|
||||
save_kg_nodes(nodes)
|
||||
save_kg_edges(edges)
|
||||
return old_n - len(nodes), old_e - len(edges)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Query history helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def query_history_path() -> Path:
|
||||
return _BASE / "query_history.jsonl"
|
||||
|
||||
|
||||
def append_query_history(result: dict) -> None:
|
||||
append_jsonl(query_history_path(), result)
|
||||
|
||||
|
||||
def load_query_history() -> list[dict]:
|
||||
records = read_jsonl(query_history_path())
|
||||
return list(reversed(records)) # newest first
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch job helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def batch_meta_path(batch_id: str) -> Path:
|
||||
return _BASE / "batches" / f"{batch_id}.json"
|
||||
|
||||
|
||||
def load_batch_meta(batch_id: str) -> dict | None:
|
||||
return read_json(batch_meta_path(batch_id))
|
||||
|
||||
|
||||
def save_batch_meta(batch_id: str, meta: dict) -> None:
|
||||
write_json(batch_meta_path(batch_id), meta)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Storage usage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def storage_used_mb() -> float:
|
||||
total = 0
|
||||
for path in _BASE.rglob("*"):
|
||||
if path.is_file():
|
||||
total += path.stat().st_size
|
||||
return round(total / (1024 * 1024), 2)
|
||||
256
backend/tests/test_api.py
Normal file
256
backend/tests/test_api.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
API integration tests — tests all major endpoints against a running server.
|
||||
Run with: python tests/test_api.py
|
||||
Server must be running on http://localhost:8000
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from pathlib import Path
|
||||
|
||||
BASE = "http://localhost:8000/api/v1"
|
||||
|
||||
PASS = "\033[92m[PASS]\033[0m"
|
||||
FAIL = "\033[91m[FAIL]\033[0m"
|
||||
INFO = "\033[94m[INFO]\033[0m"
|
||||
|
||||
results = {"passed": 0, "failed": 0}
|
||||
|
||||
|
||||
def req(method: str, path: str, body: dict | None = None, form: dict | None = None) -> dict:
|
||||
url = BASE + path
|
||||
try:
|
||||
if method == "GET" and not body and not form:
|
||||
r = urllib.request.urlopen(url, timeout=30)
|
||||
else:
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode()
|
||||
req_obj = urllib.request.Request(url, data=data, method=method,
|
||||
headers={"Content-Type": "application/json"})
|
||||
else:
|
||||
req_obj = urllib.request.Request(url, method=method)
|
||||
r = urllib.request.urlopen(req_obj, timeout=30)
|
||||
return json.loads(r.read().decode())
|
||||
except urllib.error.HTTPError as e:
|
||||
return json.loads(e.read().decode())
|
||||
|
||||
|
||||
def check(name: str, condition: bool, detail: str = "") -> None:
|
||||
if condition:
|
||||
results["passed"] += 1
|
||||
print(f" {PASS} {name}")
|
||||
else:
|
||||
results["failed"] += 1
|
||||
print(f" {FAIL} {name} {detail}")
|
||||
|
||||
|
||||
def wait_for_server(max_retries: int = 15) -> bool:
|
||||
print(f"{INFO} Waiting for server at {BASE}...")
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
urllib.request.urlopen(BASE.replace("/api/v1", "/"), timeout=3)
|
||||
print(f"{INFO} Server is up.")
|
||||
return True
|
||||
except Exception:
|
||||
time.sleep(1)
|
||||
return False
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Test groups
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_system():
|
||||
print("\n── F 组: System ──")
|
||||
|
||||
r = req("GET", "/health")
|
||||
check("GET /health returns code=0", r.get("code") == 0)
|
||||
check("health data.status exists", "status" in (r.get("data") or {}))
|
||||
check("health data.components exists", "components" in (r.get("data") or {}))
|
||||
print(f" {INFO} status={r.get('data',{}).get('status')} uptime={r.get('data',{}).get('uptime_seconds')}s")
|
||||
|
||||
r = req("GET", "/system/stats")
|
||||
check("GET /system/stats returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("stats has total_documents", "total_documents" in d)
|
||||
check("stats has total_nodes", "total_nodes" in d)
|
||||
print(f" {INFO} docs={d.get('total_documents')} nodes={d.get('total_nodes')} edges={d.get('total_edges')}")
|
||||
|
||||
r = req("GET", "/system/formats")
|
||||
check("GET /system/formats returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("formats list is non-empty", len(d.get("formats", [])) > 0)
|
||||
exts = [f["ext"] for f in d.get("formats", [])]
|
||||
check("pdf format present", "pdf" in exts)
|
||||
check("docx format present", "docx" in exts)
|
||||
|
||||
r = req("GET", "/system/demo")
|
||||
check("GET /system/demo returns code=0 or 3002", r.get("code") in (0, 3002))
|
||||
if r.get("code") == 0:
|
||||
d = r.get("data") or {}
|
||||
check("demo data has nodes", "nodes" in d)
|
||||
print(f" {INFO} demo: {len(d.get('nodes',[]))} nodes, {len(d.get('edges',[]))} edges")
|
||||
else:
|
||||
print(f" {INFO} demo data not available (no KG yet) — code={r.get('code')}")
|
||||
|
||||
|
||||
def test_documents():
|
||||
print("\n── A 组: Documents ──")
|
||||
|
||||
r = req("GET", "/documents")
|
||||
check("GET /documents returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("documents list has total field", "total" in d)
|
||||
check("documents list has items field", "items" in d)
|
||||
print(f" {INFO} total documents={d.get('total', 0)}")
|
||||
|
||||
# Upload a test text file (not a real supported format to test validation)
|
||||
print(" Testing upload validation...")
|
||||
import urllib.request, io
|
||||
boundary = "boundary123"
|
||||
body_parts = (
|
||||
f"--{boundary}\r\n"
|
||||
f'Content-Disposition: form-data; name="file"; filename="test.xyz"\r\n'
|
||||
f"Content-Type: application/octet-stream\r\n\r\n"
|
||||
f"dummy content\r\n"
|
||||
f"--{boundary}--\r\n"
|
||||
).encode()
|
||||
req_obj = urllib.request.Request(
|
||||
BASE + "/documents/upload",
|
||||
data=body_parts,
|
||||
method="POST",
|
||||
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
||||
)
|
||||
try:
|
||||
urllib.request.urlopen(req_obj, timeout=10)
|
||||
r_upload = {}
|
||||
except urllib.error.HTTPError as e:
|
||||
r_upload = json.loads(e.read().decode())
|
||||
check("upload unsupported format returns code=1002", r_upload.get("code") == 1002)
|
||||
|
||||
r = req("GET", "/documents/nonexistent_id")
|
||||
check("GET /documents/nonexistent returns code=2001", r.get("code") == 2001)
|
||||
|
||||
|
||||
def test_indexing():
|
||||
print("\n── B 组: Indexing ──")
|
||||
|
||||
r = req("POST", "/index/start", body={"doc_id": "nonexistent_doc"})
|
||||
check("start indexing nonexistent doc returns 2001", r.get("code") == 2001)
|
||||
|
||||
r = req("GET", "/index/status/nonexistent_job")
|
||||
check("get status nonexistent job returns 2002", r.get("code") == 2002)
|
||||
|
||||
r = req("GET", "/index/result/nonexistent_job")
|
||||
check("get result nonexistent job returns 2002", r.get("code") == 2002)
|
||||
|
||||
r = req("DELETE", "/index/jobs/nonexistent_job")
|
||||
check("cancel nonexistent job returns 2002", r.get("code") == 2002)
|
||||
|
||||
|
||||
def test_kg():
|
||||
print("\n── C 组: Knowledge Graph ──")
|
||||
|
||||
r = req("GET", "/kg/stats")
|
||||
check("GET /kg/stats returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("stats has total_nodes", "total_nodes" in d)
|
||||
check("stats has total_edges", "total_edges" in d)
|
||||
print(f" {INFO} KG: {d.get('total_nodes')} nodes, {d.get('total_edges')} edges")
|
||||
|
||||
r = req("GET", "/kg/nodes")
|
||||
check("GET /kg/nodes returns code 0 or 3002", r.get("code") in (0, 3002))
|
||||
if r.get("code") == 0:
|
||||
d = r.get("data") or {}
|
||||
check("nodes data has items", "items" in d)
|
||||
print(f" {INFO} nodes total={d.get('total')}")
|
||||
|
||||
if d.get("items"):
|
||||
node_id = d["items"][0]["id"]
|
||||
r2 = req("GET", f"/kg/nodes/{node_id}")
|
||||
check(f"GET /kg/nodes/{node_id} returns code=0", r2.get("code") == 0)
|
||||
|
||||
r3 = req("GET", f"/kg/nodes/{node_id}/neighbors?hops=1")
|
||||
check(f"GET /kg/nodes/{node_id}/neighbors returns code=0", r3.get("code") == 0)
|
||||
else:
|
||||
print(f" {INFO} KG is empty (code=3002) — skipping node detail tests")
|
||||
|
||||
r = req("GET", "/kg/nodes/definitely_not_a_real_node")
|
||||
check("GET /kg/nodes/invalid returns code=3001", r.get("code") == 3001)
|
||||
|
||||
r = req("GET", "/kg/edges")
|
||||
check("GET /kg/edges returns code=0", r.get("code") == 0)
|
||||
|
||||
r = req("GET", "/kg/export")
|
||||
check("GET /kg/export returns code=0", r.get("code") == 0)
|
||||
|
||||
|
||||
def test_search():
|
||||
print("\n── E 组: Search ──")
|
||||
|
||||
r = req("GET", "/search/entities?q=graph")
|
||||
check("GET /search/entities returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("search entities has query field", "query" in d)
|
||||
check("search entities has items field", "items" in d)
|
||||
print(f" {INFO} 'graph' search: {d.get('total', 0)} results")
|
||||
|
||||
r = req("GET", "/search/entities?q=technology&type=TECHNOLOGY")
|
||||
check("GET /search/entities with type filter returns code=0", r.get("code") == 0)
|
||||
|
||||
r = req("GET", "/search/path?max_hops=2")
|
||||
check("path search without from/to returns 1001", r.get("code") == 1001)
|
||||
|
||||
r = req("GET", "/search/graph?q=knowledge")
|
||||
check("GET /search/graph returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("graph search has matched_nodes", "matched_nodes" in d)
|
||||
|
||||
|
||||
def test_query():
|
||||
print("\n── D 组: QA Query ──")
|
||||
|
||||
# Don't call /query (POST) in basic tests as it needs DeepSeek API + KG data
|
||||
r = req("GET", "/query/history")
|
||||
check("GET /query/history returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("history has total field", "total" in d)
|
||||
check("history has items field", "items" in d)
|
||||
print(f" {INFO} query history: {d.get('total', 0)} records")
|
||||
|
||||
r = req("GET", "/query/batch/nonexistent_batch")
|
||||
check("GET /query/batch/nonexistent returns 2002", r.get("code") == 2002)
|
||||
|
||||
r = req("POST", "/query/batch", body={"questions": ["test question"]})
|
||||
check("POST /query/batch returns code=0", r.get("code") == 0)
|
||||
d = r.get("data") or {}
|
||||
check("batch has batch_id", "batch_id" in d)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Main
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not wait_for_server():
|
||||
print(f"\n{FAIL} Server not responding. Start with: python main.py")
|
||||
sys.exit(1)
|
||||
|
||||
test_system()
|
||||
test_documents()
|
||||
test_indexing()
|
||||
test_kg()
|
||||
test_search()
|
||||
test_query()
|
||||
|
||||
total = results["passed"] + results["failed"]
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Results: {results['passed']}/{total} passed, {results['failed']} failed")
|
||||
if results["failed"] == 0:
|
||||
print(f"{PASS} All tests passed!")
|
||||
else:
|
||||
print(f"{FAIL} {results['failed']} test(s) failed")
|
||||
print(f"{'='*50}")
|
||||
sys.exit(0 if results["failed"] == 0 else 1)
|
||||
779
docs/agentic_rag_specification-v1.0.md
Normal file
779
docs/agentic_rag_specification-v1.0.md
Normal file
@@ -0,0 +1,779 @@
|
||||
# Agentic-RAG 规范文档 v1.0
|
||||
|
||||
> GraphRAG 问答阶段核心流程:Knowledge Graph → LangChain Agent → QA
|
||||
>
|
||||
> 数据来源:Bridge Pipeline 输出(`kg_nodes.json` + `kg_edges.json`)
|
||||
> 测试验证日期:2026-03-05
|
||||
> 全流程运行耗时:~40s(4 个测试查询)
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
- [一、完整执行思路与脚本位置](#一完整执行思路与脚本位置)
|
||||
- [二、LangChain Agent 输入输出规范](#二langchain-agent-输入输出规范)
|
||||
- [三、MinerU ↔ Agentic-RAG 对接规范与核心架构](#三mineru--agentic-rag-对接规范与核心架构)
|
||||
- [四、问答流程最终数据返回格式规范](#四问答流程最终数据返回格式规范)
|
||||
- [五、虚拟环境与依赖](#五虚拟环境与依赖)
|
||||
|
||||
---
|
||||
|
||||
## 一、完整执行思路与脚本位置
|
||||
|
||||
### 1.1 总体架构定位
|
||||
|
||||
Agentic-RAG 是 GraphRAG 系统的**问答阶段**,位于 Bridge Pipeline 之后,负责将知识图谱转化为可交互的智能问答能力。
|
||||
|
||||
```
|
||||
【已完成阶段】 【本阶段:Agentic-RAG】
|
||||
──────────────────── ──────────────────────────
|
||||
PDF
|
||||
↓ MinerU Cloud API
|
||||
content_list.json
|
||||
↓ Bridge Pipeline
|
||||
kg_nodes.json (40 nodes) ──────────→ NetworkX Graph (内存)
|
||||
kg_edges.json (780 edges) ↓
|
||||
4 个 LangChain @tool
|
||||
↓
|
||||
LangChain v1 create_agent
|
||||
(DeepSeek deepseek-chat)
|
||||
↓
|
||||
ReAct 推理循环
|
||||
↓
|
||||
自然语言答案
|
||||
```
|
||||
|
||||
### 1.2 五步执行流程
|
||||
|
||||
| 步骤 | 模块 | 说明 |
|
||||
|------|------|------|
|
||||
| Step 0 | 环境 + 配置 | 加载 `.env`(DEEPSEEK_API_KEY),初始化 `ChatOpenAI` |
|
||||
| Step 1 | KG 加载 | 读取 `kg_nodes.json` + `kg_edges.json`,构建 NetworkX 无向图 |
|
||||
| Step 2 | Tool 注册 | 用 `@tool` 装饰器注册 4 个 KG 检索工具 |
|
||||
| Step 3 | Agent 构建 | `create_agent(model, tools, system_prompt)` 编译 LangGraph |
|
||||
| Step 4 | 问答调用 | `agent.invoke({"messages": [("human", question)]})` |
|
||||
| Step 5 | 结果提取 | `result["messages"][-1].content` 获取最终答案 |
|
||||
|
||||
### 1.3 测试脚本存放位置
|
||||
|
||||
```
|
||||
F:\GraphRAGAgent\graphrag_pipeline\
|
||||
├── agentic_rag_mvp.py ← 主测试脚本(本规范对应文件)
|
||||
├── .env ← DEEPSEEK_API_KEY 配置
|
||||
└── output/
|
||||
├── kg_nodes.json ← Bridge Pipeline 生成(40 节点)
|
||||
└── kg_edges.json ← Bridge Pipeline 生成(780 边)
|
||||
```
|
||||
|
||||
### 1.4 运行命令
|
||||
|
||||
```bash
|
||||
# MVP 连通性测试(4 个预设测试查询)
|
||||
F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe \
|
||||
F:/GraphRAGAgent/graphrag_pipeline/agentic_rag_mvp.py
|
||||
```
|
||||
|
||||
### 1.5 ReAct 推理循环详解
|
||||
|
||||
Agent 使用 **ReAct(Reasoning + Acting)** 模式,每个问题的处理流如下:
|
||||
|
||||
```
|
||||
用户输入 (question: str)
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ LLM Reasoning(DeepSeek deepseek-chat) │
|
||||
│ 决策:需要调用哪个工具?参数是什么? │
|
||||
└─────────────────────────────────────────────────┘
|
||||
│ tool_call
|
||||
▼
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Tool Execution(NetworkX 本地计算,无 API 调用) │
|
||||
│ search_entities / get_neighbors / │
|
||||
│ get_entities_by_type / describe_graph │
|
||||
└─────────────────────────────────────────────────┘
|
||||
│ ToolMessage(工具返回的文本结果)
|
||||
▼
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ LLM Observation(观察工具结果) │
|
||||
│ 决策:结果够用了吗?还需要调更多工具? │
|
||||
└─────────────────────────────────────────────────┘
|
||||
│ 继续 tool_call 或输出最终答案
|
||||
▼
|
||||
AIMessage(最终自然语言答案)
|
||||
```
|
||||
|
||||
**实测工具调用模式(4 个测试查询):**
|
||||
|
||||
| 查询类型 | 工具调用序列 | 特点 |
|
||||
|---------|------------|------|
|
||||
| 图谱整体概览 | `describe_graph` | 单次工具调用 |
|
||||
| 类型枚举 | `get_entities_by_type` | 单次工具调用 |
|
||||
| 多跳关系推理 | `search_entities` → `get_neighbors` | 两步串行调用 |
|
||||
| 概念精确查找 | `search_entities` → `get_neighbors` | 两步串行调用 |
|
||||
|
||||
---
|
||||
|
||||
## 二、LangChain Agent 输入输出规范
|
||||
|
||||
### 2.1 LLM 适配规范
|
||||
|
||||
#### 2.1.1 DeepSeek → LangChain 标准组件
|
||||
|
||||
LangChain v1 使用 `ChatOpenAI` 通过 `base_url` 覆盖接入任何 OpenAI 兼容 API:
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model="deepseek-chat", # DeepSeek 模型名
|
||||
api_key=DEEPSEEK_API_KEY, # 来自 graphrag_pipeline/.env
|
||||
base_url="https://api.deepseek.com", # OpenAI 兼容端点
|
||||
temperature=0, # 问答场景确定性输出
|
||||
)
|
||||
```
|
||||
|
||||
| 参数 | 值 | 说明 |
|
||||
|------|-----|------|
|
||||
| `model` | `"deepseek-chat"` | DeepSeek 实际模型标识 |
|
||||
| `api_key` | `${DEEPSEEK_API_KEY}` | 从 `.env` 读取,与 Bridge Pipeline 共用 |
|
||||
| `base_url` | `"https://api.deepseek.com"` | SDK 自动补全 `/v1` 路径 |
|
||||
| `temperature` | `0` | 问答场景设为 0,保证可重现性 |
|
||||
|
||||
#### 2.1.2 与 LangExtract 中 DeepSeek 的区别
|
||||
|
||||
| 对比项 | LangExtract 中的 DeepSeek | Agentic-RAG 中的 DeepSeek |
|
||||
|--------|--------------------------|--------------------------|
|
||||
| 接入方式 | 直接实例化 `OpenAILanguageModel` | LangChain `ChatOpenAI` 标准组件 |
|
||||
| API Key 环境变量 | `OPENAI_API_KEY` | `DEEPSEEK_API_KEY` |
|
||||
| 调用方式 | `lx.extract(model=model)` | `agent.invoke({"messages": ...})` |
|
||||
| 输出格式 | JSON(实体抽取) | 自然语言(问答) |
|
||||
| Tool Calling | 不支持(单轮推理) | 支持(ReAct 多轮) |
|
||||
|
||||
### 2.2 Agent 构建规范
|
||||
|
||||
#### 2.2.1 LangChain v1 create_agent
|
||||
|
||||
```python
|
||||
from langchain.agents import create_agent
|
||||
|
||||
agent = create_agent(
|
||||
model=llm, # ChatOpenAI 实例
|
||||
tools=_tools, # List[BaseTool],4 个工具
|
||||
system_prompt=SYSTEM_PROMPT, # 系统提示词字符串
|
||||
)
|
||||
```
|
||||
|
||||
**版本注意事项:**
|
||||
|
||||
| API | 状态 | 说明 |
|
||||
|-----|------|------|
|
||||
| `langchain.agents.create_agent` | ✅ LangChain v1 推荐 | 本项目使用 |
|
||||
| `langgraph.prebuilt.create_react_agent` | ⚠️ Deprecated in LangGraph V1.0 | 已废弃,勿用 |
|
||||
| `langchain.agents.create_react_agent` (旧版) | ❌ Legacy | 已移除 |
|
||||
|
||||
#### 2.2.2 System Prompt 规范
|
||||
|
||||
```
|
||||
You are a Knowledge Graph QA assistant. You have access to a knowledge graph
|
||||
extracted from academic documents about GraphRAG and related technologies.
|
||||
|
||||
The graph contains:
|
||||
- {node_count} deduplicated entities ({type_list} types)
|
||||
- {edge_count} CO_OCCURS_IN edges representing same-page co-occurrence
|
||||
|
||||
Available tools:
|
||||
1. search_entities — find entities by keyword substring
|
||||
2. get_neighbors — explore entity relationships (N-hop BFS)
|
||||
3. get_entities_by_type — list all entities of a type
|
||||
4. describe_graph — get graph statistics overview
|
||||
|
||||
Reasoning strategy:
|
||||
- Always use at least one tool before answering a factual question
|
||||
- For relationship questions, use get_neighbors after identifying the entity with search_entities
|
||||
- For enumeration questions, use get_entities_by_type
|
||||
- Synthesize tool results into a clear, concise answer
|
||||
- Cite the entity names and types in your final answer
|
||||
```
|
||||
|
||||
### 2.3 Agent 输入规范
|
||||
|
||||
#### 2.3.1 invoke 输入格式
|
||||
|
||||
```python
|
||||
result = agent.invoke({
|
||||
"messages": [
|
||||
("human", question) # 用户问题(自然语言字符串)
|
||||
]
|
||||
})
|
||||
```
|
||||
|
||||
**输入字段规范:**
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `messages` | `list[tuple[str, str]]` | 消息列表,格式 `(role, content)` |
|
||||
| `role` | `"human"` \| `"ai"` \| `"system"` | 消息角色 |
|
||||
| `content` | `str` | 消息内容 |
|
||||
|
||||
**多轮对话输入(支持历史上下文):**
|
||||
|
||||
```python
|
||||
result = agent.invoke({
|
||||
"messages": [
|
||||
("human", "What is GraphRAG?"),
|
||||
("ai", "GraphRAG is a knowledge graph-enhanced RAG system..."),
|
||||
("human", "How does it relate to LLMs?"), # 当前问题
|
||||
]
|
||||
})
|
||||
```
|
||||
|
||||
### 2.4 Agent 输出规范
|
||||
|
||||
#### 2.4.1 invoke 原始返回
|
||||
|
||||
```python
|
||||
{
|
||||
"messages": [
|
||||
HumanMessage(content="What is GraphRAG?"),
|
||||
AIMessage(content="", tool_calls=[...]), # 工具调用
|
||||
ToolMessage(content="...", tool_call_id="..."), # 工具结果
|
||||
AIMessage(content="GraphRAG is an advanced...") # 最终答案
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### 2.4.2 消息类型枚举
|
||||
|
||||
| 消息类型 | 角色 | 说明 |
|
||||
|---------|------|------|
|
||||
| `HumanMessage` | `human` | 用户输入 |
|
||||
| `AIMessage`(tool_calls 非空) | `ai` | LLM 决策发起工具调用 |
|
||||
| `ToolMessage` | `tool` | 工具执行结果 |
|
||||
| `AIMessage`(tool_calls 为空) | `ai` | 最终自然语言答案 |
|
||||
|
||||
#### 2.4.3 最终答案提取
|
||||
|
||||
```python
|
||||
final_msg = result["messages"][-1]
|
||||
answer = final_msg.content # str,最终自然语言答案
|
||||
```
|
||||
|
||||
### 2.5 四个工具输入输出规范
|
||||
|
||||
#### Tool 1: `search_entities`
|
||||
|
||||
| 项目 | 规范 |
|
||||
|------|------|
|
||||
| 入参 | `query: str` — 关键词(大小写不敏感子串匹配) |
|
||||
| 匹配逻辑 | `query.lower() in entity_name.lower()` |
|
||||
| 返回格式 | 多行文本,每行格式:`[{type}] "{name}" (confidence={c}, page={p}, id={id})` |
|
||||
| 无匹配时 | 返回提示 + 前 8 个样例实体名 |
|
||||
| 最多返回 | 15 条 |
|
||||
|
||||
**实际调用示例:**
|
||||
|
||||
```
|
||||
输入: query="GraphRAG"
|
||||
输出:
|
||||
Found 3 entity(ies) matching 'GraphRAG':
|
||||
[TECHNOLOGY] "GraphRAG" (confidence=match_exact, page=0, id=node_0)
|
||||
[CONCEPT] "GraphRAG pipeline" (confidence=match_exact, page=0, id=node_12)
|
||||
[CONCEPT] "GraphRAG (Global)" (confidence=match_exact, page=0, id=node_15)
|
||||
```
|
||||
|
||||
#### Tool 2: `get_neighbors`
|
||||
|
||||
| 项目 | 规范 |
|
||||
|------|------|
|
||||
| 入参 | `entity_name: str`,`hops: int = 1`(范围 1-3) |
|
||||
| 匹配逻辑 | 子串匹配找起始节点,取 `candidates[0]` |
|
||||
| 遍历算法 | `nx.single_source_shortest_path_length(G, node_id, cutoff=hops)` |
|
||||
| 返回格式 | 按 hop 分组,每组 `[{type}] {name}`,每组最多 20 条 |
|
||||
| 未找到时 | 返回提示,建议先用 `search_entities` |
|
||||
|
||||
**实际调用示例:**
|
||||
|
||||
```
|
||||
输入: entity_name="GraphRAG", hops=1
|
||||
输出:
|
||||
Neighbors of 'GraphRAG' [TECHNOLOGY] within 1 hop(s):
|
||||
|
||||
Hop 1 — 39 related entities:
|
||||
[CONCEPT] Knowledge Graph Enhanced RAG System
|
||||
[CONCEPT] retrieval-augmented generation
|
||||
...
|
||||
Total related entities: 39
|
||||
```
|
||||
|
||||
#### Tool 3: `get_entities_by_type`
|
||||
|
||||
| 项目 | 规范 |
|
||||
|------|------|
|
||||
| 入参 | `entity_type: str`(自动 `.upper()` 处理) |
|
||||
| 有效类型 | `TECHNOLOGY`, `CONCEPT`, `PERSON`, `ORGANIZATION`, `LOCATION` |
|
||||
| 返回格式 | 按 `name` 字母序排列,每行 `• {name} (confidence={c}, page={p})` |
|
||||
| 无效类型时 | 返回错误 + 图谱中实际存在的类型列表 |
|
||||
|
||||
**实际调用示例:**
|
||||
|
||||
```
|
||||
输入: entity_type="TECHNOLOGY"
|
||||
输出:
|
||||
TECHNOLOGY entities (4 total):
|
||||
• GraphRAG (confidence=match_exact, page=0)
|
||||
• LLMs (confidence=match_exact, page=0)
|
||||
• LangExtract (confidence=match_exact, page=0)
|
||||
• MinerU (confidence=match_exact, page=0)
|
||||
```
|
||||
|
||||
#### Tool 4: `describe_graph`
|
||||
|
||||
| 项目 | 规范 |
|
||||
|------|------|
|
||||
| 入参 | 无参数 |
|
||||
| 计算指标 | 节点数、边数、关系类型、图密度(`nx.density`)、度中心性(`nx.degree_centrality`) |
|
||||
| 返回格式 | 结构化文本,包含概览 + 类型分布 + Top-5 中心节点 |
|
||||
|
||||
**实际调用示例(实测输出):**
|
||||
|
||||
```
|
||||
=== Knowledge Graph Overview ===
|
||||
Nodes (entities): 40
|
||||
Edges (relations): 780
|
||||
Relation type: CO_OCCURS_IN (same-page co-occurrence)
|
||||
Graph density: 1.0000
|
||||
|
||||
Entity type distribution:
|
||||
CONCEPT : 36
|
||||
TECHNOLOGY : 4
|
||||
|
||||
Top-5 most connected entities (by degree centrality):
|
||||
[TECHNOLOGY] GraphRAG (centrality=1.000)
|
||||
[CONCEPT] Knowledge Graph Enhanced RAG System (centrality=1.000)
|
||||
[CONCEPT] retrieval-augmented generation (centrality=1.000)
|
||||
[CONCEPT] knowledge graphs (centrality=1.000)
|
||||
[CONCEPT] large language models (centrality=1.000)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 三、MinerU ↔ Agentic-RAG 对接规范与核心架构
|
||||
|
||||
### 3.1 全链路技术架构
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ 阶段一:文档解析(MinerU Cloud API) │
|
||||
│ │
|
||||
│ PDF 文件 │
|
||||
│ │ POST /file-urls/batch (enable_table=True, language="en") │
|
||||
│ ├─ PUT {presigned_url}(裸上传,不带 Content-Type) │
|
||||
│ └─ GET /extract-results/batch/{batch_id}(轮询 done) │
|
||||
│ ↓ │
|
||||
│ full_zip_url → 解压 → {uuid}_content_list.json │
|
||||
│ │
|
||||
│ 关键输出字段:type, text, text_level, table_body, page_idx, bbox │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
↓
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ 阶段二:知识图谱构建(Bridge Pipeline) │
|
||||
│ │
|
||||
│ content_list.json │
|
||||
│ │ text_assembler.py │
|
||||
│ ├─ text blocks → .rstrip() 拼接 │
|
||||
│ ├─ table blocks → BeautifulSoup HTML → pipe 分隔文本 │
|
||||
│ └─ PageText(page_idx, text, block_spans) │
|
||||
│ ↓ │
|
||||
│ entity_extractor.py (LangExtract + DeepSeek) │
|
||||
│ ↓ │
|
||||
│ kg_builder.py (去重 + CO_OCCURS_IN 边) │
|
||||
│ ↓ │
|
||||
│ kg_nodes.json (40 nodes) + kg_edges.json (780 edges) │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
↓
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ 阶段三:Agentic-RAG 问答(LangChain + LangGraph) │
|
||||
│ │
|
||||
│ kg_nodes.json → NetworkX.G.add_node(**node) │
|
||||
│ kg_edges.json → NetworkX.G.add_edge(source, target, **edge) │
|
||||
│ │
|
||||
│ @tool search_entities ← 子串匹配 │
|
||||
│ @tool get_neighbors ← BFS N-hop 遍历 │
|
||||
│ @tool get_entities_by_type ← 类型过滤 │
|
||||
│ @tool describe_graph ← 图统计 │
|
||||
│ ↓ │
|
||||
│ create_agent(ChatOpenAI("deepseek-chat"), tools, system_prompt) │
|
||||
│ ↓ │
|
||||
│ ReAct 推理循环(think → tool_call → observe → repeat) │
|
||||
│ ↓ │
|
||||
│ 自然语言答案(AIMessage.content) │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 3.2 MinerU → KG 关键参数对接
|
||||
|
||||
| MinerU 输出字段 | Bridge Pipeline 处理 | Agentic-RAG 使用 |
|
||||
|---------------|-------------------|----------------|
|
||||
| `block["type"]` | 区分 `text`/`table`/`image` | 不直接使用(已由 Bridge 转换) |
|
||||
| `block["text"]` | `.rstrip()` 后加入 PageText | 已内化为 `node["name"]` |
|
||||
| `block["table_body"]` | BeautifulSoup → pipe 分隔文本 | 已内化为实体描述 |
|
||||
| `block["page_idx"]` | 分组依据,记入 BlockSpan | `node["page"]` 字段 |
|
||||
| `block["bbox"]` | 记录字符偏移位置 | `node["char_start"]` / `node["char_end"]` |
|
||||
| `{uuid}_content_list.json 文件名` | UUID 作为 `source_doc_id` | `node["source_doc"]` / `edge["doc_id"]` |
|
||||
|
||||
### 3.3 NetworkX 图构建规范
|
||||
|
||||
```python
|
||||
import networkx as nx
|
||||
|
||||
G = nx.Graph() # 无向图(CO_OCCURS_IN 关系无方向)
|
||||
|
||||
# 节点:来自 kg_nodes.json
|
||||
for node in kg_nodes:
|
||||
G.add_node(
|
||||
node["id"], # 主键:node_0, node_1, ...
|
||||
**node # 所有字段作为节点属性
|
||||
)
|
||||
|
||||
# 边:来自 kg_edges.json
|
||||
for edge in kg_edges:
|
||||
G.add_edge(
|
||||
edge["source"], # node_0
|
||||
edge["target"], # node_1
|
||||
relation=edge["relation"], # "CO_OCCURS_IN"
|
||||
doc_id=edge["doc_id"], # UUID
|
||||
page=edge["page"], # 0-indexed
|
||||
)
|
||||
```
|
||||
|
||||
**图属性:**
|
||||
|
||||
| 属性 | 实测值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `G.number_of_nodes()` | `40` | 去重实体数 |
|
||||
| `G.number_of_edges()` | `780` | CO_OCCURS_IN 边数 |
|
||||
| `nx.density(G)` | `1.0` | 完全图(单页文档所有节点两两连接) |
|
||||
| `G.nodes[nid]` | `dict` | 节点属性字典(id, name, type, page, confidence, ...) |
|
||||
|
||||
### 3.4 MinerU API 关键参数(与 Agentic-RAG 相关部分)
|
||||
|
||||
| 参数 | 推荐值 | 影响 Agentic-RAG 的原因 |
|
||||
|------|--------|----------------------|
|
||||
| `enable_table` | `True` | 表格被解析为 HTML `<table>`,Bridge 转为文本参与实体抽取,影响 KG 节点质量 |
|
||||
| `enable_formula` | `True`(默认) | 公式以 LaTeX 内联写入文本,影响文本纯净度,可能产生噪声实体 |
|
||||
| `language` | `"en"` / `"ch"` | 影响 OCR 精度,直接影响文本质量和实体对齐率 |
|
||||
| `model_version` | `"pipeline"` | 输出 `{uuid}_content_list.json`,Bridge 通过 glob `*_content_list.json` 匹配 |
|
||||
| `page_ranges` | 按需设置 | 多页文档可分批处理,减少每批实体数和边数规模 |
|
||||
|
||||
### 3.5 Agent 系统扩展点
|
||||
|
||||
当 KG 数据更新后(新文档接入),Agentic-RAG 只需**重新加载 JSON 文件**,不需要重新构建 agent:
|
||||
|
||||
```python
|
||||
# 动态重载 KG(新文档处理完成后)
|
||||
G.clear()
|
||||
G = _load_kg() # 重新读取 kg_nodes.json + kg_edges.json
|
||||
# agent 实例无需重建,tools 引用同一 G 对象
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、问答流程最终数据返回格式规范
|
||||
|
||||
### 4.1 invoke 完整返回结构
|
||||
|
||||
```python
|
||||
result = agent.invoke({"messages": [("human", question)]})
|
||||
# result 类型: dict
|
||||
# result.keys(): ["messages"]
|
||||
```
|
||||
|
||||
`result["messages"]` 是一个有序列表,包含完整的对话历史:
|
||||
|
||||
```python
|
||||
[
|
||||
HumanMessage, # 用户输入
|
||||
AIMessage, # 工具调用决策(可能多轮)
|
||||
ToolMessage, # 工具执行结果(可能多轮)
|
||||
... # 可能有多轮 AIMessage + ToolMessage
|
||||
AIMessage, # 最终答案(tool_calls=[])
|
||||
]
|
||||
```
|
||||
|
||||
### 4.2 HumanMessage 格式
|
||||
|
||||
```python
|
||||
HumanMessage(
|
||||
content="What technology entities are in the knowledge graph?",
|
||||
additional_kwargs={},
|
||||
response_metadata={},
|
||||
id="uuid-string", # 自动生成
|
||||
)
|
||||
```
|
||||
|
||||
### 4.3 AIMessage(工具调用)格式
|
||||
|
||||
```python
|
||||
AIMessage(
|
||||
content="", # 内容为空(LLM 决策调用工具)
|
||||
additional_kwargs={
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_abc123",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_entities_by_type",
|
||||
"arguments": "{\"entity_type\": \"TECHNOLOGY\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
tool_calls=[
|
||||
{
|
||||
"name": "get_entities_by_type",
|
||||
"args": {"entity_type": "TECHNOLOGY"},
|
||||
"id": "call_abc123",
|
||||
"type": "tool_call",
|
||||
}
|
||||
],
|
||||
response_metadata={
|
||||
"model_name": "deepseek-chat",
|
||||
"finish_reason": "tool_calls",
|
||||
"usage": {
|
||||
"prompt_tokens": 580,
|
||||
"completion_tokens": 18,
|
||||
"total_tokens": 598,
|
||||
}
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### 4.4 ToolMessage 格式
|
||||
|
||||
```python
|
||||
ToolMessage(
|
||||
content="TECHNOLOGY entities (4 total):\n • GraphRAG ...\n • LLMs ...",
|
||||
tool_call_id="call_abc123", # 与 AIMessage.tool_calls[i].id 对应
|
||||
name="get_entities_by_type", # 工具名称
|
||||
additional_kwargs={},
|
||||
response_metadata={},
|
||||
)
|
||||
```
|
||||
|
||||
### 4.5 AIMessage(最终答案)格式
|
||||
|
||||
```python
|
||||
AIMessage(
|
||||
content="## Technology Entities in the Knowledge Graph\n\n1. **GraphRAG** ...",
|
||||
additional_kwargs={
|
||||
"tool_calls": [] # 空列表,表示无更多工具调用
|
||||
},
|
||||
tool_calls=[],
|
||||
response_metadata={
|
||||
"model_name": "deepseek-chat",
|
||||
"finish_reason": "stop",
|
||||
"usage": {
|
||||
"prompt_tokens": 820,
|
||||
"completion_tokens": 350,
|
||||
"total_tokens": 1170,
|
||||
}
|
||||
},
|
||||
id="msg-uuid-string",
|
||||
)
|
||||
```
|
||||
|
||||
### 4.6 最终答案提取规范
|
||||
|
||||
```python
|
||||
# 标准提取方式
|
||||
final_msg = result["messages"][-1] # 最后一条消息必为最终 AIMessage
|
||||
answer: str = final_msg.content # 自然语言答案
|
||||
|
||||
# 安全提取方式(防御性编程)
|
||||
answer = (
|
||||
final_msg.content
|
||||
if hasattr(final_msg, "content")
|
||||
else str(final_msg)
|
||||
)
|
||||
```
|
||||
|
||||
### 4.7 推荐封装数据格式
|
||||
|
||||
业务层调用时建议封装为以下结构,便于下游使用:
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
@dataclass
|
||||
class AgenticRAGResponse:
|
||||
question: str # 用户原始问题
|
||||
answer: str # 最终答案(Markdown 格式)
|
||||
tool_calls: list[dict] # 工具调用链记录
|
||||
total_messages: int # 对话轮次(含 human/ai/tool 全部)
|
||||
token_usage: dict[str, int] # Token 用量统计
|
||||
kg_stats: dict[str, Any] # KG 规模信息
|
||||
```
|
||||
|
||||
**填充示例:**
|
||||
|
||||
```python
|
||||
def run_query_with_metadata(question: str) -> AgenticRAGResponse:
|
||||
result = agent.invoke({"messages": [("human", question)]})
|
||||
messages = result["messages"]
|
||||
|
||||
# 提取工具调用链
|
||||
tool_calls = []
|
||||
for msg in messages:
|
||||
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
||||
for tc in msg.tool_calls:
|
||||
tool_calls.append({
|
||||
"tool": tc["name"],
|
||||
"args": tc["args"],
|
||||
"call_id": tc["id"],
|
||||
})
|
||||
|
||||
# Token 统计(来自最后一条 AIMessage)
|
||||
last_ai = messages[-1]
|
||||
usage = last_ai.response_metadata.get("usage", {})
|
||||
|
||||
return AgenticRAGResponse(
|
||||
question=question,
|
||||
answer=messages[-1].content,
|
||||
tool_calls=tool_calls,
|
||||
total_messages=len(messages),
|
||||
token_usage={
|
||||
"prompt_tokens": usage.get("prompt_tokens", 0),
|
||||
"completion_tokens": usage.get("completion_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
kg_stats={
|
||||
"nodes": G.number_of_nodes(),
|
||||
"edges": G.number_of_edges(),
|
||||
"density": nx.density(G),
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### 4.8 实测问答响应样例
|
||||
|
||||
#### 样例 1:T1-Overview(图谱概览类问题)
|
||||
|
||||
```
|
||||
问题:Give me an overview of the knowledge graph.
|
||||
What types of entities does it contain and which entities are most central?
|
||||
|
||||
工具调用链:
|
||||
[1] describe_graph()
|
||||
|
||||
最终答案(节选):
|
||||
## Knowledge Graph Overview
|
||||
**Structure:**
|
||||
- 40 entities (nodes) connected by 780 edges
|
||||
- All edges represent CO_OCCURS_IN relationships
|
||||
- Graph density: 1.000 — fully connected graph
|
||||
|
||||
**Entity Types:**
|
||||
1. TECHNOLOGY (4): GraphRAG, LLMs, LangExtract, MinerU
|
||||
2. CONCEPT (36): retrieval-augmented generation, knowledge graphs, ...
|
||||
|
||||
**Most Central Entities (centrality=1.000):**
|
||||
1. [TECHNOLOGY] GraphRAG
|
||||
2. [CONCEPT] Knowledge Graph Enhanced RAG System
|
||||
...
|
||||
|
||||
消息轮次:4 条(human + ai_tool_call + tool_result + ai_final)
|
||||
Token 用量:约 900 tokens
|
||||
```
|
||||
|
||||
#### 样例 2:T3-MultiHop(多跳推理类问题)
|
||||
|
||||
```
|
||||
问题:What concepts and technologies are most closely related to GraphRAG?
|
||||
Explore the graph neighborhood and explain the connections.
|
||||
|
||||
工具调用链:
|
||||
[1] search_entities(query="GraphRAG")
|
||||
[2] get_neighbors(entity_name="GraphRAG", hops=1)
|
||||
|
||||
最终答案(节选):
|
||||
## Concepts and Technologies Most Closely Related to GraphRAG
|
||||
|
||||
### Core Technologies (Directly Connected):
|
||||
1. LLMs — The foundation models that GraphRAG enhances
|
||||
2. LangExtract — Used for language extraction in the pipeline
|
||||
3. MinerU — Part of the data processing ecosystem
|
||||
|
||||
### Key Concepts:
|
||||
- Knowledge Graph Enhanced RAG System (overarching architecture)
|
||||
- retrieval-augmented generation (core paradigm)
|
||||
- multi-hop reasoning (key capability)
|
||||
...
|
||||
|
||||
消息轮次:6 条(human + 2×ai_tool_call + 2×tool_result + ai_final)
|
||||
Token 用量:约 1,200 tokens
|
||||
```
|
||||
|
||||
### 4.9 错误与边界情况处理
|
||||
|
||||
| 情况 | Agent 行为 | 返回内容 |
|
||||
|------|------------|---------|
|
||||
| 实体不存在 | 工具返回提示 + 样例实体名 | Agent 改写查询或给出不确定性说明 |
|
||||
| 类型不合法 | 工具返回有效类型列表 | Agent 自动纠正并重试 |
|
||||
| 问题超出 KG 范围 | 无工具调用结果支撑 | Agent 如实说明 "信息不在当前 KG 中" |
|
||||
| Token 超限 | LangChain 内部截断 | 减少 `hops` 或缩短问题 |
|
||||
|
||||
---
|
||||
|
||||
## 五、虚拟环境与依赖
|
||||
|
||||
### 5.1 运行环境
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| 虚拟环境 | `F:\GraphRAGAgent\langextract_src\.venv\`(复用 Bridge Pipeline 的 venv) |
|
||||
| Python 版本 | 3.12 |
|
||||
| 安装方式 | uv |
|
||||
|
||||
### 5.2 Agentic-RAG 新增依赖
|
||||
|
||||
| 包 | 版本(实测) | 用途 |
|
||||
|----|------------|------|
|
||||
| `langchain` | 1.2.10 | `@tool` 装饰器、`create_agent` |
|
||||
| `langchain-openai` | latest | `ChatOpenAI`(DeepSeek 适配) |
|
||||
| `langgraph` | latest | `create_agent` 底层运行时 |
|
||||
| `networkx` | latest | KG 图构建、BFS 遍历、中心性计算 |
|
||||
|
||||
### 5.3 完整依赖安装
|
||||
|
||||
```bash
|
||||
uv pip install langchain langchain-openai langgraph networkx \
|
||||
--python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
|
||||
```
|
||||
|
||||
### 5.4 环境变量
|
||||
|
||||
`F:\GraphRAGAgent\graphrag_pipeline\.env`:
|
||||
|
||||
```env
|
||||
DEEPSEEK_API_KEY=sk-xxxxxxxxxxxxxxxx
|
||||
DEEPSEEK_BASE_URL=https://api.deepseek.com
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 附录:各阶段文件依赖速查
|
||||
|
||||
| 阶段 | 输入 | 输出 | 关键脚本 |
|
||||
|------|------|------|---------|
|
||||
| MinerU 解析 | `*.pdf` | `{uuid}_content_list.json` | `mineru_mvp/pipeline.py` |
|
||||
| Bridge Pipeline | `*_content_list.json` | `kg_nodes.json` + `kg_edges.json` | `graphrag_pipeline/bridge.py` |
|
||||
| Agentic-RAG | `kg_nodes.json` + `kg_edges.json` | 自然语言答案 | `graphrag_pipeline/agentic_rag_mvp.py` |
|
||||
|
||||
| 规范文档 | 覆盖范围 |
|
||||
|---------|---------|
|
||||
| `docs/mineru_specification-v1.0.md` | MinerU 解析阶段输入/输出 |
|
||||
| `docs/langextract_specification-v1.0.md` | LangExtract 实体抽取参数 |
|
||||
| `docs/bridge_pipeline_specification-v1.0.md` | Bridge Pipeline 对接规范与 KG 输出格式 |
|
||||
| `docs/agentic_rag_specification-v1.0.md` | **本文件** — Agentic-RAG 问答阶段规范 |
|
||||
1757
docs/backend_service_specification-v1.0.md
Normal file
1757
docs/backend_service_specification-v1.0.md
Normal file
File diff suppressed because it is too large
Load Diff
481
docs/bridge_pipeline_specification-v1.0.md
Normal file
481
docs/bridge_pipeline_specification-v1.0.md
Normal file
@@ -0,0 +1,481 @@
|
||||
# Bridge Pipeline Specification v1.0
|
||||
|
||||
> GraphRAG 索引阶段核心流程:MinerU → LangExtract → Knowledge Graph
|
||||
|
||||
---
|
||||
|
||||
## 1. Pipeline 执行思路
|
||||
|
||||
### 1.1 整体架构
|
||||
|
||||
Bridge Pipeline 是 GraphRAG 索引阶段的核心流程,负责将 MinerU 解析后的结构化 PDF 内容送入 LangExtract 完成实体抽取,最终生成知识图谱的节点(Nodes)和边(Edges)。
|
||||
|
||||
```
|
||||
MinerU output Bridge Pipeline KG output
|
||||
───────────── ─────────────── ─────────
|
||||
{uuid}_content_list.json → text_assembler.py
|
||||
├─ text blocks ├─ 按页拼接纯文本
|
||||
└─ table blocks (HTML) ├─ HTML表格→纯文本
|
||||
└─ 记录每个block的char偏移
|
||||
→ entity_extractor.py
|
||||
├─ 逐页调用 lx.extract()
|
||||
└─ DeepSeek via OpenAI Provider
|
||||
→ kg_builder.py
|
||||
├─ 过滤低质量对齐 → kg_nodes.json
|
||||
├─ 节点去重 (name.lower(), type)
|
||||
└─ 同页实体对→CO_OCCURS_IN边 → kg_edges.json
|
||||
```
|
||||
|
||||
### 1.2 五步执行流程
|
||||
|
||||
| 步骤 | 模块 | 说明 |
|
||||
|------|------|------|
|
||||
| Step 1 | `bridge.py` | 加载 MinerU 输出 `content_list.json`,解析输入路径和 source_doc_id |
|
||||
| Step 2 | `text_assembler.py` | 按 `page_idx` 分组,拼接纯文本,记录每个 block 的字符偏移 |
|
||||
| Step 3 | `entity_extractor.py` | 逐页调用 LangExtract + DeepSeek 完成实体抽取 |
|
||||
| Step 4 | `kg_builder.py` | 过滤低质量对齐 → 节点去重 → 同页配对生成 CO_OCCURS_IN 边 |
|
||||
| Step 5 | `bridge.py` | 保存 `kg_nodes.json` + `kg_edges.json` 到 output 目录 |
|
||||
|
||||
### 1.3 文件存放位置
|
||||
|
||||
```
|
||||
F:\GraphRAGAgent\graphrag_pipeline\
|
||||
├── .env # DeepSeek API 配置
|
||||
├── CLAUDE.md # 组件开发规范
|
||||
├── bridge.py # 主入口(串联完整 Pipeline)
|
||||
├── text_assembler.py # MinerU JSON → 按页纯文本 + 偏移映射
|
||||
├── entity_extractor.py # LangExtract + DeepSeek 封装
|
||||
├── kg_builder.py # KG 节点去重 + 边生成
|
||||
└── output/
|
||||
├── kg_nodes.json # 知识图谱节点(9,851 bytes)
|
||||
└── kg_edges.json # 知识图谱边(129,093 bytes)
|
||||
```
|
||||
|
||||
### 1.4 运行命令
|
||||
|
||||
```bash
|
||||
# 使用默认测试输入
|
||||
F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe F:/GraphRAGAgent/graphrag_pipeline/bridge.py
|
||||
|
||||
# 指定输入文件
|
||||
F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe F:/GraphRAGAgent/graphrag_pipeline/bridge.py path/to/content_list.json
|
||||
|
||||
# 指定输入目录(自动查找 *_content_list.json)
|
||||
F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe F:/GraphRAGAgent/graphrag_pipeline/bridge.py path/to/output_dir/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. 实际本地输出文档规范
|
||||
|
||||
### 2.1 测试运行结果
|
||||
|
||||
- **输入文件**: `F:\GraphRAGAgent\mineru_mvp\output\test_sample\8a719db4-2b50-405b-826d-7bb27b224fa0_content_list.json`
|
||||
- **输入规模**: 10 blocks(9 text + 1 table),1 页,2102 字符
|
||||
- **抽取结果**: 45 raw extractions → 40 去重节点,780 CO_OCCURS_IN 边
|
||||
- **对齐质量**: 全部 40 节点均为 `match_exact`(1 个 `match_fuzzy` 已被过滤)
|
||||
- **执行时间**: ~22s(DeepSeek API 调用)
|
||||
|
||||
### 2.2 kg_nodes.json — 实际输出
|
||||
|
||||
**文件大小**: 9,851 bytes | **节点数**: 40
|
||||
|
||||
**节点类型分布**:
|
||||
|
||||
| 类型 | 数量 | 示例 |
|
||||
|------|------|------|
|
||||
| TECHNOLOGY | 4 | GraphRAG, MinerU, LLMs, LangExtract |
|
||||
| CONCEPT | 36 | knowledge graphs, retrieval-augmented generation, multi-hop reasoning |
|
||||
|
||||
**节点格式(实际样例)**:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "node_0",
|
||||
"name": "GraphRAG",
|
||||
"type": "TECHNOLOGY",
|
||||
"source_doc": "8a719db4-2b50-405b-826d-7bb27b224fa0",
|
||||
"char_start": 0,
|
||||
"char_end": 8,
|
||||
"confidence": "match_exact",
|
||||
"page": 0
|
||||
}
|
||||
```
|
||||
|
||||
**完整节点列表(前 10 个)**:
|
||||
|
||||
| id | name | type | confidence |
|
||||
|----|------|------|-----------|
|
||||
| node_0 | GraphRAG | TECHNOLOGY | match_exact |
|
||||
| node_1 | Knowledge Graph Enhanced RAG System | CONCEPT | match_exact |
|
||||
| node_2 | retrieval-augmented generation | CONCEPT | match_exact |
|
||||
| node_3 | knowledge graphs | CONCEPT | match_exact |
|
||||
| node_4 | large language models | CONCEPT | match_exact |
|
||||
| node_5 | question answering | CONCEPT | match_exact |
|
||||
| node_6 | document collections | CONCEPT | match_exact |
|
||||
| node_7 | RAG systems | CONCEPT | match_exact |
|
||||
| node_8 | vector similarity search | CONCEPT | match_exact |
|
||||
| node_9 | hierarchical knowledge graph | CONCEPT | match_exact |
|
||||
|
||||
### 2.3 kg_edges.json — 实际输出
|
||||
|
||||
**文件大小**: 129,093 bytes | **边数**: 780
|
||||
|
||||
**数学验证**: 40 个节点全部在同一页 → C(40,2) = 40×39/2 = 780 条边 ✓
|
||||
|
||||
**边格式(实际样例)**:
|
||||
|
||||
```json
|
||||
{
|
||||
"source": "node_0",
|
||||
"target": "node_1",
|
||||
"relation": "CO_OCCURS_IN",
|
||||
"doc_id": "8a719db4-2b50-405b-826d-7bb27b224fa0",
|
||||
"page": 0
|
||||
}
|
||||
```
|
||||
|
||||
**完整性校验结果**:
|
||||
- 自环数: 0 ✓
|
||||
- 重复边数: 0 ✓
|
||||
- 关系类型: 全部为 `CO_OCCURS_IN` ✓
|
||||
|
||||
---
|
||||
|
||||
## 3. MinerU Pipeline 关键参数规范
|
||||
|
||||
### 3.1 输入格式:content_list.json
|
||||
|
||||
MinerU 解析 PDF 后输出的 `{uuid}_content_list.json` 是一个 JSON 数组,每个元素代表一个内容块。
|
||||
|
||||
**text block 结构**:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"text": "GraphRAG: Knowledge Graph Enhanced RAG System...",
|
||||
"text_level": null,
|
||||
"page_idx": 0,
|
||||
"bbox": [72, 43, 523, 57]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `type` | string | 块类型:`"text"` \| `"table"` \| `"image"` |
|
||||
| `text` | string | 文本内容(末尾可能有空格) |
|
||||
| `text_level` | int \| null | `null`=正文,`1`=一级标题 |
|
||||
| `page_idx` | int | 页码(从 0 开始) |
|
||||
| `bbox` | list[int] | 边界框坐标 `[x0, y0, x1, y1]`(归一化 0-1000) |
|
||||
|
||||
**table block 结构**:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "table",
|
||||
"table_body": "<table><tr><th>Method</th><th>Score</th></tr>...</table>",
|
||||
"table_caption": [],
|
||||
"page_idx": 0,
|
||||
"bbox": [72, 400, 523, 500]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `table_body` | string | HTML `<table>` 标签完整内容 |
|
||||
| `table_caption` | list | 表格标题(通常为空数组) |
|
||||
|
||||
### 3.2 关键约束
|
||||
|
||||
- 文件命名: `{uuid}_content_list.json`,UUID 用作 source_doc_id
|
||||
- block 排列顺序与 PDF 阅读顺序一致
|
||||
- `text` 字段末尾可能有多余空格,需 `.rstrip()` 处理
|
||||
- `image` 类型块不含可提取文本,Bridge 跳过处理
|
||||
|
||||
---
|
||||
|
||||
## 4. LangExtract Pipeline 关键参数规范
|
||||
|
||||
### 4.1 模型配置
|
||||
|
||||
```python
|
||||
from langextract.providers.openai import OpenAILanguageModel
|
||||
|
||||
model = OpenAILanguageModel(
|
||||
model_id="deepseek-chat",
|
||||
api_key=DEEPSEEK_API_KEY,
|
||||
base_url="https://api.deepseek.com",
|
||||
)
|
||||
```
|
||||
|
||||
**重要**: 必须直接实例化 `OpenAILanguageModel`,不能使用 `model_id` 路由。LangExtract 的 `model_id` 同时用于内部路由和 API 请求参数,DeepSeek 不识别 GPT 模型名称。
|
||||
|
||||
### 4.2 抽取调用
|
||||
|
||||
```python
|
||||
result = lx.extract(
|
||||
text_or_documents=page_text, # 纯文本字符串
|
||||
prompt_description=PROMPT, # 实体类型描述
|
||||
examples=EXAMPLES, # Few-shot 示例
|
||||
model=model, # 直接传入模型实例
|
||||
show_progress=True,
|
||||
)
|
||||
```
|
||||
|
||||
### 4.3 Prompt 配置
|
||||
|
||||
```
|
||||
Extract named entities from the text in order of appearance.
|
||||
Entity types:
|
||||
TECHNOLOGY — software, algorithms, models, tools
|
||||
ORGANIZATION — companies, research groups, institutions
|
||||
PERSON — individual people
|
||||
LOCATION — places, geographic entities
|
||||
CONCEPT — technical concepts, methodologies, frameworks
|
||||
```
|
||||
|
||||
### 4.4 Few-shot 示例
|
||||
|
||||
验证可用的示例(MVP 测试 94.1% match_exact):
|
||||
|
||||
```python
|
||||
lx.data.ExampleData(
|
||||
text="LangChain is a framework created by Harrison Chase for building "
|
||||
"LLM applications. It integrates with OpenAI models and Pinecone "
|
||||
"vector database for semantic search.",
|
||||
extractions=[
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
|
||||
lx.data.Extraction(extraction_class="PERSON", extraction_text="Harrison Chase"),
|
||||
lx.data.Extraction(extraction_class="CONCEPT", extraction_text="LLM applications"),
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="OpenAI models"),
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="Pinecone"),
|
||||
lx.data.Extraction(extraction_class="CONCEPT", extraction_text="semantic search"),
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
### 4.5 输出格式:AnnotatedDocument
|
||||
|
||||
每页抽取返回一个 `AnnotatedDocument`,其 `extractions` 列表中每个元素包含:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `extraction_text` | string | 实体名称(必须为输入文本的精确子串) |
|
||||
| `extraction_class` | string | 实体类型(TECHNOLOGY/ORGANIZATION/PERSON/LOCATION/CONCEPT) |
|
||||
| `char_interval.start_pos` | int | 在输入文本中的起始字符位置 |
|
||||
| `char_interval.end_pos` | int | 在输入文本中的结束字符位置 |
|
||||
| `alignment_status` | enum | 对齐质量:`match_exact` \| `match_greater` \| `match_lesser` \| `match_fuzzy` \| `None` |
|
||||
| `extraction_index` | int | 抽取序号(从 1 开始) |
|
||||
| `group_index` | int | 组序号(从 0 开始) |
|
||||
|
||||
### 4.6 对齐质量过滤规则
|
||||
|
||||
| alignment_status | 含义 | Bridge 处理 |
|
||||
|-----------------|------|------------|
|
||||
| `match_exact` | LLM 输出与原文完全匹配 | ✅ 接受 |
|
||||
| `match_greater` | LLM 输出是原文子串的超集 | ✅ 接受 |
|
||||
| `match_lesser` | LLM 输出是原文子串的子集 | ✅ 接受 |
|
||||
| `match_fuzzy` | 模糊匹配,偏移不可靠 | ❌ 过滤 |
|
||||
| `None` | 无法对齐 | ❌ 过滤 |
|
||||
|
||||
---
|
||||
|
||||
## 5. MinerU ↔ LangExtract 接口对接规范
|
||||
|
||||
### 5.1 核心挑战
|
||||
|
||||
MinerU 输出结构化 JSON 块(含 HTML 表格),而 LangExtract 仅接受纯文本 `str`。Bridge 的 `text_assembler` 模块负责转换和偏移映射。
|
||||
|
||||
### 5.2 对接转换规则
|
||||
|
||||
| 对接点 | MinerU 规范 | LangExtract 规范 | Bridge 处理 |
|
||||
|--------|------------|-----------------|------------|
|
||||
| 输入格式 | `content_list.json`(JSON 数组) | 仅接受纯文本 `str` | `text_assembler` 拼接转换 |
|
||||
| 文本块 | `block["text"]`,末尾可能有空格 | `extraction_text` 须为原文精确子串 | `.rstrip()` 去尾部空格 |
|
||||
| 表格块 | `table_body` 是 `<table>` HTML | 不接受 HTML | BeautifulSoup 转 pipe 分隔纯文本 |
|
||||
| 标题判断 | `text_level` 缺失=正文,存在=标题 | 不区分标题/正文 | 标题和正文一起拼入文本 |
|
||||
| 坐标系 | bbox 归一化 0-1000 | char_interval 基于输入字符 | BlockSpan 记录偏移映射 |
|
||||
| 分页 | `page_idx` 区分不同页 | 单次调用处理一段文本 | 逐页分别调用 `lx.extract()` |
|
||||
| 文件名 | `{uuid}_content_list.json` | — | glob `*_content_list.json` 匹配 |
|
||||
|
||||
### 5.3 文本拼接算法
|
||||
|
||||
```
|
||||
输入: content_list (按 page_idx 分组)
|
||||
输出: PageText 列表
|
||||
|
||||
对每页:
|
||||
cursor = 0
|
||||
对每个 block (保持原顺序):
|
||||
if type == "text":
|
||||
block_text = block["text"].rstrip()
|
||||
elif type == "table":
|
||||
block_text = html_table_to_text(block["table_body"])
|
||||
else:
|
||||
跳过 (image / equation 等)
|
||||
|
||||
记录 BlockSpan(char_start=cursor, char_end=cursor+len(block_text))
|
||||
buffer.append(block_text + "\n")
|
||||
cursor += len(block_text) + 1
|
||||
|
||||
PageText.text = "".join(buffer).rstrip("\n")
|
||||
```
|
||||
|
||||
### 5.4 偏移映射数据结构
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass
|
||||
class BlockSpan:
|
||||
block_index: int # content_list 数组下标
|
||||
block_type: str # "text" | "table"
|
||||
page_idx: int # 页码
|
||||
char_start: int # 在拼接文本中的起始位置
|
||||
char_end: int # 在拼接文本中的结束位置(不含)
|
||||
bbox: list[int] # MinerU 原始 bbox
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PageText:
|
||||
page_idx: int # 页码
|
||||
text: str # 拼接后的纯文本
|
||||
block_spans: list[BlockSpan] # 每个 block 在 text 中的位置
|
||||
```
|
||||
|
||||
### 5.5 HTML 表格转换
|
||||
|
||||
```python
|
||||
def html_table_to_text(table_body: str) -> str:
|
||||
"""Convert <table> HTML → pipe-delimited plain text"""
|
||||
soup = BeautifulSoup(table_body, "html.parser")
|
||||
rows = []
|
||||
for tr in soup.find_all("tr"):
|
||||
cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
|
||||
rows.append(" | ".join(cells))
|
||||
return "\n".join(rows)
|
||||
```
|
||||
|
||||
转换示例:
|
||||
|
||||
```html
|
||||
<table><tr><th>Method</th><th>Score</th></tr><tr><td>GraphRAG</td><td>0.85</td></tr></table>
|
||||
```
|
||||
|
||||
→
|
||||
|
||||
```
|
||||
Method | Score
|
||||
GraphRAG | 0.85
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Bridge Pipeline 最终输出关键参数规范
|
||||
|
||||
### 6.1 kg_nodes.json
|
||||
|
||||
**文件路径**: `graphrag_pipeline/output/kg_nodes.json`
|
||||
|
||||
**结构**: JSON 数组,每个元素为一个去重后的实体节点。
|
||||
|
||||
| 字段 | 类型 | 说明 | 示例 |
|
||||
|------|------|------|------|
|
||||
| `id` | string | 节点唯一标识,格式 `node_{index}` | `"node_0"` |
|
||||
| `name` | string | 实体名称(原文子串) | `"GraphRAG"` |
|
||||
| `type` | string | 实体类型 | `"TECHNOLOGY"` |
|
||||
| `source_doc` | string | 来源文档 UUID | `"8a719db4-2b50-405b-826d-7bb27b224fa0"` |
|
||||
| `char_start` | int | 在拼接文本中的起始字符位置 | `0` |
|
||||
| `char_end` | int | 在拼接文本中的结束字符位置 | `8` |
|
||||
| `confidence` | string | 对齐质量(仅 `match_exact`/`match_greater`/`match_lesser`) | `"match_exact"` |
|
||||
| `page` | int | 来源页码(从 0 开始) | `0` |
|
||||
|
||||
**去重规则**: key = `(name.lower(), type)`,保留首次出现的实体。
|
||||
|
||||
**实体类型枚举**:
|
||||
|
||||
| 类型 | 说明 |
|
||||
|------|------|
|
||||
| `TECHNOLOGY` | 软件、算法、模型、工具 |
|
||||
| `ORGANIZATION` | 公司、研究机构 |
|
||||
| `PERSON` | 个人 |
|
||||
| `LOCATION` | 地理位置 |
|
||||
| `CONCEPT` | 技术概念、方法论、框架 |
|
||||
|
||||
### 6.2 kg_edges.json
|
||||
|
||||
**文件路径**: `graphrag_pipeline/output/kg_edges.json`
|
||||
|
||||
**结构**: JSON 数组,每个元素为一条同页共现关系边。
|
||||
|
||||
| 字段 | 类型 | 说明 | 示例 |
|
||||
|------|------|------|------|
|
||||
| `source` | string | 源节点 ID | `"node_0"` |
|
||||
| `target` | string | 目标节点 ID | `"node_1"` |
|
||||
| `relation` | string | 关系类型(固定 `"CO_OCCURS_IN"`) | `"CO_OCCURS_IN"` |
|
||||
| `doc_id` | string | 来源文档 UUID | `"8a719db4-..."` |
|
||||
| `page` | int | 共现页码 | `0` |
|
||||
|
||||
**边生成规则**:
|
||||
1. 按页分组所有去重后的节点 ID
|
||||
2. 同页节点两两配对 → 生成 `CO_OCCURS_IN` 边
|
||||
3. 边方向规范化: `source < target`(字典序)
|
||||
4. 去重 key: `(source, target, doc_id, page)`
|
||||
5. 无自环(source ≠ target)
|
||||
|
||||
**边数公式**: 若某页有 N 个节点,则该页产生 C(N,2) = N×(N-1)/2 条边。
|
||||
|
||||
### 6.3 输出完整性约束
|
||||
|
||||
| 约束 | 说明 |
|
||||
|------|------|
|
||||
| 节点 ID 唯一 | 每个节点的 `id` 字段全局唯一 |
|
||||
| 边引用合法 | 每条边的 `source` 和 `target` 必须对应存在的节点 `id` |
|
||||
| 无自环 | 不存在 `source == target` 的边 |
|
||||
| 无重复边 | 同一 `(source, target, doc_id, page)` 组合仅出现一次 |
|
||||
| 对齐质量保证 | 所有节点的 `confidence` 仅为 accepted 值(非 fuzzy/null) |
|
||||
| char 偏移有效 | `char_start < char_end`,且可定位到拼接文本中的实体子串 |
|
||||
|
||||
---
|
||||
|
||||
## 7. 虚拟环境规范
|
||||
|
||||
Bridge Pipeline **复用 LangExtract 的虚拟环境**,不单独创建 venv。
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|------|
|
||||
| 虚拟环境路径 | `F:\GraphRAGAgent\langextract_src\.venv\` |
|
||||
| Python 版本 | 3.12 |
|
||||
| 核心依赖 | `langextract[all]`、`beautifulsoup4`、`python-dotenv` |
|
||||
| 安装新依赖 | `uv pip install <pkg> --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe` |
|
||||
|
||||
**所有 Python 命令必须使用该虚拟环境运行,禁止使用全局 Python 或其他组件的 venv。**
|
||||
|
||||
---
|
||||
|
||||
## 8. 环境配置
|
||||
|
||||
### 8.1 .env 文件
|
||||
|
||||
位置: `F:\GraphRAGAgent\graphrag_pipeline\.env`
|
||||
|
||||
```env
|
||||
DEEPSEEK_API_KEY=<your-api-key>
|
||||
DEEPSEEK_BASE_URL=https://api.deepseek.com
|
||||
```
|
||||
|
||||
### 8.2 依赖安装
|
||||
|
||||
```bash
|
||||
uv pip install beautifulsoup4 python-dotenv --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. 测试验证清单
|
||||
|
||||
- [x] text_assembler 正确读取 content_list.json(10 blocks: 9 text + 1 table)
|
||||
- [x] 表格 HTML 转为 pipe 分隔纯文本,无 HTML 标签残留
|
||||
- [x] 按页拼接文本长度合理(2102 字符/页)
|
||||
- [x] LangExtract 成功调用 DeepSeek 返回 AnnotatedDocument
|
||||
- [x] 抽取实体数 45,match_exact 占比 > 95%
|
||||
- [x] kg_nodes.json 节点已去重(40 个),每个节点有完整字段
|
||||
- [x] kg_edges.json 边为 CO_OCCURS_IN 关系(780 条),无自环,无重复
|
||||
- [x] match_fuzzy 对齐的实体已被过滤(1 个)
|
||||
1232
docs/frontend_design_specification-v1.0.md
Normal file
1232
docs/frontend_design_specification-v1.0.md
Normal file
File diff suppressed because it is too large
Load Diff
604
docs/langextract_specification-v1.0.md
Normal file
604
docs/langextract_specification-v1.0.md
Normal file
@@ -0,0 +1,604 @@
|
||||
# LangExtract Pipeline 规范文档 v1.0
|
||||
|
||||
> 基于 [google/langextract](https://github.com/google/langextract) 源码分析 + MVP 实测验证
|
||||
> 版本基线:2026-03-04 main 分支
|
||||
> 本地源码路径:`F:\GraphRAGAgent\langextract_src\`
|
||||
> 测试脚本路径:`F:\GraphRAGAgent\langextract_src\mvp_test_deepseek.py`
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
- [〇、虚拟环境](#〇虚拟环境)
|
||||
- [一、Pipeline 执行流程](#一pipeline-执行流程)
|
||||
- [1.1 完整执行链路](#11-完整执行链路)
|
||||
- [1.2 MVP 测试脚本](#12-mvp-测试脚本)
|
||||
- [1.3 输入规范](#13-输入规范)
|
||||
- [1.4 不支持的输入格式](#14-不支持的输入格式)
|
||||
- [二、模型接入规范](#二模型接入规范)
|
||||
- [2.1 模型路由机制](#21-模型路由机制)
|
||||
- [2.2 DeepSeek 接入(实测验证)](#22-deepseek-接入实测验证)
|
||||
- [2.3 路由陷阱与规避方案](#23-路由陷阱与规避方案)
|
||||
- [2.4 OpenAI Provider 构造参数](#24-openai-provider-构造参数)
|
||||
- [三、关键参数规范](#三关键参数规范)
|
||||
- [3.1 extract() 核心参数](#31-extract-核心参数)
|
||||
- [3.2 ExampleData 示例数据格式](#32-exampledata-示例数据格式)
|
||||
- [3.3 Extraction 示例条目格式](#33-extraction-示例条目格式)
|
||||
- [3.4 分块参数](#34-分块参数)
|
||||
- [3.5 Resolver 对齐参数](#35-resolver-对齐参数)
|
||||
- [四、输出数据格式规范](#四输出数据格式规范)
|
||||
- [4.1 JSONL 输出文件(实际生成)](#41-jsonl-输出文件实际生成)
|
||||
- [4.2 AnnotatedDocument 顶层结构](#42-annotateddocument-顶层结构)
|
||||
- [4.3 Extraction 字段规范(实测对比)](#43-extraction-字段规范实测对比)
|
||||
- [4.4 CharInterval 字符锚点](#44-charinterval-字符锚点)
|
||||
- [4.5 AlignmentStatus 对齐状态枚举](#45-alignmentstatus-对齐状态枚举)
|
||||
- [4.6 extraction_summary.json(自定义摘要)](#46-extraction_summaryjson自定义摘要)
|
||||
- [五、本地生成文件清单](#五本地生成文件清单)
|
||||
- [附录:环境变量与常量速查](#附录环境变量与常量速查)
|
||||
|
||||
---
|
||||
|
||||
## 〇、虚拟环境
|
||||
|
||||
本组件使用独立的 Python 虚拟环境,与项目其他组件(MinerU MVP、GraphRAG Pipeline 等)完全隔离。
|
||||
|
||||
**所有 Python 命令必须在子虚拟环境中运行,禁止使用全局 Python 或其他组件的 venv。**
|
||||
|
||||
### 环境信息
|
||||
|
||||
- 虚拟环境路径:`F:\GraphRAGAgent\langextract_src\.venv\`
|
||||
- Python 版本:3.12
|
||||
- 创建工具:uv
|
||||
- 安装方式:`uv pip install -e ".[all]"` (含 openai、google-genai 等 60 个包)
|
||||
|
||||
### 运行方式
|
||||
|
||||
**方式一:直接使用 venv 内的 Python 解释器(推荐)**
|
||||
|
||||
```bash
|
||||
F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe mvp_test_deepseek.py
|
||||
```
|
||||
|
||||
**方式二:先激活环境再运行**
|
||||
|
||||
```bash
|
||||
cd F:/GraphRAGAgent/langextract_src
|
||||
source .venv/Scripts/activate
|
||||
python mvp_test_deepseek.py
|
||||
```
|
||||
|
||||
### 安装新依赖
|
||||
|
||||
```bash
|
||||
uv pip install <package> --python F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 一、Pipeline 执行流程
|
||||
|
||||
### 1.1 完整执行链路
|
||||
|
||||
基于 MVP 实测验证的完整 Pipeline 分为 5 个阶段:
|
||||
|
||||
```
|
||||
Step 0: 激活虚拟环境
|
||||
└── F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe
|
||||
|
||||
Step 1: 准备输入
|
||||
├── 构造纯文本字符串(str)
|
||||
├── 或构造 Document 对象列表
|
||||
└── LangExtract 仅接受纯文本,PDF/DOCX 等需前置解析
|
||||
|
||||
Step 2: 构造 Few-shot 示例
|
||||
├── 创建 ExampleData 对象列表
|
||||
├── 每个 ExampleData 包含:text(示例文本) + extractions(标注实体列表)
|
||||
└── extraction_text 必须是 text 的精确子串
|
||||
|
||||
Step 3: 配置模型并调用 extract()
|
||||
├── 直接实例化 OpenAILanguageModel(DeepSeek 场景)
|
||||
├── 传入 model_id="deepseek-chat", base_url, api_key
|
||||
└── 调用 lx.extract(text_or_documents=..., examples=..., model=model)
|
||||
|
||||
Step 4: LangExtract 内部处理
|
||||
├── 文本分块(基于句子边界,max_char_buffer=1000)
|
||||
├── 构造 Prompt(含 prompt_description + examples)
|
||||
├── 调用 LLM 推理(JSON 格式输出)
|
||||
├── 解析 LLM JSON 响应为 Extraction 对象
|
||||
└── 字符级对齐(char_interval + alignment_status)
|
||||
|
||||
Step 5: 保存输出
|
||||
├── lx.io.save_annotated_documents() → JSONL 文件
|
||||
└── 自定义 JSON 摘要(可选)
|
||||
```
|
||||
|
||||
### 1.2 MVP 测试脚本
|
||||
|
||||
**文件路径:** `F:\GraphRAGAgent\langextract_src\mvp_test_deepseek.py`
|
||||
|
||||
**执行命令:**
|
||||
|
||||
```bash
|
||||
F:/GraphRAGAgent/langextract_src/.venv/Scripts/python.exe mvp_test_deepseek.py
|
||||
```
|
||||
|
||||
**脚本核心流程:**
|
||||
|
||||
```python
|
||||
from langextract.providers.openai import OpenAILanguageModel
|
||||
|
||||
# Step 1: 直接实例化 OpenAI Provider(指向 DeepSeek)
|
||||
model = OpenAILanguageModel(
|
||||
model_id="deepseek-chat",
|
||||
api_key="sk-...",
|
||||
base_url="https://api.deepseek.com",
|
||||
)
|
||||
|
||||
# Step 2: 构造示例数据
|
||||
examples = [
|
||||
lx.data.ExampleData(
|
||||
text="LangChain is a framework created by Harrison Chase...",
|
||||
extractions=[
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
|
||||
lx.data.Extraction(extraction_class="ORGANIZATION", extraction_text="Harrison Chase"),
|
||||
...
|
||||
],
|
||||
)
|
||||
]
|
||||
|
||||
# Step 3: 调用抽取
|
||||
result = lx.extract(
|
||||
text_or_documents=input_text,
|
||||
prompt_description="Extract named entities...",
|
||||
examples=examples,
|
||||
model=model,
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
# Step 4: 保存结果
|
||||
lx.io.save_annotated_documents([result], output_name="graphrag_entities.jsonl", output_dir="mvp_output")
|
||||
```
|
||||
|
||||
**实测结果:**
|
||||
|
||||
| 指标 | 值 |
|
||||
|------|-----|
|
||||
| 输入文本长度 | 520 字符 |
|
||||
| 模型 | deepseek-chat |
|
||||
| 耗时 | 21.6 秒 |
|
||||
| 提取实体数 | 17 |
|
||||
| 实体类型分布 | TECHNOLOGY: 9, CONCEPT: 7, ORGANIZATION: 1 |
|
||||
| 精确匹配率 | 16/17 (94.1%) — 仅 1 个 match_fuzzy |
|
||||
| 输出文件 | 2 个(JSONL + JSON 摘要) |
|
||||
|
||||
### 1.3 输入规范
|
||||
|
||||
LangExtract **仅接受纯文本**作为输入,支持以下 4 种传入方式:
|
||||
|
||||
| 输入方式 | 示例 | 说明 |
|
||||
|---------|------|------|
|
||||
| **纯文本字符串** | `extract("这是一段文本...")` | 直接传入文本内容(MVP 实测使用此方式) |
|
||||
| **URL** | `extract("https://example.com/article.txt")` | 自动下载 URL 文本内容(`fetch_urls=True`) |
|
||||
| **Document 对象** | `extract([Document(text="...", document_id="doc1")])` | 传入 Document 可迭代集合 |
|
||||
| **CSV 文件** | 通过 `Dataset` 类加载后传入 | 指定 text 列和 id 列 |
|
||||
|
||||
### 1.4 不支持的输入格式
|
||||
|
||||
以下格式 **不被支持**,需要在 LangExtract 之前通过外部工具预处理为纯文本:
|
||||
|
||||
| 格式 | 状态 | 预处理方案 |
|
||||
|------|------|-----------|
|
||||
| PDF | ❌ 不支持 | 使用 MinerU / PyMuPDF 先转文本 |
|
||||
| DOCX | ❌ 不支持 | 使用 python-docx 先转文本 |
|
||||
| HTML | ❌ 不支持 | 使用 BeautifulSoup 先提取文本 |
|
||||
| 图片 | ❌ 不支持 | 使用 OCR 工具先识别文本 |
|
||||
| Markdown(含媒体) | ❌ 不支持 | 需提取纯文本部分 |
|
||||
| Excel / JSON | ❌ 不支持 | 需序列化为纯文本 |
|
||||
|
||||
---
|
||||
|
||||
## 二、模型接入规范
|
||||
|
||||
### 2.1 模型路由机制
|
||||
|
||||
文件路径:`langextract/providers/patterns.py`
|
||||
|
||||
LangExtract 通过 **正则匹配 `model_id`** 自动路由到对应的 Provider:
|
||||
|
||||
| Provider | 匹配模式 | 优先级 | 示例模型 |
|
||||
|----------|---------|--------|---------|
|
||||
| **Gemini** | `^gemini` | 10 | `gemini-2.5-flash`, `gemini-1.5-pro` |
|
||||
| **OpenAI** | `^gpt-4`, `^gpt4.`, `^gpt-5`, `^gpt5.` | 10 | `gpt-4o`, `gpt-4o-mini` |
|
||||
| **Ollama** | `gemma`, `llama`, `mistral`, `phi`, `qwen`, `deepseek` 等 | 10 | `gemma2:2b`, `llama3.2:1b` |
|
||||
|
||||
### 2.2 DeepSeek 接入(实测验证)
|
||||
|
||||
> **重要发现:** 规范文档 v0 中描述的 `model_id="gpt-4o-mini"` + `language_model_params={"base_url": ...}` 方式 **实测不可用**,因为 `model_id` 同时用于路由和 API 调用,DeepSeek 不识别 `gpt-4o-mini` 模型名。
|
||||
|
||||
**正确方式 — 直接实例化 OpenAI Provider:**
|
||||
|
||||
```python
|
||||
from langextract.providers.openai import OpenAILanguageModel
|
||||
|
||||
model = OpenAILanguageModel(
|
||||
model_id="deepseek-chat", # DeepSeek 实际模型名
|
||||
api_key="sk-your-deepseek-key",
|
||||
base_url="https://api.deepseek.com",
|
||||
)
|
||||
|
||||
result = lx.extract(
|
||||
text_or_documents="...",
|
||||
examples=[...],
|
||||
model=model, # 通过 model 参数传入,绕过路由
|
||||
show_progress=True,
|
||||
)
|
||||
```
|
||||
|
||||
**实测验证状态:** DeepSeek `deepseek-chat` 模型通过此方式成功完成实体抽取,JSON 格式输出正常。
|
||||
|
||||
### 2.3 路由陷阱与规避方案
|
||||
|
||||
| 方案 | 能否工作 | 原因 |
|
||||
|------|---------|------|
|
||||
| `model_id="gpt-4o-mini"` + `language_model_params={"base_url": "https://api.deepseek.com"}` | **不能** | `model_id` 被同时用作 API 调用的 `model` 参数,DeepSeek 返回 `400 Model Not Exist` |
|
||||
| `config=ModelConfig(model_id="deepseek-chat", provider="openai")` | **不能** | `_create_model_with_schema()` 中使用 `provider` 时未先调用 `load_builtins_once()`,导致 `No provider found` 错误(LangExtract 内部 bug) |
|
||||
| `model=OpenAILanguageModel(model_id="deepseek-chat", ...)` | **可以** | 直接实例化绕过路由,`model_id` 正确传递给 DeepSeek API |
|
||||
|
||||
### 2.4 OpenAI Provider 构造参数
|
||||
|
||||
文件路径:`langextract/providers/openai.py`
|
||||
|
||||
```python
|
||||
class OpenAILanguageModel(BaseLanguageModel):
|
||||
def __init__(
|
||||
self,
|
||||
model_id: str = 'gpt-4o-mini',
|
||||
api_key: str | None = None,
|
||||
base_url: str | None = None,
|
||||
organization: str | None = None,
|
||||
format_type: FormatType = FormatType.JSON,
|
||||
temperature: float | None = None,
|
||||
max_workers: int = 10,
|
||||
**kwargs,
|
||||
)
|
||||
```
|
||||
|
||||
| 参数 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `model_id` | `gpt-4o-mini` | 模型标识(同时作为 API 调用的 model 参数) |
|
||||
| `api_key` | `None` | 环境变量:`OPENAI_API_KEY` 或 `LANGEXTRACT_API_KEY` |
|
||||
| `base_url` | `None` | 自定义 API 端点(DeepSeek 使用 `https://api.deepseek.com`) |
|
||||
| `temperature` | `None` | 采样温度 |
|
||||
| `format_type` | `JSON` | 输出格式(JSON Mode) |
|
||||
|
||||
---
|
||||
|
||||
## 三、关键参数规范
|
||||
|
||||
### 3.1 extract() 核心参数
|
||||
|
||||
文件路径:`langextract/extraction.py`
|
||||
|
||||
```python
|
||||
def extract(
|
||||
text_or_documents: typing.Any, # 必填:纯文本或 Document 列表
|
||||
prompt_description: str | None = None, # 抽取提示词
|
||||
examples: typing.Sequence[Any] | None = None, # 必填:Few-shot 示例
|
||||
model_id: str = "gemini-2.5-flash", # 模型标识(用于路由)
|
||||
api_key: str | None = None, # API Key
|
||||
model: typing.Any = None, # 预配置的模型实例(最高优先级)
|
||||
max_char_buffer: int = 1000, # 分块最大字符数
|
||||
temperature: float | None = None, # 采样温度
|
||||
batch_length: int = 10, # 每批分块数
|
||||
max_workers: int = 10, # 最大并行线程
|
||||
additional_context: str | None = None, # 附加上下文
|
||||
resolver_params: dict | None = None, # 对齐参数
|
||||
language_model_params: dict | None = None, # Provider 构造参数
|
||||
extraction_passes: int = 1, # 抽取轮次
|
||||
context_window_chars: int | None = None, # 上下文窗口
|
||||
config: typing.Any = None, # ModelConfig 实例
|
||||
model_url: str | None = None, # 自托管端点
|
||||
show_progress: bool = True, # 显示进度条
|
||||
...
|
||||
) -> list[AnnotatedDocument] | AnnotatedDocument
|
||||
```
|
||||
|
||||
**MVP 实测使用的参数组合:**
|
||||
|
||||
| 参数 | 实测值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `text_or_documents` | 520 字符纯文本 | GraphRAG 领域相关文本 |
|
||||
| `prompt_description` | `"Extract named entities..."` | 指定 TECHNOLOGY/ORGANIZATION/CONCEPT 三类 |
|
||||
| `examples` | 1 个 ExampleData(含 6 个 Extraction) | Few-shot 示例 |
|
||||
| `model` | `OpenAILanguageModel` 实例 | 直接实例化,指向 DeepSeek |
|
||||
| `show_progress` | `True` | 显示进度 |
|
||||
| `max_char_buffer` | 1000(默认) | 文本未超过阈值,未触发分块 |
|
||||
|
||||
### 3.2 ExampleData 示例数据格式
|
||||
|
||||
文件路径:`langextract/core/data.py`
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass
|
||||
class ExampleData:
|
||||
text: str # 示例文本(必填)
|
||||
extractions: list[Extraction] # 标注的实体列表(必填)
|
||||
```
|
||||
|
||||
**MVP 实测示例:**
|
||||
|
||||
```python
|
||||
lx.data.ExampleData(
|
||||
text="LangChain is a framework created by Harrison Chase for building "
|
||||
"LLM applications. It integrates with OpenAI models and Pinecone "
|
||||
"vector database for semantic search.",
|
||||
extractions=[
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="LangChain"),
|
||||
lx.data.Extraction(extraction_class="ORGANIZATION", extraction_text="Harrison Chase"),
|
||||
lx.data.Extraction(extraction_class="CONCEPT", extraction_text="LLM applications"),
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="OpenAI models"),
|
||||
lx.data.Extraction(extraction_class="TECHNOLOGY", extraction_text="Pinecone"),
|
||||
lx.data.Extraction(extraction_class="CONCEPT", extraction_text="semantic search"),
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
**约束条件:**
|
||||
- `extraction_text` **必须是** `text` 的精确子串(否则对齐失败)
|
||||
- `extraction_class` 为自定义字符串,无预定义枚举
|
||||
- `examples` 列表不能为空(否则抛出 `ValueError`)
|
||||
- 每个 ExampleData 可包含多个不同 `extraction_class` 的条目
|
||||
|
||||
### 3.3 Extraction 示例条目格式
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass(init=False)
|
||||
class Extraction:
|
||||
extraction_class: str # 必填:实体类型
|
||||
extraction_text: str # 必填:实体文本(须为原文子串)
|
||||
attributes: dict[str, str | list[str]] | None = None # 可选:附加属性
|
||||
description: str | None = None # 可选:实体描述
|
||||
```
|
||||
|
||||
在 examples 中创建时只需要 `extraction_class` 和 `extraction_text`,其余字段由 LangExtract 在推理后自动填充。
|
||||
|
||||
### 3.4 分块参数
|
||||
|
||||
文件路径:`langextract/chunking.py`
|
||||
|
||||
LangExtract 使用基于 **句子边界** 的确定性分块策略:
|
||||
|
||||
| 参数 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `max_char_buffer` | 1000 | 每个分块最大字符数 |
|
||||
| `context_window_chars` | `None` | 前一分块的上下文窗口(用于指代消解) |
|
||||
| `batch_length` | 10 | 每批处理的分块数 |
|
||||
|
||||
**分块策略:**
|
||||
1. 如果单个句子超过 `max_char_buffer`,按换行符拆分
|
||||
2. 如果单个 token 超过 `max_char_buffer`,该 token 独占一个分块
|
||||
3. 如果多个句子可以放入 `max_char_buffer`,合并为一个分块
|
||||
|
||||
> **MVP 实测:** 输入文本 520 字符 < `max_char_buffer`(1000),整段文本作为单一分块处理,未触发分块逻辑。
|
||||
|
||||
### 3.5 Resolver 对齐参数
|
||||
|
||||
通过 `extract()` 的 `resolver_params` 字典传入:
|
||||
|
||||
| 参数 | 类型 | 默认值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `enable_fuzzy_alignment` | `bool` | `True` | 精确匹配失败后是否尝试模糊匹配 |
|
||||
| `fuzzy_alignment_threshold` | `float` | `0.75` | 模糊匹配最低 token 重叠比率 |
|
||||
| `accept_match_lesser` | `bool` | `True` | 是否接受部分精确匹配 |
|
||||
| `suppress_parse_errors` | `bool` | `False` | JSON 解析失败时是否继续 |
|
||||
|
||||
> **MVP 实测:** 未传入 `resolver_params`,使用全部默认值。17 个抽取中 16 个 `match_exact`,1 个 `match_fuzzy`("Microsoft Research")。
|
||||
|
||||
---
|
||||
|
||||
## 四、输出数据格式规范
|
||||
|
||||
### 4.1 JSONL 输出文件(实际生成)
|
||||
|
||||
**文件路径:** `mvp_output/graphrag_entities.jsonl`
|
||||
**文件大小:** 4,650 bytes
|
||||
**格式:** JSONL(JSON Lines),每行一个完整的 JSON 对象
|
||||
|
||||
保存 API:
|
||||
|
||||
```python
|
||||
lx.io.save_annotated_documents(
|
||||
[result],
|
||||
output_name="graphrag_entities.jsonl",
|
||||
output_dir="mvp_output"
|
||||
)
|
||||
```
|
||||
|
||||
### 4.2 AnnotatedDocument 顶层结构
|
||||
|
||||
**实际 JSONL 输出的顶层字段(基于本地生成文件):**
|
||||
|
||||
| 字段 | 类型 | 实测值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `text` | `string` | 520 字符 | 原始输入文本(完整保留) |
|
||||
| `document_id` | `string` | `"doc_8498f2b6"` | 自动生成,格式 `doc_{uuid_hex[:8]}` |
|
||||
| `extractions` | `array[Extraction]` | 17 个元素 | 抽取的实体列表 |
|
||||
|
||||
> **注意:** JSONL 中字段顺序为 `extractions` → `text` → `document_id`(与 dataclass 定义顺序不同,以实际输出为准)。
|
||||
|
||||
### 4.3 Extraction 字段规范(实测对比)
|
||||
|
||||
**实际输出的单条 Extraction 完整结构(摘自本地 JSONL 文件):**
|
||||
|
||||
```json
|
||||
{
|
||||
"extraction_class": "TECHNOLOGY",
|
||||
"extraction_text": "GraphRAG",
|
||||
"char_interval": {
|
||||
"start_pos": 0,
|
||||
"end_pos": 8
|
||||
},
|
||||
"alignment_status": "match_exact",
|
||||
"extraction_index": 1,
|
||||
"group_index": 0,
|
||||
"description": null,
|
||||
"attributes": {}
|
||||
}
|
||||
```
|
||||
|
||||
**实测字段对比(官方 Schema vs 实际输出):**
|
||||
|
||||
| 字段 | 官方 Schema | 实际输出 | 差异说明 |
|
||||
|------|------------|---------|---------|
|
||||
| `extraction_class` | `string` | `string` | 一致 |
|
||||
| `extraction_text` | `string` | `string` | 一致 |
|
||||
| `char_interval` | `object \| null` | `object`(始终存在) | 实测 17 个全部有值 |
|
||||
| `alignment_status` | `string \| null` | `string`(始终存在) | 实测 17 个全部有值 |
|
||||
| `extraction_index` | `int \| null` | `int`(从 1 开始) | **实测从 1 开始,非 0** |
|
||||
| `group_index` | `int \| null` | `int`(从 0 开始) | 实测从 0 开始递增 |
|
||||
| `description` | `string \| null` | `null` | 未使用 description 提示时为 null |
|
||||
| `attributes` | `dict \| null` | `{}`(空对象) | **实测为空对象 `{}`,非 `null`** |
|
||||
| `token_interval` | `object \| null` | **不存在** | **实际 JSONL 输出中无此字段** |
|
||||
|
||||
**关键差异总结:**
|
||||
|
||||
1. `extraction_index` 从 **1** 开始(非 0)
|
||||
2. `attributes` 未使用时输出空对象 `{}`(非 `null`)
|
||||
3. `token_interval` 字段 **不在 JSONL 输出中**(仅存在于内存对象)
|
||||
|
||||
### 4.4 CharInterval 字符锚点
|
||||
|
||||
```json
|
||||
{
|
||||
"start_pos": 0,
|
||||
"end_pos": 8
|
||||
}
|
||||
```
|
||||
|
||||
- `start_pos`:起始位置(包含),0-indexed
|
||||
- `end_pos`:结束位置(不包含)
|
||||
- 语义:`source_text[start_pos:end_pos]` 即为实体在原文中的精确位置
|
||||
|
||||
**实测验证(以 "GraphRAG" 为例):**
|
||||
|
||||
```python
|
||||
text = "GraphRAG is an advanced..."
|
||||
text[0:8] # → "GraphRAG" ✓ 匹配
|
||||
```
|
||||
|
||||
### 4.5 AlignmentStatus 对齐状态枚举
|
||||
|
||||
| 状态值 | 序列化值 | 含义 | 可信度 | MVP 实测数量 |
|
||||
|--------|---------|------|--------|-------------|
|
||||
| `MATCH_EXACT` | `"match_exact"` | LLM 输出与原文完全匹配 | 最高 | **16** |
|
||||
| `MATCH_GREATER` | `"match_greater"` | LLM 输出短于匹配到的原文 | 高 | 0 |
|
||||
| `MATCH_LESSER` | `"match_lesser"` | LLM 输出长于匹配到的原文 | 中 | 0 |
|
||||
| `MATCH_FUZZY` | `"match_fuzzy"` | 模糊匹配 | 低 | **1** |
|
||||
| `None` | `null` | 未找到对齐 | 不可信 | 0 |
|
||||
|
||||
> **实测精确匹配率:** 16/17 = 94.1%。唯一的 `match_fuzzy` 是 "Microsoft Research"。
|
||||
|
||||
### 4.6 extraction_summary.json(自定义摘要)
|
||||
|
||||
**文件路径:** `mvp_output/extraction_summary.json`
|
||||
**文件大小:** 2,863 bytes
|
||||
|
||||
此文件由 MVP 测试脚本自行生成(非 LangExtract 原生输出),结构如下:
|
||||
|
||||
```json
|
||||
{
|
||||
"total_extractions": 17,
|
||||
"extraction_classes": {
|
||||
"TECHNOLOGY": 9,
|
||||
"ORGANIZATION": 1,
|
||||
"CONCEPT": 7
|
||||
},
|
||||
"extractions": [
|
||||
{
|
||||
"class": "TECHNOLOGY",
|
||||
"text": "GraphRAG",
|
||||
"char_start": 0,
|
||||
"char_end": 8,
|
||||
"alignment": "match_exact"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、本地生成文件清单
|
||||
|
||||
MVP 测试后本地实际生成的文件(共 2 个输出文件):
|
||||
|
||||
```
|
||||
langextract_src/
|
||||
├── .env # DeepSeek API Key 配置
|
||||
├── .venv/ # 独立虚拟环境(Python 3.12)
|
||||
├── mvp_test_deepseek.py # MVP 测试脚本
|
||||
└── mvp_output/ # 输出目录
|
||||
├── graphrag_entities.jsonl # LangExtract 原生 JSONL 输出(4,650 bytes)
|
||||
└── extraction_summary.json # 自定义 JSON 摘要(2,863 bytes)
|
||||
```
|
||||
|
||||
| 文件 | 大小 | 来源 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `graphrag_entities.jsonl` | 4,650 bytes | `lx.io.save_annotated_documents()` | LangExtract 原生输出,1 行 JSONL,含 17 个 Extraction |
|
||||
| `extraction_summary.json` | 2,863 bytes | MVP 脚本自定义 | 扁平化摘要,含类型分布统计 |
|
||||
|
||||
---
|
||||
|
||||
## 附录:环境变量与常量速查
|
||||
|
||||
### 环境变量
|
||||
|
||||
| 变量名 | 适用 Provider | 说明 |
|
||||
|--------|--------------|------|
|
||||
| `LANGEXTRACT_API_KEY` | 所有 | 通用 API Key 后备 |
|
||||
| `GEMINI_API_KEY` | Gemini | Gemini API Key |
|
||||
| `OPENAI_API_KEY` | OpenAI | OpenAI / DeepSeek API Key |
|
||||
| `OLLAMA_BASE_URL` | Ollama | Ollama 服务地址(默认 `http://localhost:11434`) |
|
||||
|
||||
### .env 配置(MVP 实测)
|
||||
|
||||
```env
|
||||
OPENAI_API_KEY=sk-55cb39b8a3284355bc80217c11c85d1f
|
||||
```
|
||||
|
||||
### 模型优先级
|
||||
|
||||
```
|
||||
model(预配置的模型实例) > config(ModelConfig 实例) > model_id + api_key
|
||||
```
|
||||
|
||||
> **MVP 实测使用 `model` 参数**(最高优先级),直接传入 `OpenAILanguageModel` 实例。
|
||||
|
||||
### 结构化输出支持
|
||||
|
||||
| Provider | Schema 类型 | 结构化输出模式 |
|
||||
|----------|------------|---------------|
|
||||
| Gemini | `GeminiSchema` | 严格结构化输出 |
|
||||
| OpenAI | JSON Mode | 通过 `response_format` 约束 |
|
||||
| Ollama | `FormatModeSchema` | JSON 模式(非严格) |
|
||||
|
||||
### 17 个实测抽取实体完整列表
|
||||
|
||||
| # | extraction_class | extraction_text | char_interval | alignment_status |
|
||||
|---|-----------------|-----------------|---------------|-----------------|
|
||||
| 1 | TECHNOLOGY | GraphRAG | [0, 8] | match_exact |
|
||||
| 2 | ORGANIZATION | Microsoft Research | [75, 93] | match_fuzzy |
|
||||
| 3 | CONCEPT | retrieval-augmented generation | [24, 54] | match_exact |
|
||||
| 4 | CONCEPT | knowledge graphs | [107, 123] | match_exact |
|
||||
| 5 | TECHNOLOGY | GPT-4 | [156, 161] | match_exact |
|
||||
| 6 | CONCEPT | multi-hop reasoning | [172, 191] | match_exact |
|
||||
| 7 | CONCEPT | community detection algorithms | [209, 239] | match_exact |
|
||||
| 8 | TECHNOLOGY | Leiden clustering | [248, 265] | match_exact |
|
||||
| 9 | TECHNOLOGY | MinerU | [315, 321] | match_exact |
|
||||
| 10 | TECHNOLOGY | LangExtract | [344, 355] | match_exact |
|
||||
| 11 | TECHNOLOGY | Neo4j | [383, 388] | match_exact |
|
||||
| 12 | CONCEPT | graph database | [396, 410] | match_exact |
|
||||
| 13 | CONCEPT | pipeline | [424, 432] | match_exact |
|
||||
| 14 | TECHNOLOGY | PDF documents | [443, 456] | match_exact |
|
||||
| 15 | TECHNOLOGY | OCR | [465, 468] | match_exact |
|
||||
| 16 | TECHNOLOGY | NLP | [473, 476] | match_exact |
|
||||
| 17 | CONCEPT | knowledge graph | [504, 519] | match_exact |
|
||||
672
docs/langextract_specification.md
Normal file
672
docs/langextract_specification.md
Normal file
@@ -0,0 +1,672 @@
|
||||
# LangExtract Pipeline 规范文档
|
||||
|
||||
> 基于 [google/langextract](https://github.com/google/langextract) 源码分析
|
||||
> 版本基线:2026-03-04 main 分支
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
- [一、输入规范](#一输入规范)
|
||||
- [1.1 核心入口函数签名](#11-核心入口函数签名)
|
||||
- [1.2 支持的输入类型](#12-支持的输入类型)
|
||||
- [1.3 Document 数据结构](#13-document-数据结构)
|
||||
- [1.4 CSV Dataset 输入](#14-csv-dataset-输入)
|
||||
- [1.5 URL 文本下载](#15-url-文本下载)
|
||||
- [1.6 分块参数配置](#16-分块参数配置)
|
||||
- [1.7 不支持的输入格式](#17-不支持的输入格式)
|
||||
- [二、模型接入规范](#二模型接入规范)
|
||||
- [2.1 模型路由机制](#21-模型路由机制)
|
||||
- [2.2 Gemini Provider](#22-gemini-provider)
|
||||
- [2.3 OpenAI Provider](#23-openai-provider)
|
||||
- [2.4 Ollama Provider](#24-ollama-provider)
|
||||
- [2.5 OpenAI 兼容接口适配(DeepSeek 等)](#25-openai-兼容接口适配deepseek-等)
|
||||
- [2.6 模型优先级与配置覆盖关系](#26-模型优先级与配置覆盖关系)
|
||||
- [2.7 关于 Embedding 模型](#27-关于-embedding-模型)
|
||||
- [三、输出数据格式规范](#三输出数据格式规范)
|
||||
- [3.1 AnnotatedDocument 结构](#31-annotateddocument-结构)
|
||||
- [3.2 Extraction 结构](#32-extraction-结构)
|
||||
- [3.3 CharInterval 字符锚点](#33-charinterval-字符锚点)
|
||||
- [3.4 AlignmentStatus 对齐状态枚举](#34-alignmentstatus-对齐状态枚举)
|
||||
- [3.5 Resolver 对齐参数](#35-resolver-对齐参数)
|
||||
- [3.6 JSONL 输出文件格式](#36-jsonl-输出文件格式)
|
||||
- [3.7 完整输出 JSON Schema 示例](#37-完整输出-json-schema-示例)
|
||||
- [3.8 HTML 可视化输出](#38-html-可视化输出)
|
||||
- [附录:环境变量与常量速查](#附录环境变量与常量速查)
|
||||
|
||||
---
|
||||
|
||||
## 一、输入规范
|
||||
|
||||
### 1.1 核心入口函数签名
|
||||
|
||||
文件路径:`langextract/extraction.py`
|
||||
|
||||
```python
|
||||
def extract(
|
||||
text_or_documents: typing.Any,
|
||||
prompt_description: str | None = None,
|
||||
examples: typing.Sequence[typing.Any] | None = None,
|
||||
model_id: str = "gemini-2.5-flash",
|
||||
api_key: str | None = None,
|
||||
language_model_type: typing.Type[typing.Any] | None = None, # 已废弃
|
||||
format_type: typing.Any = None,
|
||||
max_char_buffer: int = 1000,
|
||||
temperature: float | None = None,
|
||||
fence_output: bool | None = None,
|
||||
use_schema_constraints: bool = True,
|
||||
batch_length: int = 10,
|
||||
max_workers: int = 10,
|
||||
additional_context: str | None = None,
|
||||
resolver_params: dict | None = None,
|
||||
language_model_params: dict | None = None,
|
||||
debug: bool = False,
|
||||
model_url: str | None = None,
|
||||
extraction_passes: int = 1,
|
||||
context_window_chars: int | None = None,
|
||||
config: typing.Any = None,
|
||||
model: typing.Any = None,
|
||||
*,
|
||||
fetch_urls: bool = True,
|
||||
prompt_validation_level: PromptValidationLevel = PromptValidationLevel.WARNING,
|
||||
prompt_validation_strict: bool = False,
|
||||
show_progress: bool = True,
|
||||
tokenizer: Tokenizer | None = None,
|
||||
) -> list[AnnotatedDocument] | AnnotatedDocument
|
||||
```
|
||||
|
||||
**关键参数说明:**
|
||||
|
||||
| 参数 | 类型 | 默认值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `text_or_documents` | `Any` | **必填** | 纯文本字符串、URL、或 `Document` 对象的可迭代集合 |
|
||||
| `prompt_description` | `str \| None` | `None` | 抽取提示词,描述需要抽取什么实体 |
|
||||
| `examples` | `Sequence[Any] \| None` | `None` | **必填** — Few-shot 示例列表(为空则抛出 ValueError) |
|
||||
| `model_id` | `str` | `"gemini-2.5-flash"` | 模型标识符,用于自动路由到对应 Provider |
|
||||
| `api_key` | `str \| None` | `None` | LLM API Key(也可通过环境变量设置) |
|
||||
| `max_char_buffer` | `int` | `1000` | 每个文本分块的最大字符数 |
|
||||
| `temperature` | `float \| None` | `None` | 采样温度(`None` 使用模型默认值) |
|
||||
| `use_schema_constraints` | `bool` | `True` | 是否启用结构化输出约束 |
|
||||
| `batch_length` | `int` | `10` | 每批处理的文本分块数量 |
|
||||
| `max_workers` | `int` | `10` | 最大并行工作线程数 |
|
||||
| `additional_context` | `str \| None` | `None` | 附加到推理提示词中的上下文信息 |
|
||||
| `resolver_params` | `dict \| None` | `None` | 对齐解析器参数(见 [3.5 节](#35-resolver-对齐参数)) |
|
||||
| `extraction_passes` | `int` | `1` | 抽取轮次(>1 时多次抽取并合并非重叠结果) |
|
||||
| `context_window_chars` | `int \| None` | `None` | 前一分块的上下文窗口字符数(用于指代消解) |
|
||||
| `model_url` | `str \| None` | `None` | 自托管模型的 API 端点 URL |
|
||||
| `fetch_urls` | `bool` | `True` | 是否自动下载 http(s) URL 内容 |
|
||||
|
||||
---
|
||||
|
||||
### 1.2 支持的输入类型
|
||||
|
||||
LangExtract **仅接受纯文本**作为输入,支持以下 4 种传入方式:
|
||||
|
||||
| 输入方式 | 示例 | 说明 |
|
||||
|---------|------|------|
|
||||
| **纯文本字符串** | `extract("这是一段文本...")` | 直接传入文本内容 |
|
||||
| **URL** | `extract("https://example.com/article.txt")` | 自动下载 URL 文本内容(`fetch_urls=True`) |
|
||||
| **Document 对象** | `extract([Document(text="...", document_id="doc1")])` | 传入 Document 可迭代集合 |
|
||||
| **CSV 文件** | 通过 `Dataset` 类加载后传入 | 指定 text 列和 id 列 |
|
||||
|
||||
---
|
||||
|
||||
### 1.3 Document 数据结构
|
||||
|
||||
文件路径:`langextract/core/data.py`
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass
|
||||
class Document:
|
||||
text: str # 必填 — 原始文本内容
|
||||
additional_context: str | None = None # 可选 — 附加上下文
|
||||
document_id: str # 自动生成 — 格式 "doc_{uuid_hex[:8]}"
|
||||
tokenized_text: TokenizedText # 惰性计算 — 分词后的文本
|
||||
```
|
||||
|
||||
**字段说明:**
|
||||
|
||||
- `text`:**必填**,原始文本内容,类型为 `str`
|
||||
- `additional_context`:可选,会附加到推理提示词中
|
||||
- `document_id`:通过 property 访问,未设置时自动生成格式为 `doc_{uuid_hex[:8]}` 的唯一 ID
|
||||
- `tokenized_text`:通过 property 惰性计算,使用配置的 Tokenizer 进行分词
|
||||
|
||||
---
|
||||
|
||||
### 1.4 CSV Dataset 输入
|
||||
|
||||
文件路径:`langextract/io.py`
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Dataset:
|
||||
input_path: pathlib.Path # CSV 文件路径
|
||||
id_key: str # 文档 ID 对应的列名
|
||||
text_key: str # 文本内容对应的列名
|
||||
|
||||
def load(self, delimiter: str = ',') -> Iterator[Document]:
|
||||
"""仅支持 .csv 后缀文件,其他格式抛出 NotImplementedError"""
|
||||
```
|
||||
|
||||
**CSV 文件要求:**
|
||||
- 文件后缀必须为 `.csv`
|
||||
- 必须包含 `text_key` 指定的文本列和 `id_key` 指定的 ID 列
|
||||
- 默认分隔符为逗号(`,`),可通过 `delimiter` 参数修改
|
||||
- 其他文件格式会直接抛出 `NotImplementedError`
|
||||
|
||||
---
|
||||
|
||||
### 1.5 URL 文本下载
|
||||
|
||||
文件路径:`langextract/io.py`
|
||||
|
||||
```python
|
||||
def download_text_from_url(
|
||||
url: str,
|
||||
timeout: int = 30, # 默认超时 30 秒
|
||||
show_progress: bool = True,
|
||||
chunk_size: int = 8192,
|
||||
) -> str
|
||||
```
|
||||
|
||||
**URL 要求:**
|
||||
- 必须以 `http://` 或 `https://` 开头
|
||||
- 仅下载文本内容(`response.text`),不解析 HTML/PDF 等
|
||||
- 需要 `fetch_urls=True`(默认开启)
|
||||
|
||||
---
|
||||
|
||||
### 1.6 分块参数配置
|
||||
|
||||
文件路径:`langextract/chunking.py`
|
||||
|
||||
LangExtract 使用基于**句子边界**的确定性分块策略(非语义分块),核心类为 `ChunkIterator`:
|
||||
|
||||
```python
|
||||
class ChunkIterator:
|
||||
def __init__(
|
||||
self,
|
||||
text: str | TokenizedText | None,
|
||||
max_char_buffer: int, # 每个分块最大字符数
|
||||
tokenizer_impl: Tokenizer, # 分词器实例
|
||||
document: Document | None = None,
|
||||
)
|
||||
```
|
||||
|
||||
**分块策略:**
|
||||
|
||||
1. 如果单个句子超过 `max_char_buffer`,按换行符拆分,同时尊重 token 边界
|
||||
2. 如果单个 token 超过 `max_char_buffer`,该 token 独占一个分块
|
||||
3. 如果多个句子可以放入 `max_char_buffer`,合并为一个分块
|
||||
|
||||
**TextChunk 输出结构:**
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass
|
||||
class TextChunk:
|
||||
token_interval: TokenInterval # 在源文档中的 token 区间
|
||||
document: Document | None = None # 源文档引用
|
||||
|
||||
# 属性
|
||||
chunk_text: str # 重建的文本内容
|
||||
sanitized_chunk_text: str # 标准化空白的文本
|
||||
char_interval: CharInterval # 在源文档中的字符区间
|
||||
document_id: str | None # 源文档 ID
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 1.7 不支持的输入格式
|
||||
|
||||
以下格式 **不被支持**,需要在 LangExtract 之前通过外部工具预处理为纯文本:
|
||||
|
||||
| 格式 | 状态 | 预处理方案 |
|
||||
|------|------|-----------|
|
||||
| PDF | ❌ 不支持 | 使用 MinerU / PyMuPDF 先转文本 |
|
||||
| DOCX | ❌ 不支持 | 使用 python-docx 先转文本 |
|
||||
| HTML | ❌ 不支持 | 使用 BeautifulSoup 先提取文本 |
|
||||
| 图片 | ❌ 不支持 | 使用 OCR 工具先识别文本 |
|
||||
| Markdown(含媒体) | ❌ 不支持 | 需提取纯文本部分 |
|
||||
| Excel / JSON | ❌ 不支持 | 需序列化为纯文本 |
|
||||
|
||||
---
|
||||
|
||||
## 二、模型接入规范
|
||||
|
||||
### 2.1 模型路由机制
|
||||
|
||||
文件路径:`langextract/providers/patterns.py`
|
||||
|
||||
LangExtract 通过 **正则匹配 `model_id`** 自动路由到对应的 Provider:
|
||||
|
||||
| Provider | 匹配模式 | 优先级 | 示例模型 |
|
||||
|----------|---------|--------|---------|
|
||||
| **Gemini** | `^gemini` | 10 | `gemini-2.5-flash`, `gemini-1.5-pro` |
|
||||
| **OpenAI** | `^gpt-4`, `^gpt4.`, `^gpt-5`, `^gpt5.` | 10 | `gpt-4o`, `gpt-4o-mini` |
|
||||
| **Ollama** | `gemma`, `llama`, `mistral`, `phi`, `qwen`, `deepseek` 等 | 10 | `gemma2:2b`, `llama3.2:1b` |
|
||||
|
||||
Ollama 额外支持 HuggingFace 格式的模型名:`meta-llama/Llama*`, `google/gemma*`, `mistralai/*`, `microsoft/phi*` 等。
|
||||
|
||||
---
|
||||
|
||||
### 2.2 Gemini Provider
|
||||
|
||||
文件路径:`langextract/providers/gemini.py`
|
||||
|
||||
```python
|
||||
class GeminiLanguageModel(BaseLanguageModel):
|
||||
def __init__(
|
||||
self,
|
||||
model_id: str = 'gemini-2.5-flash',
|
||||
api_key: str | None = None,
|
||||
vertexai: bool = False,
|
||||
credentials: Any | None = None,
|
||||
project: str | None = None,
|
||||
location: str | None = None,
|
||||
http_options: Any | None = None,
|
||||
gemini_schema: GeminiSchema | None = None,
|
||||
format_type: FormatType = FormatType.JSON,
|
||||
temperature: float = 0.0,
|
||||
max_workers: int = 10,
|
||||
fence_output: bool = False,
|
||||
**kwargs,
|
||||
)
|
||||
```
|
||||
|
||||
| 参数 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `model_id` | `gemini-2.5-flash` | Gemini 模型标识 |
|
||||
| `api_key` | `None` | 环境变量:`GEMINI_API_KEY` 或 `LANGEXTRACT_API_KEY` |
|
||||
| `vertexai` | `False` | 是否使用 Vertex AI 企业认证 |
|
||||
| `temperature` | `0.0` | 采样温度(确定性输出) |
|
||||
| `format_type` | `JSON` | 输出格式 |
|
||||
|
||||
**运行时可配参数:** `temperature`, `max_output_tokens`, `top_p`, `top_k`
|
||||
|
||||
**额外参数白名单:** `response_schema`, `response_mime_type`, `safety_settings`, `system_instruction`, `tools`, `stop_sequences`, `candidate_count`
|
||||
|
||||
---
|
||||
|
||||
### 2.3 OpenAI Provider
|
||||
|
||||
文件路径:`langextract/providers/openai.py`
|
||||
|
||||
```python
|
||||
class OpenAILanguageModel(BaseLanguageModel):
|
||||
def __init__(
|
||||
self,
|
||||
model_id: str = 'gpt-4o-mini',
|
||||
api_key: str | None = None,
|
||||
base_url: str | None = None,
|
||||
organization: str | None = None,
|
||||
format_type: FormatType = FormatType.JSON,
|
||||
temperature: float | None = None,
|
||||
max_workers: int = 10,
|
||||
**kwargs,
|
||||
)
|
||||
```
|
||||
|
||||
| 参数 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `model_id` | `gpt-4o-mini` | OpenAI 模型标识 |
|
||||
| `api_key` | `None` | 环境变量:`OPENAI_API_KEY` 或 `LANGEXTRACT_API_KEY` |
|
||||
| `base_url` | `None` | 自定义 API 端点(用于兼容接口) |
|
||||
| `organization` | `None` | OpenAI 组织 ID |
|
||||
| `temperature` | `None` | 采样温度 |
|
||||
|
||||
**运行时可配参数:** `temperature`, `max_output_tokens`, `top_p`, `frequency_penalty`, `presence_penalty`, `seed`, `stop`, `logprobs`, `top_logprobs`, `reasoning_effort`, `reasoning`, `response_format`
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Ollama Provider
|
||||
|
||||
文件路径:`langextract/providers/ollama.py`
|
||||
|
||||
```python
|
||||
class OllamaLanguageModel(BaseLanguageModel):
|
||||
def __init__(
|
||||
self,
|
||||
model_id: str, # 必填
|
||||
model_url: str = 'http://localhost:11434',
|
||||
base_url: str | None = None,
|
||||
format_type: FormatType | None = None,
|
||||
constraint: Constraint = Constraint(),
|
||||
timeout: int | None = None,
|
||||
**kwargs,
|
||||
)
|
||||
```
|
||||
|
||||
| 参数 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `model_id` | **必填** | Ollama 模型名(如 `gemma2:2b`) |
|
||||
| `model_url` | `http://localhost:11434` | Ollama 服务地址 |
|
||||
| `timeout` | `120` | 请求超时(秒) |
|
||||
| `format_type` | `JSON` | 输出格式 |
|
||||
|
||||
**内部默认常量:**
|
||||
|
||||
| 常量 | 值 | 说明 |
|
||||
|------|-----|------|
|
||||
| `_DEFAULT_TEMPERATURE` | `0.1` | 默认温度 |
|
||||
| `_DEFAULT_TIMEOUT` | `120` | 默认超时(秒) |
|
||||
| `_DEFAULT_KEEP_ALIVE` | `300` | 模型保活时间(秒) |
|
||||
| `_DEFAULT_NUM_CTX` | `2048` | 默认上下文窗口大小 |
|
||||
|
||||
**认证支持:** 可配置 `api_key`、`auth_scheme`(默认 `Bearer`)、`auth_header`(默认 `Authorization`)用于代理 Ollama 实例。
|
||||
|
||||
---
|
||||
|
||||
### 2.5 OpenAI 兼容接口适配(DeepSeek 等)
|
||||
|
||||
LangExtract 的 OpenAI Provider 支持 `base_url` 参数,因此可以接入任何 OpenAI 兼容 API:
|
||||
|
||||
```python
|
||||
# DeepSeek 接入示例
|
||||
result = lx.extract(
|
||||
text_or_documents="...",
|
||||
model_id="gpt-4o-mini", # 触发 OpenAI Provider 路由
|
||||
api_key="sk-your-deepseek-key",
|
||||
examples=[...],
|
||||
language_model_params={
|
||||
"base_url": "https://api.deepseek.com",
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
> **注意:** 由于路由基于 `model_id` 正则匹配,使用 DeepSeek 等兼容接口时 `model_id` 仍需使用 `gpt-*` 前缀来命中 OpenAI Provider,或通过 `config` 参数显式指定 Provider。
|
||||
|
||||
---
|
||||
|
||||
### 2.6 模型优先级与配置覆盖关系
|
||||
|
||||
模型配置的优先级从高到低:
|
||||
|
||||
```
|
||||
model(预配置的模型实例) > config(ModelConfig 实例) > model_id + api_key
|
||||
```
|
||||
|
||||
**ModelConfig 结构**(`langextract/factory.py`):
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass(slots=True, frozen=True)
|
||||
class ModelConfig:
|
||||
model_id: str | None = None # 模型标识
|
||||
provider: str | None = None # 显式指定 Provider 名称
|
||||
provider_kwargs: dict[str, Any] = field(default_factory=dict) # Provider 构造参数
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2.7 关于 Embedding 模型
|
||||
|
||||
**LangExtract 不使用也不依赖任何 Embedding 模型。**
|
||||
|
||||
- 文本分块使用基于句子边界的确定性分割算法,不涉及语义相似度计算
|
||||
- 没有向量索引或向量检索功能
|
||||
- 整个代码库中没有任何 Embedding 相关的调用
|
||||
|
||||
---
|
||||
|
||||
## 三、输出数据格式规范
|
||||
|
||||
### 3.1 AnnotatedDocument 结构
|
||||
|
||||
文件路径:`langextract/core/data.py`
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass
|
||||
class AnnotatedDocument:
|
||||
extractions: list[Extraction] | None = None # 抽取结果列表
|
||||
text: str | None = None # 原始文本
|
||||
document_id: str # 文档唯一标识(自动生成)
|
||||
tokenized_text: TokenizedText # 分词后文本(惰性计算)
|
||||
```
|
||||
|
||||
**序列化后的 JSON 顶层字段:**
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `document_id` | `string` | 文档唯一标识,格式 `doc_{uuid_hex[:8]}` |
|
||||
| `text` | `string \| null` | 原始输入文本 |
|
||||
| `extractions` | `array[Extraction] \| null` | 抽取的实体列表 |
|
||||
|
||||
---
|
||||
|
||||
### 3.2 Extraction 结构
|
||||
|
||||
文件路径:`langextract/core/data.py`
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass(init=False)
|
||||
class Extraction:
|
||||
extraction_class: str # 实体类型
|
||||
extraction_text: str # 实体文本
|
||||
char_interval: CharInterval | None = None # 字符位置锚点
|
||||
alignment_status: AlignmentStatus | None = None # 对齐状态
|
||||
extraction_index: int | None = None # 抽取顺序索引
|
||||
group_index: int | None = None # 分组索引
|
||||
description: str | None = None # 实体描述
|
||||
attributes: dict[str, str | list[str]] | None = None # 附加属性
|
||||
token_interval: TokenInterval | None = None # Token 位置锚点
|
||||
```
|
||||
|
||||
**字段详细说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `extraction_class` | `str` | 是 | 实体类型/分类名称(如 `PERSON`, `ORGANIZATION`) |
|
||||
| `extraction_text` | `str` | 是 | 抽取的文本内容(应为原文的子串) |
|
||||
| `char_interval` | `CharInterval \| null` | 否 | 在原文中的字符偏移位置 |
|
||||
| `alignment_status` | `string \| null` | 否 | 文本对齐质量(见 [3.4 节](#34-alignmentstatus-对齐状态枚举)) |
|
||||
| `extraction_index` | `int \| null` | 否 | 在结果列表中的顺序位置 |
|
||||
| `group_index` | `int \| null` | 否 | 分组归属(用于关联抽取) |
|
||||
| `description` | `string \| null` | 否 | 对该实体的补充描述 |
|
||||
| `attributes` | `dict \| null` | 否 | 键值对形式的附加属性 |
|
||||
| `token_interval` | `TokenInterval \| null` | 否 | 在原文中的 token 偏移位置 |
|
||||
|
||||
---
|
||||
|
||||
### 3.3 CharInterval 字符锚点
|
||||
|
||||
文件路径:`langextract/core/data.py`
|
||||
|
||||
```python
|
||||
@dataclasses.dataclass
|
||||
class CharInterval:
|
||||
start_pos: int | None = None # 起始位置(包含),0-indexed
|
||||
end_pos: int | None = None # 结束位置(不包含)
|
||||
```
|
||||
|
||||
**语义:** `source_text[start_pos:end_pos]` 即为抽取的文本在原文中的精确位置。
|
||||
|
||||
---
|
||||
|
||||
### 3.4 AlignmentStatus 对齐状态枚举
|
||||
|
||||
文件路径:`langextract/core/data.py`
|
||||
|
||||
```python
|
||||
class AlignmentStatus(enum.Enum):
|
||||
MATCH_EXACT = "match_exact"
|
||||
MATCH_GREATER = "match_greater"
|
||||
MATCH_LESSER = "match_lesser"
|
||||
MATCH_FUZZY = "match_fuzzy"
|
||||
```
|
||||
|
||||
| 状态值 | 序列化值 | 含义 | 可信度 |
|
||||
|--------|---------|------|--------|
|
||||
| `MATCH_EXACT` | `"match_exact"` | LLM 输出与原文 token 序列完全匹配 | 最高 |
|
||||
| `MATCH_GREATER` | `"match_greater"` | LLM 输出的 token 序列短于匹配到的原文(找到最佳重叠) | 高 |
|
||||
| `MATCH_LESSER` | `"match_lesser"` | LLM 输出长于匹配到的原文(部分精确匹配) | 中 |
|
||||
| `MATCH_FUZZY` | `"match_fuzzy"` | 模糊匹配,重叠率达到阈值(默认 ≥0.75) | 低 |
|
||||
| `None` | `null` | 未找到任何对齐 | 不可信 |
|
||||
|
||||
**对齐流程:**
|
||||
|
||||
```
|
||||
1. 尝试精确 token 级别匹配(difflib)
|
||||
├── 成功且长度相等 → MATCH_EXACT
|
||||
├── 成功但 LLM 输出更长 → MATCH_LESSER
|
||||
└── 成功但匹配区域更大 → MATCH_GREATER
|
||||
2. 精确匹配失败且 enable_fuzzy_alignment=True
|
||||
├── 最佳重叠窗口 ≥ fuzzy_alignment_threshold → MATCH_FUZZY
|
||||
└── 低于阈值 → None
|
||||
3. 精确匹配失败且 enable_fuzzy_alignment=False → None
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3.5 Resolver 对齐参数
|
||||
|
||||
文件路径:`langextract/resolver.py`
|
||||
|
||||
通过 `extract()` 的 `resolver_params` 字典传入:
|
||||
|
||||
```python
|
||||
result = lx.extract(
|
||||
...,
|
||||
resolver_params={
|
||||
"enable_fuzzy_alignment": True, # 是否启用模糊对齐(默认 True)
|
||||
"fuzzy_alignment_threshold": 0.75, # 模糊匹配最低重叠率(默认 0.75)
|
||||
"accept_match_lesser": True, # 是否接受 MATCH_LESSER(默认 True)
|
||||
"suppress_parse_errors": False, # 是否忽略 JSON 解析错误(默认 False)
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
| 参数 | 类型 | 默认值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `enable_fuzzy_alignment` | `bool` | `True` | 精确匹配失败后是否尝试模糊匹配 |
|
||||
| `fuzzy_alignment_threshold` | `float` | `0.75` | 模糊匹配的最低 token 重叠比率(0.0~1.0) |
|
||||
| `accept_match_lesser` | `bool` | `True` | 是否接受部分精确匹配结果 |
|
||||
| `suppress_parse_errors` | `bool` | `False` | JSON 解析失败时是否继续而非报错 |
|
||||
|
||||
---
|
||||
|
||||
### 3.6 JSONL 输出文件格式
|
||||
|
||||
文件路径:`langextract/io.py`
|
||||
|
||||
```python
|
||||
def save_annotated_documents(
|
||||
annotated_documents: Iterator[AnnotatedDocument],
|
||||
output_dir: pathlib.Path | str | None = None,
|
||||
output_name: str = 'data.jsonl',
|
||||
show_progress: bool = True,
|
||||
) -> None
|
||||
```
|
||||
|
||||
**输出规范:**
|
||||
- 文件格式:**JSONL**(JSON Lines),每行一个完整的 JSON 对象
|
||||
- 默认文件名:`data.jsonl`
|
||||
- 序列化规则:
|
||||
- Enum 值转为字符串(如 `AlignmentStatus.MATCH_EXACT` → `"match_exact"`)
|
||||
- NumPy / integral 数值类型转为 `int`
|
||||
- 以 `_` 开头的私有字段被排除
|
||||
|
||||
---
|
||||
|
||||
### 3.7 完整输出 JSON Schema 示例
|
||||
|
||||
单条 JSONL 记录的完整结构:
|
||||
|
||||
```json
|
||||
{
|
||||
"document_id": "doc_a1b2c3d4",
|
||||
"text": "GraphRAG is a technique developed by Microsoft Research that combines knowledge graphs with retrieval-augmented generation.",
|
||||
"extractions": [
|
||||
{
|
||||
"extraction_class": "TECHNOLOGY",
|
||||
"extraction_text": "GraphRAG",
|
||||
"char_interval": {
|
||||
"start_pos": 0,
|
||||
"end_pos": 8
|
||||
},
|
||||
"alignment_status": "match_exact",
|
||||
"extraction_index": 0,
|
||||
"group_index": null,
|
||||
"description": "A technique combining knowledge graphs with RAG",
|
||||
"attributes": {
|
||||
"category": "AI/ML",
|
||||
"developer": "Microsoft Research"
|
||||
},
|
||||
"token_interval": {
|
||||
"start_index": 0,
|
||||
"end_index": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"extraction_class": "ORGANIZATION",
|
||||
"extraction_text": "Microsoft Research",
|
||||
"char_interval": {
|
||||
"start_pos": 46,
|
||||
"end_pos": 64
|
||||
},
|
||||
"alignment_status": "match_exact",
|
||||
"extraction_index": 1,
|
||||
"group_index": null,
|
||||
"description": null,
|
||||
"attributes": null,
|
||||
"token_interval": {
|
||||
"start_index": 7,
|
||||
"end_index": 9
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3.8 HTML 可视化输出
|
||||
|
||||
文件路径:`langextract/visualization.py`
|
||||
|
||||
```python
|
||||
def visualize(doc: AnnotatedDocument) -> HTML
|
||||
```
|
||||
|
||||
**功能特性:**
|
||||
- 按 `extraction_class` 进行颜色编码高亮(10 色调色板)
|
||||
- 交互式 tooltip 显示实体类型和属性
|
||||
- 动画导航控件,支持多实体浏览
|
||||
- 进度滑块
|
||||
- 响应式 HTML/CSS/JavaScript 嵌入
|
||||
- 支持 Jupyter / IPython 环境直接渲染
|
||||
|
||||
---
|
||||
|
||||
## 附录:环境变量与常量速查
|
||||
|
||||
### 环境变量
|
||||
|
||||
| 变量名 | 适用 Provider | 说明 |
|
||||
|--------|--------------|------|
|
||||
| `LANGEXTRACT_API_KEY` | 所有 | 通用 API Key 后备 |
|
||||
| `GEMINI_API_KEY` | Gemini | Gemini API Key |
|
||||
| `OPENAI_API_KEY` | OpenAI | OpenAI API Key |
|
||||
| `OLLAMA_BASE_URL` | Ollama | Ollama 服务地址(默认 `http://localhost:11434`) |
|
||||
|
||||
### FormatType 枚举
|
||||
|
||||
```python
|
||||
class FormatType(enum.Enum):
|
||||
YAML = 'yaml'
|
||||
JSON = 'json'
|
||||
```
|
||||
|
||||
### 结构化输出支持
|
||||
|
||||
| Provider | Schema 类型 | 结构化输出模式 |
|
||||
|----------|------------|---------------|
|
||||
| Gemini | `GeminiSchema` | 严格结构化输出 |
|
||||
| OpenAI | JSON Mode | 通过 `response_format` 约束 |
|
||||
| Ollama | `FormatModeSchema` | JSON 模式(非严格) |
|
||||
|
||||
### Fence Output 逻辑
|
||||
|
||||
| Provider | 默认值 | 说明 |
|
||||
|----------|--------|------|
|
||||
| Gemini | `False` | 有 Schema 时不需要 fence |
|
||||
| OpenAI | `False` | JSON Mode 返回原始 JSON |
|
||||
| Ollama | `False` | 返回原始 JSON |
|
||||
879
docs/mineru_specification-v1.0.md
Normal file
879
docs/mineru_specification-v1.0.md
Normal file
@@ -0,0 +1,879 @@
|
||||
# MinerU 文档解析规范文档 v1.0
|
||||
|
||||
> 基于 [opendatalab/MinerU](https://github.com/opendatalab/MinerU) 官方 API 文档 + 本地 MVP 实测验证
|
||||
> 实测后端版本:`pipeline` / `_version_name: 2.6.4`
|
||||
> 更新日期:2026-03-04
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
- [一、Pipeline 执行流程与测试脚本](#一pipeline-执行流程与测试脚本)
|
||||
- [1.1 虚拟环境配置(环境隔离)](#11-虚拟环境配置环境隔离)
|
||||
- [1.2 完整执行流程(本地文件 → 云端解析 → 本地存储)](#12-完整执行流程本地文件--云端解析--本地存储)
|
||||
- [1.3 测试脚本存放位置](#13-测试脚本存放位置)
|
||||
- [1.4 Pipeline 各步骤详解](#14-pipeline-各步骤详解)
|
||||
- [二、输入格式规范](#二输入格式规范)
|
||||
- [2.1 支持的文件格式](#21-支持的文件格式)
|
||||
- [2.2 输入限制](#22-输入限制)
|
||||
- [2.3 OCR 语言支持](#23-ocr-语言支持)
|
||||
- [三、输出格式规范(实测验证)](#三输出格式规范实测验证)
|
||||
- [3.1 实际输出文件清单(实测 vs 官方文档对比)](#31-实际输出文件清单实测-vs-官方文档对比)
|
||||
- [3.2 content_list.json 字段规范(实测验证)](#32-content_listjson-字段规范实测验证)
|
||||
- [3.3 layout.json 字段规范(实测验证)](#33-layoutjson-字段规范实测验证)
|
||||
- [3.4 full.md Markdown 输出规范(实测验证)](#34-fullmd-markdown-输出规范实测验证)
|
||||
- [四、布局信息规范](#四布局信息规范)
|
||||
- [4.1 坐标系定义(实测验证)](#41-坐标系定义实测验证)
|
||||
- [4.2 布局分类体系](#42-布局分类体系)
|
||||
- [4.3 内容层级与标题级别](#43-内容层级与标题级别)
|
||||
- [4.4 布局精度提取指南](#44-布局精度提取指南)
|
||||
- [五、云端 API 关键参数规范](#五云端-api-关键参数规范)
|
||||
- [5.1 认证配置](#51-认证配置)
|
||||
- [5.2 本地文件上传流程 — file-urls/batch](#52-本地文件上传流程--file-urlsbatch)
|
||||
- [5.3 URL 直传解析 — extract/task](#53-url-直传解析--extracttask)
|
||||
- [5.4 批量 URL 解析 — extract/task/batch](#54-批量-url-解析--extracttaskbatch)
|
||||
- [5.5 查询结果接口](#55-查询结果接口)
|
||||
- [5.6 通用响应包装结构](#56-通用响应包装结构)
|
||||
- [5.7 任务状态枚举(实测验证)](#57-任务状态枚举实测验证)
|
||||
- [5.8 错误码速查](#58-错误码速查)
|
||||
|
||||
---
|
||||
|
||||
## 一、Pipeline 执行流程与测试脚本
|
||||
|
||||
### 1.1 虚拟环境配置(环境隔离)
|
||||
|
||||
MinerU MVP 组件使用 **独立的 Python 虚拟环境**,与项目其他组件(LangExtract、GraphRAG Pipeline 等)完全隔离,避免依赖污染。
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| 虚拟环境路径 | `F:\GraphRAGAgent\mineru_mvp\.venv\` |
|
||||
| Python 版本 | 3.12 |
|
||||
| 创建工具 | uv |
|
||||
| Python 解释器 | `F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe` |
|
||||
|
||||
**启动 Pipeline 前必须切换到子虚拟环境:**
|
||||
|
||||
```bash
|
||||
# 方式一:直接指定解释器路径(推荐,无需手动激活)
|
||||
F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe pipeline.py
|
||||
|
||||
# 方式二:先激活环境再运行
|
||||
cd F:/GraphRAGAgent/mineru_mvp
|
||||
source .venv/Scripts/activate
|
||||
python pipeline.py
|
||||
```
|
||||
|
||||
**安装新依赖:**
|
||||
|
||||
```bash
|
||||
uv pip install <package> --python F:/GraphRAGAgent/mineru_mvp/.venv/Scripts/python.exe
|
||||
```
|
||||
|
||||
**已安装依赖清单:**
|
||||
|
||||
| 包 | 用途 |
|
||||
|----|------|
|
||||
| `requests` | HTTP 客户端(API 调用、文件上传下载) |
|
||||
| `python-dotenv` | `.env` 配置文件加载 |
|
||||
| `reportlab` | 测试 PDF 生成 |
|
||||
|
||||
---
|
||||
|
||||
### 1.2 完整执行流程(本地文件 → 云端解析 → 本地存储)
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Step 0: 激活虚拟环境 │
|
||||
│ source .venv/Scripts/activate 或 直接使用 .venv 内 python │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Step 1: 获取预签名上传 URL │
|
||||
│ POST /file-urls/batch → 返回 batch_id + file_urls[] │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Step 2: 上传本地文件 │
|
||||
│ PUT {file_urls[0]} ← 本地文件二进制流(不带 Content-Type) │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Step 3: 轮询解析结果 │
|
||||
│ GET /extract-results/batch/{batch_id} │
|
||||
│ 状态流转: waiting-file → pending → running → done/failed │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Step 4: 下载解析结果 ZIP │
|
||||
│ GET {full_zip_url} → 解压到本地 output/ 目录 │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Step 5: 分析解析产物 │
|
||||
│ 读取 *content_list.json → 统计块类型、页数、生成 summary │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
> **关键发现(实测):** 上传文件时 **不能** 携带 `Content-Type` 请求头,否则 OSS 预签名 URL 校验失败返回 403 `SignatureDoesNotMatch`。必须使用裸 `PUT` 请求。
|
||||
|
||||
### 1.3 测试脚本存放位置
|
||||
|
||||
```
|
||||
F:\GraphRAGAgent\mineru_mvp\
|
||||
├── .env # API Token 配置
|
||||
├── .venv/ # 独立虚拟环境(Python 3.12, uv 创建)
|
||||
├── CLAUDE.md # Claude Code 组件规范
|
||||
├── create_test_pdf.py # 测试 PDF 生成脚本(reportlab)
|
||||
├── pipeline.py # 完整 Pipeline 脚本(5 步)
|
||||
├── test_sample.pdf # 生成的测试 PDF(1 页,含标题/段落/表格)
|
||||
└── output/
|
||||
└── test_sample/ # 解析输出结果
|
||||
├── full.md
|
||||
├── {uuid}_content_list.json
|
||||
├── layout.json
|
||||
├── {uuid}_origin.pdf
|
||||
└── images/
|
||||
└── {hash}.jpg
|
||||
```
|
||||
|
||||
### 1.4 Pipeline 各步骤详解
|
||||
|
||||
#### Step 1 — 获取预签名上传 URL
|
||||
|
||||
```python
|
||||
resp = requests.post(
|
||||
f"{API_BASE}/file-urls/batch",
|
||||
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
|
||||
json={
|
||||
"files": [{"name": "test_sample.pdf", "data_id": "mvp_test"}],
|
||||
"enable_formula": True,
|
||||
"enable_table": True,
|
||||
"language": "en",
|
||||
},
|
||||
)
|
||||
batch_id = resp.json()["data"]["batch_id"]
|
||||
upload_url = resp.json()["data"]["file_urls"][0]
|
||||
```
|
||||
|
||||
#### Step 2 — 上传文件(裸 PUT,不带 Content-Type)
|
||||
|
||||
```python
|
||||
with open("test_sample.pdf", "rb") as f:
|
||||
requests.put(upload_url, data=f) # 不传 headers
|
||||
```
|
||||
|
||||
#### Step 3 — 轮询结果
|
||||
|
||||
```python
|
||||
while True:
|
||||
result = requests.get(
|
||||
f"{API_BASE}/extract-results/batch/{batch_id}",
|
||||
headers=headers,
|
||||
).json()
|
||||
state = result["data"]["extract_result"][0]["state"]
|
||||
if state == "done":
|
||||
zip_url = result["data"]["extract_result"][0]["full_zip_url"]
|
||||
break
|
||||
time.sleep(5)
|
||||
```
|
||||
|
||||
#### Step 4 — 下载解压
|
||||
|
||||
```python
|
||||
zip_data = requests.get(zip_url).content
|
||||
with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
|
||||
zf.extractall("output/test_sample/")
|
||||
```
|
||||
|
||||
#### Step 5 — 分析产物
|
||||
|
||||
```python
|
||||
content_list = json.load(open("output/test_sample/*content_list.json"))
|
||||
# 按 type 分类统计、按 page_idx 分组、提取标题层级等
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 二、输入格式规范
|
||||
|
||||
### 2.1 支持的文件格式
|
||||
|
||||
| 格式 | 扩展名 | 说明 |
|
||||
|------|--------|------|
|
||||
| **PDF** | `.pdf` | 核心能力 — 文本型 / 扫描型 / 混合型均支持 |
|
||||
| **Word** | `.doc`, `.docx` | 旧版和新版 Word 文档 |
|
||||
| **PowerPoint** | `.ppt`, `.pptx` | 旧版和新版演示文稿 |
|
||||
| **图片** | `.png`, `.jpg`, `.jpeg` | 单页图片文档,支持 EXIF 方向自动校正 |
|
||||
| **HTML** | `.html` | 须指定 `model_version: "MinerU-HTML"` |
|
||||
|
||||
### 2.2 输入限制
|
||||
|
||||
| 约束项 | 限制值 |
|
||||
|--------|--------|
|
||||
| 单文件最大体积 | **200 MB** |
|
||||
| 单文件最大页数 | **600 页** |
|
||||
| 批量请求最大文件数 | **200 个** |
|
||||
| 预签名上传 URL 有效期 | **24 小时** |
|
||||
| 云端 API 每日最高优先级额度 | **2,000 页**,超出部分降低优先级 |
|
||||
|
||||
### 2.3 OCR 语言支持
|
||||
|
||||
MinerU 内置 OCR 引擎支持 **109 种语言**(基于 PaddleOCR v3),可通过 `language` 参数指定文档主语言。
|
||||
|
||||
> **注意(官方文档):** `language` 的默认值为 `"ch"`(非 `"zh"`),遵循 PaddleOCR 语言代码规范。
|
||||
|
||||
| 代码 | 语言 | 代码 | 语言 |
|
||||
|------|------|------|------|
|
||||
| `ch` | 中文 | `en` | 英文 |
|
||||
| `japan` | 日文 | `korean` | 韩文 |
|
||||
| `french` | 法文 | `german` | 德文 |
|
||||
|
||||
---
|
||||
|
||||
## 三、输出格式规范(实测验证)
|
||||
|
||||
### 3.1 实际输出文件清单(实测 vs 官方文档对比)
|
||||
|
||||
**实测输出(ZIP 解压后,共 5 个文件):**
|
||||
|
||||
```
|
||||
output/test_sample/
|
||||
├── full.md # Markdown 输出(单文件)
|
||||
├── {uuid}_content_list.json # 扁平化内容块列表
|
||||
├── layout.json # 富元数据中间格式
|
||||
├── {uuid}_origin.pdf # 原始 PDF 副本
|
||||
└── images/
|
||||
└── {sha256_hash}.jpg # 表格/图片截图
|
||||
```
|
||||
|
||||
**与官方文档差异对比:**
|
||||
|
||||
| 项目 | 官方文档描述 | 实测结果 | 差异说明 |
|
||||
|------|-------------|---------|---------|
|
||||
| Markdown 文件 | `auto/auto.md` + `auto_nlp/auto_nlp.md`(两个子目录) | **`full.md`**(单文件,根目录) | 云端 API 输出为合并的 `full.md`,无子目录拆分 |
|
||||
| 中间格式 | `middle.json` | **`layout.json`** | 文件名不同,结构一致 |
|
||||
| content_list | `content_list.json` | **`{uuid}_content_list.json`** | 文件名带 UUID 前缀 |
|
||||
| 原始文件副本 | 未提及 | **`{uuid}_origin.pdf`** | 云端 API 额外返回原始文件副本 |
|
||||
| 调试文件 | `layout.pdf` + `span.pdf` + `model.json` | **无** | 云端 API 不返回调试 PDF 和 model.json |
|
||||
| 图片命名 | `img_0_0.png` / `table_0_1.png` | **`{sha256}.jpg`** | 使用内容哈希命名,格式为 JPG |
|
||||
|
||||
> **重要结论:** 以实测为准。对接下游系统时,文件匹配应使用 glob 模式(如 `*content_list.json`)而非固定文件名。
|
||||
|
||||
### 3.2 content_list.json 字段规范(实测验证)
|
||||
|
||||
文件为 **JSON 数组**,每个元素是一个内容块,按文档阅读顺序排列。
|
||||
|
||||
#### 3.2.1 公共字段
|
||||
|
||||
| 字段 | 类型 | 说明 | 实测验证 |
|
||||
|------|------|------|---------|
|
||||
| `type` | `string` | 内容类型 | 实测出现:`text`, `table` |
|
||||
| `page_idx` | `int` | 所在页码(0-indexed) | 实测值:`0` |
|
||||
| `bbox` | `[int, int, int, int]` | 边界框 `[x0, y0, x1, y1]` | 实测范围:`0–1000`(归一化) |
|
||||
|
||||
#### 3.2.2 文本块(type: "text")
|
||||
|
||||
**实测完整结构:**
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"text": "GraphRAG: Knowledge Graph Enhanced RAG System ",
|
||||
"text_level": 1,
|
||||
"bbox": [141, 93, 860, 151],
|
||||
"page_idx": 0
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必现 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `text` | `string` | 是 | 文本内容(末尾可能有空格) |
|
||||
| `text_level` | `int \| 缺失` | 否 | 标题级别:`1`=一级标题;**正文时该字段缺失而非为 `0` 或 `null`** |
|
||||
|
||||
> **实测发现:** 正文段落中 `text_level` 字段 **完全不存在**(不是 `null` 或 `0`),仅标题块才携带该字段。判断标题应使用 `block.get("text_level")` 而非 `block["text_level"] >= 1`。
|
||||
|
||||
#### 3.2.3 表格块(type: "table")
|
||||
|
||||
**实测完整结构:**
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "table",
|
||||
"img_path": "images/e382eaafdf341d361c2567b20d9ce56456c17a7dd10ae5dadbcc3961256169c9.jpg",
|
||||
"table_caption": [],
|
||||
"table_footnote": [],
|
||||
"table_body": "<table><tr><td rowspan=1 colspan=2>Method Comprehensiveness</td>...</table>",
|
||||
"bbox": [115, 563, 882, 708],
|
||||
"page_idx": 0
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必现 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `img_path` | `string` | 是 | 表格截图路径(`images/{sha256}.jpg`) |
|
||||
| `table_body` | `string` | 是 | HTML 表格(`<table>` 标签,无 `<html>/<body>` 外层包裹) |
|
||||
| `table_caption` | `string[]` | 是 | 表格标题(可为空数组 `[]`) |
|
||||
| `table_footnote` | `string[]` | 是 | 表格脚注(可为空数组 `[]`) |
|
||||
|
||||
> **实测发现:** `table_body` 的 HTML 直接以 `<table>` 开头,**不含** `<html><body>` 外层包裹(官方文档示例中有外层包裹,以实测为准)。
|
||||
|
||||
#### 3.2.4 图片块(type: "image")— 官方文档
|
||||
|
||||
本次测试 PDF 不含独立图片,以下为官方文档规范(待后续实测验证):
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "image",
|
||||
"img_path": "images/{hash}.jpg",
|
||||
"image_caption": ["Figure 1: ..."],
|
||||
"image_footnote": [],
|
||||
"bbox": [x0, y0, x1, y1],
|
||||
"page_idx": 0
|
||||
}
|
||||
```
|
||||
|
||||
#### 3.2.5 公式块(type: "equation")— 官方文档
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "equation",
|
||||
"text": "E = mc^2",
|
||||
"text_format": "latex",
|
||||
"img_path": "images/{hash}.jpg",
|
||||
"bbox": [x0, y0, x1, y1],
|
||||
"page_idx": 0
|
||||
}
|
||||
```
|
||||
|
||||
> **实测发现:** 测试 PDF 结论段的百分数被解析为 LaTeX 内联公式(`$7 2 . 0 \%$`),嵌入在 `text` 类型块中,而非独立的 `equation` 块。这说明 Pipeline 后端会将简单公式内联到文本块中。
|
||||
|
||||
---
|
||||
|
||||
### 3.3 layout.json 字段规范(实测验证)
|
||||
|
||||
`layout.json` 对应官方文档中的 `middle.json`,是富元数据中间格式。
|
||||
|
||||
#### 3.3.1 顶层结构(实测)
|
||||
|
||||
```json
|
||||
{
|
||||
"_backend": "pipeline",
|
||||
"_version_name": "2.6.4",
|
||||
"pdf_info": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 实测值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `_backend` | `string` | `"pipeline"` | 使用的解析后端 |
|
||||
| `_version_name` | `string` | `"2.6.4"` | MinerU 版本标识 |
|
||||
| `pdf_info` | `array` | 含 1 个元素 | 按页组织的解析结果 |
|
||||
|
||||
#### 3.3.2 页级结构(实测)
|
||||
|
||||
```json
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [595, 841],
|
||||
"preproc_blocks": [ ... ],
|
||||
"para_blocks": [ ... ],
|
||||
"discarded_blocks": []
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 实测值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `page_idx` | `int` | `0` | 页码(0-indexed) |
|
||||
| `page_size` | `[int, int]` | `[595, 841]` | 页面尺寸 `[宽, 高]`(PDF pt 单位,A4≈595×841) |
|
||||
| `preproc_blocks` | `array` | 10 个块 | 预处理阶段的内容块 |
|
||||
| `para_blocks` | `array` | 10 个块 | 段落分段后的内容块 |
|
||||
| `discarded_blocks` | `array` | `[]` | 被过滤的内容(页眉/页脚等) |
|
||||
|
||||
> **与官方文档差异:** 实测页级结构 **仅包含 3 个数组**(`preproc_blocks`、`para_blocks`、`discarded_blocks`),**不含** 官方文档提到的 `images`、`tables`、`interline_equations` 独立数组。表格和图片直接嵌入在 `preproc_blocks` / `para_blocks` 中。
|
||||
|
||||
#### 3.3.3 内容块层级结构(Block → Line → Span,实测验证)
|
||||
|
||||
**文本/标题块(实测):**
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "title",
|
||||
"bbox": [84, 79, 512, 127],
|
||||
"lines": [
|
||||
{
|
||||
"bbox": [80, 77, 515, 106],
|
||||
"spans": [
|
||||
{
|
||||
"bbox": [80, 77, 515, 106],
|
||||
"score": 1.0,
|
||||
"content": "GraphRAG: Knowledge Graph Enhanced",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
"index": 0.5
|
||||
}
|
||||
```
|
||||
|
||||
**Block 字段(实测):**
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `type` | `string` | 块类型:实测出现 `title`, `text`, `table` |
|
||||
| `bbox` | `[int, int, int, int]` | 边界框(原始 PDF pt 坐标) |
|
||||
| `lines` | `array` | 行数组(文本/标题块) |
|
||||
| `blocks` | `array` | 子块数组(仅 `table` 类型容器块) |
|
||||
| `index` | `int \| float` | 排序索引(可为小数,如 `0.5`) |
|
||||
|
||||
**Line 字段(实测):**
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `bbox` | `[int, int, int, int]` | 行边界框 |
|
||||
| `spans` | `array` | Span 数组 |
|
||||
| `index` | `int` | 行内排序索引 |
|
||||
|
||||
**Span 字段(实测):**
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `bbox` | `[int, int, int, int]` | Span 边界框 |
|
||||
| `type` | `string` | 实测出现:`text`, `table` |
|
||||
| `content` | `string` | 文本内容(`type=text` 时) |
|
||||
| `score` | `float` | 置信度(实测多为 `1.0`) |
|
||||
|
||||
**表格容器块(实测):**
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "table",
|
||||
"bbox": [69, 474, 525, 596],
|
||||
"blocks": [
|
||||
{
|
||||
"type": "table_body",
|
||||
"bbox": [69, 474, 525, 596],
|
||||
"group_id": 0,
|
||||
"lines": [ ... ],
|
||||
"index": 0,
|
||||
"virtual_lines": [ ... ]
|
||||
}
|
||||
],
|
||||
"index": 7
|
||||
}
|
||||
```
|
||||
|
||||
表格容器块内的子块额外包含:
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `group_id` | `int` | 分组 ID |
|
||||
| `virtual_lines` | `array` | 虚拟行结构(表格布局专用) |
|
||||
|
||||
**`para_blocks` 额外字段(实测):**
|
||||
|
||||
部分 `para_blocks` 中的文本块额外包含 `bbox_fs` 字段(疑似字体大小相关的边界框),如:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"bbox": [77, 198, 518, 259],
|
||||
"lines": [...],
|
||||
"index": 2,
|
||||
"bbox_fs": [77, 198, 518, 259]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3.4 full.md Markdown 输出规范(实测验证)
|
||||
|
||||
**实测产物:** 单个 `full.md` 文件(非官方文档描述的 `auto/auto.md` + `auto_nlp/auto_nlp.md` 双目录结构)。
|
||||
|
||||
**实测特征:**
|
||||
|
||||
| 特征 | 实测行为 |
|
||||
|------|---------|
|
||||
| 标题 | 使用 `# ` 前缀,所有标题均为一级(`# `) |
|
||||
| 段落 | 纯文本,段落间以空行分隔 |
|
||||
| 表格 | 直接嵌入 HTML `<table>` 标签 |
|
||||
| 公式 | 内联使用 `$...$` 定界符(如 `$7 2 . 0 \%$`) |
|
||||
| 图片引用 | 本次未出现独立图片引用 |
|
||||
|
||||
**实测输出示例(节选):**
|
||||
|
||||
```markdown
|
||||
# GraphRAG: Knowledge Graph Enhanced RAG System
|
||||
|
||||
# 1. Introduction
|
||||
|
||||
GraphRAG is an advanced retrieval-augmented generation technique developed by...
|
||||
|
||||
# 3. Performance Comparison
|
||||
|
||||
The following table compares GraphRAG with traditional RAG approaches...
|
||||
|
||||
<table><tr><td rowspan=1 colspan=2>Method Comprehensiveness</td>...</table>
|
||||
|
||||
# 4. Conclusion
|
||||
|
||||
...comprehensiveness $7 2 . 0 \%$ vs $3 2 . 4 \%$...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、布局信息规范
|
||||
|
||||
### 4.1 坐标系定义(实测验证)
|
||||
|
||||
| 坐标系 | 适用文件 | 实测范围 | 原点 | 说明 |
|
||||
|--------|---------|---------|------|------|
|
||||
| **归一化整数坐标** | `*content_list.json` | `0 – 1000` | 左上角 | 页面宽高均映射到 0~1000 |
|
||||
| **原始 PDF 坐标** | `layout.json` | 实测 `[595, 841]`(A4 pt) | 左上角 | 与 PDF 页面尺寸一致 |
|
||||
|
||||
**bbox 格式统一为 `[x0, y0, x1, y1]`:**
|
||||
|
||||
```
|
||||
(x0, y0) ─────────────────── (x1, y0)
|
||||
│ │
|
||||
│ 内容区域 │
|
||||
│ │
|
||||
(x0, y1) ─────────────────── (x1, y1)
|
||||
```
|
||||
|
||||
**实测对照(标题块 "1. Introduction"):**
|
||||
|
||||
| 文件 | bbox | 坐标系 |
|
||||
|------|------|--------|
|
||||
| `content_list.json` | `[131, 200, 317, 222]` | 归一化 0-1000 |
|
||||
| `layout.json` | `[78, 169, 189, 187]` | PDF pt(页面 595×841) |
|
||||
|
||||
### 4.2 布局分类体系
|
||||
|
||||
#### Pipeline 后端(实测 + 官方文档合并)
|
||||
|
||||
**layout.json 中的 `type` 值(实测出现标记 ✅):**
|
||||
|
||||
| type 值 | 说明 | 实测出现 |
|
||||
|---------|------|---------|
|
||||
| `title` | 标题 | ✅ |
|
||||
| `text` | 正文段落 | ✅ |
|
||||
| `table` | 表格容器 | ✅ |
|
||||
| `table_body` | 表格主体(子块) | ✅ |
|
||||
| `table_caption` | 表格标题 | — |
|
||||
| `table_footnote` | 表格脚注 | — |
|
||||
| `image_body` | 图片主体 | — |
|
||||
| `image_caption` | 图片标题 | — |
|
||||
| `image_footnote` | 图片脚注 | — |
|
||||
| `interline_equation` | 行间公式 | — |
|
||||
| `index` | 目录项 | — |
|
||||
| `list` | 列表项 | — |
|
||||
|
||||
#### VLM 后端(官方文档,未实测)
|
||||
|
||||
VLM 后端额外支持:`code`, `code_caption`, `list`, `header`, `footer`, `page_number`, `aside_text`, `page_footnote`, `ref_text`, `algorithm`, `phonetic`。
|
||||
|
||||
### 4.3 内容层级与标题级别
|
||||
|
||||
`content_list.json` 中 `text_level` 字段标识文档结构层级:
|
||||
|
||||
| text_level | 含义 | Markdown | 实测验证 |
|
||||
|------------|------|----------|---------|
|
||||
| **字段缺失** | 正文 | 无标记 | ✅ 实测正文块不含 `text_level` 字段 |
|
||||
| `1` | 一级标题 | `# Heading` | ✅ 实测验证 |
|
||||
| `2` | 二级标题 | `## Heading` | — |
|
||||
| `3` | 三级标题 | `### Heading` | — |
|
||||
| `4+` | 更深层标题 | `####+ Heading` | — |
|
||||
|
||||
> **重要纠正:** 官方文档描述正文为 `text_level: null` 或 `0`,但实测正文块中 **该字段完全不存在**。正确判断方式:
|
||||
|
||||
```python
|
||||
# 正确写法
|
||||
is_heading = block.get("text_level") is not None
|
||||
|
||||
# 错误写法(会 KeyError)
|
||||
is_heading = block["text_level"] >= 1
|
||||
```
|
||||
|
||||
### 4.4 布局精度提取指南
|
||||
|
||||
#### 提取文档大纲
|
||||
|
||||
```python
|
||||
headings = [
|
||||
{"level": b["text_level"], "text": b["text"].strip(), "page": b["page_idx"]}
|
||||
for b in content_list
|
||||
if b["type"] == "text" and b.get("text_level") is not None
|
||||
]
|
||||
```
|
||||
|
||||
#### 提取正文段落
|
||||
|
||||
```python
|
||||
paragraphs = [
|
||||
b["text"].strip()
|
||||
for b in content_list
|
||||
if b["type"] == "text" and b.get("text_level") is None
|
||||
]
|
||||
```
|
||||
|
||||
#### 解析表格数值
|
||||
|
||||
```python
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
for b in content_list:
|
||||
if b["type"] != "table":
|
||||
continue
|
||||
soup = BeautifulSoup(b["table_body"], "html.parser")
|
||||
rows = []
|
||||
for tr in soup.find_all("tr"):
|
||||
cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
|
||||
rows.append(cells)
|
||||
# rows 即为二维表格数据
|
||||
```
|
||||
|
||||
#### 按页面位置过滤
|
||||
|
||||
```python
|
||||
def is_upper_half(block):
|
||||
"""判断内容块是否在页面上半部分(归一化坐标 0-1000)"""
|
||||
y_center = (block["bbox"][1] + block["bbox"][3]) / 2
|
||||
return y_center < 500
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、云端 API 关键参数规范
|
||||
|
||||
### 5.1 认证配置
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| 请求头 | `Authorization: Bearer {token}` |
|
||||
| Token 获取 | [mineru.net/apiManage/token](https://mineru.net/apiManage/token) |
|
||||
| .env 配置 | `MINERU_API_TOKEN=xxx` |
|
||||
|
||||
所有接口均需携带 `Authorization` 头,`Content-Type: application/json`(上传文件 PUT 请求除外)。
|
||||
|
||||
---
|
||||
|
||||
### 5.2 本地文件上传流程 — file-urls/batch
|
||||
|
||||
**用途:** 本地文件场景 — 获取预签名 URL → PUT 上传 → 自动触发解析
|
||||
|
||||
**接口:** `POST https://mineru.net/api/v4/file-urls/batch`
|
||||
|
||||
#### 请求体
|
||||
|
||||
| 字段 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| `files` | `array[object]` | **是** | — | 文件列表(最多 200 个) |
|
||||
| `files[].name` | `string` | **是** | — | 文件名(须含正确扩展名) |
|
||||
| `files[].data_id` | `string` | 否 | — | 业务标识(最长 128 字符,支持字母数字 `_` `-` `.`) |
|
||||
| `files[].is_ocr` | `bool` | 否 | `false` | 是否强制 OCR |
|
||||
| `files[].page_ranges` | `string` | 否 | — | 页码范围(如 `"2,4-6"` 或 `"2--2"` 表示到倒数第二页) |
|
||||
| `model_version` | `string` | 否 | `"pipeline"` | 模型版本:`pipeline` / `vlm` / `MinerU-HTML` |
|
||||
| `enable_formula` | `bool` | 否 | `true` | 是否启用公式识别 |
|
||||
| `enable_table` | `bool` | 否 | `true` | 是否启用表格识别 |
|
||||
| `language` | `string` | 否 | `"ch"` | OCR 语言(PaddleOCR v3 语言代码) |
|
||||
| `callback` | `string` | 否 | — | 回调通知 URL(HTTP/HTTPS POST) |
|
||||
| `seed` | `string` | 否 | — | 回调签名种子(与 callback 配合,最长 64 字符) |
|
||||
| `extra_formats` | `string[]` | 否 | — | 额外输出格式:`"docx"`, `"html"`, `"latex"` |
|
||||
|
||||
#### 响应体(实测验证)
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0,
|
||||
"msg": "ok",
|
||||
"trace_id": "9ef836ce2a65f46c5f54389e55a14039",
|
||||
"data": {
|
||||
"batch_id": "6ce0e838-b324-4f1d-8b06-01ddc07e4cd4",
|
||||
"file_urls": [
|
||||
"https://mineru.oss-cn-shanghai.aliyuncs.com/api-upload/extract/2026-03-04/{batch_id}/{file_uuid}.pdf?Expires=...&OSSAccessKeyId=...&Signature=..."
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 响应字段 | 类型 | 说明 |
|
||||
|---------|------|------|
|
||||
| `code` | `int` | `0` 表示成功 |
|
||||
| `msg` | `string` | 状态信息 |
|
||||
| `trace_id` | `string` | 请求追踪 ID |
|
||||
| `data.batch_id` | `string` | 批次 ID(后续查询结果使用) |
|
||||
| `data.file_urls` | `string[]` | 预签名上传 URL 列表(与 `files` 一一对应) |
|
||||
|
||||
#### 文件上传
|
||||
|
||||
```
|
||||
PUT {file_urls[i]}
|
||||
Body: 文件二进制流
|
||||
```
|
||||
|
||||
> **不要传任何请求头**(包括 `Content-Type`),否则 OSS 签名校验失败。
|
||||
|
||||
---
|
||||
|
||||
### 5.3 URL 直传解析 — extract/task
|
||||
|
||||
**用途:** 文件已有公网 URL 时直接提交解析
|
||||
|
||||
**接口:** `POST https://mineru.net/api/v4/extract/task`
|
||||
|
||||
#### 请求体
|
||||
|
||||
| 字段 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| `url` | `string` | **是** | — | 文件公网 URL |
|
||||
| `model_version` | `string` | 否 | `"pipeline"` | 模型版本 |
|
||||
| `is_ocr` | `bool` | 否 | `false` | 是否强制 OCR |
|
||||
| `enable_formula` | `bool` | 否 | `true` | 是否启用公式识别 |
|
||||
| `enable_table` | `bool` | 否 | `true` | 是否启用表格识别 |
|
||||
| `language` | `string` | 否 | `"ch"` | OCR 语言 |
|
||||
| `data_id` | `string` | 否 | — | 业务标识 |
|
||||
| `callback` | `string` | 否 | — | 回调 URL |
|
||||
| `seed` | `string` | 否 | — | 回调种子 |
|
||||
| `extra_formats` | `string[]` | 否 | — | 额外输出格式 |
|
||||
| `page_ranges` | `string` | 否 | — | 页码范围 |
|
||||
| `no_cache` | `bool` | 否 | `false` | 跳过 URL 缓存 |
|
||||
| `cache_tolerance` | `int` | 否 | `900` | 缓存容忍时间(秒) |
|
||||
|
||||
#### 响应体
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0,
|
||||
"msg": "ok",
|
||||
"trace_id": "string",
|
||||
"data": { "task_id": "string" }
|
||||
}
|
||||
```
|
||||
|
||||
#### 查询结果
|
||||
|
||||
`GET https://mineru.net/api/v4/extract/task/{task_id}`
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0,
|
||||
"data": {
|
||||
"task_id": "string",
|
||||
"data_id": "string",
|
||||
"state": "done",
|
||||
"full_zip_url": "https://cdn-mineru.openxlab.org.cn/...",
|
||||
"err_msg": null,
|
||||
"extract_progress": {
|
||||
"extracted_pages": 1,
|
||||
"total_pages": 1,
|
||||
"start_time": "2026-03-04 12:00:00"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5.4 批量 URL 解析 — extract/task/batch
|
||||
|
||||
**接口:** `POST https://mineru.net/api/v4/extract/task/batch`
|
||||
|
||||
#### 请求体
|
||||
|
||||
```json
|
||||
{
|
||||
"files": [
|
||||
{"url": "https://...", "data_id": "doc1", "is_ocr": false, "page_ranges": "1-5"}
|
||||
],
|
||||
"model_version": "pipeline",
|
||||
"enable_formula": true,
|
||||
"enable_table": true,
|
||||
"language": "ch",
|
||||
"extra_formats": ["docx"],
|
||||
"no_cache": false,
|
||||
"cache_tolerance": 900
|
||||
}
|
||||
```
|
||||
|
||||
#### 响应体
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0,
|
||||
"data": { "batch_id": "string" }
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5.5 查询结果接口
|
||||
|
||||
#### 单任务查询
|
||||
|
||||
`GET https://mineru.net/api/v4/extract/task/{task_id}`
|
||||
|
||||
#### 批量查询(实测验证)
|
||||
|
||||
`GET https://mineru.net/api/v4/extract-results/batch/{batch_id}`
|
||||
|
||||
**响应体(实测验证):**
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0,
|
||||
"msg": "ok",
|
||||
"trace_id": "string",
|
||||
"data": {
|
||||
"batch_id": "3b1729e9-c833-44b4-b9c2-201164001ab0",
|
||||
"extract_result": [
|
||||
{
|
||||
"file_name": "test_sample.pdf",
|
||||
"state": "done",
|
||||
"full_zip_url": "https://cdn-mineru.openxlab.org.cn/pdf/2026-03-04/...",
|
||||
"err_msg": null,
|
||||
"data_id": "mvp_test",
|
||||
"extract_progress": {
|
||||
"extracted_pages": 1,
|
||||
"total_pages": 1,
|
||||
"start_time": "2026-03-04 ..."
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5.6 通用响应包装结构
|
||||
|
||||
所有 API 响应均遵循统一包装格式:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 0, // 0 = 成功,非 0 = 失败
|
||||
"msg": "ok", // 状态描述
|
||||
"trace_id": "...", // 请求追踪 ID
|
||||
"data": { ... } // 业务数据
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5.7 任务状态枚举(实测验证)
|
||||
|
||||
| state | 说明 | 实测出现 |
|
||||
|-------|------|---------|
|
||||
| `waiting-file` | 等待文件上传完成 | ✅ |
|
||||
| `pending` | 排队等待解析 | ✅ |
|
||||
| `running` | 正在解析 | — |
|
||||
| `converting` | 格式转换中 | — |
|
||||
| `done` | 解析完成 | ✅ |
|
||||
| `failed` | 解析失败 | — |
|
||||
|
||||
> **实测状态流转:** `waiting-file` → `pending` → `done`(小文件跳过 `running`)
|
||||
|
||||
---
|
||||
|
||||
### 5.8 错误码速查
|
||||
|
||||
| 错误码 | 含义 |
|
||||
|--------|------|
|
||||
| `A0202` | Token 无效 |
|
||||
| `A0211` | Token 过期 |
|
||||
| `-60005` | 文件超过 200MB |
|
||||
| `-60006` | 页数超过 600 页 |
|
||||
| `-60018` | 当日解析额度用尽 |
|
||||
680
docs/mineru_specification.md
Normal file
680
docs/mineru_specification.md
Normal file
@@ -0,0 +1,680 @@
|
||||
# MinerU 文档解析规范文档
|
||||
|
||||
> 基于 [opendatalab/MinerU](https://github.com/opendatalab/MinerU) 官方文档及云端 API 调研
|
||||
> 版本基线:2026-03-04
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
- [一、支持的原始输入文件格式](#一支持的原始输入文件格式)
|
||||
- [1.1 支持格式清单](#11-支持格式清单)
|
||||
- [1.2 输入限制](#12-输入限制)
|
||||
- [1.3 OCR 语言支持](#13-ocr-语言支持)
|
||||
- [二、云端 API 输出格式规范](#二云端-api-输出格式规范)
|
||||
- [2.1 输出文件总览](#21-输出文件总览)
|
||||
- [2.2 content_list.json 字段规范](#22-content_listjson-字段规范)
|
||||
- [2.3 middle.json 字段规范](#23-middlejson-字段规范)
|
||||
- [2.4 Markdown 输出规范](#24-markdown-输出规范)
|
||||
- [2.5 调试与可视化文件](#25-调试与可视化文件)
|
||||
- [三、布局信息规范](#三布局信息规范)
|
||||
- [3.1 坐标系定义](#31-坐标系定义)
|
||||
- [3.2 布局分类体系(Pipeline 后端)](#32-布局分类体系pipeline-后端)
|
||||
- [3.3 布局分类体系(VLM 后端)](#33-布局分类体系vlm-后端)
|
||||
- [3.4 内容层级与标题级别](#34-内容层级与标题级别)
|
||||
- [3.5 布局精度提取指南](#35-布局精度提取指南)
|
||||
- [四、云端 API MVP 必要字段](#四云端-api-mvp-必要字段)
|
||||
- [4.1 认证配置](#41-认证配置)
|
||||
- [4.2 创建解析任务 — 请求规范](#42-创建解析任务--请求规范)
|
||||
- [4.3 查询任务结果 — 响应规范](#43-查询任务结果--响应规范)
|
||||
- [4.4 批量任务接口](#44-批量任务接口)
|
||||
- [4.5 MVP 最小可用请求示例](#45-mvp-最小可用请求示例)
|
||||
|
||||
---
|
||||
|
||||
## 一、支持的原始输入文件格式
|
||||
|
||||
### 1.1 支持格式清单
|
||||
|
||||
| 格式 | 扩展名 | 说明 |
|
||||
|------|--------|------|
|
||||
| **PDF** | `.pdf` | 核心能力 — 文本型 / 扫描型 / 混合型均支持 |
|
||||
| **Word** | `.doc`, `.docx` | 旧版和新版 Word 文档 |
|
||||
| **PowerPoint** | `.ppt`, `.pptx` | 旧版和新版演示文稿 |
|
||||
| **图片** | `.png`, `.jpg`, `.jpeg` | 单页图片文档,支持 EXIF 方向自动校正 |
|
||||
| **HTML** | `.html` | 需指定 `MinerU-HTML` 模型版本 |
|
||||
|
||||
### 1.2 输入限制
|
||||
|
||||
| 约束项 | 限制值 |
|
||||
|--------|--------|
|
||||
| 单文件最大体积 | **200 MB** |
|
||||
| 单文件最大页数 | **600 页** |
|
||||
| 云端 API 每日免费额度 | **2,000 页**(最高优先级),超出部分降低优先级 |
|
||||
|
||||
### 1.3 OCR 语言支持
|
||||
|
||||
MinerU 内置 OCR 引擎支持 **109 种语言**,可通过 `language` 参数指定文档主语言(默认 `zh` 中文)。常用语言代码:
|
||||
|
||||
| 代码 | 语言 | 代码 | 语言 |
|
||||
|------|------|------|------|
|
||||
| `zh` | 中文 | `en` | 英文 |
|
||||
| `ja` | 日文 | `ko` | 韩文 |
|
||||
| `fr` | 法文 | `de` | 德文 |
|
||||
|
||||
---
|
||||
|
||||
## 二、云端 API 输出格式规范
|
||||
|
||||
### 2.1 输出文件总览
|
||||
|
||||
云端 API 任务完成后,返回一个 ZIP 压缩包(通过 `full_zip_url` 获取),解压后包含以下文件:
|
||||
|
||||
```
|
||||
output/
|
||||
├── auto/
|
||||
│ ├── auto.md # 多模态 Markdown(含图片引用)
|
||||
│ └── images/ # 提取的图片资源
|
||||
│ ├── img_0_0.png
|
||||
│ ├── table_0_1.png
|
||||
│ └── ...
|
||||
├── auto_nlp/
|
||||
│ └── auto_nlp.md # 纯文本 NLP Markdown(无图片)
|
||||
├── middle.json # 富元数据中间格式(完整层级结构)
|
||||
├── content_list.json # 扁平化内容块列表(按阅读顺序)
|
||||
├── layout.pdf # 布局分析可视化(调试用)
|
||||
├── span.pdf # Span 级别标注(Pipeline 后端,调试用)
|
||||
└── model.json # 原始模型推理结果(调试用)
|
||||
```
|
||||
|
||||
| 文件 | 用途 | 推荐场景 |
|
||||
|------|------|---------|
|
||||
| `content_list.json` | 扁平化内容块,按阅读顺序 | **推荐用于下游 NLP/KG 管道对接** |
|
||||
| `middle.json` | 完整层级结构,含丰富元数据 | 需要精确布局信息或二次开发 |
|
||||
| `auto/auto.md` | 多模态 Markdown | 人工阅读、LLM 直接消费 |
|
||||
| `auto_nlp/auto_nlp.md` | 纯文本 Markdown | 纯文本 NLP 处理 |
|
||||
| `layout.pdf` | 布局可视化 | 调试、验证解析质量 |
|
||||
|
||||
---
|
||||
|
||||
### 2.2 content_list.json 字段规范
|
||||
|
||||
`content_list.json` 是一个 **JSON 数组**,每个元素是一个内容块,按文档阅读顺序排列。
|
||||
|
||||
#### 2.2.1 公共字段(所有类型共有)
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `type` | `string` | 内容类型:`text` / `image` / `table` / `equation` / `code` / `list` |
|
||||
| `page_idx` | `int` | 所在页码(**0-indexed**) |
|
||||
| `bbox` | `[x0, y0, x1, y1]` | 边界框坐标,归一化到 **0–1000** 范围 |
|
||||
|
||||
#### 2.2.2 文本块(type: "text")
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"text": "段落正文内容...",
|
||||
"text_level": 0,
|
||||
"page_idx": 0,
|
||||
"bbox": [72, 120, 540, 145]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `text` | `string` | 文本内容 |
|
||||
| `text_level` | `int \| null` | 标题级别:`null` 或 `0` = 正文,`1` = 一级标题,`2` = 二级标题,依此类推 |
|
||||
|
||||
#### 2.2.3 图片块(type: "image")
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "image",
|
||||
"img_path": "images/img_0_0.png",
|
||||
"image_caption": ["Figure 1: System architecture"],
|
||||
"image_footnote": ["Source: internal report"],
|
||||
"page_idx": 1,
|
||||
"bbox": [100, 200, 500, 600]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `img_path` | `string` | 图片文件相对路径 |
|
||||
| `image_caption` | `string[]` | 图片标题列表 |
|
||||
| `image_footnote` | `string[]` | 图片脚注列表 |
|
||||
|
||||
#### 2.2.4 表格块(type: "table")
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "table",
|
||||
"img_path": "images/table_0_1.png",
|
||||
"table_body": "<html><body><table><tr><td>...</td></tr></table></body></html>",
|
||||
"table_caption": ["Table 1: Performance comparison"],
|
||||
"table_footnote": ["* p < 0.05"],
|
||||
"page_idx": 2,
|
||||
"bbox": [50, 300, 950, 700]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `img_path` | `string` | 表格截图相对路径 |
|
||||
| `table_body` | `string` | 表格 HTML 表示(`<table>` 标签) |
|
||||
| `table_caption` | `string[]` | 表格标题列表 |
|
||||
| `table_footnote` | `string[]` | 表格脚注列表 |
|
||||
|
||||
#### 2.2.5 公式块(type: "equation")
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "equation",
|
||||
"text": "E = mc^2",
|
||||
"text_format": "latex",
|
||||
"img_path": "images/eq_0_0.png",
|
||||
"page_idx": 3,
|
||||
"bbox": [200, 400, 800, 450]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `text` | `string` | 公式的 LaTeX 表示 |
|
||||
| `text_format` | `string` | 固定值 `"latex"` |
|
||||
| `img_path` | `string` | 公式截图相对路径 |
|
||||
|
||||
#### 2.2.6 代码块(type: "code")— VLM 后端
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "code",
|
||||
"sub_type": "code",
|
||||
"code_body": "def hello():\n print('hello')",
|
||||
"code_caption": ["Listing 1: Example function"],
|
||||
"page_idx": 4,
|
||||
"bbox": [80, 100, 920, 300]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `sub_type` | `string` | `"code"` 或 `"algorithm"` |
|
||||
| `code_body` | `string` | 代码文本内容 |
|
||||
| `code_caption` | `string[]` | 代码块标题(可选) |
|
||||
|
||||
#### 2.2.7 列表块(type: "list")— VLM 后端
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "list",
|
||||
"sub_type": "text",
|
||||
"list_items": ["第一项", "第二项", "第三项"],
|
||||
"page_idx": 5,
|
||||
"bbox": [72, 200, 540, 350]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `sub_type` | `string` | `"text"` 或 `"ref_text"`(参考文献列表) |
|
||||
| `list_items` | `string[]` | 列表项内容 |
|
||||
|
||||
---
|
||||
|
||||
### 2.3 middle.json 字段规范
|
||||
|
||||
`middle.json` 是 MinerU 的富元数据中间格式,保留完整的文档层级结构。
|
||||
|
||||
#### 2.3.1 顶层结构
|
||||
|
||||
```json
|
||||
{
|
||||
"_backend": "pipeline | vlm | hybrid",
|
||||
"_version_name": "2.7.4",
|
||||
"pdf_info": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `_backend` | `string` | 使用的解析后端 |
|
||||
| `_version_name` | `string` | MinerU 版本标识 |
|
||||
| `pdf_info` | `array` | 按页组织的解析结果数组 |
|
||||
|
||||
#### 2.3.2 页级结构(pdf_info 数组元素)
|
||||
|
||||
```json
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [595.0, 842.0],
|
||||
"preproc_blocks": [ ... ],
|
||||
"para_blocks": [ ... ],
|
||||
"images": [ ... ],
|
||||
"tables": [ ... ],
|
||||
"interline_equations": [ ... ],
|
||||
"discarded_blocks": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `page_idx` | `int` | 页码(0-indexed) |
|
||||
| `page_size` | `[float, float]` | 页面尺寸 `[宽, 高]`(原始 PDF 坐标系,单位 pt) |
|
||||
| `preproc_blocks` | `array` | 未分段的预处理块 |
|
||||
| `para_blocks` | `array` | **已分段的内容块**(主输出) |
|
||||
| `images` | `array` | 提取的图片块 |
|
||||
| `tables` | `array` | 提取的表格块 |
|
||||
| `interline_equations` | `array` | 行间公式块 |
|
||||
| `discarded_blocks` | `array` | 被过滤的内容(页眉、页脚、页码等) |
|
||||
|
||||
#### 2.3.3 内容块层级结构
|
||||
|
||||
内容块采用三级层级:**Block → Line → Span**
|
||||
|
||||
**一级块(Level 1)— 容器块:**
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "table",
|
||||
"bbox": [x0, y0, x1, y1],
|
||||
"blocks": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `type` | `string` | `"table"` 或 `"image"` |
|
||||
| `bbox` | `[x0, y0, x1, y1]` | 边界框坐标(原始 PDF 坐标系) |
|
||||
| `blocks` | `array` | 包含的二级块 |
|
||||
|
||||
**二级块(Level 2)— 语义块:**
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"bbox": [x0, y0, x1, y1],
|
||||
"lines": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
| `type` 值 | 说明 |
|
||||
|-----------|------|
|
||||
| `text` | 正文段落 |
|
||||
| `title` | 标题 |
|
||||
| `image_body` | 图片主体 |
|
||||
| `image_caption` | 图片标题 |
|
||||
| `image_footnote` | 图片脚注 |
|
||||
| `table_body` | 表格主体 |
|
||||
| `table_caption` | 表格标题 |
|
||||
| `table_footnote` | 表格脚注 |
|
||||
| `interline_equation` | 行间公式 |
|
||||
| `index` | 目录项 |
|
||||
| `list` | 列表项 |
|
||||
|
||||
**行结构(Line):**
|
||||
|
||||
```json
|
||||
{
|
||||
"bbox": [x0, y0, x1, y1],
|
||||
"spans": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
**Span 结构(最小粒度):**
|
||||
|
||||
```json
|
||||
{
|
||||
"bbox": [x0, y0, x1, y1],
|
||||
"type": "text",
|
||||
"content": "具体文本内容",
|
||||
"score": 0.95
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `bbox` | `[x0, y0, x1, y1]` | 边界框坐标 |
|
||||
| `type` | `string` | `text` / `image` / `table` / `inline_equation` / `interline_equation` |
|
||||
| `content` | `string` | 文本内容(text 类型)|
|
||||
| `img_path` | `string` | 图片路径(image/table 类型)|
|
||||
| `score` | `float` | 模型置信度(0.0~1.0) |
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Markdown 输出规范
|
||||
|
||||
| 文件 | 特点 |
|
||||
|------|------|
|
||||
| `auto/auto.md` | 图片以 `` 引用;表格保留为 Markdown 表格或 HTML;公式使用 `$...$` 和 `$$...$$` 定界符 |
|
||||
| `auto_nlp/auto_nlp.md` | 纯文本,图片/表格替换为占位文本描述;适合直接送入 NLP 管道 |
|
||||
|
||||
---
|
||||
|
||||
### 2.5 调试与可视化文件
|
||||
|
||||
| 文件 | 格式 | 说明 |
|
||||
|------|------|------|
|
||||
| `layout.pdf` | PDF | 每页叠加带编号的检测框,不同颜色区分内容类型,验证布局分析准确性和阅读顺序 |
|
||||
| `span.pdf` | PDF | 用不同颜色线框标注页面内容的 span 类型(仅 Pipeline 后端),排查文本丢失和公式识别问题 |
|
||||
| `model.json` | JSON | 原始模型推理结果,包含 `category_id`、`poly`(四边形坐标)、`score`(置信度) |
|
||||
|
||||
---
|
||||
|
||||
## 三、布局信息规范
|
||||
|
||||
### 3.1 坐标系定义
|
||||
|
||||
MinerU 使用两套坐标系,取决于输出文件:
|
||||
|
||||
| 坐标系 | 适用文件 | 范围 | 原点 | 说明 |
|
||||
|--------|---------|------|------|------|
|
||||
| **归一化坐标** | `content_list.json` | `0 – 1000` | 左上角 | 页面宽高均映射到 0~1000 |
|
||||
| **原始 PDF 坐标** | `middle.json` | 实际 pt 值 | 左上角 | 与 PDF 页面尺寸一致(如 A4 = 595×842) |
|
||||
| **归一化比例坐标** | `model.json`(VLM) | `0.0 – 1.0` | 左上角 | 宽高均映射到 0~1 |
|
||||
|
||||
**bbox 格式统一为:`[x0, y0, x1, y1]`**
|
||||
|
||||
```
|
||||
(x0, y0) ─────────────────── (x1, y0)
|
||||
│ │
|
||||
│ 内容区域 │
|
||||
│ │
|
||||
(x0, y1) ─────────────────── (x1, y1)
|
||||
```
|
||||
|
||||
- `x0, y0`:左上角坐标
|
||||
- `x1, y1`:右下角坐标
|
||||
|
||||
### 3.2 布局分类体系(Pipeline 后端)
|
||||
|
||||
`model.json` 中的 `category_id` 枚举:
|
||||
|
||||
| category_id | 类型 | 说明 |
|
||||
|-------------|------|------|
|
||||
| 0 | `title` | 标题 |
|
||||
| 1 | `plain_text` | 正文文本 |
|
||||
| 2 | `abandon` | 丢弃区域(页眉/页脚/页码等) |
|
||||
| 3 | `figure` | 图片 |
|
||||
| 4 | `figure_caption` | 图片标题 |
|
||||
| 5 | `table` | 表格 |
|
||||
| 6 | `table_caption` | 表格标题 |
|
||||
| 7 | `table_footnote` | 表格脚注 |
|
||||
| 8 | `isolate_formula` | 独立行间公式 |
|
||||
| 9 | `formula_caption` | 公式标题 |
|
||||
| 13 | `embedding` | 嵌入内容 |
|
||||
| 14 | `isolated` | 隔离内容 |
|
||||
| 15 | `OCR_text` | OCR 识别文本 |
|
||||
|
||||
### 3.3 布局分类体系(VLM 后端)
|
||||
|
||||
VLM 后端使用字符串类型标识,分类更细:
|
||||
|
||||
| type 值 | 说明 |
|
||||
|---------|------|
|
||||
| `text` | 正文 |
|
||||
| `title` | 标题 |
|
||||
| `equation` | 公式 |
|
||||
| `image` | 图片 |
|
||||
| `image_caption` | 图片标题 |
|
||||
| `image_footnote` | 图片脚注 |
|
||||
| `table` | 表格 |
|
||||
| `table_caption` | 表格标题 |
|
||||
| `table_footnote` | 表格脚注 |
|
||||
| `code` | 代码块 |
|
||||
| `code_caption` | 代码标题 |
|
||||
| `list` | 列表 |
|
||||
| `header` | 页眉(discarded) |
|
||||
| `footer` | 页脚(discarded) |
|
||||
| `page_number` | 页码(discarded) |
|
||||
| `aside_text` | 边栏文字(discarded) |
|
||||
| `page_footnote` | 页面脚注(discarded) |
|
||||
| `ref_text` | 参考文献 |
|
||||
| `algorithm` | 算法伪代码 |
|
||||
| `phonetic` | 注音 |
|
||||
|
||||
### 3.4 内容层级与标题级别
|
||||
|
||||
`content_list.json` 中的 `text_level` 字段标识文档结构层级:
|
||||
|
||||
| text_level | 含义 | 对应 Markdown |
|
||||
|------------|------|--------------|
|
||||
| `null` 或 `0` | 正文 | 无标记 |
|
||||
| `1` | 一级标题 | `# Heading` |
|
||||
| `2` | 二级标题 | `## Heading` |
|
||||
| `3` | 三级标题 | `### Heading` |
|
||||
| `4` | 四级标题 | `#### Heading` |
|
||||
| `5+` | 更深层标题 | `#####+ Heading` |
|
||||
|
||||
### 3.5 布局精度提取指南
|
||||
|
||||
针对不同数据类型的精确提取建议:
|
||||
|
||||
#### 文本提取
|
||||
|
||||
```python
|
||||
# 从 content_list.json 提取所有正文文本
|
||||
texts = [
|
||||
block for block in content_list
|
||||
if block["type"] == "text"
|
||||
]
|
||||
# 按页过滤
|
||||
page_0_texts = [b for b in texts if b["page_idx"] == 0]
|
||||
```
|
||||
|
||||
#### 标题层级提取
|
||||
|
||||
```python
|
||||
# 提取文档大纲结构
|
||||
headings = [
|
||||
{"level": block["text_level"], "text": block["text"], "page": block["page_idx"]}
|
||||
for block in content_list
|
||||
if block["type"] == "text" and block.get("text_level") and block["text_level"] >= 1
|
||||
]
|
||||
```
|
||||
|
||||
#### 表格数值提取
|
||||
|
||||
```python
|
||||
# 表格以 HTML 形式存储在 table_body 中,可用 BeautifulSoup 解析
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
tables = [b for b in content_list if b["type"] == "table"]
|
||||
for table in tables:
|
||||
soup = BeautifulSoup(table["table_body"], "html.parser")
|
||||
rows = []
|
||||
for tr in soup.find_all("tr"):
|
||||
cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
|
||||
rows.append(cells)
|
||||
```
|
||||
|
||||
#### 空间位置定位
|
||||
|
||||
```python
|
||||
# 利用 bbox 判断内容在页面中的位置
|
||||
def get_position(bbox, threshold=500):
|
||||
"""判断内容在页面的上半部分还是下半部分(归一化坐标 0-1000)"""
|
||||
y_center = (bbox[1] + bbox[3]) / 2
|
||||
return "upper" if y_center < threshold else "lower"
|
||||
|
||||
# 判断两个块是否水平相邻(同一行)
|
||||
def is_same_row(block_a, block_b, tolerance=20):
|
||||
return abs(block_a["bbox"][1] - block_b["bbox"][1]) < tolerance
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、云端 API MVP 必要字段
|
||||
|
||||
### 4.1 认证配置
|
||||
|
||||
| 配置项 | 值 | 获取方式 |
|
||||
|--------|-----|---------|
|
||||
| Token | Bearer Token 字符串 | [mineru.net/apiManage/token](https://mineru.net/apiManage/token) 注册后获取 |
|
||||
|
||||
**请求头格式(所有接口通用):**
|
||||
|
||||
```
|
||||
Authorization: Bearer {your_token}
|
||||
Content-Type: application/json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 创建解析任务 — 请求规范
|
||||
|
||||
**接口:** `POST https://mineru.net/api/v4/extract/task`
|
||||
|
||||
#### 请求体字段
|
||||
|
||||
| 字段 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| `url` | `string` | **是** | — | 待解析文件的公网可访问 URL |
|
||||
| `is_ocr` | `bool` | 否 | `false` | 是否强制启用 OCR(扫描件建议开启) |
|
||||
| `enable_formula` | `bool` | 否 | `true` | 是否启用公式识别 |
|
||||
| `enable_table` | `bool` | 否 | `true` | 是否启用表格识别 |
|
||||
| `language` | `string` | 否 | `"zh"` | 文档主语言代码 |
|
||||
| `model` | `string` | 否 | 自动选择 | 模型版本:`pipeline` / `vlm` / `MinerU-HTML` |
|
||||
| `data_id` | `string` | 否 | — | 自定义业务标识(用于关联追踪) |
|
||||
| `callback_url` | `string` | 否 | — | 任务完成后的回调通知 URL |
|
||||
|
||||
#### MVP 最小必填字段
|
||||
|
||||
```json
|
||||
{
|
||||
"url": "https://example.com/document.pdf"
|
||||
}
|
||||
```
|
||||
|
||||
> 仅 `url` 为必填,其余参数均有合理默认值。
|
||||
|
||||
---
|
||||
|
||||
### 4.3 查询任务结果 — 响应规范
|
||||
|
||||
**接口:** `GET https://mineru.net/api/v4/extract/task/{task_id}`
|
||||
|
||||
#### 响应体字段
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `task_id` | `string` | 任务唯一标识 |
|
||||
| `state` | `string` | 任务状态(见下方枚举) |
|
||||
| `err_msg` | `string \| null` | 错误信息(失败时) |
|
||||
| `full_zip_url` | `string \| null` | 完整输出 ZIP 下载地址(成功时) |
|
||||
| `file_name` | `string` | 原始文件名 |
|
||||
| `batch_id` | `string \| null` | 批量任务 ID(如有) |
|
||||
|
||||
#### 任务状态枚举
|
||||
|
||||
| state | 说明 |
|
||||
|-------|------|
|
||||
| `pending` | 排队等待中 |
|
||||
| `processing` | 正在解析 |
|
||||
| `done` | 解析完成 |
|
||||
| `failed` | 解析失败(查看 `err_msg`) |
|
||||
|
||||
---
|
||||
|
||||
### 4.4 批量任务接口
|
||||
|
||||
#### 4.4.1 批量获取上传 URL
|
||||
|
||||
**接口:** `POST https://mineru.net/api/v4/file-urls/batch`
|
||||
|
||||
用于获取文件上传的预签名 URL(适合本地文件上传场景)。
|
||||
|
||||
#### 4.4.2 批量创建任务
|
||||
|
||||
**接口:** `POST https://mineru.net/api/v4/extract/task/batch`
|
||||
|
||||
请求体中 `files` 数组包含多个文件的解析参数。
|
||||
|
||||
#### 4.4.3 批量查询结果
|
||||
|
||||
**接口:** `GET https://mineru.net/api/v4/extract-results/batch/{batch_id}`
|
||||
|
||||
---
|
||||
|
||||
### 4.5 MVP 最小可用请求示例
|
||||
|
||||
#### Python 实现
|
||||
|
||||
```python
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
|
||||
MINERU_API_TOKEN = os.getenv("MINERU_API_TOKEN")
|
||||
BASE_URL = "https://mineru.net/api/v4"
|
||||
HEADERS = {
|
||||
"Authorization": f"Bearer {MINERU_API_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# ① 创建解析任务(仅需 url 一个必填字段)
|
||||
resp = requests.post(
|
||||
f"{BASE_URL}/extract/task",
|
||||
headers=HEADERS,
|
||||
json={
|
||||
"url": "https://example.com/sample.pdf", # 必填:文件公网 URL
|
||||
# "is_ocr": False, # 可选:默认 false
|
||||
# "enable_formula": True, # 可选:默认 true
|
||||
# "enable_table": True, # 可选:默认 true
|
||||
# "language": "zh", # 可选:默认中文
|
||||
},
|
||||
)
|
||||
task_id = resp.json()["task_id"]
|
||||
print(f"Task created: {task_id}")
|
||||
|
||||
# ② 轮询查询结果
|
||||
while True:
|
||||
result = requests.get(
|
||||
f"{BASE_URL}/extract/task/{task_id}",
|
||||
headers=HEADERS,
|
||||
).json()
|
||||
|
||||
state = result["state"]
|
||||
print(f"State: {state}")
|
||||
|
||||
if state == "done":
|
||||
zip_url = result["full_zip_url"]
|
||||
print(f"Download: {zip_url}")
|
||||
break
|
||||
elif state == "failed":
|
||||
print(f"Error: {result['err_msg']}")
|
||||
break
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
# ③ 下载并解压结果
|
||||
import zipfile, io
|
||||
|
||||
zip_data = requests.get(zip_url).content
|
||||
with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
|
||||
zf.extractall("./mineru_output/")
|
||||
print("Files:", zf.namelist())
|
||||
```
|
||||
|
||||
#### cURL 实现
|
||||
|
||||
```bash
|
||||
# 创建任务
|
||||
curl -X POST https://mineru.net/api/v4/extract/task \
|
||||
-H "Authorization: Bearer YOUR_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"url": "https://example.com/sample.pdf"}'
|
||||
|
||||
# 查询结果
|
||||
curl https://mineru.net/api/v4/extract/task/{task_id} \
|
||||
-H "Authorization: Bearer YOUR_TOKEN"
|
||||
```
|
||||
|
||||
#### MVP 检查清单
|
||||
|
||||
- [ ] 已在 [mineru.net](https://mineru.net/) 注册账号
|
||||
- [ ] 已在 [Token 管理页](https://mineru.net/apiManage/token) 获取 API Token
|
||||
- [ ] 已将 Token 配置到 `.env` 文件:`MINERU_API_TOKEN=xxx`
|
||||
- [ ] 准备了公网可访问的测试文件 URL(PDF/DOCX/PPT/图片)
|
||||
- [ ] 安装了 `requests` 库:`pip install requests`
|
||||
1442
docs/product_requirements_document-v1.0.md
Normal file
1442
docs/product_requirements_document-v1.0.md
Normal file
File diff suppressed because it is too large
Load Diff
2
frontend/.gitignore
vendored
Normal file
2
frontend/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
**/.git_embedded_backup/
|
||||
3
frontend/ATTRIBUTIONS.md
Normal file
3
frontend/ATTRIBUTIONS.md
Normal file
@@ -0,0 +1,3 @@
|
||||
This Figma Make file includes components from [shadcn/ui](https://ui.shadcn.com/) used under [MIT license](https://github.com/shadcn-ui/ui/blob/main/LICENSE.md).
|
||||
|
||||
This Figma Make file includes photos from [Unsplash](https://unsplash.com) used under [license](https://unsplash.com/license).
|
||||
33
frontend/CLAUDE.md
Normal file
33
frontend/CLAUDE.md
Normal file
@@ -0,0 +1,33 @@
|
||||
# Frontend — 开发说明
|
||||
|
||||
## 路径
|
||||
|
||||
```
|
||||
F:\GraphRAGAgent\frontend\
|
||||
```
|
||||
|
||||
## 启动开发服务器
|
||||
|
||||
```bash
|
||||
cd F:/GraphRAGAgent/frontend
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
启动后访问:http://localhost:5173
|
||||
|
||||
## 依赖安装
|
||||
|
||||
```bash
|
||||
cd F:/GraphRAGAgent/frontend
|
||||
pnpm install
|
||||
pnpm rebuild @tailwindcss/oxide esbuild
|
||||
```
|
||||
|
||||
> 注意:首次安装后需执行 `pnpm rebuild @tailwindcss/oxide esbuild`,否则 Vite 构建会因原生包未编译而失败。
|
||||
|
||||
## 构建生产包
|
||||
|
||||
```bash
|
||||
cd F:/GraphRAGAgent/frontend
|
||||
pnpm build
|
||||
```
|
||||
11
frontend/README.md
Normal file
11
frontend/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
# 构建产品原型
|
||||
|
||||
This is a code bundle for 构建产品原型. The original project is available at https://www.figma.com/design/Tt95Sj8nC3HvirV1Vw4cA7/%E6%9E%84%E5%BB%BA%E4%BA%A7%E5%93%81%E5%8E%9F%E5%9E%8B.
|
||||
|
||||
## Running the code
|
||||
|
||||
Run `npm i` to install the dependencies.
|
||||
|
||||
Run `npm run dev` to start the development server.
|
||||
|
||||
61
frontend/guidelines/Guidelines.md
Normal file
61
frontend/guidelines/Guidelines.md
Normal file
@@ -0,0 +1,61 @@
|
||||
**Add your own guidelines here**
|
||||
<!--
|
||||
|
||||
System Guidelines
|
||||
|
||||
Use this file to provide the AI with rules and guidelines you want it to follow.
|
||||
This template outlines a few examples of things you can add. You can add your own sections and format it to suit your needs
|
||||
|
||||
TIP: More context isn't always better. It can confuse the LLM. Try and add the most important rules you need
|
||||
|
||||
# General guidelines
|
||||
|
||||
Any general rules you want the AI to follow.
|
||||
For example:
|
||||
|
||||
* Only use absolute positioning when necessary. Opt for responsive and well structured layouts that use flexbox and grid by default
|
||||
* Refactor code as you go to keep code clean
|
||||
* Keep file sizes small and put helper functions and components in their own files.
|
||||
|
||||
--------------
|
||||
|
||||
# Design system guidelines
|
||||
Rules for how the AI should make generations look like your company's design system
|
||||
|
||||
Additionally, if you select a design system to use in the prompt box, you can reference
|
||||
your design system's components, tokens, variables and components.
|
||||
For example:
|
||||
|
||||
* Use a base font-size of 14px
|
||||
* Date formats should always be in the format “Jun 10”
|
||||
* The bottom toolbar should only ever have a maximum of 4 items
|
||||
* Never use the floating action button with the bottom toolbar
|
||||
* Chips should always come in sets of 3 or more
|
||||
* Don't use a dropdown if there are 2 or fewer options
|
||||
|
||||
You can also create sub sections and add more specific details
|
||||
For example:
|
||||
|
||||
|
||||
## Button
|
||||
The Button component is a fundamental interactive element in our design system, designed to trigger actions or navigate
|
||||
users through the application. It provides visual feedback and clear affordances to enhance user experience.
|
||||
|
||||
### Usage
|
||||
Buttons should be used for important actions that users need to take, such as form submissions, confirming choices,
|
||||
or initiating processes. They communicate interactivity and should have clear, action-oriented labels.
|
||||
|
||||
### Variants
|
||||
* Primary Button
|
||||
* Purpose : Used for the main action in a section or page
|
||||
* Visual Style : Bold, filled with the primary brand color
|
||||
* Usage : One primary button per section to guide users toward the most important action
|
||||
* Secondary Button
|
||||
* Purpose : Used for alternative or supporting actions
|
||||
* Visual Style : Outlined with the primary color, transparent background
|
||||
* Usage : Can appear alongside a primary button for less important actions
|
||||
* Tertiary Button
|
||||
* Purpose : Used for the least important actions
|
||||
* Visual Style : Text-only with no border, using primary color
|
||||
* Usage : For actions that should be available but not emphasized
|
||||
-->
|
||||
15
frontend/index.html
Normal file
15
frontend/index.html
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>构建产品原型</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
6034
frontend/package-lock.json
generated
Normal file
6034
frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
95
frontend/package.json
Normal file
95
frontend/package.json
Normal file
@@ -0,0 +1,95 @@
|
||||
{
|
||||
"name": "@figma/my-make-file",
|
||||
"private": true,
|
||||
"version": "0.0.1",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "vite build",
|
||||
"dev": "vite"
|
||||
},
|
||||
"dependencies": {
|
||||
"@emotion/react": "11.14.0",
|
||||
"@emotion/styled": "11.14.1",
|
||||
"@mui/icons-material": "7.3.5",
|
||||
"@mui/material": "7.3.5",
|
||||
"@popperjs/core": "2.11.8",
|
||||
"@radix-ui/react-accordion": "1.2.3",
|
||||
"@radix-ui/react-alert-dialog": "1.1.6",
|
||||
"@radix-ui/react-aspect-ratio": "1.1.2",
|
||||
"@radix-ui/react-avatar": "1.1.3",
|
||||
"@radix-ui/react-checkbox": "1.1.4",
|
||||
"@radix-ui/react-collapsible": "1.1.3",
|
||||
"@radix-ui/react-context-menu": "2.2.6",
|
||||
"@radix-ui/react-dialog": "1.1.6",
|
||||
"@radix-ui/react-dropdown-menu": "2.1.6",
|
||||
"@radix-ui/react-hover-card": "1.1.6",
|
||||
"@radix-ui/react-label": "2.1.2",
|
||||
"@radix-ui/react-menubar": "1.1.6",
|
||||
"@radix-ui/react-navigation-menu": "1.2.5",
|
||||
"@radix-ui/react-popover": "1.1.6",
|
||||
"@radix-ui/react-progress": "1.1.2",
|
||||
"@radix-ui/react-radio-group": "1.2.3",
|
||||
"@radix-ui/react-scroll-area": "1.2.3",
|
||||
"@radix-ui/react-select": "2.1.6",
|
||||
"@radix-ui/react-separator": "1.1.2",
|
||||
"@radix-ui/react-slider": "1.2.3",
|
||||
"@radix-ui/react-slot": "1.1.2",
|
||||
"@radix-ui/react-switch": "1.1.3",
|
||||
"@radix-ui/react-tabs": "1.1.3",
|
||||
"@radix-ui/react-toggle": "1.1.2",
|
||||
"@radix-ui/react-toggle-group": "1.1.2",
|
||||
"@radix-ui/react-tooltip": "1.1.8",
|
||||
"@types/d3": "^7.4.3",
|
||||
"class-variance-authority": "0.7.1",
|
||||
"clsx": "2.1.1",
|
||||
"cmdk": "1.1.1",
|
||||
"d3": "^7.9.0",
|
||||
"date-fns": "3.6.0",
|
||||
"embla-carousel-react": "8.6.0",
|
||||
"input-otp": "1.4.2",
|
||||
"lucide-react": "0.487.0",
|
||||
"marked": "^17.0.4",
|
||||
"motion": "12.23.24",
|
||||
"next-themes": "0.4.6",
|
||||
"optional": "^0.1.4",
|
||||
"react": "^19.2.5",
|
||||
"react-day-picker": "8.10.1",
|
||||
"react-dnd": "16.0.1",
|
||||
"react-dnd-html5-backend": "16.0.1",
|
||||
"react-dom": "^19.2.5",
|
||||
"react-hook-form": "7.55.0",
|
||||
"react-popper": "2.3.0",
|
||||
"react-resizable-panels": "2.1.7",
|
||||
"react-responsive-masonry": "2.7.1",
|
||||
"react-router": "7.13.0",
|
||||
"react-slick": "0.31.0",
|
||||
"recharts": "2.15.2",
|
||||
"sonner": "2.0.3",
|
||||
"tailwind-merge": "3.2.0",
|
||||
"tw-animate-css": "1.3.8",
|
||||
"vaul": "1.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/vite": "4.1.12",
|
||||
"@vitejs/plugin-react": "4.7.0",
|
||||
"tailwindcss": "4.1.12",
|
||||
"vite": "6.3.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"react": {
|
||||
"optional": true
|
||||
},
|
||||
"react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"pnpm": {
|
||||
"overrides": {
|
||||
"vite": "6.3.5"
|
||||
}
|
||||
}
|
||||
}
|
||||
4676
frontend/pnpm-lock.yaml
generated
Normal file
4676
frontend/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
15
frontend/postcss.config.mjs
Normal file
15
frontend/postcss.config.mjs
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* PostCSS Configuration
|
||||
*
|
||||
* Tailwind CSS v4 (via @tailwindcss/vite) automatically sets up all required
|
||||
* PostCSS plugins — you do NOT need to include `tailwindcss` or `autoprefixer` here.
|
||||
*
|
||||
* This file only exists for adding additional PostCSS plugins, if needed.
|
||||
* For example:
|
||||
*
|
||||
* import postcssNested from 'postcss-nested'
|
||||
* export default { plugins: [postcssNested()] }
|
||||
*
|
||||
* Otherwise, you can leave this file empty.
|
||||
*/
|
||||
export default {}
|
||||
7
frontend/src/app/App.tsx
Normal file
7
frontend/src/app/App.tsx
Normal file
@@ -0,0 +1,7 @@
|
||||
import { RouterProvider } from 'react-router';
|
||||
import { router } from './routes';
|
||||
import '../styles/app.css';
|
||||
|
||||
export default function App() {
|
||||
return <RouterProvider router={router} />;
|
||||
}
|
||||
279
frontend/src/app/api.ts
Normal file
279
frontend/src/app/api.ts
Normal file
@@ -0,0 +1,279 @@
|
||||
/**
|
||||
* GraphRAG Studio — Backend API Client
|
||||
* Base: http://localhost:8000/api/v1
|
||||
* All functions return the `data` field; throw ApiError on code !== 0
|
||||
*/
|
||||
|
||||
const BASE = 'http://localhost:8000/api/v1';
|
||||
|
||||
export class ApiError extends Error {
|
||||
code: number;
|
||||
constructor(code: number, msg: string) {
|
||||
super(msg);
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
|
||||
async function request<T>(
|
||||
method: string,
|
||||
path: string,
|
||||
options: {
|
||||
body?: unknown;
|
||||
formData?: FormData;
|
||||
params?: Record<string, string | number | boolean | undefined | null>;
|
||||
} = {}
|
||||
): Promise<T> {
|
||||
let url = BASE + path;
|
||||
|
||||
if (options.params) {
|
||||
const parts = Object.entries(options.params)
|
||||
.filter(([, v]) => v !== undefined && v !== null && v !== '')
|
||||
.map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(String(v))}`);
|
||||
if (parts.length) url += '?' + parts.join('&');
|
||||
}
|
||||
|
||||
const init: RequestInit = { method };
|
||||
if (options.formData) {
|
||||
init.body = options.formData;
|
||||
} else if (options.body !== undefined) {
|
||||
init.headers = { 'Content-Type': 'application/json' };
|
||||
init.body = JSON.stringify(options.body);
|
||||
}
|
||||
|
||||
const res = await fetch(url, init);
|
||||
const json = await res.json();
|
||||
if (json.code !== 0) throw new ApiError(json.code, json.msg ?? 'Unknown error');
|
||||
return json.data as T;
|
||||
}
|
||||
|
||||
const get = <T>(path: string, params?: Record<string, string | number | boolean | undefined | null>) =>
|
||||
request<T>('GET', path, { params });
|
||||
const post = <T>(path: string, body?: unknown) => request<T>('POST', path, { body });
|
||||
const postForm = <T>(path: string, fd: FormData) => request<T>('POST', path, { formData: fd });
|
||||
const del = <T>(path: string) => request<T>('DELETE', path);
|
||||
|
||||
// ─── Response Types ───────────────────────────────────────────────────────────
|
||||
|
||||
export interface ApiDoc {
|
||||
doc_id: string;
|
||||
filename: string;
|
||||
format: string;
|
||||
pages: number | null;
|
||||
status: 'uploaded' | 'indexing' | 'indexed' | 'failed';
|
||||
upload_date: string;
|
||||
job_id?: string | null;
|
||||
file_size?: number;
|
||||
error_msg?: string | null;
|
||||
}
|
||||
|
||||
export interface ApiJobStatus {
|
||||
job_id: string;
|
||||
doc_id: string;
|
||||
status: 'submitted' | 'queued' | 'parsing' | 'extracting' | 'indexing' | 'done' | 'failed' | 'cancelled';
|
||||
stage: string;
|
||||
progress: number; // 0.0–1.0
|
||||
started_at?: string;
|
||||
updated_at?: string;
|
||||
error_msg?: string | null;
|
||||
}
|
||||
|
||||
export interface ApiIndexResult {
|
||||
job_id: string;
|
||||
doc_id: string;
|
||||
status: string;
|
||||
nodes_added: number;
|
||||
edges_added: number;
|
||||
total_nodes: number;
|
||||
total_edges: number;
|
||||
pages_processed: number;
|
||||
extractions_count: number;
|
||||
duration_seconds: number;
|
||||
}
|
||||
|
||||
export interface ApiKGNode {
|
||||
id: string;
|
||||
name: string;
|
||||
type: string;
|
||||
page: number;
|
||||
confidence: string;
|
||||
degree: number;
|
||||
source_doc: string;
|
||||
// Only present in detail endpoint:
|
||||
degree_centrality?: number;
|
||||
neighbor_count?: number;
|
||||
}
|
||||
|
||||
export interface ApiKGEdge {
|
||||
id: string;
|
||||
source: string;
|
||||
target: string;
|
||||
relation: string;
|
||||
doc_id: string;
|
||||
page: number;
|
||||
}
|
||||
|
||||
export interface ApiHealthData {
|
||||
status: string;
|
||||
version: string;
|
||||
uptime_seconds: number;
|
||||
components: {
|
||||
mineru_venv: { status: string };
|
||||
langextract_venv: { status: string };
|
||||
deepseek_api: { status: string };
|
||||
storage: { status: string };
|
||||
};
|
||||
}
|
||||
|
||||
export interface ApiStats {
|
||||
total_documents: number;
|
||||
indexed_documents: number;
|
||||
failed_documents: number;
|
||||
total_nodes: number;
|
||||
total_edges: number;
|
||||
total_queries: number;
|
||||
active_jobs: number;
|
||||
storage_used_mb: number;
|
||||
}
|
||||
|
||||
export interface ApiToolCall {
|
||||
step: number;
|
||||
tool_name: string;
|
||||
tool_input: string;
|
||||
tool_output: string;
|
||||
}
|
||||
|
||||
export interface ApiQueryResult {
|
||||
id: string;
|
||||
question: string;
|
||||
answer: string;
|
||||
tool_calls: ApiToolCall[];
|
||||
cited_nodes: string[]; // node IDs
|
||||
duration_seconds: number;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface ApiSearchResult {
|
||||
query: string;
|
||||
total: number;
|
||||
items: ApiKGNode[];
|
||||
}
|
||||
|
||||
export interface ApiPathResult {
|
||||
from: { id: string; name: string; type: string };
|
||||
to: { id: string; name: string; type: string };
|
||||
max_hops: number;
|
||||
total_paths: number;
|
||||
paths: Array<{
|
||||
length: number;
|
||||
nodes: Array<{ id: string; name: string; type: string }>;
|
||||
edges?: Array<{ source: string; target: string; relation: string }>;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface ApiGraphSearchResult {
|
||||
query: string;
|
||||
matched_nodes: ApiKGNode[];
|
||||
subgraph_edges: ApiKGEdge[];
|
||||
total_nodes: number;
|
||||
}
|
||||
|
||||
// ─── API Functions ────────────────────────────────────────────────────────────
|
||||
|
||||
export const api = {
|
||||
// A: Documents
|
||||
listDocuments: (page = 1, pageSize = 100) =>
|
||||
get<{ total: number; page: number; page_size: number; items: ApiDoc[] }>(
|
||||
'/documents', { page, page_size: pageSize }
|
||||
),
|
||||
|
||||
getDocument: (docId: string) => get<ApiDoc>(`/documents/${docId}`),
|
||||
|
||||
uploadDocument: (file: File) => {
|
||||
const fd = new FormData();
|
||||
fd.append('file', file);
|
||||
return postForm<{ doc_id: string; filename: string; format: string; status: string }>(
|
||||
'/documents/upload', fd
|
||||
);
|
||||
},
|
||||
|
||||
deleteDocument: (docId: string) =>
|
||||
del<{ doc_id: string; removed_nodes: number; removed_edges: number }>(`/documents/${docId}`),
|
||||
|
||||
// B: Indexing
|
||||
startIndexing: (docId: string) =>
|
||||
post<{ job_id: string; doc_id: string; status: string }>('/index/start', { doc_id: docId }),
|
||||
|
||||
getJobStatus: (jobId: string) => get<ApiJobStatus>(`/index/status/${jobId}`),
|
||||
|
||||
getJobResult: (jobId: string) => get<ApiIndexResult>(`/index/result/${jobId}`),
|
||||
|
||||
cancelJob: (jobId: string) => del<{ job_id: string }>(`/index/jobs/${jobId}`),
|
||||
|
||||
// C: Knowledge Graph
|
||||
getNodes: (params?: { page?: number; pageSize?: number; type?: string; docId?: string }) =>
|
||||
get<{ total: number; page: number; page_size: number; items: ApiKGNode[] }>('/kg/nodes', {
|
||||
page: params?.page,
|
||||
page_size: params?.pageSize ?? 500,
|
||||
type: params?.type,
|
||||
doc_id: params?.docId,
|
||||
}),
|
||||
|
||||
getEdges: (params?: { page?: number; pageSize?: number; docId?: string }) =>
|
||||
get<{ total: number; page: number; page_size: number; items: ApiKGEdge[] }>('/kg/edges', {
|
||||
page: params?.page,
|
||||
page_size: params?.pageSize ?? 2000,
|
||||
doc_id: params?.docId,
|
||||
}),
|
||||
|
||||
getNodeDetail: (nodeId: string) => get<ApiKGNode>(`/kg/nodes/${nodeId}`),
|
||||
|
||||
getNodeNeighbors: (nodeId: string, hops = 1) =>
|
||||
get<{
|
||||
center: ApiKGNode;
|
||||
hops: number;
|
||||
neighbors_by_hop: Record<string, ApiKGNode[]>;
|
||||
total_neighbors: number;
|
||||
}>(`/kg/nodes/${nodeId}/neighbors`, { hops }),
|
||||
|
||||
getKGStats: () =>
|
||||
get<{ total_nodes: number; total_edges: number; type_distribution: Record<string, number> }>('/kg/stats'),
|
||||
|
||||
exportKG: () => get<{ nodes: ApiKGNode[]; edges: ApiKGEdge[] }>('/kg/export'),
|
||||
|
||||
// D: QA Query
|
||||
query: (question: string, history: { question: string; answer: string }[] = []) => {
|
||||
// Transform {question, answer}[] to ChatMessage format expected by backend
|
||||
const chatHistory = history.flatMap(h => [
|
||||
{ role: 'human' as const, content: h.question },
|
||||
{ role: 'ai' as const, content: h.answer },
|
||||
]);
|
||||
return post<ApiQueryResult>('/query', { question, history: chatHistory });
|
||||
},
|
||||
|
||||
getQueryHistory: (page = 1, pageSize = 50) =>
|
||||
get<{ total: number; page: number; page_size: number; items: ApiQueryResult[] }>(
|
||||
'/query/history', { page, page_size: pageSize }
|
||||
),
|
||||
|
||||
// E: Search
|
||||
searchEntities: (q: string, type?: string, limit = 15) =>
|
||||
get<ApiSearchResult>('/search/entities', {
|
||||
q,
|
||||
type: type && type !== '全部类型' ? type : undefined,
|
||||
limit,
|
||||
}),
|
||||
|
||||
searchPath: (fromId: string, toId: string, maxHops = 3) =>
|
||||
get<ApiPathResult>('/search/path', { from: fromId, to: toId, max_hops: maxHops }),
|
||||
|
||||
searchGraph: (q: string, includeNeighbors = false) =>
|
||||
get<ApiGraphSearchResult>('/search/graph', { q, include_neighbors: includeNeighbors }),
|
||||
|
||||
// F: System
|
||||
getHealth: () => get<ApiHealthData>('/health'),
|
||||
|
||||
getSystemStats: () => get<ApiStats>('/system/stats'),
|
||||
|
||||
getDemoData: () =>
|
||||
get<{ nodes: ApiKGNode[]; edges: ApiKGEdge[]; stats: Record<string, unknown> }>('/system/demo'),
|
||||
};
|
||||
27
frontend/src/app/components/figma/ImageWithFallback.tsx
Normal file
27
frontend/src/app/components/figma/ImageWithFallback.tsx
Normal file
@@ -0,0 +1,27 @@
|
||||
import React, { useState } from 'react'
|
||||
|
||||
const ERROR_IMG_SRC =
|
||||
'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iODgiIGhlaWdodD0iODgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgc3Ryb2tlPSIjMDAwIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBvcGFjaXR5PSIuMyIgZmlsbD0ibm9uZSIgc3Ryb2tlLXdpZHRoPSIzLjciPjxyZWN0IHg9IjE2IiB5PSIxNiIgd2lkdGg9IjU2IiBoZWlnaHQ9IjU2IiByeD0iNiIvPjxwYXRoIGQ9Im0xNiA1OCAxNi0xOCAzMiAzMiIvPjxjaXJjbGUgY3g9IjUzIiBjeT0iMzUiIHI9IjciLz48L3N2Zz4KCg=='
|
||||
|
||||
export function ImageWithFallback(props: React.ImgHTMLAttributes<HTMLImageElement>) {
|
||||
const [didError, setDidError] = useState(false)
|
||||
|
||||
const handleError = () => {
|
||||
setDidError(true)
|
||||
}
|
||||
|
||||
const { src, alt, style, className, ...rest } = props
|
||||
|
||||
return didError ? (
|
||||
<div
|
||||
className={`inline-block bg-gray-100 text-center align-middle ${className ?? ''}`}
|
||||
style={style}
|
||||
>
|
||||
<div className="flex items-center justify-center w-full h-full">
|
||||
<img src={ERROR_IMG_SRC} alt="Error loading image" {...rest} data-original-url={src} />
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<img src={src} alt={alt} className={className} style={style} {...rest} onError={handleError} />
|
||||
)
|
||||
}
|
||||
47
frontend/src/app/components/layout/AppLayout.tsx
Normal file
47
frontend/src/app/components/layout/AppLayout.tsx
Normal file
@@ -0,0 +1,47 @@
|
||||
import React from 'react';
|
||||
import { Outlet } from 'react-router';
|
||||
import { Toaster } from 'sonner';
|
||||
import { Header } from './Header';
|
||||
import { Sidebar } from './Sidebar';
|
||||
import { StatusBar } from './StatusBar';
|
||||
import { useAppState, AppProvider } from '../../store';
|
||||
|
||||
function AppLayoutInner() {
|
||||
const { sidebarCollapsed } = useAppState();
|
||||
|
||||
return (
|
||||
<div
|
||||
style={{
|
||||
display: 'grid',
|
||||
gridTemplateAreas: '"header header" "sidebar main" "footer footer"',
|
||||
gridTemplateColumns: `${sidebarCollapsed ? 72 : 220}px 1fr`,
|
||||
gridTemplateRows: '56px 1fr 32px',
|
||||
height: '100vh',
|
||||
overflow: 'hidden',
|
||||
transition: 'grid-template-columns 200ms ease',
|
||||
}}
|
||||
>
|
||||
<Header />
|
||||
<Sidebar />
|
||||
<main
|
||||
style={{
|
||||
gridArea: 'main',
|
||||
overflowY: 'auto',
|
||||
background: 'var(--bg-base)',
|
||||
}}
|
||||
>
|
||||
<Outlet />
|
||||
</main>
|
||||
<StatusBar />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export function AppLayout() {
|
||||
return (
|
||||
<AppProvider>
|
||||
<AppLayoutInner />
|
||||
<Toaster position="top-right" theme="dark" richColors />
|
||||
</AppProvider>
|
||||
);
|
||||
}
|
||||
145
frontend/src/app/components/layout/Header.tsx
Normal file
145
frontend/src/app/components/layout/Header.tsx
Normal file
@@ -0,0 +1,145 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useNavigate } from 'react-router';
|
||||
import { Menu, Search, X } from 'lucide-react';
|
||||
import { useAppState, type KGNode } from '../../store';
|
||||
import { api } from '../../api';
|
||||
import { TYPE_COLORS } from '../../mock-data';
|
||||
|
||||
export function Header() {
|
||||
const { sidebarCollapsed, setSidebarCollapsed, health } = useAppState();
|
||||
const [query, setQuery] = useState('');
|
||||
const [showSuggestions, setShowSuggestions] = useState(false);
|
||||
const [suggestions, setSuggestions] = useState<KGNode[]>([]);
|
||||
const navigate = useNavigate();
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
const timerRef = useRef<ReturnType<typeof setTimeout>>();
|
||||
|
||||
useEffect(() => {
|
||||
if (query.length >= 2) {
|
||||
clearTimeout(timerRef.current);
|
||||
timerRef.current = setTimeout(async () => {
|
||||
try {
|
||||
const res = await api.searchEntities(query, undefined, 5);
|
||||
setSuggestions(res.items.map(n => ({
|
||||
id: n.id, name: n.name, type: n.type as KGNode['type'],
|
||||
page: n.page, confidence: n.confidence as KGNode['confidence'],
|
||||
degree: n.degree, centrality: 0, doc_id: n.doc_id,
|
||||
})));
|
||||
setShowSuggestions(true);
|
||||
} catch {
|
||||
setSuggestions([]);
|
||||
}
|
||||
}, 300);
|
||||
} else {
|
||||
setSuggestions([]);
|
||||
setShowSuggestions(false);
|
||||
}
|
||||
return () => clearTimeout(timerRef.current);
|
||||
}, [query]);
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (query.trim()) {
|
||||
setShowSuggestions(false);
|
||||
navigate(`/search?q=${encodeURIComponent(query)}`);
|
||||
}
|
||||
};
|
||||
|
||||
const allOk = Object.values(health).every(v => v === 'ok');
|
||||
|
||||
return (
|
||||
<header
|
||||
className="flex items-center px-4 gap-4"
|
||||
style={{
|
||||
gridArea: 'header',
|
||||
height: 56,
|
||||
background: 'var(--bg-s1)',
|
||||
borderBottom: '1px solid var(--border-main)',
|
||||
position: 'sticky',
|
||||
top: 0,
|
||||
zIndex: 100,
|
||||
}}
|
||||
>
|
||||
{/* Left */}
|
||||
<button
|
||||
onClick={() => setSidebarCollapsed(!sidebarCollapsed)}
|
||||
className="p-1.5 rounded-md hover:opacity-80 cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', color: 'var(--text-3)' }}
|
||||
aria-label="Toggle sidebar"
|
||||
>
|
||||
<Menu size={18} />
|
||||
</button>
|
||||
<span style={{ color: 'var(--blue)', fontSize: 16, fontWeight: 600, whiteSpace: 'nowrap' }}>
|
||||
GraphRAG Studio
|
||||
</span>
|
||||
|
||||
{/* Center - Search */}
|
||||
<form onSubmit={handleSubmit} className="flex-1 flex justify-center relative" style={{ maxWidth: 400, margin: '0 auto' }}>
|
||||
<div className="relative w-full">
|
||||
<Search size={14} className="absolute left-3 top-1/2 -translate-y-1/2" style={{ color: 'var(--text-4)' }} />
|
||||
<input
|
||||
ref={inputRef}
|
||||
value={query}
|
||||
onChange={e => setQuery(e.target.value)}
|
||||
onFocus={() => query.length >= 3 && setShowSuggestions(true)}
|
||||
onBlur={() => setTimeout(() => setShowSuggestions(false), 200)}
|
||||
placeholder="搜索实体..."
|
||||
className="w-full pl-9 pr-8 py-1.5 rounded-md outline-none"
|
||||
style={{
|
||||
background: 'var(--bg-s2)',
|
||||
border: '1px solid var(--border-main)',
|
||||
color: 'var(--text-1)',
|
||||
fontSize: 13,
|
||||
}}
|
||||
/>
|
||||
{query && (
|
||||
<button type="button" onClick={() => { setQuery(''); setShowSuggestions(false); }} className="absolute right-2 top-1/2 -translate-y-1/2 cursor-pointer" style={{ color: 'var(--text-4)' }}>
|
||||
<X size={14} />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{showSuggestions && suggestions.length > 0 && (
|
||||
<div
|
||||
className="absolute top-full mt-1 w-full rounded-md overflow-hidden"
|
||||
style={{ background: 'var(--bg-s3)', border: '1px solid var(--border-main)', boxShadow: 'var(--shadow-md)', zIndex: 200 }}
|
||||
>
|
||||
{suggestions.map(s => (
|
||||
<button
|
||||
key={s.id}
|
||||
type="button"
|
||||
className="w-full flex items-center gap-2 px-3 py-2 hover:opacity-80 cursor-pointer text-left"
|
||||
style={{ background: 'transparent', borderBottom: '1px solid var(--border-muted)' }}
|
||||
onMouseDown={() => {
|
||||
setShowSuggestions(false);
|
||||
setQuery('');
|
||||
navigate(`/graph?node=${s.id}`);
|
||||
}}
|
||||
>
|
||||
<span style={{ color: 'var(--text-1)', fontSize: 13 }}>{s.name}</span>
|
||||
<span
|
||||
className="px-1.5 py-0.5 rounded"
|
||||
style={{
|
||||
fontSize: 10, fontWeight: 600,
|
||||
background: `${TYPE_COLORS[s.type]}20`,
|
||||
color: TYPE_COLORS[s.type],
|
||||
}}
|
||||
>
|
||||
{s.type}
|
||||
</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</form>
|
||||
|
||||
{/* Right */}
|
||||
<div className="flex items-center gap-2" style={{ whiteSpace: 'nowrap' }}>
|
||||
<span
|
||||
className="inline-block w-2 h-2 rounded-full"
|
||||
style={{ background: allOk ? 'var(--green)' : 'var(--red)' }}
|
||||
/>
|
||||
<span style={{ color: 'var(--text-3)', fontSize: 12 }}>API: localhost:8000</span>
|
||||
</div>
|
||||
</header>
|
||||
);
|
||||
}
|
||||
110
frontend/src/app/components/layout/Sidebar.tsx
Normal file
110
frontend/src/app/components/layout/Sidebar.tsx
Normal file
@@ -0,0 +1,110 @@
|
||||
import React from 'react';
|
||||
import { useNavigate, useLocation } from 'react-router';
|
||||
import { LayoutDashboard, FileText, Share2, MessageSquare, Search, Settings } from 'lucide-react';
|
||||
import { useAppState } from '../../store';
|
||||
|
||||
const navItems = [
|
||||
{ icon: LayoutDashboard, label: '仪表盘', path: '/dashboard', badge: null },
|
||||
{ icon: FileText, label: '文档管理', path: '/documents', badgeKey: 'documents' as const },
|
||||
{ icon: Share2, label: '知识图谱', path: '/graph', badge: null },
|
||||
{ icon: MessageSquare, label: '智能问答', path: '/chat', badgeKey: 'queries' as const },
|
||||
{ icon: Search, label: '搜索', path: '/search', badge: null },
|
||||
];
|
||||
|
||||
export function Sidebar() {
|
||||
const { sidebarCollapsed, stats } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const location = useLocation();
|
||||
|
||||
const width = sidebarCollapsed ? 72 : 220;
|
||||
|
||||
return (
|
||||
<nav
|
||||
className="flex flex-col py-3 overflow-hidden"
|
||||
style={{
|
||||
gridArea: 'sidebar',
|
||||
width,
|
||||
background: 'var(--bg-s1)',
|
||||
borderRight: '1px solid var(--border-main)',
|
||||
transition: 'width 200ms ease',
|
||||
}}
|
||||
>
|
||||
<div className="flex flex-col gap-1 px-2">
|
||||
{navItems.map(item => {
|
||||
const active = location.pathname === item.path ||
|
||||
(item.path === '/dashboard' && location.pathname === '/');
|
||||
const Icon = item.icon;
|
||||
const badgeValue = item.badgeKey ? stats[item.badgeKey] : null;
|
||||
|
||||
return (
|
||||
<button
|
||||
key={item.path}
|
||||
onClick={() => navigate(item.path)}
|
||||
className="flex items-center gap-3 rounded-md cursor-pointer relative"
|
||||
style={{
|
||||
padding: sidebarCollapsed ? '10px 0' : '10px 12px',
|
||||
justifyContent: sidebarCollapsed ? 'center' : 'flex-start',
|
||||
background: active ? 'rgba(88,166,255,0.1)' : 'transparent',
|
||||
color: active ? 'var(--blue)' : 'var(--text-3)',
|
||||
fontSize: 14,
|
||||
fontWeight: active ? 500 : 400,
|
||||
border: 'none',
|
||||
transition: 'all 150ms ease',
|
||||
}}
|
||||
onMouseEnter={e => {
|
||||
if (!active) (e.currentTarget as HTMLElement).style.background = 'var(--bg-s2)';
|
||||
}}
|
||||
onMouseLeave={e => {
|
||||
if (!active) (e.currentTarget as HTMLElement).style.background = 'transparent';
|
||||
}}
|
||||
>
|
||||
{active && (
|
||||
<div
|
||||
className="absolute left-0 top-2 bottom-2 rounded-r"
|
||||
style={{ width: 2, background: 'var(--blue)' }}
|
||||
/>
|
||||
)}
|
||||
<Icon size={18} />
|
||||
{!sidebarCollapsed && (
|
||||
<>
|
||||
<span className="flex-1 text-left">{item.label}</span>
|
||||
{badgeValue != null && (
|
||||
<span
|
||||
className="px-1.5 py-0.5 rounded-full"
|
||||
style={{
|
||||
fontSize: 11, fontWeight: 600,
|
||||
background: 'var(--bg-s2)',
|
||||
color: 'var(--text-3)',
|
||||
minWidth: 20,
|
||||
textAlign: 'center',
|
||||
}}
|
||||
>
|
||||
{badgeValue}
|
||||
</span>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
<div className="mt-auto px-2">
|
||||
<button
|
||||
className="flex items-center gap-3 rounded-md w-full cursor-pointer"
|
||||
style={{
|
||||
padding: sidebarCollapsed ? '10px 0' : '10px 12px',
|
||||
justifyContent: sidebarCollapsed ? 'center' : 'flex-start',
|
||||
background: 'transparent',
|
||||
color: 'var(--text-4)',
|
||||
fontSize: 14,
|
||||
border: 'none',
|
||||
}}
|
||||
>
|
||||
<Settings size={18} />
|
||||
{!sidebarCollapsed && <span>系统设置</span>}
|
||||
</button>
|
||||
</div>
|
||||
</nav>
|
||||
);
|
||||
}
|
||||
34
frontend/src/app/components/layout/StatusBar.tsx
Normal file
34
frontend/src/app/components/layout/StatusBar.tsx
Normal file
@@ -0,0 +1,34 @@
|
||||
import React from 'react';
|
||||
import { useAppState } from '../../store';
|
||||
|
||||
export function StatusBar() {
|
||||
const { documents, health } = useAppState();
|
||||
const indexingDoc = documents.find(d => d.status === 'indexing');
|
||||
const allOk = Object.values(health).every(v => v === 'ok');
|
||||
|
||||
return (
|
||||
<footer
|
||||
className="flex items-center justify-between px-4"
|
||||
style={{
|
||||
gridArea: 'footer',
|
||||
height: 32,
|
||||
background: 'var(--bg-s1)',
|
||||
borderTop: '1px solid var(--border-main)',
|
||||
fontSize: 12,
|
||||
color: 'var(--text-4)',
|
||||
}}
|
||||
>
|
||||
<div>
|
||||
{indexingDoc && (
|
||||
<span style={{ color: 'var(--yellow)' }}>
|
||||
正在索引 {indexingDoc.filename}... {indexingDoc.progress ?? 0}%
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span>v1.0.0</span>
|
||||
<span className="inline-block w-1.5 h-1.5 rounded-full" style={{ background: allOk ? 'var(--green)' : 'var(--red)' }} />
|
||||
</div>
|
||||
</footer>
|
||||
);
|
||||
}
|
||||
210
frontend/src/app/components/pages/Dashboard.tsx
Normal file
210
frontend/src/app/components/pages/Dashboard.tsx
Normal file
@@ -0,0 +1,210 @@
|
||||
import React from 'react';
|
||||
import { useNavigate } from 'react-router';
|
||||
import { Share2, MessageSquare, Search, Zap, Upload, FileText, ExternalLink } from 'lucide-react';
|
||||
import { useAppState } from '../../store';
|
||||
|
||||
const statCards = [
|
||||
{ key: 'kg_nodes', label: '图谱节点', color: '#58a6ff', icon: '◈' },
|
||||
{ key: 'kg_edges', label: '图谱边', color: '#8957e5', icon: '◇' },
|
||||
{ key: 'documents', label: '文档数', color: '#3fb950', icon: '▤' },
|
||||
{ key: 'queries', label: '查询次数', color: '#d29922', icon: '◆' },
|
||||
] as const;
|
||||
|
||||
const statusStyles: Record<string, { bg: string; color: string }> = {
|
||||
indexed: { bg: '#1a3a22', color: '#3fb950' },
|
||||
indexing: { bg: '#2d2a16', color: '#d29922' },
|
||||
uploaded: { bg: '#1c2128', color: '#8b949e' },
|
||||
failed: { bg: '#3b1a1a', color: '#f85149' },
|
||||
};
|
||||
|
||||
export function Dashboard() {
|
||||
const { stats, health, documents } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const recentDocs = documents.slice(0, 5);
|
||||
|
||||
return (
|
||||
<div className="p-6" style={{ maxWidth: 1200, margin: '0 auto' }}>
|
||||
{/* Page Title + Upload Button */}
|
||||
<div className="flex items-center justify-between mb-6">
|
||||
<h1 style={{ color: 'var(--text-1)', fontSize: 20, fontWeight: 600 }}>仪表盘</h1>
|
||||
<button
|
||||
onClick={() => navigate('/documents')}
|
||||
className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, fontWeight: 500, border: 'none' }}
|
||||
>
|
||||
<Upload size={14} /> 上传 & 索引
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Stat Cards */}
|
||||
<div className="grid grid-cols-4 gap-4 mb-6" style={{ minWidth: 0 }}>
|
||||
{statCards.map(c => (
|
||||
<div
|
||||
key={c.key}
|
||||
className="rounded-lg p-4"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
|
||||
>
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span style={{ color: 'var(--text-3)', fontSize: 13 }}>{c.label}</span>
|
||||
<span style={{ fontSize: 18, color: c.color }}>{c.icon}</span>
|
||||
</div>
|
||||
<div style={{ color: c.color, fontSize: 28, fontWeight: 700 }}>
|
||||
{stats[c.key].toLocaleString()}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
{/* System Health */}
|
||||
<div
|
||||
className="rounded-lg p-4 col-span-1"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
|
||||
>
|
||||
<h2 className="mb-4" style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>系统健康</h2>
|
||||
<div className="flex flex-col gap-3">
|
||||
{[
|
||||
{ name: 'MinerU venv', status: health.mineru },
|
||||
{ name: 'LangExtract venv', status: health.langextract },
|
||||
{ name: 'DeepSeek API', status: health.deepseek },
|
||||
{ name: 'Storage', status: health.storage },
|
||||
].map(s => (
|
||||
<div key={s.name} className="flex items-center justify-between">
|
||||
<span style={{ color: 'var(--text-2)', fontSize: 13 }}>{s.name}</span>
|
||||
<span className="flex items-center gap-1.5">
|
||||
<span className="inline-block w-2 h-2 rounded-full" style={{ background: s.status === 'ok' ? 'var(--green)' : 'var(--red)' }} />
|
||||
<span style={{ color: s.status === 'ok' ? 'var(--green)' : 'var(--red)', fontSize: 12 }}>{s.status}</span>
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Quick Actions */}
|
||||
<h2 className="mt-6 mb-3" style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>快捷操作</h2>
|
||||
<div className="flex flex-col gap-2">
|
||||
{[
|
||||
{ icon: Share2, label: '浏览图谱', path: '/graph' },
|
||||
{ icon: MessageSquare, label: '开始对话', path: '/chat' },
|
||||
{ icon: Search, label: '搜索', path: '/search' },
|
||||
{ icon: Zap, label: '演示', path: '/graph' },
|
||||
].map(a => (
|
||||
<button
|
||||
key={a.label}
|
||||
onClick={() => navigate(a.path)}
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full"
|
||||
style={{
|
||||
background: 'var(--bg-s2)',
|
||||
border: '1px solid var(--border-main)',
|
||||
color: 'var(--text-2)',
|
||||
fontSize: 13,
|
||||
}}
|
||||
>
|
||||
<a.icon size={14} style={{ color: 'var(--blue)' }} /> {a.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Recent Documents */}
|
||||
<div
|
||||
className="rounded-lg p-4 col-span-2"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
|
||||
>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<h2 style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>最近文档</h2>
|
||||
<button
|
||||
onClick={() => navigate('/documents')}
|
||||
className="flex items-center gap-1 cursor-pointer"
|
||||
style={{ color: 'var(--blue)', fontSize: 12, background: 'none', border: 'none' }}
|
||||
>
|
||||
查看全部 <ExternalLink size={12} />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col">
|
||||
{/* Table header */}
|
||||
<div
|
||||
className="grid gap-4 px-3 py-2 rounded-t-md"
|
||||
style={{ gridTemplateColumns: '1fr 60px 50px 90px 130px 100px', background: 'var(--bg-s2)', fontSize: 11, fontWeight: 600, color: 'var(--text-3)', textTransform: 'uppercase', letterSpacing: '0.5px' }}
|
||||
>
|
||||
<span>文件名</span>
|
||||
<span>格式</span>
|
||||
<span>页数</span>
|
||||
<span>状态</span>
|
||||
<span>日期</span>
|
||||
<span>操作</span>
|
||||
</div>
|
||||
|
||||
{recentDocs.map(doc => {
|
||||
const st = statusStyles[doc.status];
|
||||
return (
|
||||
<div
|
||||
key={doc.id}
|
||||
className="grid gap-4 px-3 py-2.5 items-center"
|
||||
style={{
|
||||
gridTemplateColumns: '1fr 60px 50px 90px 130px 100px',
|
||||
borderBottom: '1px solid var(--border-muted)',
|
||||
fontSize: 13,
|
||||
}}
|
||||
>
|
||||
<span className="flex items-center gap-2 truncate" style={{ color: 'var(--text-1)' }}>
|
||||
<FileText size={14} style={{ color: 'var(--text-3)', flexShrink: 0 }} />
|
||||
<span className="truncate">{doc.filename}</span>
|
||||
</span>
|
||||
<span style={{ color: 'var(--text-3)' }}>{doc.format}</span>
|
||||
<span style={{ color: 'var(--text-3)' }}>{doc.pages}</span>
|
||||
<span>
|
||||
<span
|
||||
className="px-2 py-0.5 rounded-full"
|
||||
style={{ fontSize: 11, fontWeight: 600, background: st.bg, color: st.color }}
|
||||
>
|
||||
{doc.status}
|
||||
</span>
|
||||
</span>
|
||||
<span style={{ color: 'var(--text-4)', fontSize: 12 }}>
|
||||
{new Date(doc.upload_date).toLocaleDateString('zh-CN', { month: 'short', day: 'numeric', year: 'numeric' })}
|
||||
</span>
|
||||
<span>
|
||||
{doc.status === 'indexed' && (
|
||||
<button
|
||||
onClick={() => navigate(`/graph?doc_id=${doc.id}`)}
|
||||
className="px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
|
||||
>
|
||||
查看图谱
|
||||
</button>
|
||||
)}
|
||||
{doc.status === 'uploaded' && (
|
||||
<button
|
||||
className="px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(35,134,54,0.2)', color: 'var(--green)', border: 'none' }}
|
||||
>
|
||||
索引
|
||||
</button>
|
||||
)}
|
||||
{doc.status === 'indexing' && (
|
||||
<div className="flex items-center gap-2">
|
||||
<div style={{ flex: 1, height: 4, background: 'var(--bg-s2)', borderRadius: 2, overflow: 'hidden' }}>
|
||||
<div style={{ width: `${doc.progress}%`, height: '100%', background: 'var(--yellow)', borderRadius: 2, transition: 'width 300ms' }} />
|
||||
</div>
|
||||
<span style={{ fontSize: 11, color: 'var(--yellow)' }}>{doc.progress}%</span>
|
||||
</div>
|
||||
)}
|
||||
{doc.status === 'failed' && (
|
||||
<button
|
||||
className="px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(248,81,73,0.1)', color: 'var(--red)', border: 'none' }}
|
||||
>
|
||||
重试
|
||||
</button>
|
||||
)}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
439
frontend/src/app/components/pages/Documents.tsx
Normal file
439
frontend/src/app/components/pages/Documents.tsx
Normal file
@@ -0,0 +1,439 @@
|
||||
import React, { useState, useCallback, useRef } from 'react';
|
||||
import { useNavigate } from 'react-router';
|
||||
import { toast } from 'sonner';
|
||||
import { Upload, FileText, Trash2, Play, RotateCcw, X, ChevronDown, ChevronRight, Eye } from 'lucide-react';
|
||||
import { useAppState } from '../../store';
|
||||
import { api, ApiError } from '../../api';
|
||||
|
||||
const statusStyles: Record<string, { bg: string; color: string }> = {
|
||||
indexed: { bg: '#1a3a22', color: '#3fb950' },
|
||||
indexing: { bg: '#2d2a16', color: '#d29922' },
|
||||
uploaded: { bg: '#1c2128', color: '#8b949e' },
|
||||
failed: { bg: '#3b1a1a', color: '#f85149' },
|
||||
};
|
||||
|
||||
export function Documents() {
|
||||
const { documents, setDocuments, refreshDocuments, refreshKG } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const [dragOver, setDragOver] = useState(false);
|
||||
const [formatFilter, setFormatFilter] = useState('All');
|
||||
const [statusFilter, setStatusFilter] = useState('All');
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
const [expandedDoc, setExpandedDoc] = useState<string | null>(null);
|
||||
const [showDeleteModal, setShowDeleteModal] = useState<string | null>(null);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
|
||||
const filteredDocs = documents.filter(d => {
|
||||
if (formatFilter !== 'All' && d.format !== formatFilter) return false;
|
||||
if (statusFilter !== 'All' && d.status !== statusFilter) return false;
|
||||
if (searchTerm && !d.filename.toLowerCase().includes(searchTerm.toLowerCase())) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
// ── Upload ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const handleFiles = useCallback(async (files: FileList | File[]) => {
|
||||
const fileArr = Array.from(files);
|
||||
if (fileArr.length === 0) return;
|
||||
|
||||
setUploading(true);
|
||||
for (const file of fileArr) {
|
||||
try {
|
||||
toast.loading(`上传 ${file.name}...`, { id: `upload-${file.name}` });
|
||||
|
||||
// 1. Upload
|
||||
const uploaded = await api.uploadDocument(file);
|
||||
const newDoc = {
|
||||
id: uploaded.doc_id,
|
||||
filename: uploaded.filename,
|
||||
format: uploaded.format,
|
||||
pages: 0,
|
||||
status: 'uploaded' as const,
|
||||
upload_date: new Date().toISOString(),
|
||||
};
|
||||
setDocuments(prev => [newDoc, ...prev]);
|
||||
toast.success(`${file.name} 上传成功`, { id: `upload-${file.name}` });
|
||||
|
||||
// 2. Auto-start indexing
|
||||
try {
|
||||
toast.loading(`开始索引 ${file.name}...`, { id: `index-${uploaded.doc_id}` });
|
||||
const job = await api.startIndexing(uploaded.doc_id);
|
||||
setDocuments(prev =>
|
||||
prev.map(d => d.id === uploaded.doc_id
|
||||
? { ...d, status: 'indexing', job_id: job.job_id, progress: 0 }
|
||||
: d
|
||||
)
|
||||
);
|
||||
toast.success(`${file.name} 开始索引`, { id: `index-${uploaded.doc_id}` });
|
||||
} catch (err) {
|
||||
const msg = err instanceof ApiError ? err.message : '启动索引失败';
|
||||
toast.error(msg, { id: `index-${uploaded.doc_id}` });
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = err instanceof ApiError ? err.message : '上传失败';
|
||||
toast.error(`${file.name}: ${msg}`, { id: `upload-${file.name}` });
|
||||
}
|
||||
}
|
||||
setUploading(false);
|
||||
}, [setDocuments]);
|
||||
|
||||
const handleDragOver = useCallback((e: React.DragEvent) => { e.preventDefault(); setDragOver(true); }, []);
|
||||
const handleDragLeave = useCallback(() => setDragOver(false), []);
|
||||
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||
e.preventDefault();
|
||||
setDragOver(false);
|
||||
handleFiles(e.dataTransfer.files);
|
||||
}, [handleFiles]);
|
||||
|
||||
const handleBrowse = () => fileInputRef.current?.click();
|
||||
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
if (e.target.files) handleFiles(e.target.files);
|
||||
e.target.value = '';
|
||||
};
|
||||
|
||||
// ── Index / Retry ────────────────────────────────────────────────────────────
|
||||
|
||||
const handleStartIndex = useCallback(async (docId: string, filename: string) => {
|
||||
try {
|
||||
const job = await api.startIndexing(docId);
|
||||
setDocuments(prev =>
|
||||
prev.map(d => d.id === docId
|
||||
? { ...d, status: 'indexing', job_id: job.job_id, progress: 0, error: undefined }
|
||||
: d
|
||||
)
|
||||
);
|
||||
toast.success(`${filename} 开始索引`);
|
||||
} catch (err) {
|
||||
const msg = err instanceof ApiError ? err.message : '启动索引失败';
|
||||
toast.error(msg);
|
||||
}
|
||||
}, [setDocuments]);
|
||||
|
||||
// ── Cancel ───────────────────────────────────────────────────────────────────
|
||||
|
||||
const handleCancel = useCallback(async (docId: string, jobId: string) => {
|
||||
try {
|
||||
await api.cancelJob(jobId);
|
||||
setDocuments(prev =>
|
||||
prev.map(d => d.id === docId
|
||||
? { ...d, status: 'uploaded', job_id: undefined, progress: undefined }
|
||||
: d
|
||||
)
|
||||
);
|
||||
toast.info('索引任务已取消');
|
||||
} catch (err) {
|
||||
const msg = err instanceof ApiError ? err.message : '取消失败';
|
||||
toast.error(msg);
|
||||
}
|
||||
}, [setDocuments]);
|
||||
|
||||
// ── Delete ───────────────────────────────────────────────────────────────────
|
||||
|
||||
const handleDelete = useCallback(async () => {
|
||||
if (!showDeleteModal) return;
|
||||
try {
|
||||
await api.deleteDocument(showDeleteModal);
|
||||
setDocuments(prev => prev.filter(d => d.id !== showDeleteModal));
|
||||
setShowDeleteModal(null);
|
||||
toast.success('文档已删除');
|
||||
refreshKG();
|
||||
} catch (err) {
|
||||
const msg = err instanceof ApiError ? err.message : '删除失败';
|
||||
toast.error(msg);
|
||||
}
|
||||
}, [showDeleteModal, setDocuments, refreshKG]);
|
||||
|
||||
const deleteDoc = documents.find(d => d.id === showDeleteModal);
|
||||
|
||||
return (
|
||||
<div className="p-6" style={{ maxWidth: 1200, margin: '0 auto' }}>
|
||||
<h1 className="mb-6" style={{ color: 'var(--text-1)', fontSize: 20, fontWeight: 600 }}>文档管理</h1>
|
||||
|
||||
{/* Hidden file input */}
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
multiple
|
||||
accept=".pdf,.docx,.doc,.pptx,.ppt,.png,.jpg,.jpeg,.html"
|
||||
style={{ display: 'none' }}
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
|
||||
{/* Upload Area */}
|
||||
<div
|
||||
onDragOver={handleDragOver}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDrop}
|
||||
onClick={handleBrowse}
|
||||
className="flex flex-col items-center justify-center gap-3 rounded-lg p-8 mb-6 cursor-pointer"
|
||||
style={{
|
||||
border: `2px dashed ${dragOver ? 'var(--blue)' : 'var(--border-main)'}`,
|
||||
background: dragOver ? 'rgba(88,166,255,0.05)' : 'var(--bg-s1)',
|
||||
transition: 'all 200ms ease',
|
||||
opacity: uploading ? 0.6 : 1,
|
||||
pointerEvents: uploading ? 'none' : 'auto',
|
||||
}}
|
||||
>
|
||||
<Upload size={32} style={{ color: dragOver ? 'var(--blue)' : 'var(--text-4)' }} />
|
||||
<div style={{ color: 'var(--text-2)', fontSize: 14 }}>
|
||||
{uploading ? '正在上传...' : (
|
||||
<>拖拽文件到此处,或{' '}<span style={{ color: 'var(--blue)' }}>浏览文件</span></>
|
||||
)}
|
||||
</div>
|
||||
<div style={{ color: 'var(--text-4)', fontSize: 12 }}>
|
||||
PDF · DOCX · DOC · PPTX · PPT · PNG · JPG · HTML | 单文件最大 200MB
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toolbar */}
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<select
|
||||
value={formatFilter}
|
||||
onChange={e => setFormatFilter(e.target.value)}
|
||||
className="px-3 py-1.5 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
<option>All</option>
|
||||
<option>PDF</option>
|
||||
<option>DOCX</option>
|
||||
<option>PPTX</option>
|
||||
<option>PNG</option>
|
||||
<option>JPG</option>
|
||||
<option>HTML</option>
|
||||
</select>
|
||||
<select
|
||||
value={statusFilter}
|
||||
onChange={e => setStatusFilter(e.target.value)}
|
||||
className="px-3 py-1.5 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
<option>All</option>
|
||||
<option>indexed</option>
|
||||
<option>indexing</option>
|
||||
<option>uploaded</option>
|
||||
<option>failed</option>
|
||||
</select>
|
||||
<input
|
||||
value={searchTerm}
|
||||
onChange={e => setSearchTerm(e.target.value)}
|
||||
placeholder="搜索文档..."
|
||||
className="px-3 py-1.5 rounded-md flex-1"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13, outline: 'none' }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Document Table */}
|
||||
<div className="rounded-lg overflow-hidden" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}>
|
||||
{/* Header */}
|
||||
<div
|
||||
className="grid gap-4 px-4 py-2.5"
|
||||
style={{
|
||||
gridTemplateColumns: '24px 1fr 70px 50px 100px 140px 160px',
|
||||
background: 'var(--bg-s2)', fontSize: 11, fontWeight: 600,
|
||||
color: 'var(--text-3)', textTransform: 'uppercase', letterSpacing: '0.5px',
|
||||
}}
|
||||
>
|
||||
<span />
|
||||
<span>文件名</span>
|
||||
<span>格式</span>
|
||||
<span>页数</span>
|
||||
<span>状态</span>
|
||||
<span>上传日期</span>
|
||||
<span>操作</span>
|
||||
</div>
|
||||
|
||||
{/* Rows */}
|
||||
{filteredDocs.length === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center py-12 gap-3">
|
||||
<FileText size={40} style={{ color: 'var(--text-4)' }} />
|
||||
<span style={{ color: 'var(--text-3)', fontSize: 14 }}>
|
||||
{documents.length === 0 ? '暂无文档,请上传文件' : '未找到匹配文档'}
|
||||
</span>
|
||||
</div>
|
||||
) : (
|
||||
filteredDocs.map(doc => {
|
||||
const st = statusStyles[doc.status];
|
||||
const isExpanded = expandedDoc === doc.id;
|
||||
return (
|
||||
<React.Fragment key={doc.id}>
|
||||
<div
|
||||
className="grid gap-4 px-4 py-3 items-center"
|
||||
style={{
|
||||
gridTemplateColumns: '24px 1fr 70px 50px 100px 140px 160px',
|
||||
borderBottom: '1px solid var(--border-muted)',
|
||||
fontSize: 13,
|
||||
}}
|
||||
>
|
||||
<button
|
||||
onClick={() => setExpandedDoc(isExpanded ? null : doc.id)}
|
||||
className="cursor-pointer"
|
||||
style={{ background: 'none', border: 'none', color: 'var(--text-4)', padding: 0 }}
|
||||
>
|
||||
{doc.status === 'indexed'
|
||||
? (isExpanded ? <ChevronDown size={14} /> : <ChevronRight size={14} />)
|
||||
: <span style={{ width: 14, display: 'inline-block' }} />}
|
||||
</button>
|
||||
<span className="flex items-center gap-2 truncate" style={{ color: 'var(--text-1)' }}>
|
||||
<FileText size={14} style={{ color: 'var(--text-3)', flexShrink: 0 }} />
|
||||
<span className="truncate">{doc.filename}</span>
|
||||
</span>
|
||||
<span style={{ color: 'var(--text-3)' }}>{doc.format}</span>
|
||||
<span style={{ color: 'var(--text-3)' }}>{doc.pages || '—'}</span>
|
||||
<span>
|
||||
<span className="px-2 py-0.5 rounded-full inline-flex items-center gap-1" style={{ fontSize: 11, fontWeight: 600, background: st.bg, color: st.color }}>
|
||||
{doc.status === 'indexing' && (
|
||||
<span className="inline-block w-1.5 h-1.5 rounded-full animate-pulse" style={{ background: st.color }} />
|
||||
)}
|
||||
{doc.status}
|
||||
</span>
|
||||
</span>
|
||||
<span style={{ color: 'var(--text-4)', fontSize: 12 }}>
|
||||
{new Date(doc.upload_date).toLocaleDateString('zh-CN', { month: 'short', day: 'numeric', year: 'numeric' })}
|
||||
</span>
|
||||
<span className="flex items-center gap-2">
|
||||
{doc.status === 'uploaded' && (
|
||||
<button
|
||||
onClick={() => handleStartIndex(doc.id, doc.filename)}
|
||||
className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(35,134,54,0.2)', color: 'var(--green)', border: 'none' }}
|
||||
>
|
||||
<Play size={10} /> 索引
|
||||
</button>
|
||||
)}
|
||||
{doc.status === 'indexing' && (
|
||||
<>
|
||||
<div className="flex items-center gap-1.5 flex-1">
|
||||
<div style={{ flex: 1, height: 4, background: 'var(--bg-s2)', borderRadius: 2, overflow: 'hidden', minWidth: 40 }}>
|
||||
<div style={{ width: `${doc.progress ?? 0}%`, height: '100%', background: 'var(--yellow)', borderRadius: 2, transition: 'width 300ms' }} />
|
||||
</div>
|
||||
<span style={{ fontSize: 10, color: 'var(--yellow)', whiteSpace: 'nowrap' }}>{doc.progress ?? 0}%</span>
|
||||
</div>
|
||||
{doc.job_id && (
|
||||
<button
|
||||
onClick={() => handleCancel(doc.id, doc.job_id!)}
|
||||
className="cursor-pointer"
|
||||
style={{ background: 'none', border: 'none', color: 'var(--text-4)', padding: 2 }}
|
||||
>
|
||||
<X size={12} />
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{doc.status === 'indexed' && (
|
||||
<button
|
||||
onClick={() => navigate(`/graph?doc_id=${doc.id}`)}
|
||||
className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
|
||||
>
|
||||
<Eye size={10} /> 查看图谱
|
||||
</button>
|
||||
)}
|
||||
{doc.status === 'failed' && (
|
||||
<button
|
||||
onClick={() => handleStartIndex(doc.id, doc.filename)}
|
||||
className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(248,81,73,0.1)', color: 'var(--red)', border: 'none' }}
|
||||
>
|
||||
<RotateCcw size={10} /> 重试
|
||||
</button>
|
||||
)}
|
||||
{doc.status !== 'indexing' && (
|
||||
<button
|
||||
onClick={() => setShowDeleteModal(doc.id)}
|
||||
className="cursor-pointer p-1 rounded"
|
||||
style={{ background: 'none', border: 'none', color: 'var(--text-4)' }}
|
||||
>
|
||||
<Trash2 size={12} />
|
||||
</button>
|
||||
)}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Expanded Result Row */}
|
||||
{isExpanded && doc.result && (
|
||||
<div className="px-12 py-3" style={{ background: 'var(--bg-s2)', borderBottom: '1px solid var(--border-muted)' }}>
|
||||
<div className="flex items-center gap-4 mb-2" style={{ fontSize: 13, color: 'var(--text-2)' }}>
|
||||
<span>{doc.result.nodes} 个节点</span>
|
||||
<span style={{ color: 'var(--text-4)' }}>·</span>
|
||||
<span>{doc.result.edges} 条边</span>
|
||||
<span style={{ color: 'var(--text-4)' }}>·</span>
|
||||
<span>{doc.result.pages} 页</span>
|
||||
<span style={{ color: 'var(--text-4)' }}>·</span>
|
||||
<span>{doc.result.extractions} 次提取</span>
|
||||
<span style={{ color: 'var(--text-4)' }}>·</span>
|
||||
<span>{doc.result.duration.toFixed(1)}秒</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={() => navigate(`/graph?doc_id=${doc.id}`)}
|
||||
className="flex items-center gap-1 px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 11, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
|
||||
>
|
||||
在图谱中查看
|
||||
</button>
|
||||
{/* 查看提取结果:后端暂未提供独立 API,功能未开发 */}
|
||||
<button
|
||||
disabled
|
||||
title="功能未开发:后端暂无提取记录独立查询接口"
|
||||
className="flex items-center gap-1 px-2 py-1 rounded"
|
||||
style={{ fontSize: 11, background: 'var(--bg-s1)', color: 'var(--text-4)', border: '1px solid var(--border-muted)', cursor: 'not-allowed', opacity: 0.5 }}
|
||||
>
|
||||
查看提取结果 <span style={{ fontSize: 9, background: 'rgba(209,75,75,0.2)', color: '#f85149', padding: '1px 4px', borderRadius: 3, marginLeft: 4 }}>未开发</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error message */}
|
||||
{doc.status === 'failed' && doc.error && (
|
||||
<div className="px-12 py-2" style={{ background: 'rgba(248,81,73,0.05)', borderBottom: '1px solid var(--border-muted)' }}>
|
||||
<span style={{ fontSize: 12, color: 'var(--red)' }}>{doc.error}</span>
|
||||
</div>
|
||||
)}
|
||||
</React.Fragment>
|
||||
);
|
||||
})
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Delete Modal */}
|
||||
{showDeleteModal && deleteDoc && (
|
||||
<div
|
||||
className="fixed inset-0 flex items-center justify-center"
|
||||
style={{ background: 'rgba(0,0,0,0.6)', zIndex: 1000 }}
|
||||
onClick={() => setShowDeleteModal(null)}
|
||||
>
|
||||
<div
|
||||
className="rounded-xl p-6"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', width: 360, boxShadow: 'var(--shadow-lg)' }}
|
||||
onClick={e => e.stopPropagation()}
|
||||
>
|
||||
<h3 className="mb-3" style={{ color: 'var(--text-1)', fontSize: 16, fontWeight: 600 }}>
|
||||
确认删除 "{deleteDoc.filename}"?
|
||||
</h3>
|
||||
<p className="mb-4" style={{ color: 'var(--text-2)', fontSize: 13 }}>
|
||||
该文档及其关联的所有知识图谱数据将被永久删除,此操作不可撤销。
|
||||
</p>
|
||||
<div className="flex justify-end gap-2">
|
||||
<button
|
||||
onClick={() => setShowDeleteModal(null)}
|
||||
className="px-4 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
取消
|
||||
</button>
|
||||
<button
|
||||
onClick={handleDelete}
|
||||
className="px-4 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'rgba(248,81,73,0.15)', border: '1px solid var(--red)', color: 'var(--red)', fontSize: 13, fontWeight: 500 }}
|
||||
>
|
||||
删除
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
439
frontend/src/app/components/pages/KGExplorer.tsx
Normal file
439
frontend/src/app/components/pages/KGExplorer.tsx
Normal file
@@ -0,0 +1,439 @@
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import { useNavigate, useSearchParams } from 'react-router';
|
||||
import * as d3 from 'd3';
|
||||
import { ZoomIn, ZoomOut, Maximize2, Search, Download, Image, X, MessageSquare, Upload, Share2 } from 'lucide-react';
|
||||
import { useAppState, type KGNode } from '../../store';
|
||||
import { TYPE_COLORS } from '../../mock-data';
|
||||
|
||||
const ENTITY_TYPES = ['TECHNOLOGY', 'CONCEPT', 'PERSON', 'ORGANIZATION', 'LOCATION'] as const;
|
||||
const CONFIDENCE_LEVELS = ['match_exact', 'match_greater', 'match_lesser', 'match_fuzzy'] as const;
|
||||
|
||||
export function KGExplorer() {
|
||||
const { nodes, edges, documents, selectedNode, setSelectedNode, getNeighbors } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const [searchParams] = useSearchParams();
|
||||
const svgRef = useRef<SVGSVGElement>(null);
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const simulationRef = useRef<d3.Simulation<any, any>>();
|
||||
const zoomRef = useRef<d3.ZoomBehavior<SVGSVGElement, unknown>>();
|
||||
|
||||
const [filterTypes, setFilterTypes] = useState<Set<string>>(new Set(ENTITY_TYPES));
|
||||
const [filterConfidence, setFilterConfidence] = useState<Set<string>>(new Set(CONFIDENCE_LEVELS));
|
||||
const [filterDoc, setFilterDoc] = useState<string>('all');
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [showFilter, setShowFilter] = useState(true);
|
||||
const [tooltip, setTooltip] = useState<{ x: number; y: number; node: KGNode } | null>(null);
|
||||
|
||||
const indexedDocs = documents.filter(d => d.status === 'indexed');
|
||||
|
||||
// Filtered nodes/edges
|
||||
const visibleNodes = nodes.filter(n => {
|
||||
if (!filterTypes.has(n.type)) return false;
|
||||
if (!filterConfidence.has(n.confidence)) return false;
|
||||
if (filterDoc !== 'all' && n.doc_id !== filterDoc) return false;
|
||||
if (searchQuery && !n.name.toLowerCase().includes(searchQuery.toLowerCase())) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
const visibleNodeIds = new Set(visibleNodes.map(n => n.id));
|
||||
const visibleEdges = edges.filter(e => visibleNodeIds.has(e.source as string) && visibleNodeIds.has(e.target as string));
|
||||
|
||||
// Neighbors of selected
|
||||
const neighborInfo = selectedNode ? getNeighbors(selectedNode.id) : null;
|
||||
|
||||
// D3 rendering
|
||||
useEffect(() => {
|
||||
if (!svgRef.current || !containerRef.current) return;
|
||||
const svg = d3.select(svgRef.current);
|
||||
svg.selectAll('*').remove();
|
||||
if (visibleNodes.length === 0) return;
|
||||
|
||||
const rect = containerRef.current.getBoundingClientRect();
|
||||
const width = rect.width;
|
||||
const height = rect.height;
|
||||
|
||||
svg.attr('width', width).attr('height', height);
|
||||
|
||||
const g = svg.append('g');
|
||||
|
||||
const zoom = d3.zoom<SVGSVGElement, unknown>()
|
||||
.scaleExtent([0.1, 8])
|
||||
.on('zoom', (event) => g.attr('transform', event.transform));
|
||||
zoomRef.current = zoom;
|
||||
svg.call(zoom);
|
||||
|
||||
// Create simulation data copies
|
||||
const simNodes = visibleNodes.map(n => ({ ...n, x: width / 2 + (Math.random() - 0.5) * 200, y: height / 2 + (Math.random() - 0.5) * 200 }));
|
||||
const simEdges = visibleEdges.map(e => ({ ...e, source: e.source, target: e.target }));
|
||||
|
||||
const simulation = d3.forceSimulation(simNodes)
|
||||
.force('link', d3.forceLink(simEdges).id((d: any) => d.id).distance(60).strength(0.3))
|
||||
.force('charge', d3.forceManyBody().strength(-120))
|
||||
.force('center', d3.forceCenter(width / 2, height / 2))
|
||||
.force('collide', d3.forceCollide().radius((d: any) => getRadius(d.degree) + 4))
|
||||
.alphaDecay(0.02);
|
||||
|
||||
simulationRef.current = simulation;
|
||||
|
||||
// Edges
|
||||
const link = g.append('g')
|
||||
.selectAll('line')
|
||||
.data(simEdges)
|
||||
.join('line')
|
||||
.attr('stroke', '#30363d')
|
||||
.attr('stroke-width', 1)
|
||||
.attr('stroke-opacity', 0.25);
|
||||
|
||||
// Nodes
|
||||
const node = g.append('g')
|
||||
.selectAll('circle')
|
||||
.data(simNodes)
|
||||
.join('circle')
|
||||
.attr('r', (d: any) => getRadius(d.degree))
|
||||
.attr('fill', (d: any) => TYPE_COLORS[d.type] || '#8b949e')
|
||||
.attr('stroke', '#0f1117')
|
||||
.attr('stroke-width', 1.5)
|
||||
.attr('opacity', 0.9)
|
||||
.attr('cursor', 'pointer')
|
||||
.on('mouseover', function(event, d: any) {
|
||||
d3.select(this).attr('stroke', '#ffffff').attr('stroke-width', 2.5);
|
||||
setTooltip({ x: event.clientX + 8, y: event.clientY + 8, node: d });
|
||||
})
|
||||
.on('mouseout', function() {
|
||||
d3.select(this).attr('stroke', '#0f1117').attr('stroke-width', 1.5);
|
||||
setTooltip(null);
|
||||
})
|
||||
.on('click', (_, d: any) => {
|
||||
setSelectedNode(d);
|
||||
// Highlight logic
|
||||
node.attr('opacity', (n: any) => {
|
||||
if (n.id === d.id) return 0.9;
|
||||
const isNeighbor = simEdges.some((e: any) =>
|
||||
(e.source.id === d.id && e.target.id === n.id) ||
|
||||
(e.target.id === d.id && e.source.id === n.id)
|
||||
);
|
||||
return isNeighbor ? 0.9 : 0.1;
|
||||
});
|
||||
d3.select(node.nodes()[simNodes.indexOf(d)])
|
||||
.attr('r', getRadius(d.degree) * 1.5);
|
||||
link.attr('stroke-opacity', (e: any) =>
|
||||
e.source.id === d.id || e.target.id === d.id ? 0.8 : 0.05
|
||||
);
|
||||
})
|
||||
.call(d3.drag<SVGCircleElement, any>()
|
||||
.on('start', (event, d: any) => {
|
||||
if (!event.active) simulation.alphaTarget(0.3).restart();
|
||||
d.fx = d.x; d.fy = d.y;
|
||||
})
|
||||
.on('drag', (event, d: any) => { d.fx = event.x; d.fy = event.y; })
|
||||
.on('end', (event, d: any) => {
|
||||
if (!event.active) simulation.alphaTarget(0);
|
||||
})
|
||||
);
|
||||
|
||||
// Labels for high-degree nodes
|
||||
const label = g.append('g')
|
||||
.selectAll('text')
|
||||
.data(simNodes.filter(n => n.degree >= 12))
|
||||
.join('text')
|
||||
.text((d: any) => d.name)
|
||||
.attr('font-size', 10)
|
||||
.attr('fill', 'var(--text-3)')
|
||||
.attr('text-anchor', 'middle')
|
||||
.attr('dy', (d: any) => -(getRadius(d.degree) + 6))
|
||||
.attr('pointer-events', 'none');
|
||||
|
||||
// Click blank to reset
|
||||
svg.on('click', (event) => {
|
||||
if (event.target === svgRef.current) {
|
||||
setSelectedNode(null);
|
||||
node.attr('opacity', 0.9).attr('r', (d: any) => getRadius(d.degree));
|
||||
link.attr('stroke-opacity', 0.25);
|
||||
}
|
||||
});
|
||||
|
||||
simulation.on('tick', () => {
|
||||
link
|
||||
.attr('x1', (d: any) => d.source.x)
|
||||
.attr('y1', (d: any) => d.source.y)
|
||||
.attr('x2', (d: any) => d.target.x)
|
||||
.attr('y2', (d: any) => d.target.y);
|
||||
node
|
||||
.attr('cx', (d: any) => d.x)
|
||||
.attr('cy', (d: any) => d.y);
|
||||
label
|
||||
.attr('x', (d: any) => d.x)
|
||||
.attr('y', (d: any) => d.y);
|
||||
});
|
||||
|
||||
// Handle URL params
|
||||
const nodeParam = searchParams.get('node');
|
||||
if (nodeParam) {
|
||||
const target = simNodes.find(n => n.id === nodeParam);
|
||||
if (target) {
|
||||
setTimeout(() => {
|
||||
const nd = nodes.find(n => n.id === nodeParam);
|
||||
if (nd) setSelectedNode(nd);
|
||||
}, 500);
|
||||
}
|
||||
}
|
||||
|
||||
const docParam = searchParams.get('doc_id');
|
||||
if (docParam) {
|
||||
setFilterDoc(docParam);
|
||||
}
|
||||
|
||||
return () => { simulation.stop(); };
|
||||
}, [visibleNodes.length, visibleEdges.length, searchQuery, filterDoc]);
|
||||
|
||||
const handleZoomIn = () => {
|
||||
if (svgRef.current && zoomRef.current) {
|
||||
d3.select(svgRef.current).transition().duration(300).call(zoomRef.current.scaleBy, 1.3);
|
||||
}
|
||||
};
|
||||
const handleZoomOut = () => {
|
||||
if (svgRef.current && zoomRef.current) {
|
||||
d3.select(svgRef.current).transition().duration(300).call(zoomRef.current.scaleBy, 0.7);
|
||||
}
|
||||
};
|
||||
const handleFitAll = () => {
|
||||
if (svgRef.current && zoomRef.current) {
|
||||
d3.select(svgRef.current).transition().duration(500).call(zoomRef.current.transform, d3.zoomIdentity);
|
||||
}
|
||||
};
|
||||
|
||||
const toggleType = (t: string) => {
|
||||
const next = new Set(filterTypes);
|
||||
if (next.has(t)) next.delete(t); else next.add(t);
|
||||
setFilterTypes(next);
|
||||
};
|
||||
|
||||
const toggleConfidence = (c: string) => {
|
||||
const next = new Set(filterConfidence);
|
||||
if (next.has(c)) next.delete(c); else next.add(c);
|
||||
setFilterConfidence(next);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex h-full" style={{ background: 'var(--bg-base)' }}>
|
||||
{/* Filter Panel */}
|
||||
{showFilter && (
|
||||
<div
|
||||
className="flex flex-col p-4 overflow-y-auto"
|
||||
style={{
|
||||
width: 260,
|
||||
background: 'var(--bg-s1)',
|
||||
borderRight: '1px solid var(--border-main)',
|
||||
flexShrink: 0,
|
||||
}}
|
||||
>
|
||||
<h3 className="mb-3" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>来源文档</h3>
|
||||
<select
|
||||
value={filterDoc}
|
||||
onChange={e => setFilterDoc(e.target.value)}
|
||||
className="mb-4 px-2 py-1.5 rounded-md w-full"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 12 }}
|
||||
>
|
||||
<option value="all">全部文档</option>
|
||||
{indexedDocs.map(d => (
|
||||
<option key={d.id} value={d.id}>{d.filename}</option>
|
||||
))}
|
||||
</select>
|
||||
|
||||
<h3 className="mb-2" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>实体类型</h3>
|
||||
<div className="flex flex-col gap-1.5 mb-4">
|
||||
{ENTITY_TYPES.map(t => {
|
||||
const count = nodes.filter(n => n.type === t).length;
|
||||
return (
|
||||
<label key={t} className="flex items-center gap-2 cursor-pointer" style={{ fontSize: 12, color: 'var(--text-2)' }}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={filterTypes.has(t)}
|
||||
onChange={() => toggleType(t)}
|
||||
className="cursor-pointer"
|
||||
style={{ accentColor: TYPE_COLORS[t] }}
|
||||
/>
|
||||
<span className="inline-block w-2.5 h-2.5 rounded-full" style={{ background: TYPE_COLORS[t] }} />
|
||||
<span className="flex-1">{t}</span>
|
||||
<span style={{ color: 'var(--text-4)', fontSize: 11 }}>{count}</span>
|
||||
</label>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
<h3 className="mb-2" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>置信度</h3>
|
||||
<div className="flex flex-col gap-1.5 mb-4">
|
||||
{CONFIDENCE_LEVELS.map(c => (
|
||||
<label key={c} className="flex items-center gap-2 cursor-pointer" style={{ fontSize: 12, color: 'var(--text-2)' }}>
|
||||
<input type="checkbox" checked={filterConfidence.has(c)} onChange={() => toggleConfidence(c)} className="cursor-pointer" />
|
||||
{c.replace('match_', '')}
|
||||
</label>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="mt-auto flex flex-col gap-2">
|
||||
<button className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full" style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 12 }}>
|
||||
<Image size={12} /> 导出 PNG
|
||||
</button>
|
||||
<button className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full" style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 12 }}>
|
||||
<Download size={12} /> 导出 JSON
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Graph Area */}
|
||||
<div ref={containerRef} className="flex-1 relative" style={{ overflow: 'hidden' }}>
|
||||
{/* Toolbar */}
|
||||
<div className="absolute top-3 left-3 flex items-center gap-1.5 rounded-md p-1" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', zIndex: 10 }}>
|
||||
<button onClick={handleZoomIn} className="p-1.5 rounded cursor-pointer" style={{ background: 'transparent', border: 'none', color: 'var(--text-3)' }}><ZoomIn size={16} /></button>
|
||||
<button onClick={handleZoomOut} className="p-1.5 rounded cursor-pointer" style={{ background: 'transparent', border: 'none', color: 'var(--text-3)' }}><ZoomOut size={16} /></button>
|
||||
<button onClick={handleFitAll} className="p-1.5 rounded cursor-pointer" style={{ background: 'transparent', border: 'none', color: 'var(--text-3)' }}><Maximize2 size={16} /></button>
|
||||
<div style={{ width: 1, height: 20, background: 'var(--border-main)' }} />
|
||||
<div className="relative">
|
||||
<Search size={12} className="absolute left-2 top-1/2 -translate-y-1/2" style={{ color: 'var(--text-4)' }} />
|
||||
<input
|
||||
value={searchQuery}
|
||||
onChange={e => setSearchQuery(e.target.value)}
|
||||
placeholder="搜索..."
|
||||
className="pl-7 pr-2 py-1 rounded"
|
||||
style={{ width: 120, background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 12, outline: 'none' }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="absolute bottom-3 left-3 flex flex-wrap gap-3 rounded-md px-3 py-2" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', zIndex: 10 }}>
|
||||
{ENTITY_TYPES.map(t => (
|
||||
<div key={t} className="flex items-center gap-1.5" style={{ fontSize: 11, color: 'var(--text-3)' }}>
|
||||
<span className="inline-block w-2.5 h-2.5 rounded-full" style={{ background: TYPE_COLORS[t] }} />
|
||||
{t}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Stats */}
|
||||
<div className="absolute top-3 right-3 rounded-md px-3 py-1.5" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', zIndex: 10, fontSize: 11, color: 'var(--text-3)' }}>
|
||||
{visibleNodes.length} 个节点 · {visibleEdges.length} 条边
|
||||
</div>
|
||||
|
||||
{visibleNodes.length === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center h-full gap-3">
|
||||
<Share2 size={48} style={{ color: 'var(--text-4)' }} />
|
||||
<span style={{ color: 'var(--text-2)', fontSize: 16 }}>暂无知识图谱</span>
|
||||
<button
|
||||
onClick={() => navigate('/documents')}
|
||||
className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, border: 'none' }}
|
||||
>
|
||||
<Upload size={14} /> 上传 & 索引
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<svg ref={svgRef} className="w-full h-full" />
|
||||
)}
|
||||
|
||||
{/* Tooltip */}
|
||||
{tooltip && (
|
||||
<div
|
||||
className="fixed rounded-md px-3 py-2 pointer-events-none"
|
||||
style={{
|
||||
left: tooltip.x, top: tooltip.y,
|
||||
background: 'var(--bg-s3)', border: '1px solid var(--border-main)',
|
||||
boxShadow: 'var(--shadow-md)', zIndex: 100, fontSize: 12,
|
||||
}}
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span style={{ color: 'var(--text-1)', fontWeight: 600 }}>{tooltip.node.name}</span>
|
||||
<span className="px-1.5 py-0.5 rounded" style={{ fontSize: 10, fontWeight: 600, background: `${TYPE_COLORS[tooltip.node.type]}20`, color: TYPE_COLORS[tooltip.node.type] }}>
|
||||
{tooltip.node.type}
|
||||
</span>
|
||||
</div>
|
||||
<div style={{ color: 'var(--text-3)' }}>页码: {tooltip.node.page}</div>
|
||||
<div style={{ color: 'var(--text-3)' }}>置信度: {tooltip.node.confidence}</div>
|
||||
<div style={{ color: 'var(--text-3)' }}>度数: {tooltip.node.degree}</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Detail Panel */}
|
||||
{selectedNode && (
|
||||
<div
|
||||
className="flex flex-col p-4 overflow-y-auto"
|
||||
style={{
|
||||
width: 300,
|
||||
background: 'var(--bg-s1)',
|
||||
borderLeft: '1px solid var(--border-main)',
|
||||
flexShrink: 0,
|
||||
}}
|
||||
>
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h2 style={{ color: 'var(--text-1)', fontSize: 18, fontWeight: 600 }}>{selectedNode.name}</h2>
|
||||
<button onClick={() => setSelectedNode(null)} className="cursor-pointer" style={{ background: 'none', border: 'none', color: 'var(--text-4)' }}>
|
||||
<X size={16} />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<span className="inline-block w-fit px-2 py-0.5 rounded mb-4" style={{ fontSize: 11, fontWeight: 600, background: `${TYPE_COLORS[selectedNode.type]}20`, color: TYPE_COLORS[selectedNode.type] }}>
|
||||
{selectedNode.type}
|
||||
</span>
|
||||
|
||||
{selectedNode.description && (
|
||||
<p className="mb-4" style={{ color: 'var(--text-2)', fontSize: 13, lineHeight: 1.6 }}>
|
||||
{selectedNode.description}
|
||||
</p>
|
||||
)}
|
||||
|
||||
<div className="flex flex-col gap-2 mb-4">
|
||||
{[
|
||||
{ label: '页码', value: selectedNode.page },
|
||||
{ label: '置信度', value: selectedNode.confidence.replace('match_', '') },
|
||||
{ label: '度数', value: selectedNode.degree },
|
||||
{ label: '中心性', value: selectedNode.centrality.toFixed(2) },
|
||||
].map(p => (
|
||||
<div key={p.label} className="flex justify-between" style={{ fontSize: 13 }}>
|
||||
<span style={{ color: 'var(--text-3)' }}>{p.label}</span>
|
||||
<span style={{ color: 'var(--text-1)' }}>{p.value}</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<h3 className="mb-2" style={{ fontSize: 11, fontWeight: 600, textTransform: 'uppercase', letterSpacing: '0.5px', color: 'var(--text-3)' }}>
|
||||
邻居节点 ({neighborInfo?.nodes.length ?? 0})
|
||||
</h3>
|
||||
<div className="flex flex-col gap-1 mb-4">
|
||||
{neighborInfo?.nodes.slice(0, 5).map(n => (
|
||||
<button
|
||||
key={n.id}
|
||||
onClick={() => setSelectedNode(n)}
|
||||
className="flex items-center gap-2 px-2 py-1.5 rounded cursor-pointer text-left"
|
||||
style={{ background: 'var(--bg-s2)', border: 'none', fontSize: 12, color: 'var(--text-2)' }}
|
||||
>
|
||||
<span className="inline-block w-2 h-2 rounded-full" style={{ background: TYPE_COLORS[n.type] }} />
|
||||
<span className="flex-1 truncate">{n.name}</span>
|
||||
<span style={{ color: 'var(--text-4)', fontSize: 10 }}>{n.type}</span>
|
||||
</button>
|
||||
))}
|
||||
{(neighborInfo?.nodes.length ?? 0) > 5 && (
|
||||
<span style={{ color: 'var(--blue)', fontSize: 12, cursor: 'pointer' }}>
|
||||
查看全部 {neighborInfo?.nodes.length} 个邻居 →
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={() => navigate(`/chat?q=${encodeURIComponent(`Tell me about ${selectedNode.name}`)}`)}
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-md cursor-pointer w-full justify-center"
|
||||
style={{ background: 'rgba(88,166,255,0.1)', border: '1px solid var(--blue)', color: 'var(--blue)', fontSize: 13 }}
|
||||
>
|
||||
<MessageSquare size={14} /> 询问 AI
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function getRadius(degree: number): number {
|
||||
return Math.max(4, Math.log(degree + 1) * 4);
|
||||
}
|
||||
377
frontend/src/app/components/pages/QAChat.tsx
Normal file
377
frontend/src/app/components/pages/QAChat.tsx
Normal file
@@ -0,0 +1,377 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useNavigate, useSearchParams } from 'react-router';
|
||||
import { Send, Plus, ChevronRight, Clock, ExternalLink, Info } from 'lucide-react';
|
||||
import { toast } from 'sonner';
|
||||
import { useAppState, type ChatMessage, type ToolCall } from '../../store';
|
||||
import { api, ApiError } from '../../api';
|
||||
import { TYPE_COLORS } from '../../mock-data';
|
||||
|
||||
export function QAChat() {
|
||||
const { messages, setMessages, chatHistory, suggestedPrompts, nodes, refreshHistory } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const [searchParams] = useSearchParams();
|
||||
const [input, setInput] = useState('');
|
||||
const [isThinking, setIsThinking] = useState(false);
|
||||
const [activeHistoryId, setActiveHistoryId] = useState<string | null>(null);
|
||||
const [conversationHistory, setConversationHistory] = useState<{ question: string; answer: string }[]>([]);
|
||||
const messagesEndRef = useRef<HTMLDivElement>(null);
|
||||
const inputRef = useRef<HTMLTextAreaElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const q = searchParams.get('q');
|
||||
if (q) {
|
||||
setInput(q);
|
||||
inputRef.current?.focus();
|
||||
}
|
||||
}, [searchParams]);
|
||||
|
||||
useEffect(() => {
|
||||
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
|
||||
}, [messages, isThinking]);
|
||||
|
||||
// Build cited node objects from node IDs using local KG
|
||||
function resolveCitedNodes(ids: string[]) {
|
||||
return ids
|
||||
.map(id => {
|
||||
const n = nodes.find(n => n.id === id);
|
||||
return n ? { id: n.id, name: n.name, type: n.type } : null;
|
||||
})
|
||||
.filter(Boolean) as { id: string; name: string; type: string }[];
|
||||
}
|
||||
|
||||
const handleSend = async () => {
|
||||
if (!input.trim() || isThinking) return;
|
||||
const question = input.trim();
|
||||
setInput('');
|
||||
setIsThinking(true);
|
||||
|
||||
const userMsg: ChatMessage = {
|
||||
id: `m${Date.now()}`,
|
||||
role: 'human',
|
||||
content: question,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
|
||||
try {
|
||||
const result = await api.query(question, conversationHistory);
|
||||
const aiMsg: ChatMessage = {
|
||||
id: result.id ?? `m${Date.now() + 1}`,
|
||||
role: 'ai',
|
||||
content: result.answer,
|
||||
timestamp: result.timestamp ?? new Date().toISOString(),
|
||||
toolCalls: result.tool_calls.map((tc, i) => ({
|
||||
step: tc.step ?? i + 1,
|
||||
tool: tc.tool_name,
|
||||
input: tc.tool_input,
|
||||
output: tc.tool_output,
|
||||
})),
|
||||
citedNodes: resolveCitedNodes(result.cited_nodes ?? []),
|
||||
duration: result.duration_seconds,
|
||||
};
|
||||
setMessages(prev => [...prev, aiMsg]);
|
||||
setConversationHistory(prev => [...prev, { question, answer: result.answer }]);
|
||||
// Refresh history sidebar
|
||||
refreshHistory();
|
||||
} catch (err) {
|
||||
const msg = err instanceof ApiError ? err.message : '问答服务异常';
|
||||
toast.error(msg);
|
||||
setMessages(prev => [...prev, {
|
||||
id: `err${Date.now()}`,
|
||||
role: 'ai',
|
||||
content: `⚠️ 请求失败:${msg}\n\n请确认:\n1. 后端服务已启动(localhost:8000)\n2. 知识图谱已有数据(请先上传并索引文档)\n3. DeepSeek API Key 已配置`,
|
||||
timestamp: new Date().toISOString(),
|
||||
}]);
|
||||
} finally {
|
||||
setIsThinking(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleKeyDown = (e: React.KeyboardEvent) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
handleSend();
|
||||
}
|
||||
};
|
||||
|
||||
const handleNewChat = () => {
|
||||
setMessages([]);
|
||||
setInput('');
|
||||
setActiveHistoryId(null);
|
||||
setConversationHistory([]);
|
||||
};
|
||||
|
||||
// Load a history item as a single Q&A session
|
||||
const handleLoadHistory = (h: typeof chatHistory[0]) => {
|
||||
setActiveHistoryId(h.id);
|
||||
const msgs: ChatMessage[] = [
|
||||
{ id: `${h.id}-q`, role: 'human', content: h.question, timestamp: h.timestamp },
|
||||
{
|
||||
id: `${h.id}-a`, role: 'ai', content: h.answer, timestamp: h.timestamp,
|
||||
toolCalls: h.toolCalls,
|
||||
citedNodes: resolveCitedNodes(h.citedNodeIds ?? []),
|
||||
duration: h.duration,
|
||||
},
|
||||
];
|
||||
setMessages(msgs);
|
||||
setConversationHistory([{ question: h.question, answer: h.answer }]);
|
||||
};
|
||||
|
||||
const groupedHistory = {
|
||||
'今天': chatHistory.filter(h => h.group === '今天'),
|
||||
'昨天': chatHistory.filter(h => h.group === '昨天'),
|
||||
'更早': chatHistory.filter(h => h.group === '更早'),
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex h-full" style={{ background: 'var(--bg-base)' }}>
|
||||
{/* History Sidebar */}
|
||||
<div
|
||||
className="flex flex-col"
|
||||
style={{ width: 240, background: 'var(--bg-s1)', borderRight: '1px solid var(--border-main)', flexShrink: 0 }}
|
||||
>
|
||||
<div className="p-3">
|
||||
<button
|
||||
onClick={handleNewChat}
|
||||
className="flex items-center gap-2 w-full px-3 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
<Plus size={14} /> 新对话
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* 历史会话管理说明 */}
|
||||
<div className="mx-3 mb-2 px-2 py-1.5 rounded-md flex items-start gap-1.5" style={{ background: 'rgba(88,166,255,0.08)', border: '1px solid rgba(88,166,255,0.2)' }}>
|
||||
<Info size={11} style={{ color: 'var(--blue)', flexShrink: 0, marginTop: 1 }} />
|
||||
<span style={{ fontSize: 10, color: 'var(--text-4)', lineHeight: 1.4 }}>
|
||||
点击历史记录查看单条问答;多轮对话会话管理
|
||||
<span style={{ background: 'rgba(248,81,73,0.15)', color: '#f85149', padding: '0 3px', borderRadius: 2, marginLeft: 2 }}>未开发</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="flex-1 overflow-y-auto px-2">
|
||||
{Object.entries(groupedHistory).map(([group, items]) => items.length > 0 && (
|
||||
<div key={group} className="mb-3">
|
||||
<div className="px-2 py-1" style={{ fontSize: 11, fontWeight: 600, color: 'var(--text-4)', textTransform: 'uppercase', letterSpacing: '0.5px' }}>
|
||||
{group}
|
||||
</div>
|
||||
{items.map(h => (
|
||||
<button
|
||||
key={h.id}
|
||||
onClick={() => handleLoadHistory(h)}
|
||||
className="w-full text-left px-2 py-1.5 rounded cursor-pointer truncate block"
|
||||
style={{
|
||||
background: activeHistoryId === h.id ? 'var(--bg-s2)' : 'transparent',
|
||||
color: activeHistoryId === h.id ? 'var(--text-1)' : 'var(--text-3)',
|
||||
fontSize: 12, border: 'none',
|
||||
}}
|
||||
>
|
||||
{h.question.length > 28 ? h.question.slice(0, 28) + '...' : h.question}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
))}
|
||||
{chatHistory.length === 0 && (
|
||||
<div className="px-2 py-4 text-center" style={{ color: 'var(--text-4)', fontSize: 12 }}>暂无历史记录</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Chat Area */}
|
||||
<div className="flex-1 flex flex-col">
|
||||
{/* Messages */}
|
||||
<div className="flex-1 overflow-y-auto p-6">
|
||||
{messages.length === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center h-full gap-4">
|
||||
<div style={{ fontSize: 32 }}>
|
||||
<span style={{ color: 'var(--blue)' }}>GraphRAG</span>{' '}
|
||||
<span style={{ color: 'var(--text-3)' }}>Studio</span>
|
||||
</div>
|
||||
<p style={{ color: 'var(--text-3)', fontSize: 14, textAlign: 'center', maxWidth: 500 }}>
|
||||
向知识图谱提问。我将使用多步推理从已索引的文档中为您找到准确答案。
|
||||
</p>
|
||||
<div className="grid grid-cols-2 gap-3 mt-4" style={{ maxWidth: 600, width: '100%' }}>
|
||||
{suggestedPrompts.map((p, i) => (
|
||||
<button
|
||||
key={i}
|
||||
onClick={() => setInput(p)}
|
||||
className="text-left p-3 rounded-lg cursor-pointer"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
{p}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex flex-col gap-4 max-w-3xl mx-auto">
|
||||
{messages.map(msg => (
|
||||
<div key={msg.id}>
|
||||
{msg.role === 'human' ? (
|
||||
<div className="flex justify-end">
|
||||
<div
|
||||
className="rounded-lg px-4 py-3"
|
||||
style={{ background: 'rgba(88,166,255,0.15)', color: 'var(--text-1)', fontSize: 14, maxWidth: '80%', lineHeight: 1.6 }}
|
||||
>
|
||||
{msg.content}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex justify-start">
|
||||
<div
|
||||
className="rounded-lg px-4 py-3"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 14, maxWidth: '90%', lineHeight: 1.6 }}
|
||||
>
|
||||
<div
|
||||
style={{ whiteSpace: 'pre-wrap' }}
|
||||
dangerouslySetInnerHTML={{ __html: renderSimpleMarkdown(msg.content) }}
|
||||
/>
|
||||
|
||||
{msg.toolCalls && msg.toolCalls.length > 0 && (
|
||||
<ToolCallPanel toolCalls={msg.toolCalls} />
|
||||
)}
|
||||
|
||||
{msg.citedNodes && msg.citedNodes.length > 0 && (
|
||||
<div className="flex flex-wrap gap-2 mt-3 pt-3" style={{ borderTop: '1px solid var(--border-muted)' }}>
|
||||
{msg.citedNodes.map(cn => (
|
||||
<button
|
||||
key={cn.id}
|
||||
onClick={() => navigate(`/graph?node=${cn.id}`)}
|
||||
className="flex items-center gap-1.5 px-2 py-1 rounded-full cursor-pointer"
|
||||
style={{
|
||||
background: `${TYPE_COLORS[cn.type] ?? '#8b949e'}15`,
|
||||
border: `1px solid ${TYPE_COLORS[cn.type] ?? '#8b949e'}40`,
|
||||
color: TYPE_COLORS[cn.type] ?? '#8b949e',
|
||||
fontSize: 11, fontWeight: 500,
|
||||
}}
|
||||
>
|
||||
<span className="inline-block w-1.5 h-1.5 rounded-full" style={{ background: TYPE_COLORS[cn.type] ?? '#8b949e' }} />
|
||||
{cn.name}
|
||||
<ExternalLink size={9} />
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{msg.duration !== undefined && (
|
||||
<div className="flex items-center gap-1 mt-2" style={{ color: 'var(--text-4)', fontSize: 11 }}>
|
||||
<Clock size={10} /> {msg.duration.toFixed(1)}s
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
{isThinking && (
|
||||
<div className="flex justify-start">
|
||||
<div
|
||||
className="rounded-lg px-4 py-3 flex items-center gap-1.5"
|
||||
style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
|
||||
>
|
||||
<span className="thinking-dot" />
|
||||
<span className="thinking-dot" />
|
||||
<span className="thinking-dot" />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div ref={messagesEndRef} />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Input Area */}
|
||||
<div className="p-4" style={{ borderTop: '1px solid var(--border-main)', background: 'var(--bg-s1)' }}>
|
||||
<div className="max-w-3xl mx-auto flex gap-2">
|
||||
<textarea
|
||||
ref={inputRef}
|
||||
value={input}
|
||||
onChange={e => setInput(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder="向知识图谱提问..."
|
||||
disabled={isThinking}
|
||||
rows={1}
|
||||
className="flex-1 resize-none rounded-lg px-4 py-2.5 outline-none"
|
||||
style={{
|
||||
background: 'var(--bg-s2)', border: '1px solid var(--border-main)',
|
||||
color: 'var(--text-1)', fontSize: 14, minHeight: 42, maxHeight: 120,
|
||||
opacity: isThinking ? 0.5 : 1,
|
||||
}}
|
||||
/>
|
||||
<button
|
||||
onClick={handleSend}
|
||||
disabled={isThinking || !input.trim()}
|
||||
className="px-4 py-2 rounded-lg cursor-pointer flex items-center gap-2"
|
||||
style={{
|
||||
background: input.trim() ? 'var(--green-btn)' : 'var(--bg-s2)',
|
||||
color: input.trim() ? '#fff' : 'var(--text-4)',
|
||||
border: 'none', fontSize: 13, fontWeight: 500,
|
||||
opacity: isThinking ? 0.5 : 1,
|
||||
}}
|
||||
>
|
||||
<Send size={14} /> 发送
|
||||
</button>
|
||||
</div>
|
||||
<div className="max-w-3xl mx-auto mt-1.5">
|
||||
<span style={{ color: 'var(--text-4)', fontSize: 11 }}>
|
||||
Enter 发送,Shift+Enter 换行 | 批量问答管理
|
||||
<span style={{ background: 'rgba(248,81,73,0.15)', color: '#f85149', padding: '0 3px', borderRadius: 2, marginLeft: 4, fontSize: 10 }}>未开发</span>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ToolCallPanel({ toolCalls }: { toolCalls: ToolCall[] }) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
return (
|
||||
<div className="mt-3">
|
||||
<button
|
||||
onClick={() => setExpanded(!expanded)}
|
||||
className="flex items-center gap-1.5 cursor-pointer"
|
||||
style={{ background: 'none', border: 'none', color: 'var(--text-3)', fontSize: 12 }}
|
||||
>
|
||||
<ChevronRight
|
||||
size={12}
|
||||
style={{ transform: expanded ? 'rotate(90deg)' : 'none', transition: 'transform 150ms' }}
|
||||
/>
|
||||
工具调用 ({toolCalls.length} 步)
|
||||
</button>
|
||||
{expanded && (
|
||||
<div className="mt-2 rounded-md overflow-hidden" style={{ background: 'var(--bg-s3)', border: '1px solid var(--border-muted)' }}>
|
||||
{toolCalls.map(tc => (
|
||||
<div key={tc.step} className="p-3" style={{ borderBottom: '1px solid var(--border-muted)' }}>
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span style={{ color: 'var(--text-4)', fontSize: 11 }}>步骤 {tc.step}</span>
|
||||
<span style={{ color: 'var(--yellow)', fontSize: 12, fontFamily: 'monospace', fontWeight: 600 }}>{tc.tool}</span>
|
||||
</div>
|
||||
<div className="mb-1" style={{ fontSize: 11, color: 'var(--text-4)' }}>输入:</div>
|
||||
<pre className="mb-2 p-2 rounded overflow-x-auto" style={{ background: 'var(--bg-base)', fontSize: 11, color: 'var(--text-3)', fontFamily: 'monospace', lineHeight: 1.5 }}>
|
||||
{tc.input}
|
||||
</pre>
|
||||
<div className="mb-1" style={{ fontSize: 11, color: 'var(--text-4)' }}>输出:</div>
|
||||
<pre className="p-2 rounded overflow-x-auto" style={{ background: 'var(--bg-base)', fontSize: 11, color: 'var(--text-3)', fontFamily: 'monospace', lineHeight: 1.5 }}>
|
||||
{tc.output}
|
||||
</pre>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function renderSimpleMarkdown(text: string): string {
|
||||
return text
|
||||
.replace(/\*\*(.*?)\*\*/g, '<strong style="color:var(--text-1)">$1</strong>')
|
||||
.replace(/^## (.*$)/gm, '<div style="font-size:16px;font-weight:600;color:var(--text-1);margin:8px 0 4px">$1</div>')
|
||||
.replace(/^### (.*$)/gm, '<div style="font-size:14px;font-weight:600;color:var(--text-1);margin:6px 0 4px">$1</div>')
|
||||
.replace(/^> (.*$)/gm, '<div style="border-left:3px solid var(--blue);padding-left:12px;color:var(--text-3);margin:8px 0">$1</div>')
|
||||
.replace(/^\d+\. (.*$)/gm, '<div style="padding-left:16px;margin:2px 0">$&</div>')
|
||||
.replace(/^- (.*$)/gm, '<div style="padding-left:16px;margin:2px 0">• $1</div>')
|
||||
.replace(/\n/g, '<br/>');
|
||||
}
|
||||
469
frontend/src/app/components/pages/SearchPage.tsx
Normal file
469
frontend/src/app/components/pages/SearchPage.tsx
Normal file
@@ -0,0 +1,469 @@
|
||||
import React, { useState, useEffect, useRef } from 'react';
|
||||
import { useNavigate, useSearchParams } from 'react-router';
|
||||
import * as d3 from 'd3';
|
||||
import { Search, ExternalLink, MessageSquare, ArrowRight } from 'lucide-react';
|
||||
import { useAppState, mapApiNode, mapApiEdge, type KGNode } from '../../store';
|
||||
import { api, ApiError } from '../../api';
|
||||
import { TYPE_COLORS } from '../../mock-data';
|
||||
|
||||
const ENTITY_TYPES_OPTIONS = ['全部类型', 'TECHNOLOGY', 'CONCEPT', 'PERSON', 'ORGANIZATION', 'LOCATION'];
|
||||
|
||||
export function SearchPage() {
|
||||
const { nodes, edges, getNeighbors } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
|
||||
const [query, setQuery] = useState(searchParams.get('q') || '');
|
||||
const [typeFilter, setTypeFilter] = useState(searchParams.get('type') || '全部类型');
|
||||
const [activeTab, setActiveTab] = useState<'entity' | 'path' | 'graph'>(
|
||||
(searchParams.get('tab') as 'entity' | 'path' | 'graph') || 'entity'
|
||||
);
|
||||
const [results, setResults] = useState<KGNode[]>([]);
|
||||
const [selectedResult, setSelectedResult] = useState<KGNode | null>(null);
|
||||
const [hasSearched, setHasSearched] = useState(false);
|
||||
const [searching, setSearching] = useState(false);
|
||||
|
||||
// Path search
|
||||
const [pathFrom, setPathFrom] = useState('');
|
||||
const [pathTo, setPathTo] = useState('');
|
||||
const [maxHops, setMaxHops] = useState(3);
|
||||
const [pathResult, setPathResult] = useState<KGNode[] | null>(null);
|
||||
const [pathSearching, setPathSearching] = useState(false);
|
||||
const [pathError, setPathError] = useState('');
|
||||
|
||||
// Graph search
|
||||
const [graphQuery, setGraphQuery] = useState('');
|
||||
const [includeNeighbors, setIncludeNeighbors] = useState(true);
|
||||
const [graphResults, setGraphResults] = useState<KGNode[]>([]);
|
||||
const [graphSearching, setGraphSearching] = useState(false);
|
||||
|
||||
const previewRef = useRef<SVGSVGElement>(null);
|
||||
|
||||
// Auto-search from URL
|
||||
useEffect(() => {
|
||||
const q = searchParams.get('q');
|
||||
if (q) {
|
||||
setQuery(q);
|
||||
doEntitySearch(q, typeFilter);
|
||||
}
|
||||
}, []); // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
// ── Entity Search ─────────────────────────────────────────────────────────
|
||||
|
||||
const doEntitySearch = async (q: string, type: string) => {
|
||||
if (!q.trim()) return;
|
||||
setSearching(true);
|
||||
setHasSearched(true);
|
||||
try {
|
||||
const res = await api.searchEntities(q.trim(), type !== '全部类型' ? type : undefined, 50);
|
||||
const mapped = res.items.map(mapApiNode);
|
||||
setResults(mapped);
|
||||
setSelectedResult(mapped[0] ?? null);
|
||||
setSearchParams({ q: q.trim(), type, tab: 'entity' });
|
||||
} catch {
|
||||
setResults([]);
|
||||
} finally {
|
||||
setSearching(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleEntitySearch = () => doEntitySearch(query, typeFilter);
|
||||
|
||||
// ── Preview graph for selected entity ────────────────────────────────────
|
||||
|
||||
useEffect(() => {
|
||||
if (!selectedResult || !previewRef.current) return;
|
||||
const svg = d3.select(previewRef.current);
|
||||
svg.selectAll('*').remove();
|
||||
|
||||
// Use local KG for preview (already loaded)
|
||||
const { nodes: neighbors, edges: nEdges } = getNeighbors(selectedResult.id);
|
||||
const allNodes = [selectedResult, ...neighbors];
|
||||
const width = 380;
|
||||
const height = 280;
|
||||
svg.attr('width', width).attr('height', height);
|
||||
|
||||
const g = svg.append('g');
|
||||
const simNodes = allNodes.map(n => ({ ...n, x: width / 2 + (Math.random() - 0.5) * 100, y: height / 2 + (Math.random() - 0.5) * 100 }));
|
||||
const simEdges = nEdges.map(e => ({ ...e }));
|
||||
|
||||
const simulation = d3.forceSimulation(simNodes)
|
||||
.force('link', d3.forceLink(simEdges).id((d: any) => d.id).distance(50).strength(0.5))
|
||||
.force('charge', d3.forceManyBody().strength(-80))
|
||||
.force('center', d3.forceCenter(width / 2, height / 2))
|
||||
.alphaDecay(0.05);
|
||||
|
||||
const link = g.selectAll('line').data(simEdges).join('line')
|
||||
.attr('stroke', '#30363d').attr('stroke-width', 1).attr('stroke-opacity', 0.4);
|
||||
const node = g.selectAll('circle').data(simNodes).join('circle')
|
||||
.attr('r', (d: any) => d.id === selectedResult.id ? 8 : 5)
|
||||
.attr('fill', (d: any) => TYPE_COLORS[d.type] ?? '#8b949e')
|
||||
.attr('stroke', (d: any) => d.id === selectedResult.id ? '#fff' : '#0f1117')
|
||||
.attr('stroke-width', (d: any) => d.id === selectedResult.id ? 2 : 1);
|
||||
g.selectAll('text').data(simNodes.filter(n => n.id === selectedResult.id || n.degree >= 10)).join('text')
|
||||
.text((d: any) => d.name).attr('font-size', 9).attr('fill', 'var(--text-3)')
|
||||
.attr('text-anchor', 'middle').attr('dy', -12).attr('pointer-events', 'none');
|
||||
|
||||
simulation.on('tick', () => {
|
||||
link.attr('x1', (d: any) => d.source.x).attr('y1', (d: any) => d.source.y)
|
||||
.attr('x2', (d: any) => d.target.x).attr('y2', (d: any) => d.target.y);
|
||||
node.attr('cx', (d: any) => d.x).attr('cy', (d: any) => d.y);
|
||||
});
|
||||
return () => simulation.stop();
|
||||
}, [selectedResult, getNeighbors]);
|
||||
|
||||
// ── Path Search ───────────────────────────────────────────────────────────
|
||||
|
||||
const handlePathSearch = async () => {
|
||||
if (!pathFrom.trim() || !pathTo.trim()) return;
|
||||
setPathError('');
|
||||
setPathResult(null);
|
||||
|
||||
// Resolve names to node IDs from local KG
|
||||
const fromNode = nodes.find(n => n.name.toLowerCase().includes(pathFrom.toLowerCase()));
|
||||
const toNode = nodes.find(n => n.name.toLowerCase().includes(pathTo.toLowerCase()));
|
||||
|
||||
if (!fromNode) { setPathError(`未找到起点实体"${pathFrom}",请检查名称是否正确`); return; }
|
||||
if (!toNode) { setPathError(`未找到终点实体"${pathTo}",请检查名称是否正确`); return; }
|
||||
|
||||
setPathSearching(true);
|
||||
try {
|
||||
const res = await api.searchPath(fromNode.id, toNode.id, maxHops);
|
||||
if (!res.paths || res.paths.length === 0) {
|
||||
setPathResult([]);
|
||||
} else {
|
||||
// Use the shortest path (first result)
|
||||
const firstPath = res.paths[0];
|
||||
const pathNodes = firstPath.nodes
|
||||
.map(n => {
|
||||
const local = nodes.find(ln => ln.id === n.id);
|
||||
return local ?? { id: n.id, name: n.name, type: n.type as KGNode['type'], page: 0, confidence: 'match_exact' as const, degree: 0, centrality: 0, doc_id: '' };
|
||||
});
|
||||
setPathResult(pathNodes);
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof ApiError && err.code === 3001) {
|
||||
setPathResult([]);
|
||||
} else {
|
||||
setPathError(err instanceof ApiError ? err.message : '路径查找失败');
|
||||
}
|
||||
} finally {
|
||||
setPathSearching(false);
|
||||
}
|
||||
};
|
||||
|
||||
// ── Graph Search ──────────────────────────────────────────────────────────
|
||||
|
||||
const handleGraphSearch = async () => {
|
||||
if (!graphQuery.trim()) return;
|
||||
setGraphSearching(true);
|
||||
try {
|
||||
const res = await api.searchGraph(graphQuery.trim(), includeNeighbors);
|
||||
setGraphResults(res.matched_nodes.map(mapApiNode));
|
||||
} catch {
|
||||
setGraphResults([]);
|
||||
} finally {
|
||||
setGraphSearching(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="p-6" style={{ maxWidth: 1200, margin: '0 auto' }}>
|
||||
<h1 className="mb-6" style={{ color: 'var(--text-1)', fontSize: 20, fontWeight: 600 }}>搜索</h1>
|
||||
|
||||
{/* Search Header */}
|
||||
<div className="flex gap-3 mb-4">
|
||||
<div className="relative flex-1">
|
||||
<Search size={14} className="absolute left-3 top-1/2 -translate-y-1/2" style={{ color: 'var(--text-4)' }} />
|
||||
<input
|
||||
value={query}
|
||||
onChange={e => setQuery(e.target.value)}
|
||||
onKeyDown={e => e.key === 'Enter' && handleEntitySearch()}
|
||||
placeholder="搜索实体..."
|
||||
className="w-full pl-9 pr-4 py-2.5 rounded-lg outline-none"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 14 }}
|
||||
/>
|
||||
</div>
|
||||
<select
|
||||
value={typeFilter}
|
||||
onChange={e => setTypeFilter(e.target.value)}
|
||||
className="px-3 py-2 rounded-lg cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
{ENTITY_TYPES_OPTIONS.map(t => <option key={t}>{t}</option>)}
|
||||
</select>
|
||||
<button
|
||||
onClick={handleEntitySearch}
|
||||
disabled={searching}
|
||||
className="flex items-center gap-2 px-5 py-2 rounded-lg cursor-pointer"
|
||||
style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, fontWeight: 500, border: 'none', opacity: searching ? 0.7 : 1 }}
|
||||
>
|
||||
<Search size={14} /> {searching ? '搜索中...' : '搜索'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Tabs */}
|
||||
<div className="flex gap-0 mb-6" style={{ borderBottom: '1px solid var(--border-main)' }}>
|
||||
{([
|
||||
{ key: 'entity' as const, label: '实体搜索' },
|
||||
{ key: 'path' as const, label: '路径搜索' },
|
||||
{ key: 'graph' as const, label: '图谱搜索' },
|
||||
]).map(tab => (
|
||||
<button
|
||||
key={tab.key}
|
||||
onClick={() => setActiveTab(tab.key)}
|
||||
className="px-4 py-2.5 cursor-pointer relative"
|
||||
style={{
|
||||
background: 'transparent', border: 'none',
|
||||
color: activeTab === tab.key ? 'var(--blue)' : 'var(--text-3)',
|
||||
fontSize: 13, fontWeight: activeTab === tab.key ? 600 : 400,
|
||||
}}
|
||||
>
|
||||
{tab.label}
|
||||
{activeTab === tab.key && (
|
||||
<div className="absolute bottom-0 left-0 right-0 h-0.5" style={{ background: 'var(--blue)' }} />
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Entity Search Tab */}
|
||||
{activeTab === 'entity' && (
|
||||
<div className="flex gap-4">
|
||||
<div className="flex-1" style={{ minWidth: 0 }}>
|
||||
{!hasSearched ? (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3">
|
||||
<Search size={36} style={{ color: 'var(--text-4)' }} />
|
||||
<span style={{ color: 'var(--text-3)', fontSize: 14 }}>输入查询以搜索实体</span>
|
||||
</div>
|
||||
) : searching ? (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3">
|
||||
<span style={{ color: 'var(--text-3)', fontSize: 14 }}>搜索中...</span>
|
||||
</div>
|
||||
) : results.length === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3">
|
||||
<span style={{ color: 'var(--text-3)', fontSize: 14 }}>未找到实体 "{query}"</span>
|
||||
<button
|
||||
onClick={() => navigate('/graph')}
|
||||
className="flex items-center gap-1 cursor-pointer"
|
||||
style={{ color: 'var(--blue)', fontSize: 13, background: 'none', border: 'none' }}
|
||||
>
|
||||
探索知识图谱 <ExternalLink size={12} />
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex flex-col gap-2">
|
||||
<div style={{ color: 'var(--text-4)', fontSize: 12, marginBottom: 4 }}>
|
||||
找到 {results.length} 个结果
|
||||
</div>
|
||||
{results.map(r => (
|
||||
<button
|
||||
key={r.id}
|
||||
onClick={() => setSelectedResult(r)}
|
||||
className="flex items-center gap-3 p-3 rounded-lg cursor-pointer text-left w-full"
|
||||
style={{
|
||||
background: selectedResult?.id === r.id ? 'var(--bg-s2)' : 'var(--bg-s1)',
|
||||
border: `1px solid ${selectedResult?.id === r.id ? 'var(--blue)' : 'var(--border-main)'}`,
|
||||
}}
|
||||
>
|
||||
<span className="inline-block w-3 h-3 rounded-full flex-shrink-0" style={{ background: TYPE_COLORS[r.type] ?? '#8b949e' }} />
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2 mb-0.5">
|
||||
<span style={{ color: 'var(--text-1)', fontSize: 14, fontWeight: 500 }}>{r.name}</span>
|
||||
<span className="px-1.5 py-0.5 rounded" style={{ fontSize: 10, fontWeight: 600, background: `${TYPE_COLORS[r.type] ?? '#8b949e'}20`, color: TYPE_COLORS[r.type] ?? '#8b949e' }}>
|
||||
{r.type}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3" style={{ fontSize: 11, color: 'var(--text-4)' }}>
|
||||
<span>页码 {r.page}</span>
|
||||
<span>度数 {r.degree}</span>
|
||||
<span>{r.confidence.replace('match_', '')}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5 flex-shrink-0">
|
||||
<button
|
||||
onClick={e => { e.stopPropagation(); navigate(`/graph?node=${r.id}`); }}
|
||||
className="px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 10, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
|
||||
>
|
||||
查看图谱
|
||||
</button>
|
||||
<button
|
||||
onClick={e => { e.stopPropagation(); navigate(`/chat?q=${encodeURIComponent(`What is ${r.name}`)}`); }}
|
||||
className="px-2 py-1 rounded cursor-pointer"
|
||||
style={{ fontSize: 10, background: 'rgba(88,166,255,0.1)', color: 'var(--blue)', border: 'none' }}
|
||||
>
|
||||
<MessageSquare size={10} />
|
||||
</button>
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Preview Graph */}
|
||||
{selectedResult && (
|
||||
<div
|
||||
className="rounded-lg p-3 flex-shrink-0"
|
||||
style={{ width: 400, background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}
|
||||
>
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span style={{ color: 'var(--text-1)', fontSize: 13, fontWeight: 600 }}>
|
||||
预览: {selectedResult.name}
|
||||
</span>
|
||||
<span style={{ fontSize: 11, color: 'var(--text-4)' }}>1 跳邻居</span>
|
||||
</div>
|
||||
<svg ref={previewRef} className="w-full" style={{ height: 280, background: 'var(--bg-base)', borderRadius: 6 }} />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Path Search Tab */}
|
||||
{activeTab === 'path' && (
|
||||
<div>
|
||||
<div className="flex items-end gap-3 mb-6">
|
||||
<div className="flex-1">
|
||||
<label style={{ fontSize: 12, color: 'var(--text-3)', display: 'block', marginBottom: 4 }}>起点实体名称</label>
|
||||
<input
|
||||
value={pathFrom}
|
||||
onChange={e => setPathFrom(e.target.value)}
|
||||
placeholder="如: GraphRAG"
|
||||
className="w-full px-3 py-2 rounded-md outline-none"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13 }}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<label style={{ fontSize: 12, color: 'var(--text-3)', display: 'block', marginBottom: 4 }}>终点实体名称</label>
|
||||
<input
|
||||
value={pathTo}
|
||||
onChange={e => setPathTo(e.target.value)}
|
||||
placeholder="如: LLM"
|
||||
className="w-full px-3 py-2 rounded-md outline-none"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13 }}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label style={{ fontSize: 12, color: 'var(--text-3)', display: 'block', marginBottom: 4 }}>最大跳数</label>
|
||||
<select
|
||||
value={maxHops}
|
||||
onChange={e => setMaxHops(Number(e.target.value))}
|
||||
className="px-3 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-2)', fontSize: 13 }}
|
||||
>
|
||||
{[1, 2, 3, 4, 5].map(n => <option key={n} value={n}>{n}</option>)}
|
||||
</select>
|
||||
</div>
|
||||
<button
|
||||
onClick={handlePathSearch}
|
||||
disabled={pathSearching}
|
||||
className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, border: 'none', opacity: pathSearching ? 0.7 : 1 }}
|
||||
>
|
||||
{pathSearching ? '查找中...' : '查找路径'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{pathError && (
|
||||
<div className="mb-4 px-4 py-2 rounded-md" style={{ background: 'rgba(248,81,73,0.1)', border: '1px solid rgba(248,81,73,0.3)', color: 'var(--red)', fontSize: 13 }}>
|
||||
{pathError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{pathResult !== null && (
|
||||
pathResult.length === 0 ? (
|
||||
<div className="text-center py-12" style={{ color: 'var(--text-3)', fontSize: 14 }}>
|
||||
这两个实体之间没有路径(在 {maxHops} 跳内)
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center gap-2 flex-wrap p-6 rounded-lg" style={{ background: 'var(--bg-s1)', border: '1px solid var(--border-main)' }}>
|
||||
<span style={{ fontSize: 12, color: 'var(--text-4)', marginBottom: 8, display: 'block', width: '100%' }}>
|
||||
路径长度 {pathResult.length - 1} 跳
|
||||
</span>
|
||||
{pathResult.map((n, i) => (
|
||||
<React.Fragment key={n.id}>
|
||||
<button
|
||||
onClick={() => navigate(`/graph?node=${n.id}`)}
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-lg cursor-pointer"
|
||||
style={{ background: 'var(--bg-s2)', border: `1px solid ${TYPE_COLORS[n.type] ?? '#8b949e'}40` }}
|
||||
>
|
||||
<span className="w-2.5 h-2.5 rounded-full" style={{ background: TYPE_COLORS[n.type] ?? '#8b949e' }} />
|
||||
<span style={{ color: 'var(--text-1)', fontSize: 13 }}>{n.name}</span>
|
||||
<span style={{ fontSize: 10, color: TYPE_COLORS[n.type] ?? '#8b949e' }}>{n.type}</span>
|
||||
</button>
|
||||
{i < pathResult.length - 1 && (
|
||||
<ArrowRight size={16} style={{ color: 'var(--text-4)' }} />
|
||||
)}
|
||||
</React.Fragment>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Graph Search Tab */}
|
||||
{activeTab === 'graph' && (
|
||||
<div>
|
||||
<div className="flex items-end gap-3 mb-6">
|
||||
<div className="flex-1">
|
||||
<input
|
||||
value={graphQuery}
|
||||
onChange={e => setGraphQuery(e.target.value)}
|
||||
onKeyDown={e => e.key === 'Enter' && handleGraphSearch()}
|
||||
placeholder="搜索关键词..."
|
||||
className="w-full px-3 py-2 rounded-md outline-none"
|
||||
style={{ background: 'var(--bg-s2)', border: '1px solid var(--border-main)', color: 'var(--text-1)', fontSize: 13 }}
|
||||
/>
|
||||
</div>
|
||||
<label className="flex items-center gap-2 cursor-pointer px-3 py-2">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={includeNeighbors}
|
||||
onChange={e => setIncludeNeighbors(e.target.checked)}
|
||||
style={{ accentColor: 'var(--blue)' }}
|
||||
/>
|
||||
<span style={{ fontSize: 12, color: 'var(--text-2)' }}>包含邻居</span>
|
||||
</label>
|
||||
<button
|
||||
onClick={handleGraphSearch}
|
||||
disabled={graphSearching}
|
||||
className="flex items-center gap-2 px-4 py-2 rounded-md cursor-pointer"
|
||||
style={{ background: 'var(--green-btn)', color: '#fff', fontSize: 13, border: 'none', opacity: graphSearching ? 0.7 : 1 }}
|
||||
>
|
||||
{graphSearching ? '搜索中...' : '搜索'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{graphResults.length > 0 && (
|
||||
<>
|
||||
<div style={{ color: 'var(--text-4)', fontSize: 12, marginBottom: 8 }}>
|
||||
找到 {graphResults.length} 个节点
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{graphResults.map(n => (
|
||||
<button
|
||||
key={n.id}
|
||||
onClick={() => navigate(`/graph?node=${n.id}`)}
|
||||
className="flex items-center gap-2 px-3 py-1.5 rounded-full cursor-pointer"
|
||||
style={{ background: `${TYPE_COLORS[n.type] ?? '#8b949e'}15`, border: `1px solid ${TYPE_COLORS[n.type] ?? '#8b949e'}40`, color: TYPE_COLORS[n.type] ?? '#8b949e', fontSize: 12 }}
|
||||
>
|
||||
<span className="w-2 h-2 rounded-full" style={{ background: TYPE_COLORS[n.type] ?? '#8b949e' }} />
|
||||
{n.name}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{graphSearching === false && graphQuery && graphResults.length === 0 && (
|
||||
<div className="text-center py-12" style={{ color: 'var(--text-3)', fontSize: 14 }}>
|
||||
未找到包含 "{graphQuery}" 的节点
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
66
frontend/src/app/components/ui/accordion.tsx
Normal file
66
frontend/src/app/components/ui/accordion.tsx
Normal file
@@ -0,0 +1,66 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as AccordionPrimitive from "@radix-ui/react-accordion";
|
||||
import { ChevronDownIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Accordion({
|
||||
...props
|
||||
}: React.ComponentProps<typeof AccordionPrimitive.Root>) {
|
||||
return <AccordionPrimitive.Root data-slot="accordion" {...props} />;
|
||||
}
|
||||
|
||||
function AccordionItem({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AccordionPrimitive.Item>) {
|
||||
return (
|
||||
<AccordionPrimitive.Item
|
||||
data-slot="accordion-item"
|
||||
className={cn("border-b last:border-b-0", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AccordionTrigger({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AccordionPrimitive.Trigger>) {
|
||||
return (
|
||||
<AccordionPrimitive.Header className="flex">
|
||||
<AccordionPrimitive.Trigger
|
||||
data-slot="accordion-trigger"
|
||||
className={cn(
|
||||
"focus-visible:border-ring focus-visible:ring-ring/50 flex flex-1 items-start justify-between gap-4 rounded-md py-4 text-left text-sm font-medium transition-all outline-none hover:underline focus-visible:ring-[3px] disabled:pointer-events-none disabled:opacity-50 [&[data-state=open]>svg]:rotate-180",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<ChevronDownIcon className="text-muted-foreground pointer-events-none size-4 shrink-0 translate-y-0.5 transition-transform duration-200" />
|
||||
</AccordionPrimitive.Trigger>
|
||||
</AccordionPrimitive.Header>
|
||||
);
|
||||
}
|
||||
|
||||
function AccordionContent({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AccordionPrimitive.Content>) {
|
||||
return (
|
||||
<AccordionPrimitive.Content
|
||||
data-slot="accordion-content"
|
||||
className="data-[state=closed]:animate-accordion-up data-[state=open]:animate-accordion-down overflow-hidden text-sm"
|
||||
{...props}
|
||||
>
|
||||
<div className={cn("pt-0 pb-4", className)}>{children}</div>
|
||||
</AccordionPrimitive.Content>
|
||||
);
|
||||
}
|
||||
|
||||
export { Accordion, AccordionItem, AccordionTrigger, AccordionContent };
|
||||
157
frontend/src/app/components/ui/alert-dialog.tsx
Normal file
157
frontend/src/app/components/ui/alert-dialog.tsx
Normal file
@@ -0,0 +1,157 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as AlertDialogPrimitive from "@radix-ui/react-alert-dialog";
|
||||
|
||||
import { cn } from "./utils";
|
||||
import { buttonVariants } from "./button";
|
||||
|
||||
function AlertDialog({
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Root>) {
|
||||
return <AlertDialogPrimitive.Root data-slot="alert-dialog" {...props} />;
|
||||
}
|
||||
|
||||
function AlertDialogTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Trigger>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Trigger data-slot="alert-dialog-trigger" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogPortal({
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Portal>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Portal data-slot="alert-dialog-portal" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogOverlay({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Overlay>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Overlay
|
||||
data-slot="alert-dialog-overlay"
|
||||
className={cn(
|
||||
"data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Content>) {
|
||||
return (
|
||||
<AlertDialogPortal>
|
||||
<AlertDialogOverlay />
|
||||
<AlertDialogPrimitive.Content
|
||||
data-slot="alert-dialog-content"
|
||||
className={cn(
|
||||
"bg-background data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</AlertDialogPortal>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogHeader({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="alert-dialog-header"
|
||||
className={cn("flex flex-col gap-2 text-center sm:text-left", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogFooter({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="alert-dialog-footer"
|
||||
className={cn(
|
||||
"flex flex-col-reverse gap-2 sm:flex-row sm:justify-end",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogTitle({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Title>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Title
|
||||
data-slot="alert-dialog-title"
|
||||
className={cn("text-lg font-semibold", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogDescription({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Description>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Description
|
||||
data-slot="alert-dialog-description"
|
||||
className={cn("text-muted-foreground text-sm", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogAction({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Action>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Action
|
||||
className={cn(buttonVariants(), className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDialogCancel({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AlertDialogPrimitive.Cancel>) {
|
||||
return (
|
||||
<AlertDialogPrimitive.Cancel
|
||||
className={cn(buttonVariants({ variant: "outline" }), className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
AlertDialog,
|
||||
AlertDialogPortal,
|
||||
AlertDialogOverlay,
|
||||
AlertDialogTrigger,
|
||||
AlertDialogContent,
|
||||
AlertDialogHeader,
|
||||
AlertDialogFooter,
|
||||
AlertDialogTitle,
|
||||
AlertDialogDescription,
|
||||
AlertDialogAction,
|
||||
AlertDialogCancel,
|
||||
};
|
||||
66
frontend/src/app/components/ui/alert.tsx
Normal file
66
frontend/src/app/components/ui/alert.tsx
Normal file
@@ -0,0 +1,66 @@
|
||||
import * as React from "react";
|
||||
import { cva, type VariantProps } from "class-variance-authority";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
const alertVariants = cva(
|
||||
"relative w-full rounded-lg border px-4 py-3 text-sm grid has-[>svg]:grid-cols-[calc(var(--spacing)*4)_1fr] grid-cols-[0_1fr] has-[>svg]:gap-x-3 gap-y-0.5 items-start [&>svg]:size-4 [&>svg]:translate-y-0.5 [&>svg]:text-current",
|
||||
{
|
||||
variants: {
|
||||
variant: {
|
||||
default: "bg-card text-card-foreground",
|
||||
destructive:
|
||||
"text-destructive bg-card [&>svg]:text-current *:data-[slot=alert-description]:text-destructive/90",
|
||||
},
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: "default",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
function Alert({
|
||||
className,
|
||||
variant,
|
||||
...props
|
||||
}: React.ComponentProps<"div"> & VariantProps<typeof alertVariants>) {
|
||||
return (
|
||||
<div
|
||||
data-slot="alert"
|
||||
role="alert"
|
||||
className={cn(alertVariants({ variant }), className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertTitle({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="alert-title"
|
||||
className={cn(
|
||||
"col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AlertDescription({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="alert-description"
|
||||
className={cn(
|
||||
"text-muted-foreground col-start-2 grid justify-items-start gap-1 text-sm [&_p]:leading-relaxed",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Alert, AlertTitle, AlertDescription };
|
||||
11
frontend/src/app/components/ui/aspect-ratio.tsx
Normal file
11
frontend/src/app/components/ui/aspect-ratio.tsx
Normal file
@@ -0,0 +1,11 @@
|
||||
"use client";
|
||||
|
||||
import * as AspectRatioPrimitive from "@radix-ui/react-aspect-ratio";
|
||||
|
||||
function AspectRatio({
|
||||
...props
|
||||
}: React.ComponentProps<typeof AspectRatioPrimitive.Root>) {
|
||||
return <AspectRatioPrimitive.Root data-slot="aspect-ratio" {...props} />;
|
||||
}
|
||||
|
||||
export { AspectRatio };
|
||||
53
frontend/src/app/components/ui/avatar.tsx
Normal file
53
frontend/src/app/components/ui/avatar.tsx
Normal file
@@ -0,0 +1,53 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as AvatarPrimitive from "@radix-ui/react-avatar";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Avatar({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AvatarPrimitive.Root>) {
|
||||
return (
|
||||
<AvatarPrimitive.Root
|
||||
data-slot="avatar"
|
||||
className={cn(
|
||||
"relative flex size-10 shrink-0 overflow-hidden rounded-full",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AvatarImage({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AvatarPrimitive.Image>) {
|
||||
return (
|
||||
<AvatarPrimitive.Image
|
||||
data-slot="avatar-image"
|
||||
className={cn("aspect-square size-full", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function AvatarFallback({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof AvatarPrimitive.Fallback>) {
|
||||
return (
|
||||
<AvatarPrimitive.Fallback
|
||||
data-slot="avatar-fallback"
|
||||
className={cn(
|
||||
"bg-muted flex size-full items-center justify-center rounded-full",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Avatar, AvatarImage, AvatarFallback };
|
||||
46
frontend/src/app/components/ui/badge.tsx
Normal file
46
frontend/src/app/components/ui/badge.tsx
Normal file
@@ -0,0 +1,46 @@
|
||||
import * as React from "react";
|
||||
import { Slot } from "@radix-ui/react-slot";
|
||||
import { cva, type VariantProps } from "class-variance-authority";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
const badgeVariants = cva(
|
||||
"inline-flex items-center justify-center rounded-md border px-2 py-0.5 text-xs font-medium w-fit whitespace-nowrap shrink-0 [&>svg]:size-3 gap-1 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive transition-[color,box-shadow] overflow-hidden",
|
||||
{
|
||||
variants: {
|
||||
variant: {
|
||||
default:
|
||||
"border-transparent bg-primary text-primary-foreground [a&]:hover:bg-primary/90",
|
||||
secondary:
|
||||
"border-transparent bg-secondary text-secondary-foreground [a&]:hover:bg-secondary/90",
|
||||
destructive:
|
||||
"border-transparent bg-destructive text-white [a&]:hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
|
||||
outline:
|
||||
"text-foreground [a&]:hover:bg-accent [a&]:hover:text-accent-foreground",
|
||||
},
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: "default",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
function Badge({
|
||||
className,
|
||||
variant,
|
||||
asChild = false,
|
||||
...props
|
||||
}: React.ComponentProps<"span"> &
|
||||
VariantProps<typeof badgeVariants> & { asChild?: boolean }) {
|
||||
const Comp = asChild ? Slot : "span";
|
||||
|
||||
return (
|
||||
<Comp
|
||||
data-slot="badge"
|
||||
className={cn(badgeVariants({ variant }), className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Badge, badgeVariants };
|
||||
109
frontend/src/app/components/ui/breadcrumb.tsx
Normal file
109
frontend/src/app/components/ui/breadcrumb.tsx
Normal file
@@ -0,0 +1,109 @@
|
||||
import * as React from "react";
|
||||
import { Slot } from "@radix-ui/react-slot";
|
||||
import { ChevronRight, MoreHorizontal } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Breadcrumb({ ...props }: React.ComponentProps<"nav">) {
|
||||
return <nav aria-label="breadcrumb" data-slot="breadcrumb" {...props} />;
|
||||
}
|
||||
|
||||
function BreadcrumbList({ className, ...props }: React.ComponentProps<"ol">) {
|
||||
return (
|
||||
<ol
|
||||
data-slot="breadcrumb-list"
|
||||
className={cn(
|
||||
"text-muted-foreground flex flex-wrap items-center gap-1.5 text-sm break-words sm:gap-2.5",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function BreadcrumbItem({ className, ...props }: React.ComponentProps<"li">) {
|
||||
return (
|
||||
<li
|
||||
data-slot="breadcrumb-item"
|
||||
className={cn("inline-flex items-center gap-1.5", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function BreadcrumbLink({
|
||||
asChild,
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"a"> & {
|
||||
asChild?: boolean;
|
||||
}) {
|
||||
const Comp = asChild ? Slot : "a";
|
||||
|
||||
return (
|
||||
<Comp
|
||||
data-slot="breadcrumb-link"
|
||||
className={cn("hover:text-foreground transition-colors", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function BreadcrumbPage({ className, ...props }: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
data-slot="breadcrumb-page"
|
||||
role="link"
|
||||
aria-disabled="true"
|
||||
aria-current="page"
|
||||
className={cn("text-foreground font-normal", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function BreadcrumbSeparator({
|
||||
children,
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"li">) {
|
||||
return (
|
||||
<li
|
||||
data-slot="breadcrumb-separator"
|
||||
role="presentation"
|
||||
aria-hidden="true"
|
||||
className={cn("[&>svg]:size-3.5", className)}
|
||||
{...props}
|
||||
>
|
||||
{children ?? <ChevronRight />}
|
||||
</li>
|
||||
);
|
||||
}
|
||||
|
||||
function BreadcrumbEllipsis({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
data-slot="breadcrumb-ellipsis"
|
||||
role="presentation"
|
||||
aria-hidden="true"
|
||||
className={cn("flex size-9 items-center justify-center", className)}
|
||||
{...props}
|
||||
>
|
||||
<MoreHorizontal className="size-4" />
|
||||
<span className="sr-only">More</span>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Breadcrumb,
|
||||
BreadcrumbList,
|
||||
BreadcrumbItem,
|
||||
BreadcrumbLink,
|
||||
BreadcrumbPage,
|
||||
BreadcrumbSeparator,
|
||||
BreadcrumbEllipsis,
|
||||
};
|
||||
58
frontend/src/app/components/ui/button.tsx
Normal file
58
frontend/src/app/components/ui/button.tsx
Normal file
@@ -0,0 +1,58 @@
|
||||
import * as React from "react";
|
||||
import { Slot } from "@radix-ui/react-slot";
|
||||
import { cva, type VariantProps } from "class-variance-authority";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
const buttonVariants = cva(
|
||||
"inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
|
||||
{
|
||||
variants: {
|
||||
variant: {
|
||||
default: "bg-primary text-primary-foreground hover:bg-primary/90",
|
||||
destructive:
|
||||
"bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
|
||||
outline:
|
||||
"border bg-background text-foreground hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
|
||||
secondary:
|
||||
"bg-secondary text-secondary-foreground hover:bg-secondary/80",
|
||||
ghost:
|
||||
"hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
|
||||
link: "text-primary underline-offset-4 hover:underline",
|
||||
},
|
||||
size: {
|
||||
default: "h-9 px-4 py-2 has-[>svg]:px-3",
|
||||
sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
|
||||
lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
|
||||
icon: "size-9 rounded-md",
|
||||
},
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: "default",
|
||||
size: "default",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
function Button({
|
||||
className,
|
||||
variant,
|
||||
size,
|
||||
asChild = false,
|
||||
...props
|
||||
}: React.ComponentProps<"button"> &
|
||||
VariantProps<typeof buttonVariants> & {
|
||||
asChild?: boolean;
|
||||
}) {
|
||||
const Comp = asChild ? Slot : "button";
|
||||
|
||||
return (
|
||||
<Comp
|
||||
data-slot="button"
|
||||
className={cn(buttonVariants({ variant, size, className }))}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Button, buttonVariants };
|
||||
75
frontend/src/app/components/ui/calendar.tsx
Normal file
75
frontend/src/app/components/ui/calendar.tsx
Normal file
@@ -0,0 +1,75 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import { ChevronLeft, ChevronRight } from "lucide-react";
|
||||
import { DayPicker } from "react-day-picker";
|
||||
|
||||
import { cn } from "./utils";
|
||||
import { buttonVariants } from "./button";
|
||||
|
||||
function Calendar({
|
||||
className,
|
||||
classNames,
|
||||
showOutsideDays = true,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DayPicker>) {
|
||||
return (
|
||||
<DayPicker
|
||||
showOutsideDays={showOutsideDays}
|
||||
className={cn("p-3", className)}
|
||||
classNames={{
|
||||
months: "flex flex-col sm:flex-row gap-2",
|
||||
month: "flex flex-col gap-4",
|
||||
caption: "flex justify-center pt-1 relative items-center w-full",
|
||||
caption_label: "text-sm font-medium",
|
||||
nav: "flex items-center gap-1",
|
||||
nav_button: cn(
|
||||
buttonVariants({ variant: "outline" }),
|
||||
"size-7 bg-transparent p-0 opacity-50 hover:opacity-100",
|
||||
),
|
||||
nav_button_previous: "absolute left-1",
|
||||
nav_button_next: "absolute right-1",
|
||||
table: "w-full border-collapse space-x-1",
|
||||
head_row: "flex",
|
||||
head_cell:
|
||||
"text-muted-foreground rounded-md w-8 font-normal text-[0.8rem]",
|
||||
row: "flex w-full mt-2",
|
||||
cell: cn(
|
||||
"relative p-0 text-center text-sm focus-within:relative focus-within:z-20 [&:has([aria-selected])]:bg-accent [&:has([aria-selected].day-range-end)]:rounded-r-md",
|
||||
props.mode === "range"
|
||||
? "[&:has(>.day-range-end)]:rounded-r-md [&:has(>.day-range-start)]:rounded-l-md first:[&:has([aria-selected])]:rounded-l-md last:[&:has([aria-selected])]:rounded-r-md"
|
||||
: "[&:has([aria-selected])]:rounded-md",
|
||||
),
|
||||
day: cn(
|
||||
buttonVariants({ variant: "ghost" }),
|
||||
"size-8 p-0 font-normal aria-selected:opacity-100",
|
||||
),
|
||||
day_range_start:
|
||||
"day-range-start aria-selected:bg-primary aria-selected:text-primary-foreground",
|
||||
day_range_end:
|
||||
"day-range-end aria-selected:bg-primary aria-selected:text-primary-foreground",
|
||||
day_selected:
|
||||
"bg-primary text-primary-foreground hover:bg-primary hover:text-primary-foreground focus:bg-primary focus:text-primary-foreground",
|
||||
day_today: "bg-accent text-accent-foreground",
|
||||
day_outside:
|
||||
"day-outside text-muted-foreground aria-selected:text-muted-foreground",
|
||||
day_disabled: "text-muted-foreground opacity-50",
|
||||
day_range_middle:
|
||||
"aria-selected:bg-accent aria-selected:text-accent-foreground",
|
||||
day_hidden: "invisible",
|
||||
...classNames,
|
||||
}}
|
||||
components={{
|
||||
IconLeft: ({ className, ...props }) => (
|
||||
<ChevronLeft className={cn("size-4", className)} {...props} />
|
||||
),
|
||||
IconRight: ({ className, ...props }) => (
|
||||
<ChevronRight className={cn("size-4", className)} {...props} />
|
||||
),
|
||||
}}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Calendar };
|
||||
92
frontend/src/app/components/ui/card.tsx
Normal file
92
frontend/src/app/components/ui/card.tsx
Normal file
@@ -0,0 +1,92 @@
|
||||
import * as React from "react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Card({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="card"
|
||||
className={cn(
|
||||
"bg-card text-card-foreground flex flex-col gap-6 rounded-xl border",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="card-header"
|
||||
className={cn(
|
||||
"@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-1.5 px-6 pt-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<h4
|
||||
data-slot="card-title"
|
||||
className={cn("leading-none", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<p
|
||||
data-slot="card-description"
|
||||
className={cn("text-muted-foreground", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CardAction({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="card-action"
|
||||
className={cn(
|
||||
"col-start-2 row-span-2 row-start-1 self-start justify-self-end",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CardContent({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="card-content"
|
||||
className={cn("px-6 [&:last-child]:pb-6", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="card-footer"
|
||||
className={cn("flex items-center px-6 pb-6 [.border-t]:pt-6", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Card,
|
||||
CardHeader,
|
||||
CardFooter,
|
||||
CardTitle,
|
||||
CardAction,
|
||||
CardDescription,
|
||||
CardContent,
|
||||
};
|
||||
241
frontend/src/app/components/ui/carousel.tsx
Normal file
241
frontend/src/app/components/ui/carousel.tsx
Normal file
@@ -0,0 +1,241 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import useEmblaCarousel, {
|
||||
type UseEmblaCarouselType,
|
||||
} from "embla-carousel-react";
|
||||
import { ArrowLeft, ArrowRight } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
import { Button } from "./button";
|
||||
|
||||
type CarouselApi = UseEmblaCarouselType[1];
|
||||
type UseCarouselParameters = Parameters<typeof useEmblaCarousel>;
|
||||
type CarouselOptions = UseCarouselParameters[0];
|
||||
type CarouselPlugin = UseCarouselParameters[1];
|
||||
|
||||
type CarouselProps = {
|
||||
opts?: CarouselOptions;
|
||||
plugins?: CarouselPlugin;
|
||||
orientation?: "horizontal" | "vertical";
|
||||
setApi?: (api: CarouselApi) => void;
|
||||
};
|
||||
|
||||
type CarouselContextProps = {
|
||||
carouselRef: ReturnType<typeof useEmblaCarousel>[0];
|
||||
api: ReturnType<typeof useEmblaCarousel>[1];
|
||||
scrollPrev: () => void;
|
||||
scrollNext: () => void;
|
||||
canScrollPrev: boolean;
|
||||
canScrollNext: boolean;
|
||||
} & CarouselProps;
|
||||
|
||||
const CarouselContext = React.createContext<CarouselContextProps | null>(null);
|
||||
|
||||
function useCarousel() {
|
||||
const context = React.useContext(CarouselContext);
|
||||
|
||||
if (!context) {
|
||||
throw new Error("useCarousel must be used within a <Carousel />");
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
function Carousel({
|
||||
orientation = "horizontal",
|
||||
opts,
|
||||
setApi,
|
||||
plugins,
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<"div"> & CarouselProps) {
|
||||
const [carouselRef, api] = useEmblaCarousel(
|
||||
{
|
||||
...opts,
|
||||
axis: orientation === "horizontal" ? "x" : "y",
|
||||
},
|
||||
plugins,
|
||||
);
|
||||
const [canScrollPrev, setCanScrollPrev] = React.useState(false);
|
||||
const [canScrollNext, setCanScrollNext] = React.useState(false);
|
||||
|
||||
const onSelect = React.useCallback((api: CarouselApi) => {
|
||||
if (!api) return;
|
||||
setCanScrollPrev(api.canScrollPrev());
|
||||
setCanScrollNext(api.canScrollNext());
|
||||
}, []);
|
||||
|
||||
const scrollPrev = React.useCallback(() => {
|
||||
api?.scrollPrev();
|
||||
}, [api]);
|
||||
|
||||
const scrollNext = React.useCallback(() => {
|
||||
api?.scrollNext();
|
||||
}, [api]);
|
||||
|
||||
const handleKeyDown = React.useCallback(
|
||||
(event: React.KeyboardEvent<HTMLDivElement>) => {
|
||||
if (event.key === "ArrowLeft") {
|
||||
event.preventDefault();
|
||||
scrollPrev();
|
||||
} else if (event.key === "ArrowRight") {
|
||||
event.preventDefault();
|
||||
scrollNext();
|
||||
}
|
||||
},
|
||||
[scrollPrev, scrollNext],
|
||||
);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (!api || !setApi) return;
|
||||
setApi(api);
|
||||
}, [api, setApi]);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (!api) return;
|
||||
onSelect(api);
|
||||
api.on("reInit", onSelect);
|
||||
api.on("select", onSelect);
|
||||
|
||||
return () => {
|
||||
api?.off("select", onSelect);
|
||||
};
|
||||
}, [api, onSelect]);
|
||||
|
||||
return (
|
||||
<CarouselContext.Provider
|
||||
value={{
|
||||
carouselRef,
|
||||
api: api,
|
||||
opts,
|
||||
orientation:
|
||||
orientation || (opts?.axis === "y" ? "vertical" : "horizontal"),
|
||||
scrollPrev,
|
||||
scrollNext,
|
||||
canScrollPrev,
|
||||
canScrollNext,
|
||||
}}
|
||||
>
|
||||
<div
|
||||
onKeyDownCapture={handleKeyDown}
|
||||
className={cn("relative", className)}
|
||||
role="region"
|
||||
aria-roledescription="carousel"
|
||||
data-slot="carousel"
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
</CarouselContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
function CarouselContent({ className, ...props }: React.ComponentProps<"div">) {
|
||||
const { carouselRef, orientation } = useCarousel();
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={carouselRef}
|
||||
className="overflow-hidden"
|
||||
data-slot="carousel-content"
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
"flex",
|
||||
orientation === "horizontal" ? "-ml-4" : "-mt-4 flex-col",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function CarouselItem({ className, ...props }: React.ComponentProps<"div">) {
|
||||
const { orientation } = useCarousel();
|
||||
|
||||
return (
|
||||
<div
|
||||
role="group"
|
||||
aria-roledescription="slide"
|
||||
data-slot="carousel-item"
|
||||
className={cn(
|
||||
"min-w-0 shrink-0 grow-0 basis-full",
|
||||
orientation === "horizontal" ? "pl-4" : "pt-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CarouselPrevious({
|
||||
className,
|
||||
variant = "outline",
|
||||
size = "icon",
|
||||
...props
|
||||
}: React.ComponentProps<typeof Button>) {
|
||||
const { orientation, scrollPrev, canScrollPrev } = useCarousel();
|
||||
|
||||
return (
|
||||
<Button
|
||||
data-slot="carousel-previous"
|
||||
variant={variant}
|
||||
size={size}
|
||||
className={cn(
|
||||
"absolute size-8 rounded-full",
|
||||
orientation === "horizontal"
|
||||
? "top-1/2 -left-12 -translate-y-1/2"
|
||||
: "-top-12 left-1/2 -translate-x-1/2 rotate-90",
|
||||
className,
|
||||
)}
|
||||
disabled={!canScrollPrev}
|
||||
onClick={scrollPrev}
|
||||
{...props}
|
||||
>
|
||||
<ArrowLeft />
|
||||
<span className="sr-only">Previous slide</span>
|
||||
</Button>
|
||||
);
|
||||
}
|
||||
|
||||
function CarouselNext({
|
||||
className,
|
||||
variant = "outline",
|
||||
size = "icon",
|
||||
...props
|
||||
}: React.ComponentProps<typeof Button>) {
|
||||
const { orientation, scrollNext, canScrollNext } = useCarousel();
|
||||
|
||||
return (
|
||||
<Button
|
||||
data-slot="carousel-next"
|
||||
variant={variant}
|
||||
size={size}
|
||||
className={cn(
|
||||
"absolute size-8 rounded-full",
|
||||
orientation === "horizontal"
|
||||
? "top-1/2 -right-12 -translate-y-1/2"
|
||||
: "-bottom-12 left-1/2 -translate-x-1/2 rotate-90",
|
||||
className,
|
||||
)}
|
||||
disabled={!canScrollNext}
|
||||
onClick={scrollNext}
|
||||
{...props}
|
||||
>
|
||||
<ArrowRight />
|
||||
<span className="sr-only">Next slide</span>
|
||||
</Button>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
type CarouselApi,
|
||||
Carousel,
|
||||
CarouselContent,
|
||||
CarouselItem,
|
||||
CarouselPrevious,
|
||||
CarouselNext,
|
||||
};
|
||||
353
frontend/src/app/components/ui/chart.tsx
Normal file
353
frontend/src/app/components/ui/chart.tsx
Normal file
@@ -0,0 +1,353 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as RechartsPrimitive from "recharts";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
// Format: { THEME_NAME: CSS_SELECTOR }
|
||||
const THEMES = { light: "", dark: ".dark" } as const;
|
||||
|
||||
export type ChartConfig = {
|
||||
[k in string]: {
|
||||
label?: React.ReactNode;
|
||||
icon?: React.ComponentType;
|
||||
} & (
|
||||
| { color?: string; theme?: never }
|
||||
| { color?: never; theme: Record<keyof typeof THEMES, string> }
|
||||
);
|
||||
};
|
||||
|
||||
type ChartContextProps = {
|
||||
config: ChartConfig;
|
||||
};
|
||||
|
||||
const ChartContext = React.createContext<ChartContextProps | null>(null);
|
||||
|
||||
function useChart() {
|
||||
const context = React.useContext(ChartContext);
|
||||
|
||||
if (!context) {
|
||||
throw new Error("useChart must be used within a <ChartContainer />");
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
function ChartContainer({
|
||||
id,
|
||||
className,
|
||||
children,
|
||||
config,
|
||||
...props
|
||||
}: React.ComponentProps<"div"> & {
|
||||
config: ChartConfig;
|
||||
children: React.ComponentProps<
|
||||
typeof RechartsPrimitive.ResponsiveContainer
|
||||
>["children"];
|
||||
}) {
|
||||
const uniqueId = React.useId();
|
||||
const chartId = `chart-${id || uniqueId.replace(/:/g, "")}`;
|
||||
|
||||
return (
|
||||
<ChartContext.Provider value={{ config }}>
|
||||
<div
|
||||
data-slot="chart"
|
||||
data-chart={chartId}
|
||||
className={cn(
|
||||
"[&_.recharts-cartesian-axis-tick_text]:fill-muted-foreground [&_.recharts-cartesian-grid_line[stroke='#ccc']]:stroke-border/50 [&_.recharts-curve.recharts-tooltip-cursor]:stroke-border [&_.recharts-polar-grid_[stroke='#ccc']]:stroke-border [&_.recharts-radial-bar-background-sector]:fill-muted [&_.recharts-rectangle.recharts-tooltip-cursor]:fill-muted [&_.recharts-reference-line_[stroke='#ccc']]:stroke-border flex aspect-video justify-center text-xs [&_.recharts-dot[stroke='#fff']]:stroke-transparent [&_.recharts-layer]:outline-hidden [&_.recharts-sector]:outline-hidden [&_.recharts-sector[stroke='#fff']]:stroke-transparent [&_.recharts-surface]:outline-hidden",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ChartStyle id={chartId} config={config} />
|
||||
<RechartsPrimitive.ResponsiveContainer>
|
||||
{children}
|
||||
</RechartsPrimitive.ResponsiveContainer>
|
||||
</div>
|
||||
</ChartContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => {
|
||||
const colorConfig = Object.entries(config).filter(
|
||||
([, config]) => config.theme || config.color,
|
||||
);
|
||||
|
||||
if (!colorConfig.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<style
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: Object.entries(THEMES)
|
||||
.map(
|
||||
([theme, prefix]) => `
|
||||
${prefix} [data-chart=${id}] {
|
||||
${colorConfig
|
||||
.map(([key, itemConfig]) => {
|
||||
const color =
|
||||
itemConfig.theme?.[theme as keyof typeof itemConfig.theme] ||
|
||||
itemConfig.color;
|
||||
return color ? ` --color-${key}: ${color};` : null;
|
||||
})
|
||||
.join("\n")}
|
||||
}
|
||||
`,
|
||||
)
|
||||
.join("\n"),
|
||||
}}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
const ChartTooltip = RechartsPrimitive.Tooltip;
|
||||
|
||||
function ChartTooltipContent({
|
||||
active,
|
||||
payload,
|
||||
className,
|
||||
indicator = "dot",
|
||||
hideLabel = false,
|
||||
hideIndicator = false,
|
||||
label,
|
||||
labelFormatter,
|
||||
labelClassName,
|
||||
formatter,
|
||||
color,
|
||||
nameKey,
|
||||
labelKey,
|
||||
}: React.ComponentProps<typeof RechartsPrimitive.Tooltip> &
|
||||
React.ComponentProps<"div"> & {
|
||||
hideLabel?: boolean;
|
||||
hideIndicator?: boolean;
|
||||
indicator?: "line" | "dot" | "dashed";
|
||||
nameKey?: string;
|
||||
labelKey?: string;
|
||||
}) {
|
||||
const { config } = useChart();
|
||||
|
||||
const tooltipLabel = React.useMemo(() => {
|
||||
if (hideLabel || !payload?.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const [item] = payload;
|
||||
const key = `${labelKey || item?.dataKey || item?.name || "value"}`;
|
||||
const itemConfig = getPayloadConfigFromPayload(config, item, key);
|
||||
const value =
|
||||
!labelKey && typeof label === "string"
|
||||
? config[label as keyof typeof config]?.label || label
|
||||
: itemConfig?.label;
|
||||
|
||||
if (labelFormatter) {
|
||||
return (
|
||||
<div className={cn("font-medium", labelClassName)}>
|
||||
{labelFormatter(value, payload)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return <div className={cn("font-medium", labelClassName)}>{value}</div>;
|
||||
}, [
|
||||
label,
|
||||
labelFormatter,
|
||||
payload,
|
||||
hideLabel,
|
||||
labelClassName,
|
||||
config,
|
||||
labelKey,
|
||||
]);
|
||||
|
||||
if (!active || !payload?.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const nestLabel = payload.length === 1 && indicator !== "dot";
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"border-border/50 bg-background grid min-w-[8rem] items-start gap-1.5 rounded-lg border px-2.5 py-1.5 text-xs shadow-xl",
|
||||
className,
|
||||
)}
|
||||
>
|
||||
{!nestLabel ? tooltipLabel : null}
|
||||
<div className="grid gap-1.5">
|
||||
{payload.map((item, index) => {
|
||||
const key = `${nameKey || item.name || item.dataKey || "value"}`;
|
||||
const itemConfig = getPayloadConfigFromPayload(config, item, key);
|
||||
const indicatorColor = color || item.payload.fill || item.color;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={item.dataKey}
|
||||
className={cn(
|
||||
"[&>svg]:text-muted-foreground flex w-full flex-wrap items-stretch gap-2 [&>svg]:h-2.5 [&>svg]:w-2.5",
|
||||
indicator === "dot" && "items-center",
|
||||
)}
|
||||
>
|
||||
{formatter && item?.value !== undefined && item.name ? (
|
||||
formatter(item.value, item.name, item, index, item.payload)
|
||||
) : (
|
||||
<>
|
||||
{itemConfig?.icon ? (
|
||||
<itemConfig.icon />
|
||||
) : (
|
||||
!hideIndicator && (
|
||||
<div
|
||||
className={cn(
|
||||
"shrink-0 rounded-[2px] border-(--color-border) bg-(--color-bg)",
|
||||
{
|
||||
"h-2.5 w-2.5": indicator === "dot",
|
||||
"w-1": indicator === "line",
|
||||
"w-0 border-[1.5px] border-dashed bg-transparent":
|
||||
indicator === "dashed",
|
||||
"my-0.5": nestLabel && indicator === "dashed",
|
||||
},
|
||||
)}
|
||||
style={
|
||||
{
|
||||
"--color-bg": indicatorColor,
|
||||
"--color-border": indicatorColor,
|
||||
} as React.CSSProperties
|
||||
}
|
||||
/>
|
||||
)
|
||||
)}
|
||||
<div
|
||||
className={cn(
|
||||
"flex flex-1 justify-between leading-none",
|
||||
nestLabel ? "items-end" : "items-center",
|
||||
)}
|
||||
>
|
||||
<div className="grid gap-1.5">
|
||||
{nestLabel ? tooltipLabel : null}
|
||||
<span className="text-muted-foreground">
|
||||
{itemConfig?.label || item.name}
|
||||
</span>
|
||||
</div>
|
||||
{item.value && (
|
||||
<span className="text-foreground font-mono font-medium tabular-nums">
|
||||
{item.value.toLocaleString()}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const ChartLegend = RechartsPrimitive.Legend;
|
||||
|
||||
function ChartLegendContent({
|
||||
className,
|
||||
hideIcon = false,
|
||||
payload,
|
||||
verticalAlign = "bottom",
|
||||
nameKey,
|
||||
}: React.ComponentProps<"div"> &
|
||||
Pick<RechartsPrimitive.LegendProps, "payload" | "verticalAlign"> & {
|
||||
hideIcon?: boolean;
|
||||
nameKey?: string;
|
||||
}) {
|
||||
const { config } = useChart();
|
||||
|
||||
if (!payload?.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center justify-center gap-4",
|
||||
verticalAlign === "top" ? "pb-3" : "pt-3",
|
||||
className,
|
||||
)}
|
||||
>
|
||||
{payload.map((item) => {
|
||||
const key = `${nameKey || item.dataKey || "value"}`;
|
||||
const itemConfig = getPayloadConfigFromPayload(config, item, key);
|
||||
|
||||
return (
|
||||
<div
|
||||
key={item.value}
|
||||
className={cn(
|
||||
"[&>svg]:text-muted-foreground flex items-center gap-1.5 [&>svg]:h-3 [&>svg]:w-3",
|
||||
)}
|
||||
>
|
||||
{itemConfig?.icon && !hideIcon ? (
|
||||
<itemConfig.icon />
|
||||
) : (
|
||||
<div
|
||||
className="h-2 w-2 shrink-0 rounded-[2px]"
|
||||
style={{
|
||||
backgroundColor: item.color,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
{itemConfig?.label}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Helper to extract item config from a payload.
|
||||
function getPayloadConfigFromPayload(
|
||||
config: ChartConfig,
|
||||
payload: unknown,
|
||||
key: string,
|
||||
) {
|
||||
if (typeof payload !== "object" || payload === null) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const payloadPayload =
|
||||
"payload" in payload &&
|
||||
typeof payload.payload === "object" &&
|
||||
payload.payload !== null
|
||||
? payload.payload
|
||||
: undefined;
|
||||
|
||||
let configLabelKey: string = key;
|
||||
|
||||
if (
|
||||
key in payload &&
|
||||
typeof payload[key as keyof typeof payload] === "string"
|
||||
) {
|
||||
configLabelKey = payload[key as keyof typeof payload] as string;
|
||||
} else if (
|
||||
payloadPayload &&
|
||||
key in payloadPayload &&
|
||||
typeof payloadPayload[key as keyof typeof payloadPayload] === "string"
|
||||
) {
|
||||
configLabelKey = payloadPayload[
|
||||
key as keyof typeof payloadPayload
|
||||
] as string;
|
||||
}
|
||||
|
||||
return configLabelKey in config
|
||||
? config[configLabelKey]
|
||||
: config[key as keyof typeof config];
|
||||
}
|
||||
|
||||
export {
|
||||
ChartContainer,
|
||||
ChartTooltip,
|
||||
ChartTooltipContent,
|
||||
ChartLegend,
|
||||
ChartLegendContent,
|
||||
ChartStyle,
|
||||
};
|
||||
32
frontend/src/app/components/ui/checkbox.tsx
Normal file
32
frontend/src/app/components/ui/checkbox.tsx
Normal file
@@ -0,0 +1,32 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as CheckboxPrimitive from "@radix-ui/react-checkbox";
|
||||
import { CheckIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Checkbox({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CheckboxPrimitive.Root>) {
|
||||
return (
|
||||
<CheckboxPrimitive.Root
|
||||
data-slot="checkbox"
|
||||
className={cn(
|
||||
"peer border bg-input-background dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<CheckboxPrimitive.Indicator
|
||||
data-slot="checkbox-indicator"
|
||||
className="flex items-center justify-center text-current transition-none"
|
||||
>
|
||||
<CheckIcon className="size-3.5" />
|
||||
</CheckboxPrimitive.Indicator>
|
||||
</CheckboxPrimitive.Root>
|
||||
);
|
||||
}
|
||||
|
||||
export { Checkbox };
|
||||
33
frontend/src/app/components/ui/collapsible.tsx
Normal file
33
frontend/src/app/components/ui/collapsible.tsx
Normal file
@@ -0,0 +1,33 @@
|
||||
"use client";
|
||||
|
||||
import * as CollapsiblePrimitive from "@radix-ui/react-collapsible";
|
||||
|
||||
function Collapsible({
|
||||
...props
|
||||
}: React.ComponentProps<typeof CollapsiblePrimitive.Root>) {
|
||||
return <CollapsiblePrimitive.Root data-slot="collapsible" {...props} />;
|
||||
}
|
||||
|
||||
function CollapsibleTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleTrigger>) {
|
||||
return (
|
||||
<CollapsiblePrimitive.CollapsibleTrigger
|
||||
data-slot="collapsible-trigger"
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CollapsibleContent({
|
||||
...props
|
||||
}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleContent>) {
|
||||
return (
|
||||
<CollapsiblePrimitive.CollapsibleContent
|
||||
data-slot="collapsible-content"
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Collapsible, CollapsibleTrigger, CollapsibleContent };
|
||||
177
frontend/src/app/components/ui/command.tsx
Normal file
177
frontend/src/app/components/ui/command.tsx
Normal file
@@ -0,0 +1,177 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import { Command as CommandPrimitive } from "cmdk";
|
||||
import { SearchIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from "./dialog";
|
||||
|
||||
function Command({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive>) {
|
||||
return (
|
||||
<CommandPrimitive
|
||||
data-slot="command"
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground flex h-full w-full flex-col overflow-hidden rounded-md",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandDialog({
|
||||
title = "Command Palette",
|
||||
description = "Search for a command to run...",
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof Dialog> & {
|
||||
title?: string;
|
||||
description?: string;
|
||||
}) {
|
||||
return (
|
||||
<Dialog {...props}>
|
||||
<DialogHeader className="sr-only">
|
||||
<DialogTitle>{title}</DialogTitle>
|
||||
<DialogDescription>{description}</DialogDescription>
|
||||
</DialogHeader>
|
||||
<DialogContent className="overflow-hidden p-0">
|
||||
<Command className="[&_[cmdk-group-heading]]:text-muted-foreground **:data-[slot=command-input-wrapper]:h-12 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group]]:px-2 [&_[cmdk-group]:not([hidden])_~[cmdk-group]]:pt-0 [&_[cmdk-input-wrapper]_svg]:h-5 [&_[cmdk-input-wrapper]_svg]:w-5 [&_[cmdk-input]]:h-12 [&_[cmdk-item]]:px-2 [&_[cmdk-item]]:py-3 [&_[cmdk-item]_svg]:h-5 [&_[cmdk-item]_svg]:w-5">
|
||||
{children}
|
||||
</Command>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandInput({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive.Input>) {
|
||||
return (
|
||||
<div
|
||||
data-slot="command-input-wrapper"
|
||||
className="flex h-9 items-center gap-2 border-b px-3"
|
||||
>
|
||||
<SearchIcon className="size-4 shrink-0 opacity-50" />
|
||||
<CommandPrimitive.Input
|
||||
data-slot="command-input"
|
||||
className={cn(
|
||||
"placeholder:text-muted-foreground flex h-10 w-full rounded-md bg-transparent py-3 text-sm outline-hidden disabled:cursor-not-allowed disabled:opacity-50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandList({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive.List>) {
|
||||
return (
|
||||
<CommandPrimitive.List
|
||||
data-slot="command-list"
|
||||
className={cn(
|
||||
"max-h-[300px] scroll-py-1 overflow-x-hidden overflow-y-auto",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandEmpty({
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive.Empty>) {
|
||||
return (
|
||||
<CommandPrimitive.Empty
|
||||
data-slot="command-empty"
|
||||
className="py-6 text-center text-sm"
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandGroup({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive.Group>) {
|
||||
return (
|
||||
<CommandPrimitive.Group
|
||||
data-slot="command-group"
|
||||
className={cn(
|
||||
"text-foreground [&_[cmdk-group-heading]]:text-muted-foreground overflow-hidden p-1 [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandSeparator({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive.Separator>) {
|
||||
return (
|
||||
<CommandPrimitive.Separator
|
||||
data-slot="command-separator"
|
||||
className={cn("bg-border -mx-1 h-px", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandItem({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof CommandPrimitive.Item>) {
|
||||
return (
|
||||
<CommandPrimitive.Item
|
||||
data-slot="command-item"
|
||||
className={cn(
|
||||
"data-[selected=true]:bg-accent data-[selected=true]:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled=true]:pointer-events-none data-[disabled=true]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function CommandShortcut({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
data-slot="command-shortcut"
|
||||
className={cn(
|
||||
"text-muted-foreground ml-auto text-xs tracking-widest",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Command,
|
||||
CommandDialog,
|
||||
CommandInput,
|
||||
CommandList,
|
||||
CommandEmpty,
|
||||
CommandGroup,
|
||||
CommandItem,
|
||||
CommandShortcut,
|
||||
CommandSeparator,
|
||||
};
|
||||
252
frontend/src/app/components/ui/context-menu.tsx
Normal file
252
frontend/src/app/components/ui/context-menu.tsx
Normal file
@@ -0,0 +1,252 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as ContextMenuPrimitive from "@radix-ui/react-context-menu";
|
||||
import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function ContextMenu({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Root>) {
|
||||
return <ContextMenuPrimitive.Root data-slot="context-menu" {...props} />;
|
||||
}
|
||||
|
||||
function ContextMenuTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Trigger>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Trigger data-slot="context-menu-trigger" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Group>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Group data-slot="context-menu-group" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuPortal({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Portal>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Portal data-slot="context-menu-portal" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuSub({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Sub>) {
|
||||
return <ContextMenuPrimitive.Sub data-slot="context-menu-sub" {...props} />;
|
||||
}
|
||||
|
||||
function ContextMenuRadioGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.RadioGroup>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.RadioGroup
|
||||
data-slot="context-menu-radio-group"
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuSubTrigger({
|
||||
className,
|
||||
inset,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.SubTrigger> & {
|
||||
inset?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<ContextMenuPrimitive.SubTrigger
|
||||
data-slot="context-menu-sub-trigger"
|
||||
data-inset={inset}
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<ChevronRightIcon className="ml-auto" />
|
||||
</ContextMenuPrimitive.SubTrigger>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuSubContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.SubContent>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.SubContent
|
||||
data-slot="context-menu-sub-content"
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-context-menu-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Content>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Portal>
|
||||
<ContextMenuPrimitive.Content
|
||||
data-slot="context-menu-content"
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 max-h-(--radix-context-menu-content-available-height) min-w-[8rem] origin-(--radix-context-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</ContextMenuPrimitive.Portal>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuItem({
|
||||
className,
|
||||
inset,
|
||||
variant = "default",
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Item> & {
|
||||
inset?: boolean;
|
||||
variant?: "default" | "destructive";
|
||||
}) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Item
|
||||
data-slot="context-menu-item"
|
||||
data-inset={inset}
|
||||
data-variant={variant}
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuCheckboxItem({
|
||||
className,
|
||||
children,
|
||||
checked,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.CheckboxItem>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.CheckboxItem
|
||||
data-slot="context-menu-checkbox-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
checked={checked}
|
||||
{...props}
|
||||
>
|
||||
<span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
|
||||
<ContextMenuPrimitive.ItemIndicator>
|
||||
<CheckIcon className="size-4" />
|
||||
</ContextMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</ContextMenuPrimitive.CheckboxItem>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuRadioItem({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.RadioItem>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.RadioItem
|
||||
data-slot="context-menu-radio-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
|
||||
<ContextMenuPrimitive.ItemIndicator>
|
||||
<CircleIcon className="size-2 fill-current" />
|
||||
</ContextMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</ContextMenuPrimitive.RadioItem>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuLabel({
|
||||
className,
|
||||
inset,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Label> & {
|
||||
inset?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Label
|
||||
data-slot="context-menu-label"
|
||||
data-inset={inset}
|
||||
className={cn(
|
||||
"text-foreground px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuSeparator({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ContextMenuPrimitive.Separator>) {
|
||||
return (
|
||||
<ContextMenuPrimitive.Separator
|
||||
data-slot="context-menu-separator"
|
||||
className={cn("bg-border -mx-1 my-1 h-px", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuShortcut({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
data-slot="context-menu-shortcut"
|
||||
className={cn(
|
||||
"text-muted-foreground ml-auto text-xs tracking-widest",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
ContextMenu,
|
||||
ContextMenuTrigger,
|
||||
ContextMenuContent,
|
||||
ContextMenuItem,
|
||||
ContextMenuCheckboxItem,
|
||||
ContextMenuRadioItem,
|
||||
ContextMenuLabel,
|
||||
ContextMenuSeparator,
|
||||
ContextMenuShortcut,
|
||||
ContextMenuGroup,
|
||||
ContextMenuPortal,
|
||||
ContextMenuSub,
|
||||
ContextMenuSubContent,
|
||||
ContextMenuSubTrigger,
|
||||
ContextMenuRadioGroup,
|
||||
};
|
||||
135
frontend/src/app/components/ui/dialog.tsx
Normal file
135
frontend/src/app/components/ui/dialog.tsx
Normal file
@@ -0,0 +1,135 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as DialogPrimitive from "@radix-ui/react-dialog";
|
||||
import { XIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Dialog({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Root>) {
|
||||
return <DialogPrimitive.Root data-slot="dialog" {...props} />;
|
||||
}
|
||||
|
||||
function DialogTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Trigger>) {
|
||||
return <DialogPrimitive.Trigger data-slot="dialog-trigger" {...props} />;
|
||||
}
|
||||
|
||||
function DialogPortal({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Portal>) {
|
||||
return <DialogPrimitive.Portal data-slot="dialog-portal" {...props} />;
|
||||
}
|
||||
|
||||
function DialogClose({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Close>) {
|
||||
return <DialogPrimitive.Close data-slot="dialog-close" {...props} />;
|
||||
}
|
||||
|
||||
function DialogOverlay({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Overlay>) {
|
||||
return (
|
||||
<DialogPrimitive.Overlay
|
||||
data-slot="dialog-overlay"
|
||||
className={cn(
|
||||
"data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DialogContent({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Content>) {
|
||||
return (
|
||||
<DialogPortal data-slot="dialog-portal">
|
||||
<DialogOverlay />
|
||||
<DialogPrimitive.Content
|
||||
data-slot="dialog-content"
|
||||
className={cn(
|
||||
"bg-background data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<DialogPrimitive.Close className="ring-offset-background focus:ring-ring data-[state=open]:bg-accent data-[state=open]:text-muted-foreground absolute top-4 right-4 rounded-xs opacity-70 transition-opacity hover:opacity-100 focus:ring-2 focus:ring-offset-2 focus:outline-hidden disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4">
|
||||
<XIcon />
|
||||
<span className="sr-only">Close</span>
|
||||
</DialogPrimitive.Close>
|
||||
</DialogPrimitive.Content>
|
||||
</DialogPortal>
|
||||
);
|
||||
}
|
||||
|
||||
function DialogHeader({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="dialog-header"
|
||||
className={cn("flex flex-col gap-2 text-center sm:text-left", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DialogFooter({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="dialog-footer"
|
||||
className={cn(
|
||||
"flex flex-col-reverse gap-2 sm:flex-row sm:justify-end",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DialogTitle({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Title>) {
|
||||
return (
|
||||
<DialogPrimitive.Title
|
||||
data-slot="dialog-title"
|
||||
className={cn("text-lg leading-none font-semibold", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DialogDescription({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DialogPrimitive.Description>) {
|
||||
return (
|
||||
<DialogPrimitive.Description
|
||||
data-slot="dialog-description"
|
||||
className={cn("text-muted-foreground text-sm", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Dialog,
|
||||
DialogClose,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogOverlay,
|
||||
DialogPortal,
|
||||
DialogTitle,
|
||||
DialogTrigger,
|
||||
};
|
||||
132
frontend/src/app/components/ui/drawer.tsx
Normal file
132
frontend/src/app/components/ui/drawer.tsx
Normal file
@@ -0,0 +1,132 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import { Drawer as DrawerPrimitive } from "vaul";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Drawer({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Root>) {
|
||||
return <DrawerPrimitive.Root data-slot="drawer" {...props} />;
|
||||
}
|
||||
|
||||
function DrawerTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Trigger>) {
|
||||
return <DrawerPrimitive.Trigger data-slot="drawer-trigger" {...props} />;
|
||||
}
|
||||
|
||||
function DrawerPortal({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Portal>) {
|
||||
return <DrawerPrimitive.Portal data-slot="drawer-portal" {...props} />;
|
||||
}
|
||||
|
||||
function DrawerClose({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Close>) {
|
||||
return <DrawerPrimitive.Close data-slot="drawer-close" {...props} />;
|
||||
}
|
||||
|
||||
function DrawerOverlay({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Overlay>) {
|
||||
return (
|
||||
<DrawerPrimitive.Overlay
|
||||
data-slot="drawer-overlay"
|
||||
className={cn(
|
||||
"data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-black/50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DrawerContent({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Content>) {
|
||||
return (
|
||||
<DrawerPortal data-slot="drawer-portal">
|
||||
<DrawerOverlay />
|
||||
<DrawerPrimitive.Content
|
||||
data-slot="drawer-content"
|
||||
className={cn(
|
||||
"group/drawer-content bg-background fixed z-50 flex h-auto flex-col",
|
||||
"data-[vaul-drawer-direction=top]:inset-x-0 data-[vaul-drawer-direction=top]:top-0 data-[vaul-drawer-direction=top]:mb-24 data-[vaul-drawer-direction=top]:max-h-[80vh] data-[vaul-drawer-direction=top]:rounded-b-lg data-[vaul-drawer-direction=top]:border-b",
|
||||
"data-[vaul-drawer-direction=bottom]:inset-x-0 data-[vaul-drawer-direction=bottom]:bottom-0 data-[vaul-drawer-direction=bottom]:mt-24 data-[vaul-drawer-direction=bottom]:max-h-[80vh] data-[vaul-drawer-direction=bottom]:rounded-t-lg data-[vaul-drawer-direction=bottom]:border-t",
|
||||
"data-[vaul-drawer-direction=right]:inset-y-0 data-[vaul-drawer-direction=right]:right-0 data-[vaul-drawer-direction=right]:w-3/4 data-[vaul-drawer-direction=right]:border-l data-[vaul-drawer-direction=right]:sm:max-w-sm",
|
||||
"data-[vaul-drawer-direction=left]:inset-y-0 data-[vaul-drawer-direction=left]:left-0 data-[vaul-drawer-direction=left]:w-3/4 data-[vaul-drawer-direction=left]:border-r data-[vaul-drawer-direction=left]:sm:max-w-sm",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<div className="bg-muted mx-auto mt-4 hidden h-2 w-[100px] shrink-0 rounded-full group-data-[vaul-drawer-direction=bottom]/drawer-content:block" />
|
||||
{children}
|
||||
</DrawerPrimitive.Content>
|
||||
</DrawerPortal>
|
||||
);
|
||||
}
|
||||
|
||||
function DrawerHeader({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="drawer-header"
|
||||
className={cn("flex flex-col gap-1.5 p-4", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DrawerFooter({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="drawer-footer"
|
||||
className={cn("mt-auto flex flex-col gap-2 p-4", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DrawerTitle({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Title>) {
|
||||
return (
|
||||
<DrawerPrimitive.Title
|
||||
data-slot="drawer-title"
|
||||
className={cn("text-foreground font-semibold", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DrawerDescription({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DrawerPrimitive.Description>) {
|
||||
return (
|
||||
<DrawerPrimitive.Description
|
||||
data-slot="drawer-description"
|
||||
className={cn("text-muted-foreground text-sm", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Drawer,
|
||||
DrawerPortal,
|
||||
DrawerOverlay,
|
||||
DrawerTrigger,
|
||||
DrawerClose,
|
||||
DrawerContent,
|
||||
DrawerHeader,
|
||||
DrawerFooter,
|
||||
DrawerTitle,
|
||||
DrawerDescription,
|
||||
};
|
||||
257
frontend/src/app/components/ui/dropdown-menu.tsx
Normal file
257
frontend/src/app/components/ui/dropdown-menu.tsx
Normal file
@@ -0,0 +1,257 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu";
|
||||
import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function DropdownMenu({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Root>) {
|
||||
return <DropdownMenuPrimitive.Root data-slot="dropdown-menu" {...props} />;
|
||||
}
|
||||
|
||||
function DropdownMenuPortal({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Portal>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Portal data-slot="dropdown-menu-portal" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Trigger>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Trigger
|
||||
data-slot="dropdown-menu-trigger"
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuContent({
|
||||
className,
|
||||
sideOffset = 4,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Content>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Portal>
|
||||
<DropdownMenuPrimitive.Content
|
||||
data-slot="dropdown-menu-content"
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 max-h-(--radix-dropdown-menu-content-available-height) min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border p-1 shadow-md",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</DropdownMenuPrimitive.Portal>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Group>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Group data-slot="dropdown-menu-group" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuItem({
|
||||
className,
|
||||
inset,
|
||||
variant = "default",
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Item> & {
|
||||
inset?: boolean;
|
||||
variant?: "default" | "destructive";
|
||||
}) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Item
|
||||
data-slot="dropdown-menu-item"
|
||||
data-inset={inset}
|
||||
data-variant={variant}
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuCheckboxItem({
|
||||
className,
|
||||
children,
|
||||
checked,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.CheckboxItem>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.CheckboxItem
|
||||
data-slot="dropdown-menu-checkbox-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
checked={checked}
|
||||
{...props}
|
||||
>
|
||||
<span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
|
||||
<DropdownMenuPrimitive.ItemIndicator>
|
||||
<CheckIcon className="size-4" />
|
||||
</DropdownMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</DropdownMenuPrimitive.CheckboxItem>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuRadioGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioGroup>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.RadioGroup
|
||||
data-slot="dropdown-menu-radio-group"
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuRadioItem({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioItem>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.RadioItem
|
||||
data-slot="dropdown-menu-radio-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-sm py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
|
||||
<DropdownMenuPrimitive.ItemIndicator>
|
||||
<CircleIcon className="size-2 fill-current" />
|
||||
</DropdownMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</DropdownMenuPrimitive.RadioItem>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuLabel({
|
||||
className,
|
||||
inset,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Label> & {
|
||||
inset?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Label
|
||||
data-slot="dropdown-menu-label"
|
||||
data-inset={inset}
|
||||
className={cn(
|
||||
"px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuSeparator({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Separator>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.Separator
|
||||
data-slot="dropdown-menu-separator"
|
||||
className={cn("bg-border -mx-1 my-1 h-px", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuShortcut({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
data-slot="dropdown-menu-shortcut"
|
||||
className={cn(
|
||||
"text-muted-foreground ml-auto text-xs tracking-widest",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuSub({
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.Sub>) {
|
||||
return <DropdownMenuPrimitive.Sub data-slot="dropdown-menu-sub" {...props} />;
|
||||
}
|
||||
|
||||
function DropdownMenuSubTrigger({
|
||||
className,
|
||||
inset,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.SubTrigger> & {
|
||||
inset?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.SubTrigger
|
||||
data-slot="dropdown-menu-sub-trigger"
|
||||
data-inset={inset}
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[inset]:pl-8",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<ChevronRightIcon className="ml-auto size-4" />
|
||||
</DropdownMenuPrimitive.SubTrigger>
|
||||
);
|
||||
}
|
||||
|
||||
function DropdownMenuSubContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof DropdownMenuPrimitive.SubContent>) {
|
||||
return (
|
||||
<DropdownMenuPrimitive.SubContent
|
||||
data-slot="dropdown-menu-sub-content"
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
DropdownMenu,
|
||||
DropdownMenuPortal,
|
||||
DropdownMenuTrigger,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuGroup,
|
||||
DropdownMenuLabel,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuCheckboxItem,
|
||||
DropdownMenuRadioGroup,
|
||||
DropdownMenuRadioItem,
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuShortcut,
|
||||
DropdownMenuSub,
|
||||
DropdownMenuSubTrigger,
|
||||
DropdownMenuSubContent,
|
||||
};
|
||||
168
frontend/src/app/components/ui/form.tsx
Normal file
168
frontend/src/app/components/ui/form.tsx
Normal file
@@ -0,0 +1,168 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as LabelPrimitive from "@radix-ui/react-label";
|
||||
import { Slot } from "@radix-ui/react-slot";
|
||||
import {
|
||||
Controller,
|
||||
FormProvider,
|
||||
useFormContext,
|
||||
useFormState,
|
||||
type ControllerProps,
|
||||
type FieldPath,
|
||||
type FieldValues,
|
||||
} from "react-hook-form";
|
||||
|
||||
import { cn } from "./utils";
|
||||
import { Label } from "./label";
|
||||
|
||||
const Form = FormProvider;
|
||||
|
||||
type FormFieldContextValue<
|
||||
TFieldValues extends FieldValues = FieldValues,
|
||||
TName extends FieldPath<TFieldValues> = FieldPath<TFieldValues>,
|
||||
> = {
|
||||
name: TName;
|
||||
};
|
||||
|
||||
const FormFieldContext = React.createContext<FormFieldContextValue>(
|
||||
{} as FormFieldContextValue,
|
||||
);
|
||||
|
||||
const FormField = <
|
||||
TFieldValues extends FieldValues = FieldValues,
|
||||
TName extends FieldPath<TFieldValues> = FieldPath<TFieldValues>,
|
||||
>({
|
||||
...props
|
||||
}: ControllerProps<TFieldValues, TName>) => {
|
||||
return (
|
||||
<FormFieldContext.Provider value={{ name: props.name }}>
|
||||
<Controller {...props} />
|
||||
</FormFieldContext.Provider>
|
||||
);
|
||||
};
|
||||
|
||||
const useFormField = () => {
|
||||
const fieldContext = React.useContext(FormFieldContext);
|
||||
const itemContext = React.useContext(FormItemContext);
|
||||
const { getFieldState } = useFormContext();
|
||||
const formState = useFormState({ name: fieldContext.name });
|
||||
const fieldState = getFieldState(fieldContext.name, formState);
|
||||
|
||||
if (!fieldContext) {
|
||||
throw new Error("useFormField should be used within <FormField>");
|
||||
}
|
||||
|
||||
const { id } = itemContext;
|
||||
|
||||
return {
|
||||
id,
|
||||
name: fieldContext.name,
|
||||
formItemId: `${id}-form-item`,
|
||||
formDescriptionId: `${id}-form-item-description`,
|
||||
formMessageId: `${id}-form-item-message`,
|
||||
...fieldState,
|
||||
};
|
||||
};
|
||||
|
||||
type FormItemContextValue = {
|
||||
id: string;
|
||||
};
|
||||
|
||||
const FormItemContext = React.createContext<FormItemContextValue>(
|
||||
{} as FormItemContextValue,
|
||||
);
|
||||
|
||||
function FormItem({ className, ...props }: React.ComponentProps<"div">) {
|
||||
const id = React.useId();
|
||||
|
||||
return (
|
||||
<FormItemContext.Provider value={{ id }}>
|
||||
<div
|
||||
data-slot="form-item"
|
||||
className={cn("grid gap-2", className)}
|
||||
{...props}
|
||||
/>
|
||||
</FormItemContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
function FormLabel({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof LabelPrimitive.Root>) {
|
||||
const { error, formItemId } = useFormField();
|
||||
|
||||
return (
|
||||
<Label
|
||||
data-slot="form-label"
|
||||
data-error={!!error}
|
||||
className={cn("data-[error=true]:text-destructive", className)}
|
||||
htmlFor={formItemId}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function FormControl({ ...props }: React.ComponentProps<typeof Slot>) {
|
||||
const { error, formItemId, formDescriptionId, formMessageId } =
|
||||
useFormField();
|
||||
|
||||
return (
|
||||
<Slot
|
||||
data-slot="form-control"
|
||||
id={formItemId}
|
||||
aria-describedby={
|
||||
!error
|
||||
? `${formDescriptionId}`
|
||||
: `${formDescriptionId} ${formMessageId}`
|
||||
}
|
||||
aria-invalid={!!error}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function FormDescription({ className, ...props }: React.ComponentProps<"p">) {
|
||||
const { formDescriptionId } = useFormField();
|
||||
|
||||
return (
|
||||
<p
|
||||
data-slot="form-description"
|
||||
id={formDescriptionId}
|
||||
className={cn("text-muted-foreground text-sm", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function FormMessage({ className, ...props }: React.ComponentProps<"p">) {
|
||||
const { error, formMessageId } = useFormField();
|
||||
const body = error ? String(error?.message ?? "") : props.children;
|
||||
|
||||
if (!body) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<p
|
||||
data-slot="form-message"
|
||||
id={formMessageId}
|
||||
className={cn("text-destructive text-sm", className)}
|
||||
{...props}
|
||||
>
|
||||
{body}
|
||||
</p>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
useFormField,
|
||||
Form,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormControl,
|
||||
FormDescription,
|
||||
FormMessage,
|
||||
FormField,
|
||||
};
|
||||
44
frontend/src/app/components/ui/hover-card.tsx
Normal file
44
frontend/src/app/components/ui/hover-card.tsx
Normal file
@@ -0,0 +1,44 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as HoverCardPrimitive from "@radix-ui/react-hover-card";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function HoverCard({
|
||||
...props
|
||||
}: React.ComponentProps<typeof HoverCardPrimitive.Root>) {
|
||||
return <HoverCardPrimitive.Root data-slot="hover-card" {...props} />;
|
||||
}
|
||||
|
||||
function HoverCardTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof HoverCardPrimitive.Trigger>) {
|
||||
return (
|
||||
<HoverCardPrimitive.Trigger data-slot="hover-card-trigger" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function HoverCardContent({
|
||||
className,
|
||||
align = "center",
|
||||
sideOffset = 4,
|
||||
...props
|
||||
}: React.ComponentProps<typeof HoverCardPrimitive.Content>) {
|
||||
return (
|
||||
<HoverCardPrimitive.Portal data-slot="hover-card-portal">
|
||||
<HoverCardPrimitive.Content
|
||||
data-slot="hover-card-content"
|
||||
align={align}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-64 origin-(--radix-hover-card-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</HoverCardPrimitive.Portal>
|
||||
);
|
||||
}
|
||||
|
||||
export { HoverCard, HoverCardTrigger, HoverCardContent };
|
||||
77
frontend/src/app/components/ui/input-otp.tsx
Normal file
77
frontend/src/app/components/ui/input-otp.tsx
Normal file
@@ -0,0 +1,77 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import { OTPInput, OTPInputContext } from "input-otp";
|
||||
import { MinusIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function InputOTP({
|
||||
className,
|
||||
containerClassName,
|
||||
...props
|
||||
}: React.ComponentProps<typeof OTPInput> & {
|
||||
containerClassName?: string;
|
||||
}) {
|
||||
return (
|
||||
<OTPInput
|
||||
data-slot="input-otp"
|
||||
containerClassName={cn(
|
||||
"flex items-center gap-2 has-disabled:opacity-50",
|
||||
containerClassName,
|
||||
)}
|
||||
className={cn("disabled:cursor-not-allowed", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function InputOTPGroup({ className, ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div
|
||||
data-slot="input-otp-group"
|
||||
className={cn("flex items-center gap-1", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function InputOTPSlot({
|
||||
index,
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"div"> & {
|
||||
index: number;
|
||||
}) {
|
||||
const inputOTPContext = React.useContext(OTPInputContext);
|
||||
const { char, hasFakeCaret, isActive } = inputOTPContext?.slots[index] ?? {};
|
||||
|
||||
return (
|
||||
<div
|
||||
data-slot="input-otp-slot"
|
||||
data-active={isActive}
|
||||
className={cn(
|
||||
"data-[active=true]:border-ring data-[active=true]:ring-ring/50 data-[active=true]:aria-invalid:ring-destructive/20 dark:data-[active=true]:aria-invalid:ring-destructive/40 aria-invalid:border-destructive data-[active=true]:aria-invalid:border-destructive dark:bg-input/30 border-input relative flex h-9 w-9 items-center justify-center border-y border-r text-sm bg-input-background transition-all outline-none first:rounded-l-md first:border-l last:rounded-r-md data-[active=true]:z-10 data-[active=true]:ring-[3px]",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{char}
|
||||
{hasFakeCaret && (
|
||||
<div className="pointer-events-none absolute inset-0 flex items-center justify-center">
|
||||
<div className="animate-caret-blink bg-foreground h-4 w-px duration-1000" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function InputOTPSeparator({ ...props }: React.ComponentProps<"div">) {
|
||||
return (
|
||||
<div data-slot="input-otp-separator" role="separator" {...props}>
|
||||
<MinusIcon />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export { InputOTP, InputOTPGroup, InputOTPSlot, InputOTPSeparator };
|
||||
21
frontend/src/app/components/ui/input.tsx
Normal file
21
frontend/src/app/components/ui/input.tsx
Normal file
@@ -0,0 +1,21 @@
|
||||
import * as React from "react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Input({ className, type, ...props }: React.ComponentProps<"input">) {
|
||||
return (
|
||||
<input
|
||||
type={type}
|
||||
data-slot="input"
|
||||
className={cn(
|
||||
"file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input flex h-9 w-full min-w-0 rounded-md border px-3 py-1 text-base bg-input-background transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
|
||||
"focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
|
||||
"aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Input };
|
||||
24
frontend/src/app/components/ui/label.tsx
Normal file
24
frontend/src/app/components/ui/label.tsx
Normal file
@@ -0,0 +1,24 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as LabelPrimitive from "@radix-ui/react-label";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Label({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof LabelPrimitive.Root>) {
|
||||
return (
|
||||
<LabelPrimitive.Root
|
||||
data-slot="label"
|
||||
className={cn(
|
||||
"flex items-center gap-2 text-sm leading-none font-medium select-none group-data-[disabled=true]:pointer-events-none group-data-[disabled=true]:opacity-50 peer-disabled:cursor-not-allowed peer-disabled:opacity-50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export { Label };
|
||||
276
frontend/src/app/components/ui/menubar.tsx
Normal file
276
frontend/src/app/components/ui/menubar.tsx
Normal file
@@ -0,0 +1,276 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as MenubarPrimitive from "@radix-ui/react-menubar";
|
||||
import { CheckIcon, ChevronRightIcon, CircleIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Menubar({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Root>) {
|
||||
return (
|
||||
<MenubarPrimitive.Root
|
||||
data-slot="menubar"
|
||||
className={cn(
|
||||
"bg-background flex h-9 items-center gap-1 rounded-md border p-1 shadow-xs",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarMenu({
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Menu>) {
|
||||
return <MenubarPrimitive.Menu data-slot="menubar-menu" {...props} />;
|
||||
}
|
||||
|
||||
function MenubarGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Group>) {
|
||||
return <MenubarPrimitive.Group data-slot="menubar-group" {...props} />;
|
||||
}
|
||||
|
||||
function MenubarPortal({
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Portal>) {
|
||||
return <MenubarPrimitive.Portal data-slot="menubar-portal" {...props} />;
|
||||
}
|
||||
|
||||
function MenubarRadioGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.RadioGroup>) {
|
||||
return (
|
||||
<MenubarPrimitive.RadioGroup data-slot="menubar-radio-group" {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarTrigger({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Trigger>) {
|
||||
return (
|
||||
<MenubarPrimitive.Trigger
|
||||
data-slot="menubar-trigger"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex items-center rounded-sm px-2 py-1 text-sm font-medium outline-hidden select-none",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarContent({
|
||||
className,
|
||||
align = "start",
|
||||
alignOffset = -4,
|
||||
sideOffset = 8,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Content>) {
|
||||
return (
|
||||
<MenubarPortal>
|
||||
<MenubarPrimitive.Content
|
||||
data-slot="menubar-content"
|
||||
align={align}
|
||||
alignOffset={alignOffset}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[12rem] origin-(--radix-menubar-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-md",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</MenubarPortal>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarItem({
|
||||
className,
|
||||
inset,
|
||||
variant = "default",
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Item> & {
|
||||
inset?: boolean;
|
||||
variant?: "default" | "destructive";
|
||||
}) {
|
||||
return (
|
||||
<MenubarPrimitive.Item
|
||||
data-slot="menubar-item"
|
||||
data-inset={inset}
|
||||
data-variant={variant}
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarCheckboxItem({
|
||||
className,
|
||||
children,
|
||||
checked,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.CheckboxItem>) {
|
||||
return (
|
||||
<MenubarPrimitive.CheckboxItem
|
||||
data-slot="menubar-checkbox-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-xs py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
checked={checked}
|
||||
{...props}
|
||||
>
|
||||
<span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
|
||||
<MenubarPrimitive.ItemIndicator>
|
||||
<CheckIcon className="size-4" />
|
||||
</MenubarPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</MenubarPrimitive.CheckboxItem>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarRadioItem({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.RadioItem>) {
|
||||
return (
|
||||
<MenubarPrimitive.RadioItem
|
||||
data-slot="menubar-radio-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground relative flex cursor-default items-center gap-2 rounded-xs py-1.5 pr-2 pl-8 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="pointer-events-none absolute left-2 flex size-3.5 items-center justify-center">
|
||||
<MenubarPrimitive.ItemIndicator>
|
||||
<CircleIcon className="size-2 fill-current" />
|
||||
</MenubarPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</MenubarPrimitive.RadioItem>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarLabel({
|
||||
className,
|
||||
inset,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Label> & {
|
||||
inset?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<MenubarPrimitive.Label
|
||||
data-slot="menubar-label"
|
||||
data-inset={inset}
|
||||
className={cn(
|
||||
"px-2 py-1.5 text-sm font-medium data-[inset]:pl-8",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarSeparator({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Separator>) {
|
||||
return (
|
||||
<MenubarPrimitive.Separator
|
||||
data-slot="menubar-separator"
|
||||
className={cn("bg-border -mx-1 my-1 h-px", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarShortcut({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
data-slot="menubar-shortcut"
|
||||
className={cn(
|
||||
"text-muted-foreground ml-auto text-xs tracking-widest",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarSub({
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.Sub>) {
|
||||
return <MenubarPrimitive.Sub data-slot="menubar-sub" {...props} />;
|
||||
}
|
||||
|
||||
function MenubarSubTrigger({
|
||||
className,
|
||||
inset,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.SubTrigger> & {
|
||||
inset?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<MenubarPrimitive.SubTrigger
|
||||
data-slot="menubar-sub-trigger"
|
||||
data-inset={inset}
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground data-[state=open]:bg-accent data-[state=open]:text-accent-foreground flex cursor-default items-center rounded-sm px-2 py-1.5 text-sm outline-none select-none data-[inset]:pl-8",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<ChevronRightIcon className="ml-auto h-4 w-4" />
|
||||
</MenubarPrimitive.SubTrigger>
|
||||
);
|
||||
}
|
||||
|
||||
function MenubarSubContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof MenubarPrimitive.SubContent>) {
|
||||
return (
|
||||
<MenubarPrimitive.SubContent
|
||||
data-slot="menubar-sub-content"
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 min-w-[8rem] origin-(--radix-menubar-content-transform-origin) overflow-hidden rounded-md border p-1 shadow-lg",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Menubar,
|
||||
MenubarPortal,
|
||||
MenubarMenu,
|
||||
MenubarTrigger,
|
||||
MenubarContent,
|
||||
MenubarGroup,
|
||||
MenubarSeparator,
|
||||
MenubarLabel,
|
||||
MenubarItem,
|
||||
MenubarShortcut,
|
||||
MenubarCheckboxItem,
|
||||
MenubarRadioGroup,
|
||||
MenubarRadioItem,
|
||||
MenubarSub,
|
||||
MenubarSubTrigger,
|
||||
MenubarSubContent,
|
||||
};
|
||||
168
frontend/src/app/components/ui/navigation-menu.tsx
Normal file
168
frontend/src/app/components/ui/navigation-menu.tsx
Normal file
@@ -0,0 +1,168 @@
|
||||
import * as React from "react";
|
||||
import * as NavigationMenuPrimitive from "@radix-ui/react-navigation-menu";
|
||||
import { cva } from "class-variance-authority";
|
||||
import { ChevronDownIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function NavigationMenu({
|
||||
className,
|
||||
children,
|
||||
viewport = true,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Root> & {
|
||||
viewport?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.Root
|
||||
data-slot="navigation-menu"
|
||||
data-viewport={viewport}
|
||||
className={cn(
|
||||
"group/navigation-menu relative flex max-w-max flex-1 items-center justify-center",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
{viewport && <NavigationMenuViewport />}
|
||||
</NavigationMenuPrimitive.Root>
|
||||
);
|
||||
}
|
||||
|
||||
function NavigationMenuList({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.List>) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.List
|
||||
data-slot="navigation-menu-list"
|
||||
className={cn(
|
||||
"group flex flex-1 list-none items-center justify-center gap-1",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function NavigationMenuItem({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Item>) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.Item
|
||||
data-slot="navigation-menu-item"
|
||||
className={cn("relative", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
const navigationMenuTriggerStyle = cva(
|
||||
"group inline-flex h-9 w-max items-center justify-center rounded-md bg-background px-4 py-2 text-sm font-medium hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground disabled:pointer-events-none disabled:opacity-50 data-[state=open]:hover:bg-accent data-[state=open]:text-accent-foreground data-[state=open]:focus:bg-accent data-[state=open]:bg-accent/50 focus-visible:ring-ring/50 outline-none transition-[color,box-shadow] focus-visible:ring-[3px] focus-visible:outline-1",
|
||||
);
|
||||
|
||||
function NavigationMenuTrigger({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Trigger>) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.Trigger
|
||||
data-slot="navigation-menu-trigger"
|
||||
className={cn(navigationMenuTriggerStyle(), "group", className)}
|
||||
{...props}
|
||||
>
|
||||
{children}{" "}
|
||||
<ChevronDownIcon
|
||||
className="relative top-[1px] ml-1 size-3 transition duration-300 group-data-[state=open]:rotate-180"
|
||||
aria-hidden="true"
|
||||
/>
|
||||
</NavigationMenuPrimitive.Trigger>
|
||||
);
|
||||
}
|
||||
|
||||
function NavigationMenuContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Content>) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.Content
|
||||
data-slot="navigation-menu-content"
|
||||
className={cn(
|
||||
"data-[motion^=from-]:animate-in data-[motion^=to-]:animate-out data-[motion^=from-]:fade-in data-[motion^=to-]:fade-out data-[motion=from-end]:slide-in-from-right-52 data-[motion=from-start]:slide-in-from-left-52 data-[motion=to-end]:slide-out-to-right-52 data-[motion=to-start]:slide-out-to-left-52 top-0 left-0 w-full p-2 pr-2.5 md:absolute md:w-auto",
|
||||
"group-data-[viewport=false]/navigation-menu:bg-popover group-data-[viewport=false]/navigation-menu:text-popover-foreground group-data-[viewport=false]/navigation-menu:data-[state=open]:animate-in group-data-[viewport=false]/navigation-menu:data-[state=closed]:animate-out group-data-[viewport=false]/navigation-menu:data-[state=closed]:zoom-out-95 group-data-[viewport=false]/navigation-menu:data-[state=open]:zoom-in-95 group-data-[viewport=false]/navigation-menu:data-[state=open]:fade-in-0 group-data-[viewport=false]/navigation-menu:data-[state=closed]:fade-out-0 group-data-[viewport=false]/navigation-menu:top-full group-data-[viewport=false]/navigation-menu:mt-1.5 group-data-[viewport=false]/navigation-menu:overflow-hidden group-data-[viewport=false]/navigation-menu:rounded-md group-data-[viewport=false]/navigation-menu:border group-data-[viewport=false]/navigation-menu:shadow group-data-[viewport=false]/navigation-menu:duration-200 **:data-[slot=navigation-menu-link]:focus:ring-0 **:data-[slot=navigation-menu-link]:focus:outline-none",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function NavigationMenuViewport({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Viewport>) {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"absolute top-full left-0 isolate z-50 flex justify-center",
|
||||
)}
|
||||
>
|
||||
<NavigationMenuPrimitive.Viewport
|
||||
data-slot="navigation-menu-viewport"
|
||||
className={cn(
|
||||
"origin-top-center bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-90 relative mt-1.5 h-[var(--radix-navigation-menu-viewport-height)] w-full overflow-hidden rounded-md border shadow md:w-[var(--radix-navigation-menu-viewport-width)]",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function NavigationMenuLink({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Link>) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.Link
|
||||
data-slot="navigation-menu-link"
|
||||
className={cn(
|
||||
"data-[active=true]:focus:bg-accent data-[active=true]:hover:bg-accent data-[active=true]:bg-accent/50 data-[active=true]:text-accent-foreground hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground focus-visible:ring-ring/50 [&_svg:not([class*='text-'])]:text-muted-foreground flex flex-col gap-1 rounded-sm p-2 text-sm transition-all outline-none focus-visible:ring-[3px] focus-visible:outline-1 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function NavigationMenuIndicator({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof NavigationMenuPrimitive.Indicator>) {
|
||||
return (
|
||||
<NavigationMenuPrimitive.Indicator
|
||||
data-slot="navigation-menu-indicator"
|
||||
className={cn(
|
||||
"data-[state=visible]:animate-in data-[state=hidden]:animate-out data-[state=hidden]:fade-out data-[state=visible]:fade-in top-full z-[1] flex h-1.5 items-end justify-center overflow-hidden",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<div className="bg-border relative top-[60%] h-2 w-2 rotate-45 rounded-tl-sm shadow-md" />
|
||||
</NavigationMenuPrimitive.Indicator>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
NavigationMenu,
|
||||
NavigationMenuList,
|
||||
NavigationMenuItem,
|
||||
NavigationMenuContent,
|
||||
NavigationMenuTrigger,
|
||||
NavigationMenuLink,
|
||||
NavigationMenuIndicator,
|
||||
NavigationMenuViewport,
|
||||
navigationMenuTriggerStyle,
|
||||
};
|
||||
127
frontend/src/app/components/ui/pagination.tsx
Normal file
127
frontend/src/app/components/ui/pagination.tsx
Normal file
@@ -0,0 +1,127 @@
|
||||
import * as React from "react";
|
||||
import {
|
||||
ChevronLeftIcon,
|
||||
ChevronRightIcon,
|
||||
MoreHorizontalIcon,
|
||||
} from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
import { Button, buttonVariants } from "./button";
|
||||
|
||||
function Pagination({ className, ...props }: React.ComponentProps<"nav">) {
|
||||
return (
|
||||
<nav
|
||||
role="navigation"
|
||||
aria-label="pagination"
|
||||
data-slot="pagination"
|
||||
className={cn("mx-auto flex w-full justify-center", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function PaginationContent({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"ul">) {
|
||||
return (
|
||||
<ul
|
||||
data-slot="pagination-content"
|
||||
className={cn("flex flex-row items-center gap-1", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function PaginationItem({ ...props }: React.ComponentProps<"li">) {
|
||||
return <li data-slot="pagination-item" {...props} />;
|
||||
}
|
||||
|
||||
type PaginationLinkProps = {
|
||||
isActive?: boolean;
|
||||
} & Pick<React.ComponentProps<typeof Button>, "size"> &
|
||||
React.ComponentProps<"a">;
|
||||
|
||||
function PaginationLink({
|
||||
className,
|
||||
isActive,
|
||||
size = "icon",
|
||||
...props
|
||||
}: PaginationLinkProps) {
|
||||
return (
|
||||
<a
|
||||
aria-current={isActive ? "page" : undefined}
|
||||
data-slot="pagination-link"
|
||||
data-active={isActive}
|
||||
className={cn(
|
||||
buttonVariants({
|
||||
variant: isActive ? "outline" : "ghost",
|
||||
size,
|
||||
}),
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function PaginationPrevious({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof PaginationLink>) {
|
||||
return (
|
||||
<PaginationLink
|
||||
aria-label="Go to previous page"
|
||||
size="default"
|
||||
className={cn("gap-1 px-2.5 sm:pl-2.5", className)}
|
||||
{...props}
|
||||
>
|
||||
<ChevronLeftIcon />
|
||||
<span className="hidden sm:block">Previous</span>
|
||||
</PaginationLink>
|
||||
);
|
||||
}
|
||||
|
||||
function PaginationNext({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof PaginationLink>) {
|
||||
return (
|
||||
<PaginationLink
|
||||
aria-label="Go to next page"
|
||||
size="default"
|
||||
className={cn("gap-1 px-2.5 sm:pr-2.5", className)}
|
||||
{...props}
|
||||
>
|
||||
<span className="hidden sm:block">Next</span>
|
||||
<ChevronRightIcon />
|
||||
</PaginationLink>
|
||||
);
|
||||
}
|
||||
|
||||
function PaginationEllipsis({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<"span">) {
|
||||
return (
|
||||
<span
|
||||
aria-hidden
|
||||
data-slot="pagination-ellipsis"
|
||||
className={cn("flex size-9 items-center justify-center", className)}
|
||||
{...props}
|
||||
>
|
||||
<MoreHorizontalIcon className="size-4" />
|
||||
<span className="sr-only">More pages</span>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Pagination,
|
||||
PaginationContent,
|
||||
PaginationLink,
|
||||
PaginationItem,
|
||||
PaginationPrevious,
|
||||
PaginationNext,
|
||||
PaginationEllipsis,
|
||||
};
|
||||
48
frontend/src/app/components/ui/popover.tsx
Normal file
48
frontend/src/app/components/ui/popover.tsx
Normal file
@@ -0,0 +1,48 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as PopoverPrimitive from "@radix-ui/react-popover";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Popover({
|
||||
...props
|
||||
}: React.ComponentProps<typeof PopoverPrimitive.Root>) {
|
||||
return <PopoverPrimitive.Root data-slot="popover" {...props} />;
|
||||
}
|
||||
|
||||
function PopoverTrigger({
|
||||
...props
|
||||
}: React.ComponentProps<typeof PopoverPrimitive.Trigger>) {
|
||||
return <PopoverPrimitive.Trigger data-slot="popover-trigger" {...props} />;
|
||||
}
|
||||
|
||||
function PopoverContent({
|
||||
className,
|
||||
align = "center",
|
||||
sideOffset = 4,
|
||||
...props
|
||||
}: React.ComponentProps<typeof PopoverPrimitive.Content>) {
|
||||
return (
|
||||
<PopoverPrimitive.Portal>
|
||||
<PopoverPrimitive.Content
|
||||
data-slot="popover-content"
|
||||
align={align}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-72 origin-(--radix-popover-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</PopoverPrimitive.Portal>
|
||||
);
|
||||
}
|
||||
|
||||
function PopoverAnchor({
|
||||
...props
|
||||
}: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {
|
||||
return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />;
|
||||
}
|
||||
|
||||
export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor };
|
||||
31
frontend/src/app/components/ui/progress.tsx
Normal file
31
frontend/src/app/components/ui/progress.tsx
Normal file
@@ -0,0 +1,31 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as ProgressPrimitive from "@radix-ui/react-progress";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Progress({
|
||||
className,
|
||||
value,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ProgressPrimitive.Root>) {
|
||||
return (
|
||||
<ProgressPrimitive.Root
|
||||
data-slot="progress"
|
||||
className={cn(
|
||||
"bg-primary/20 relative h-2 w-full overflow-hidden rounded-full",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ProgressPrimitive.Indicator
|
||||
data-slot="progress-indicator"
|
||||
className="bg-primary h-full w-full flex-1 transition-all"
|
||||
style={{ transform: `translateX(-${100 - (value || 0)}%)` }}
|
||||
/>
|
||||
</ProgressPrimitive.Root>
|
||||
);
|
||||
}
|
||||
|
||||
export { Progress };
|
||||
45
frontend/src/app/components/ui/radio-group.tsx
Normal file
45
frontend/src/app/components/ui/radio-group.tsx
Normal file
@@ -0,0 +1,45 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group";
|
||||
import { CircleIcon } from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function RadioGroup({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {
|
||||
return (
|
||||
<RadioGroupPrimitive.Root
|
||||
data-slot="radio-group"
|
||||
className={cn("grid gap-3", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function RadioGroupItem({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {
|
||||
return (
|
||||
<RadioGroupPrimitive.Item
|
||||
data-slot="radio-group-item"
|
||||
className={cn(
|
||||
"border-input text-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 aspect-square size-4 shrink-0 rounded-full border shadow-xs transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<RadioGroupPrimitive.Indicator
|
||||
data-slot="radio-group-indicator"
|
||||
className="relative flex items-center justify-center"
|
||||
>
|
||||
<CircleIcon className="fill-primary absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2" />
|
||||
</RadioGroupPrimitive.Indicator>
|
||||
</RadioGroupPrimitive.Item>
|
||||
);
|
||||
}
|
||||
|
||||
export { RadioGroup, RadioGroupItem };
|
||||
56
frontend/src/app/components/ui/resizable.tsx
Normal file
56
frontend/src/app/components/ui/resizable.tsx
Normal file
@@ -0,0 +1,56 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import { GripVerticalIcon } from "lucide-react";
|
||||
import * as ResizablePrimitive from "react-resizable-panels";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function ResizablePanelGroup({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ResizablePrimitive.PanelGroup>) {
|
||||
return (
|
||||
<ResizablePrimitive.PanelGroup
|
||||
data-slot="resizable-panel-group"
|
||||
className={cn(
|
||||
"flex h-full w-full data-[panel-group-direction=vertical]:flex-col",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ResizablePanel({
|
||||
...props
|
||||
}: React.ComponentProps<typeof ResizablePrimitive.Panel>) {
|
||||
return <ResizablePrimitive.Panel data-slot="resizable-panel" {...props} />;
|
||||
}
|
||||
|
||||
function ResizableHandle({
|
||||
withHandle,
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ResizablePrimitive.PanelResizeHandle> & {
|
||||
withHandle?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<ResizablePrimitive.PanelResizeHandle
|
||||
data-slot="resizable-handle"
|
||||
className={cn(
|
||||
"bg-border focus-visible:ring-ring relative flex w-px items-center justify-center after:absolute after:inset-y-0 after:left-1/2 after:w-1 after:-translate-x-1/2 focus-visible:ring-1 focus-visible:ring-offset-1 focus-visible:outline-hidden data-[panel-group-direction=vertical]:h-px data-[panel-group-direction=vertical]:w-full data-[panel-group-direction=vertical]:after:left-0 data-[panel-group-direction=vertical]:after:h-1 data-[panel-group-direction=vertical]:after:w-full data-[panel-group-direction=vertical]:after:-translate-y-1/2 data-[panel-group-direction=vertical]:after:translate-x-0 [&[data-panel-group-direction=vertical]>div]:rotate-90",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{withHandle && (
|
||||
<div className="bg-border z-10 flex h-4 w-3 items-center justify-center rounded-xs border">
|
||||
<GripVerticalIcon className="size-2.5" />
|
||||
</div>
|
||||
)}
|
||||
</ResizablePrimitive.PanelResizeHandle>
|
||||
);
|
||||
}
|
||||
|
||||
export { ResizablePanelGroup, ResizablePanel, ResizableHandle };
|
||||
58
frontend/src/app/components/ui/scroll-area.tsx
Normal file
58
frontend/src/app/components/ui/scroll-area.tsx
Normal file
@@ -0,0 +1,58 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function ScrollArea({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {
|
||||
return (
|
||||
<ScrollAreaPrimitive.Root
|
||||
data-slot="scroll-area"
|
||||
className={cn("relative", className)}
|
||||
{...props}
|
||||
>
|
||||
<ScrollAreaPrimitive.Viewport
|
||||
data-slot="scroll-area-viewport"
|
||||
className="focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1"
|
||||
>
|
||||
{children}
|
||||
</ScrollAreaPrimitive.Viewport>
|
||||
<ScrollBar />
|
||||
<ScrollAreaPrimitive.Corner />
|
||||
</ScrollAreaPrimitive.Root>
|
||||
);
|
||||
}
|
||||
|
||||
function ScrollBar({
|
||||
className,
|
||||
orientation = "vertical",
|
||||
...props
|
||||
}: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {
|
||||
return (
|
||||
<ScrollAreaPrimitive.ScrollAreaScrollbar
|
||||
data-slot="scroll-area-scrollbar"
|
||||
orientation={orientation}
|
||||
className={cn(
|
||||
"flex touch-none p-px transition-colors select-none",
|
||||
orientation === "vertical" &&
|
||||
"h-full w-2.5 border-l border-l-transparent",
|
||||
orientation === "horizontal" &&
|
||||
"h-2.5 flex-col border-t border-t-transparent",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ScrollAreaPrimitive.ScrollAreaThumb
|
||||
data-slot="scroll-area-thumb"
|
||||
className="bg-border relative flex-1 rounded-full"
|
||||
/>
|
||||
</ScrollAreaPrimitive.ScrollAreaScrollbar>
|
||||
);
|
||||
}
|
||||
|
||||
export { ScrollArea, ScrollBar };
|
||||
189
frontend/src/app/components/ui/select.tsx
Normal file
189
frontend/src/app/components/ui/select.tsx
Normal file
@@ -0,0 +1,189 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as SelectPrimitive from "@radix-ui/react-select";
|
||||
import {
|
||||
CheckIcon,
|
||||
ChevronDownIcon,
|
||||
ChevronUpIcon,
|
||||
} from "lucide-react";
|
||||
|
||||
import { cn } from "./utils";
|
||||
|
||||
function Select({
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Root>) {
|
||||
return <SelectPrimitive.Root data-slot="select" {...props} />;
|
||||
}
|
||||
|
||||
function SelectGroup({
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Group>) {
|
||||
return <SelectPrimitive.Group data-slot="select-group" {...props} />;
|
||||
}
|
||||
|
||||
function SelectValue({
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Value>) {
|
||||
return <SelectPrimitive.Value data-slot="select-value" {...props} />;
|
||||
}
|
||||
|
||||
function SelectTrigger({
|
||||
className,
|
||||
size = "default",
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Trigger> & {
|
||||
size?: "sm" | "default";
|
||||
}) {
|
||||
return (
|
||||
<SelectPrimitive.Trigger
|
||||
data-slot="select-trigger"
|
||||
data-size={size}
|
||||
className={cn(
|
||||
"border-input data-[placeholder]:text-muted-foreground [&_svg:not([class*='text-'])]:text-muted-foreground focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 dark:hover:bg-input/50 flex w-full items-center justify-between gap-2 rounded-md border bg-input-background px-3 py-2 text-sm whitespace-nowrap transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50 data-[size=default]:h-9 data-[size=sm]:h-8 *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<SelectPrimitive.Icon asChild>
|
||||
<ChevronDownIcon className="size-4 opacity-50" />
|
||||
</SelectPrimitive.Icon>
|
||||
</SelectPrimitive.Trigger>
|
||||
);
|
||||
}
|
||||
|
||||
function SelectContent({
|
||||
className,
|
||||
children,
|
||||
position = "popper",
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Content>) {
|
||||
return (
|
||||
<SelectPrimitive.Portal>
|
||||
<SelectPrimitive.Content
|
||||
data-slot="select-content"
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 relative z-50 max-h-(--radix-select-content-available-height) min-w-[8rem] origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border shadow-md",
|
||||
position === "popper" &&
|
||||
"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
|
||||
className,
|
||||
)}
|
||||
position={position}
|
||||
{...props}
|
||||
>
|
||||
<SelectScrollUpButton />
|
||||
<SelectPrimitive.Viewport
|
||||
className={cn(
|
||||
"p-1",
|
||||
position === "popper" &&
|
||||
"h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)] scroll-my-1",
|
||||
)}
|
||||
>
|
||||
{children}
|
||||
</SelectPrimitive.Viewport>
|
||||
<SelectScrollDownButton />
|
||||
</SelectPrimitive.Content>
|
||||
</SelectPrimitive.Portal>
|
||||
);
|
||||
}
|
||||
|
||||
function SelectLabel({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Label>) {
|
||||
return (
|
||||
<SelectPrimitive.Label
|
||||
data-slot="select-label"
|
||||
className={cn("text-muted-foreground px-2 py-1.5 text-xs", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function SelectItem({
|
||||
className,
|
||||
children,
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Item>) {
|
||||
return (
|
||||
<SelectPrimitive.Item
|
||||
data-slot="select-item"
|
||||
className={cn(
|
||||
"focus:bg-accent focus:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex w-full cursor-default items-center gap-2 rounded-sm py-1.5 pr-8 pl-2 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="absolute right-2 flex size-3.5 items-center justify-center">
|
||||
<SelectPrimitive.ItemIndicator>
|
||||
<CheckIcon className="size-4" />
|
||||
</SelectPrimitive.ItemIndicator>
|
||||
</span>
|
||||
<SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
|
||||
</SelectPrimitive.Item>
|
||||
);
|
||||
}
|
||||
|
||||
function SelectSeparator({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.Separator>) {
|
||||
return (
|
||||
<SelectPrimitive.Separator
|
||||
data-slot="select-separator"
|
||||
className={cn("bg-border pointer-events-none -mx-1 my-1 h-px", className)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function SelectScrollUpButton({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.ScrollUpButton>) {
|
||||
return (
|
||||
<SelectPrimitive.ScrollUpButton
|
||||
data-slot="select-scroll-up-button"
|
||||
className={cn(
|
||||
"flex cursor-default items-center justify-center py-1",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ChevronUpIcon className="size-4" />
|
||||
</SelectPrimitive.ScrollUpButton>
|
||||
);
|
||||
}
|
||||
|
||||
function SelectScrollDownButton({
|
||||
className,
|
||||
...props
|
||||
}: React.ComponentProps<typeof SelectPrimitive.ScrollDownButton>) {
|
||||
return (
|
||||
<SelectPrimitive.ScrollDownButton
|
||||
data-slot="select-scroll-down-button"
|
||||
className={cn(
|
||||
"flex cursor-default items-center justify-center py-1",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ChevronDownIcon className="size-4" />
|
||||
</SelectPrimitive.ScrollDownButton>
|
||||
);
|
||||
}
|
||||
|
||||
export {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectGroup,
|
||||
SelectItem,
|
||||
SelectLabel,
|
||||
SelectScrollDownButton,
|
||||
SelectScrollUpButton,
|
||||
SelectSeparator,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
};
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user