GraphRAG Studio — initial commit: multimodal RAG system with KG visualization
Full-stack application for document-to-knowledge-graph pipeline: - Backend: FastAPI + LangGraph ReAct agent + DeepSeek + MinerU parsing - Frontend: React 19 + Vite + D3.js + shadcn/ui - Pipeline: MinerU parsing → LangExtract entity extraction → KG building Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
0
backend/models/__init__.py
Normal file
0
backend/models/__init__.py
Normal file
360
backend/models/schemas.py
Normal file
360
backend/models/schemas.py
Normal file
@@ -0,0 +1,360 @@
|
||||
"""
|
||||
Pydantic v2 schemas — all API data objects per backend_service_specification-v1.0.md
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any, Generic, Optional, TypeVar
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Universal response envelope
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class APIResponse(BaseModel, Generic[T]):
|
||||
code: int = 0
|
||||
msg: str = "success"
|
||||
request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
data: Optional[T] = None
|
||||
|
||||
@classmethod
|
||||
def ok(cls, data: Any = None) -> "APIResponse":
|
||||
return cls(code=0, msg="success", data=data)
|
||||
|
||||
@classmethod
|
||||
def err(cls, code: int, msg: str) -> "APIResponse":
|
||||
return cls(code=code, msg=msg, data=None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# A. Document schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DocumentInfo(BaseModel):
|
||||
doc_id: str
|
||||
filename: str
|
||||
format: str
|
||||
size_bytes: int
|
||||
pages: Optional[int] = None
|
||||
uploaded_at: str
|
||||
status: str # uploaded | indexed | failed
|
||||
language: str = "ch"
|
||||
enable_formula: bool = True
|
||||
enable_table: bool = True
|
||||
|
||||
|
||||
class DocumentListData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[DocumentInfo]
|
||||
|
||||
|
||||
class DeleteDocumentData(BaseModel):
|
||||
deleted: bool
|
||||
doc_id: str
|
||||
removed_nodes: int
|
||||
removed_edges: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# B. Indexing job schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class IndexingProgress(BaseModel):
|
||||
parsed_pages: int = 0
|
||||
total_pages: int = 0
|
||||
extracted_entities: int = 0
|
||||
|
||||
|
||||
class IndexingJobStatus(BaseModel):
|
||||
job_id: str
|
||||
doc_id: str
|
||||
status: str # submitted|queued|parsing|extracting|indexing|done|failed|cancelled
|
||||
stage: str = ""
|
||||
progress: IndexingProgress = Field(default_factory=IndexingProgress)
|
||||
created_at: str
|
||||
elapsed_seconds: float = 0.0
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class StartIndexRequest(BaseModel):
|
||||
doc_id: str
|
||||
|
||||
|
||||
class CancelJobData(BaseModel):
|
||||
cancelled: bool
|
||||
job_id: str
|
||||
previous_status: str
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C. KG schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class KGNode(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
source_doc: str
|
||||
char_start: Optional[int] = None
|
||||
char_end: Optional[int] = None
|
||||
confidence: Optional[str] = None
|
||||
page: int = 0
|
||||
degree: int = 0
|
||||
|
||||
|
||||
class KGNodeDetail(KGNode):
|
||||
degree_centrality: float = 0.0
|
||||
neighbor_count: int = 0
|
||||
|
||||
|
||||
class KGEdge(BaseModel):
|
||||
source: str
|
||||
target: str
|
||||
relation: str = "CO_OCCURS_IN"
|
||||
doc_id: str
|
||||
page: int = 0
|
||||
|
||||
|
||||
class KGNodeListData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[KGNode]
|
||||
|
||||
|
||||
class KGEdgeListData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[KGEdge]
|
||||
|
||||
|
||||
class KGStatsData(BaseModel):
|
||||
total_nodes: int
|
||||
total_edges: int
|
||||
density: float
|
||||
type_distribution: dict[str, int]
|
||||
relation_types: dict[str, int]
|
||||
top5_central_nodes: list[dict]
|
||||
source_documents: list[str]
|
||||
|
||||
|
||||
class KGExportData(BaseModel):
|
||||
format: str
|
||||
doc_id: Optional[str]
|
||||
total_nodes: int
|
||||
total_edges: int
|
||||
exported_at: str
|
||||
nodes: list[KGNode]
|
||||
edges: list[KGEdge]
|
||||
|
||||
|
||||
class NeighborInfo(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
page: int
|
||||
|
||||
|
||||
class NeighborsData(BaseModel):
|
||||
center: NeighborInfo
|
||||
hops: int
|
||||
neighbors_by_hop: dict[str, list[NeighborInfo]]
|
||||
total_neighbors: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# D. QA schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: str # human | ai
|
||||
content: str
|
||||
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
question: str
|
||||
history: list[ChatMessage] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ToolCallRecord(BaseModel):
|
||||
tool: str
|
||||
input: dict
|
||||
output: str
|
||||
|
||||
|
||||
class QAResult(BaseModel):
|
||||
query_id: str
|
||||
question: str
|
||||
answer: str
|
||||
tool_calls: list[ToolCallRecord] = Field(default_factory=list)
|
||||
cited_nodes: list[str] = Field(default_factory=list)
|
||||
elapsed_seconds: float
|
||||
created_at: str
|
||||
|
||||
|
||||
class QAHistoryData(BaseModel):
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
items: list[QAResult]
|
||||
|
||||
|
||||
class BatchQueryRequest(BaseModel):
|
||||
questions: list[str] = Field(..., max_length=20)
|
||||
|
||||
|
||||
class BatchQueryData(BaseModel):
|
||||
batch_id: str
|
||||
total: int
|
||||
status: str
|
||||
created_at: str
|
||||
|
||||
|
||||
class BatchResultData(BaseModel):
|
||||
batch_id: str
|
||||
total: int
|
||||
completed: int
|
||||
failed: int
|
||||
status: str
|
||||
results: list[QAResult]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# E. Search schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class EntitySearchData(BaseModel):
|
||||
query: str
|
||||
total: int
|
||||
items: list[KGNode]
|
||||
|
||||
|
||||
class PathNode(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: str
|
||||
|
||||
|
||||
class PathEdge(BaseModel):
|
||||
source: str
|
||||
target: str
|
||||
relation: str
|
||||
|
||||
|
||||
class PathInfo(BaseModel):
|
||||
length: int
|
||||
nodes: list[PathNode]
|
||||
edges: list[PathEdge]
|
||||
|
||||
|
||||
class PathSearchData(BaseModel):
|
||||
from_node: PathNode = Field(alias="from")
|
||||
to_node: PathNode = Field(alias="to")
|
||||
max_hops: int
|
||||
paths: list[PathInfo]
|
||||
total_paths: int
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class GraphSearchData(BaseModel):
|
||||
query: str
|
||||
matched_nodes: list[KGNode]
|
||||
subgraph_edges: list[KGEdge]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# F. System schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ComponentHealth(BaseModel):
|
||||
status: str # ok | error
|
||||
path: Optional[str] = None
|
||||
exists: Optional[bool] = None
|
||||
base_url: Optional[str] = None
|
||||
key_configured: Optional[bool] = None
|
||||
kg_nodes_exists: Optional[bool] = None
|
||||
kg_edges_exists: Optional[bool] = None
|
||||
uploads_dir_exists: Optional[bool] = None
|
||||
|
||||
|
||||
class HealthData(BaseModel):
|
||||
status: str
|
||||
version: str
|
||||
uptime_seconds: float
|
||||
components: dict[str, ComponentHealth]
|
||||
|
||||
|
||||
class SystemStatsData(BaseModel):
|
||||
total_documents: int
|
||||
indexed_documents: int
|
||||
failed_documents: int
|
||||
total_nodes: int
|
||||
total_edges: int
|
||||
type_distribution: dict[str, int]
|
||||
total_queries: int
|
||||
active_jobs: int
|
||||
storage_used_mb: float
|
||||
|
||||
|
||||
class FormatInfo(BaseModel):
|
||||
ext: str
|
||||
description: str
|
||||
max_size_mb: int
|
||||
max_pages: int
|
||||
requires_ocr: bool
|
||||
|
||||
|
||||
class FormatsData(BaseModel):
|
||||
formats: list[FormatInfo]
|
||||
ocr_languages: list[dict]
|
||||
notes: list[str]
|
||||
|
||||
|
||||
class DemoData(BaseModel):
|
||||
nodes: list[KGNode]
|
||||
edges: list[KGEdge]
|
||||
stats: dict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# B3 index result
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class IndexResultStats(BaseModel):
|
||||
blocks: int = 0
|
||||
block_types: dict[str, int] = Field(default_factory=dict)
|
||||
pages: int = 0
|
||||
raw_extractions: int = 0
|
||||
nodes: int = 0
|
||||
edges: int = 0
|
||||
type_counts: dict[str, int] = Field(default_factory=dict)
|
||||
alignment_counts: dict[str, int] = Field(default_factory=dict)
|
||||
elapsed_seconds: float = 0.0
|
||||
|
||||
|
||||
class ExtractionRecord(BaseModel):
|
||||
text: str
|
||||
type: str
|
||||
char_start: Optional[int] = None
|
||||
char_end: Optional[int] = None
|
||||
alignment: Optional[str] = None
|
||||
page: int = 0
|
||||
doc_id: str
|
||||
|
||||
|
||||
class IndexResultData(BaseModel):
|
||||
job_id: str
|
||||
doc_id: str
|
||||
status: str
|
||||
stats: Optional[IndexResultStats] = None
|
||||
extractions: Optional[list[ExtractionRecord]] = None
|
||||
nodes: Optional[list[KGNode]] = None
|
||||
edges: Optional[list[KGEdge]] = None
|
||||
Reference in New Issue
Block a user