GraphRAG Studio — initial commit: multimodal RAG system with KG visualization

Full-stack application for document-to-knowledge-graph pipeline:
- Backend: FastAPI + LangGraph ReAct agent + DeepSeek + MinerU parsing
- Frontend: React 19 + Vite + D3.js + shadcn/ui
- Pipeline: MinerU parsing → LangExtract entity extraction → KG building

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
plf
2026-06-07 17:30:04 +08:00
commit b02d3378fc
127 changed files with 37218 additions and 0 deletions

256
backend/tests/test_api.py Normal file
View File

@@ -0,0 +1,256 @@
"""
API integration tests — tests all major endpoints against a running server.
Run with: python tests/test_api.py
Server must be running on http://localhost:8000
"""
import json
import sys
import time
import urllib.request
import urllib.error
from pathlib import Path
BASE = "http://localhost:8000/api/v1"
PASS = "\033[92m[PASS]\033[0m"
FAIL = "\033[91m[FAIL]\033[0m"
INFO = "\033[94m[INFO]\033[0m"
results = {"passed": 0, "failed": 0}
def req(method: str, path: str, body: dict | None = None, form: dict | None = None) -> dict:
url = BASE + path
try:
if method == "GET" and not body and not form:
r = urllib.request.urlopen(url, timeout=30)
else:
if body is not None:
data = json.dumps(body).encode()
req_obj = urllib.request.Request(url, data=data, method=method,
headers={"Content-Type": "application/json"})
else:
req_obj = urllib.request.Request(url, method=method)
r = urllib.request.urlopen(req_obj, timeout=30)
return json.loads(r.read().decode())
except urllib.error.HTTPError as e:
return json.loads(e.read().decode())
def check(name: str, condition: bool, detail: str = "") -> None:
if condition:
results["passed"] += 1
print(f" {PASS} {name}")
else:
results["failed"] += 1
print(f" {FAIL} {name} {detail}")
def wait_for_server(max_retries: int = 15) -> bool:
print(f"{INFO} Waiting for server at {BASE}...")
for i in range(max_retries):
try:
urllib.request.urlopen(BASE.replace("/api/v1", "/"), timeout=3)
print(f"{INFO} Server is up.")
return True
except Exception:
time.sleep(1)
return False
# ─────────────────────────────────────────────────────────────────────────────
# Test groups
# ─────────────────────────────────────────────────────────────────────────────
def test_system():
print("\n── F 组: System ──")
r = req("GET", "/health")
check("GET /health returns code=0", r.get("code") == 0)
check("health data.status exists", "status" in (r.get("data") or {}))
check("health data.components exists", "components" in (r.get("data") or {}))
print(f" {INFO} status={r.get('data',{}).get('status')} uptime={r.get('data',{}).get('uptime_seconds')}s")
r = req("GET", "/system/stats")
check("GET /system/stats returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("stats has total_documents", "total_documents" in d)
check("stats has total_nodes", "total_nodes" in d)
print(f" {INFO} docs={d.get('total_documents')} nodes={d.get('total_nodes')} edges={d.get('total_edges')}")
r = req("GET", "/system/formats")
check("GET /system/formats returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("formats list is non-empty", len(d.get("formats", [])) > 0)
exts = [f["ext"] for f in d.get("formats", [])]
check("pdf format present", "pdf" in exts)
check("docx format present", "docx" in exts)
r = req("GET", "/system/demo")
check("GET /system/demo returns code=0 or 3002", r.get("code") in (0, 3002))
if r.get("code") == 0:
d = r.get("data") or {}
check("demo data has nodes", "nodes" in d)
print(f" {INFO} demo: {len(d.get('nodes',[]))} nodes, {len(d.get('edges',[]))} edges")
else:
print(f" {INFO} demo data not available (no KG yet) — code={r.get('code')}")
def test_documents():
print("\n── A 组: Documents ──")
r = req("GET", "/documents")
check("GET /documents returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("documents list has total field", "total" in d)
check("documents list has items field", "items" in d)
print(f" {INFO} total documents={d.get('total', 0)}")
# Upload a test text file (not a real supported format to test validation)
print(" Testing upload validation...")
import urllib.request, io
boundary = "boundary123"
body_parts = (
f"--{boundary}\r\n"
f'Content-Disposition: form-data; name="file"; filename="test.xyz"\r\n'
f"Content-Type: application/octet-stream\r\n\r\n"
f"dummy content\r\n"
f"--{boundary}--\r\n"
).encode()
req_obj = urllib.request.Request(
BASE + "/documents/upload",
data=body_parts,
method="POST",
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
)
try:
urllib.request.urlopen(req_obj, timeout=10)
r_upload = {}
except urllib.error.HTTPError as e:
r_upload = json.loads(e.read().decode())
check("upload unsupported format returns code=1002", r_upload.get("code") == 1002)
r = req("GET", "/documents/nonexistent_id")
check("GET /documents/nonexistent returns code=2001", r.get("code") == 2001)
def test_indexing():
print("\n── B 组: Indexing ──")
r = req("POST", "/index/start", body={"doc_id": "nonexistent_doc"})
check("start indexing nonexistent doc returns 2001", r.get("code") == 2001)
r = req("GET", "/index/status/nonexistent_job")
check("get status nonexistent job returns 2002", r.get("code") == 2002)
r = req("GET", "/index/result/nonexistent_job")
check("get result nonexistent job returns 2002", r.get("code") == 2002)
r = req("DELETE", "/index/jobs/nonexistent_job")
check("cancel nonexistent job returns 2002", r.get("code") == 2002)
def test_kg():
print("\n── C 组: Knowledge Graph ──")
r = req("GET", "/kg/stats")
check("GET /kg/stats returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("stats has total_nodes", "total_nodes" in d)
check("stats has total_edges", "total_edges" in d)
print(f" {INFO} KG: {d.get('total_nodes')} nodes, {d.get('total_edges')} edges")
r = req("GET", "/kg/nodes")
check("GET /kg/nodes returns code 0 or 3002", r.get("code") in (0, 3002))
if r.get("code") == 0:
d = r.get("data") or {}
check("nodes data has items", "items" in d)
print(f" {INFO} nodes total={d.get('total')}")
if d.get("items"):
node_id = d["items"][0]["id"]
r2 = req("GET", f"/kg/nodes/{node_id}")
check(f"GET /kg/nodes/{node_id} returns code=0", r2.get("code") == 0)
r3 = req("GET", f"/kg/nodes/{node_id}/neighbors?hops=1")
check(f"GET /kg/nodes/{node_id}/neighbors returns code=0", r3.get("code") == 0)
else:
print(f" {INFO} KG is empty (code=3002) — skipping node detail tests")
r = req("GET", "/kg/nodes/definitely_not_a_real_node")
check("GET /kg/nodes/invalid returns code=3001", r.get("code") == 3001)
r = req("GET", "/kg/edges")
check("GET /kg/edges returns code=0", r.get("code") == 0)
r = req("GET", "/kg/export")
check("GET /kg/export returns code=0", r.get("code") == 0)
def test_search():
print("\n── E 组: Search ──")
r = req("GET", "/search/entities?q=graph")
check("GET /search/entities returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("search entities has query field", "query" in d)
check("search entities has items field", "items" in d)
print(f" {INFO} 'graph' search: {d.get('total', 0)} results")
r = req("GET", "/search/entities?q=technology&type=TECHNOLOGY")
check("GET /search/entities with type filter returns code=0", r.get("code") == 0)
r = req("GET", "/search/path?max_hops=2")
check("path search without from/to returns 1001", r.get("code") == 1001)
r = req("GET", "/search/graph?q=knowledge")
check("GET /search/graph returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("graph search has matched_nodes", "matched_nodes" in d)
def test_query():
print("\n── D 组: QA Query ──")
# Don't call /query (POST) in basic tests as it needs DeepSeek API + KG data
r = req("GET", "/query/history")
check("GET /query/history returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("history has total field", "total" in d)
check("history has items field", "items" in d)
print(f" {INFO} query history: {d.get('total', 0)} records")
r = req("GET", "/query/batch/nonexistent_batch")
check("GET /query/batch/nonexistent returns 2002", r.get("code") == 2002)
r = req("POST", "/query/batch", body={"questions": ["test question"]})
check("POST /query/batch returns code=0", r.get("code") == 0)
d = r.get("data") or {}
check("batch has batch_id", "batch_id" in d)
# ─────────────────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
if not wait_for_server():
print(f"\n{FAIL} Server not responding. Start with: python main.py")
sys.exit(1)
test_system()
test_documents()
test_indexing()
test_kg()
test_search()
test_query()
total = results["passed"] + results["failed"]
print(f"\n{'='*50}")
print(f"Results: {results['passed']}/{total} passed, {results['failed']} failed")
if results["failed"] == 0:
print(f"{PASS} All tests passed!")
else:
print(f"{FAIL} {results['failed']} test(s) failed")
print(f"{'='*50}")
sys.exit(0 if results["failed"] == 0 else 1)