feat: 初始化 OnceLove GraphRAG 项目基础架构
- 添加完整的项目结构,包括前端(Vue3 + Vite)、后端(Fastify)和基础设施配置 - 实现核心 GraphRAG 服务,集成 Neo4j 图数据库和 Qdrant 向量数据库 - 添加用户认证系统和管理员登录界面 - 提供 Docker 容器化部署方案和开发环境配置 - 包含项目文档、API 文档(Swagger)和测试脚本
This commit is contained in:
11
OnceLove/oncelove-graphrag/api/Dockerfile
Normal file
11
OnceLove/oncelove-graphrag/api/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
||||
FROM node:20-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package.json ./
|
||||
RUN npm config set registry https://registry.npmmirror.com && npm install --omit=dev --no-audit --no-fund
|
||||
|
||||
COPY src ./src
|
||||
|
||||
EXPOSE 3000
|
||||
CMD ["node", "src/index.js"]
|
||||
1777
OnceLove/oncelove-graphrag/api/package-lock.json
generated
Normal file
1777
OnceLove/oncelove-graphrag/api/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
20
OnceLove/oncelove-graphrag/api/package.json
Normal file
20
OnceLove/oncelove-graphrag/api/package.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "oncelove-graphrag-api",
|
||||
"version": "0.1.0",
|
||||
"type": "module",
|
||||
"main": "src/index.js",
|
||||
"scripts": {
|
||||
"dev": "node --watch src/index.js",
|
||||
"start": "node src/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"@fastify/cors": "^8.5.0",
|
||||
"@fastify/swagger": "^8.15.0",
|
||||
"@fastify/swagger-ui": "^3.1.0",
|
||||
"@qdrant/js-client-rest": "1.11.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"fastify": "^4.28.1",
|
||||
"neo4j-driver": "^5.24.0",
|
||||
"swagger-jsdoc": "^6.2.8"
|
||||
}
|
||||
}
|
||||
26
OnceLove/oncelove-graphrag/api/src/config/clients.js
Normal file
26
OnceLove/oncelove-graphrag/api/src/config/clients.js
Normal file
@@ -0,0 +1,26 @@
|
||||
import neo4j from "neo4j-driver";
|
||||
import { QdrantClient } from "@qdrant/js-client-rest";
|
||||
|
||||
let neo4jDriver;
|
||||
let qdrantClient;
|
||||
|
||||
/**
|
||||
* 初始化并缓存数据库客户端,避免重复创建连接对象。
|
||||
*/
|
||||
export const createClients = (env) => {
|
||||
if (!neo4jDriver) {
|
||||
neo4jDriver = neo4j.driver(
|
||||
env.NEO4J_URI,
|
||||
neo4j.auth.basic(env.NEO4J_USER, env.NEO4J_PASSWORD)
|
||||
);
|
||||
}
|
||||
|
||||
if (!qdrantClient) {
|
||||
qdrantClient = new QdrantClient({
|
||||
url: env.QDRANT_URL,
|
||||
apiKey: env.QDRANT_API_KEY
|
||||
});
|
||||
}
|
||||
|
||||
return { neo4jDriver, qdrantClient };
|
||||
};
|
||||
30
OnceLove/oncelove-graphrag/api/src/config/env.js
Normal file
30
OnceLove/oncelove-graphrag/api/src/config/env.js
Normal file
@@ -0,0 +1,30 @@
|
||||
import dotenv from "dotenv";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
/**
|
||||
* 统一读取并导出运行时环境变量,供配置层与服务层复用。
|
||||
*/
|
||||
export const env = {
|
||||
NODE_ENV: process.env.NODE_ENV ?? "production", // 运行环境,默认生产
|
||||
PORT: Number(process.env.PORT ?? 3000), // 监听端口,默认 3000
|
||||
APPLICATION_URL: process.env.APPLICATION_URL ?? `http://localhost:${Number(process.env.PORT ?? 3000)}`,
|
||||
WEBUI_URL: process.env.WEBUI_URL ?? "http://localhost:8080",
|
||||
NEO4J_URI: process.env.NEO4J_URI, // Neo4j 连接 URI
|
||||
NEO4J_USER: process.env.NEO4J_USER, // Neo4j 用户名
|
||||
NEO4J_PASSWORD: process.env.NEO4J_PASSWORD, // Neo4j 密码
|
||||
QDRANT_URL: process.env.QDRANT_URL, // Qdrant 连接 URL
|
||||
QDRANT_API_KEY: process.env.QDRANT_API_KEY, // Qdrant API 密钥
|
||||
QDRANT_COLLECTION: process.env.QDRANT_COLLECTION ?? "oncelove_chunks", // Qdrant 集合名称,默认 oncelove_chunks
|
||||
EMBEDDING_DIM: Number(process.env.EMBEDDING_DIM ?? 1024), // 嵌入向量维度,默认 1024
|
||||
EMBEDDING_BASE_URL: process.env.EMBEDDING_BASE_URL ?? "",
|
||||
EMBEDDING_API_KEY: process.env.EMBEDDING_API_KEY ?? "",
|
||||
EMBEDDING_MODEL: process.env.EMBEDDING_MODEL ?? "",
|
||||
RERANK_BASE_URL: process.env.RERANK_BASE_URL ?? "",
|
||||
RERANK_API_KEY: process.env.RERANK_API_KEY ?? "",
|
||||
RERANK_MODEL: process.env.RERANK_MODEL ?? "",
|
||||
LLM_BASE_URL: process.env.LLM_BASE_URL ?? "",
|
||||
LLM_API_KEY: process.env.LLM_API_KEY ?? "",
|
||||
LLM_MODEL_NAME: process.env.LLM_MODEL_NAME ?? "",
|
||||
ADMIN_PASSWORD: process.env.ADMIN_PASSWORD ?? "oncelove123"
|
||||
};
|
||||
18
OnceLove/oncelove-graphrag/api/src/config/swagger.js
Normal file
18
OnceLove/oncelove-graphrag/api/src/config/swagger.js
Normal file
@@ -0,0 +1,18 @@
|
||||
import swaggerJsdoc from "swagger-jsdoc";
|
||||
|
||||
/**
|
||||
* 基于路由注释生成 OpenAPI 规范。
|
||||
*/
|
||||
export const createSwaggerSpec = (env) =>
|
||||
swaggerJsdoc({
|
||||
definition: {
|
||||
openapi: "3.0.3",
|
||||
info: {
|
||||
title: "OnceLove GraphRAG API",
|
||||
version: "0.1.0",
|
||||
description: "时序 GraphRAG 服务接口文档"
|
||||
},
|
||||
servers: [{ url: env.APPLICATION_URL }]
|
||||
},
|
||||
apis: ["src/routes/*.js"]
|
||||
});
|
||||
@@ -0,0 +1,32 @@
|
||||
/**
|
||||
* 统一服务层异常处理并输出标准响应结构。
|
||||
*/
|
||||
const sendServiceResult = async (reply, action) => {
|
||||
try {
|
||||
const data = await action();
|
||||
return reply.send(data);
|
||||
} catch (error) {
|
||||
const statusCode = Number(error?.statusCode) || 500;
|
||||
return reply.code(statusCode).send({
|
||||
ok: false,
|
||||
error: error?.message ?? "internal error"
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* GraphRAG 控制器:负责请求转发与响应封装。
|
||||
*/
|
||||
export const createGraphRagController = (service) => ({
|
||||
health: async (_request, reply) => reply.send({ ok: true }),
|
||||
ready: async (_request, reply) => sendServiceResult(reply, () => service.ready()),
|
||||
bootstrap: async (_request, reply) => sendServiceResult(reply, () => service.bootstrap()),
|
||||
getGraphStats: async (_request, reply) => sendServiceResult(reply, () => service.getGraphStats()),
|
||||
ingest: async (request, reply) => sendServiceResult(reply, () => service.ingest(request.body)),
|
||||
queryTimeline: async (request, reply) =>
|
||||
sendServiceResult(reply, () => service.queryTimeline(request.body)),
|
||||
queryGraphRag: async (request, reply) =>
|
||||
sendServiceResult(reply, () => service.queryGraphRag(request.body)),
|
||||
analyzeAndIngest: async (request, reply) =>
|
||||
sendServiceResult(reply, () => service.analyzeAndIngest(request.body.text))
|
||||
});
|
||||
1
OnceLove/oncelove-graphrag/api/src/controllers/index.js
Normal file
1
OnceLove/oncelove-graphrag/api/src/controllers/index.js
Normal file
@@ -0,0 +1 @@
|
||||
export { createGraphRagController } from "./graphrag.controller.js";
|
||||
16
OnceLove/oncelove-graphrag/api/src/index.js
Normal file
16
OnceLove/oncelove-graphrag/api/src/index.js
Normal file
@@ -0,0 +1,16 @@
|
||||
import { createServer } from "./server.js";
|
||||
|
||||
/**
|
||||
* 应用启动入口。
|
||||
*/
|
||||
const start = async () => {
|
||||
const { app, env } = await createServer();
|
||||
try {
|
||||
await app.listen({ port: env.PORT, host: "0.0.0.0" });
|
||||
} catch (error) {
|
||||
app.log.error(error);
|
||||
process.exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
start();
|
||||
32
OnceLove/oncelove-graphrag/api/src/routes/auth.route.js
Normal file
32
OnceLove/oncelove-graphrag/api/src/routes/auth.route.js
Normal file
@@ -0,0 +1,32 @@
|
||||
export const registerAuthRoutes = async (app, env) => {
|
||||
/**
|
||||
* @openapi
|
||||
* /auth/verify:
|
||||
* post:
|
||||
* tags:
|
||||
* - Auth
|
||||
* summary: 验证管理员密码
|
||||
* requestBody:
|
||||
* required: true
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* password:
|
||||
* type: string
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 验证成功
|
||||
* 401:
|
||||
* description: 密码错误
|
||||
*/
|
||||
app.post("/auth/verify", async (request, reply) => {
|
||||
const { password } = request.body ?? {};
|
||||
if (!password || password !== env.ADMIN_PASSWORD) {
|
||||
reply.code(401).send({ ok: false, message: "密码错误或未提供" });
|
||||
return;
|
||||
}
|
||||
reply.send({ ok: true, message: "验证成功" });
|
||||
});
|
||||
};
|
||||
248
OnceLove/oncelove-graphrag/api/src/routes/graphrag.route.js
Normal file
248
OnceLove/oncelove-graphrag/api/src/routes/graphrag.route.js
Normal file
@@ -0,0 +1,248 @@
|
||||
/**
|
||||
* 注册 GraphRAG 相关 HTTP 路由。
|
||||
*/
|
||||
export const registerGraphRagRoutes = async (app, controller) => {
|
||||
/**
|
||||
* @openapi
|
||||
* /health:
|
||||
* get:
|
||||
* tags:
|
||||
* - System
|
||||
* summary: 存活检查
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 服务存活
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* ok:
|
||||
* type: boolean
|
||||
* example: true
|
||||
*/
|
||||
app.get("/health", controller.health);
|
||||
/**
|
||||
* @openapi
|
||||
* /ready:
|
||||
* get:
|
||||
* tags:
|
||||
* - System
|
||||
* summary: 就绪检查(Neo4j + Qdrant)
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 服务已就绪
|
||||
* 500:
|
||||
* description: 服务未就绪
|
||||
*/
|
||||
app.get("/ready", controller.ready);
|
||||
/**
|
||||
* @openapi
|
||||
* /bootstrap:
|
||||
* post:
|
||||
* tags:
|
||||
* - System
|
||||
* summary: 初始化约束与向量集合
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 初始化成功
|
||||
*/
|
||||
app.post("/bootstrap", controller.bootstrap);
|
||||
/**
|
||||
* @openapi
|
||||
* /graph/stats:
|
||||
* get:
|
||||
* tags:
|
||||
* - GraphRAG
|
||||
* summary: 获取图谱统计数据(nodes + links)供 D3.js 可视化
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 图谱统计数据
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* ok:
|
||||
* type: boolean
|
||||
* nodes:
|
||||
* type: array
|
||||
* items:
|
||||
* type: object
|
||||
* properties:
|
||||
* id:
|
||||
* type: string
|
||||
* name:
|
||||
* type: string
|
||||
* type:
|
||||
* type: string
|
||||
* occurred_at:
|
||||
* type: string
|
||||
* links:
|
||||
* type: array
|
||||
* items:
|
||||
* type: object
|
||||
* properties:
|
||||
* source:
|
||||
* type: string
|
||||
* target:
|
||||
* type: string
|
||||
* type:
|
||||
* type: string
|
||||
* total:
|
||||
* type: integer
|
||||
*/
|
||||
app.get("/graph/stats", controller.getGraphStats);
|
||||
/**
|
||||
* @openapi
|
||||
* /ingest:
|
||||
* post:
|
||||
* tags:
|
||||
* - GraphRAG
|
||||
* summary: 写入人物、事件与向量数据(支持自动 embedding)
|
||||
* requestBody:
|
||||
* required: true
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* persons:
|
||||
* type: array
|
||||
* items:
|
||||
* type: object
|
||||
* properties:
|
||||
* id:
|
||||
* type: string
|
||||
* name:
|
||||
* type: string
|
||||
* events:
|
||||
* type: array
|
||||
* items:
|
||||
* type: object
|
||||
* properties:
|
||||
* id:
|
||||
* type: string
|
||||
* type:
|
||||
* type: string
|
||||
* summary:
|
||||
* type: string
|
||||
* occurred_at:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* participants:
|
||||
* type: array
|
||||
* items:
|
||||
* type: string
|
||||
* topics:
|
||||
* type: array
|
||||
* items:
|
||||
* type: string
|
||||
* chunks:
|
||||
* type: array
|
||||
* items:
|
||||
* type: object
|
||||
* properties:
|
||||
* id:
|
||||
* type: string
|
||||
* text:
|
||||
* type: string
|
||||
* description: 当未提供 vector 时,将使用 text 通过第三方 embedding 自动生成
|
||||
* vector:
|
||||
* type: array
|
||||
* items:
|
||||
* type: number
|
||||
* payload:
|
||||
* type: object
|
||||
* properties:
|
||||
* event_id:
|
||||
* type: string
|
||||
* occurred_at:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 入库成功
|
||||
* 400:
|
||||
* description: 参数错误
|
||||
*/
|
||||
app.post("/ingest", controller.ingest);
|
||||
/**
|
||||
* @openapi
|
||||
* /query/timeline:
|
||||
* post:
|
||||
* tags:
|
||||
* - GraphRAG
|
||||
* summary: 按双方人物查询时序事件链
|
||||
* requestBody:
|
||||
* required: true
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* required:
|
||||
* - a_id
|
||||
* - b_id
|
||||
* properties:
|
||||
* a_id:
|
||||
* type: string
|
||||
* b_id:
|
||||
* type: string
|
||||
* start:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* end:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* limit:
|
||||
* type: integer
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 查询成功
|
||||
* 400:
|
||||
* description: 参数错误
|
||||
*/
|
||||
app.post("/query/timeline", controller.queryTimeline);
|
||||
/**
|
||||
* @openapi
|
||||
* /query/graphrag:
|
||||
* post:
|
||||
* tags:
|
||||
* - GraphRAG
|
||||
* summary: 向量召回 + 图谱时序上下文检索(支持 query_text 自动向量化)
|
||||
* requestBody:
|
||||
* required: true
|
||||
* content:
|
||||
* application/json:
|
||||
* schema:
|
||||
* type: object
|
||||
* properties:
|
||||
* query_vector:
|
||||
* type: array
|
||||
* items:
|
||||
* type: number
|
||||
* query_text:
|
||||
* type: string
|
||||
* a_id:
|
||||
* type: string
|
||||
* b_id:
|
||||
* type: string
|
||||
* start:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* end:
|
||||
* type: string
|
||||
* format: date-time
|
||||
* top_k:
|
||||
* type: integer
|
||||
* timeline_limit:
|
||||
* type: integer
|
||||
* responses:
|
||||
* 200:
|
||||
* description: 查询成功
|
||||
* 400:
|
||||
* description: 参数错误
|
||||
*/
|
||||
app.post("/query/graphrag", controller.queryGraphRag);
|
||||
app.post("/analyze", controller.analyzeAndIngest);
|
||||
};
|
||||
10
OnceLove/oncelove-graphrag/api/src/routes/index.js
Normal file
10
OnceLove/oncelove-graphrag/api/src/routes/index.js
Normal file
@@ -0,0 +1,10 @@
|
||||
import { registerGraphRagRoutes } from "./graphrag.route.js";
|
||||
import { registerAuthRoutes } from "./auth.route.js";
|
||||
|
||||
/**
|
||||
* 统一注册全部路由模块。
|
||||
*/
|
||||
export const registerRoutes = async (app, controller, env) => {
|
||||
await registerGraphRagRoutes(app, controller);
|
||||
await registerAuthRoutes(app, env);
|
||||
};
|
||||
54
OnceLove/oncelove-graphrag/api/src/server.js
Normal file
54
OnceLove/oncelove-graphrag/api/src/server.js
Normal file
@@ -0,0 +1,54 @@
|
||||
import Fastify from "fastify";
|
||||
import cors from "@fastify/cors";
|
||||
import swagger from "@fastify/swagger";
|
||||
import swaggerUi from "@fastify/swagger-ui";
|
||||
import { env } from "./config/env.js";
|
||||
import { createClients } from "./config/clients.js";
|
||||
import { createSwaggerSpec } from "./config/swagger.js";
|
||||
import { EmbeddingService, RerankService, GraphRagService, LLMService } from "./services/index.js";
|
||||
import { createGraphRagController } from "./controllers/index.js";
|
||||
import { registerRoutes } from "./routes/index.js";
|
||||
|
||||
/**
|
||||
* 创建 Fastify 应用并完成依赖装配与路由注册。
|
||||
*/
|
||||
export const createServer = async () => {
|
||||
const app = Fastify({ logger: true });
|
||||
|
||||
await app.register(cors, {
|
||||
origin: [env.WEBUI_URL, "http://localhost:8080", "http://127.0.0.1:8080", "http://localhost:5174", "http://127.0.0.1:5174", "http://localhost:5173", "http://127.0.0.1:5173"],
|
||||
methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||
allowedHeaders: ["Content-Type", "Authorization"]
|
||||
});
|
||||
|
||||
await app.register(swagger, {
|
||||
mode: "static",
|
||||
specification: {
|
||||
document: createSwaggerSpec(env)
|
||||
}
|
||||
});
|
||||
await app.register(swaggerUi, {
|
||||
routePrefix: "/api-docs"
|
||||
});
|
||||
const { neo4jDriver, qdrantClient } = createClients(env);
|
||||
const embeddingService = new EmbeddingService(env);
|
||||
const rerankService = new RerankService(env);
|
||||
const llmService = new LLMService(env);
|
||||
const service = new GraphRagService({
|
||||
driver: neo4jDriver,
|
||||
qdrantClient,
|
||||
embeddingService,
|
||||
rerankService,
|
||||
llmService,
|
||||
env
|
||||
});
|
||||
const controller = createGraphRagController(service);
|
||||
|
||||
await registerRoutes(app, controller, env);
|
||||
|
||||
app.addHook("onClose", async () => {
|
||||
await neo4jDriver.close();
|
||||
});
|
||||
|
||||
return { app, env };
|
||||
};
|
||||
@@ -0,0 +1,53 @@
|
||||
const createHttpError = (statusCode, message) => {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
};
|
||||
|
||||
export class EmbeddingService {
|
||||
constructor(env) {
|
||||
this.baseUrl = (env.EMBEDDING_BASE_URL ?? "").replace(/\/+$/, "");
|
||||
this.apiKey = env.EMBEDDING_API_KEY ?? "";
|
||||
this.model = env.EMBEDDING_MODEL ?? "";
|
||||
this.dimension = env.EMBEDDING_DIM;
|
||||
}
|
||||
|
||||
isEnabled() {
|
||||
return Boolean(this.baseUrl && this.apiKey && this.model);
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
if (!this.isEnabled()) {
|
||||
throw createHttpError(400, "embedding 服务未配置,请提供 EMBEDDING_BASE_URL/EMBEDDING_API_KEY/EMBEDDING_MODEL");
|
||||
}
|
||||
|
||||
const cleaned = typeof text === "string" ? text.trim() : "";
|
||||
if (!cleaned) {
|
||||
throw createHttpError(400, "embedding 输入文本不能为空");
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/v1/embeddings`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: cleaned
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw createHttpError(response.status, `embedding 请求失败: ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const vector = data?.data?.[0]?.embedding;
|
||||
if (!Array.isArray(vector) || vector.length !== this.dimension) {
|
||||
throw createHttpError(400, `embedding 维度异常,期望 ${this.dimension}`);
|
||||
}
|
||||
return vector;
|
||||
}
|
||||
}
|
||||
589
OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js
Normal file
589
OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js
Normal file
@@ -0,0 +1,589 @@
|
||||
import neo4j from "neo4j-driver";
|
||||
|
||||
/**
|
||||
* 将 ISO 时间转换为秒级时间戳,用于 Qdrant 的范围过滤。
|
||||
*/
|
||||
const toTimestamp = (value) => {
|
||||
if (!value) return null;
|
||||
const ms = Date.parse(value);
|
||||
if (Number.isNaN(ms)) return null;
|
||||
return Math.floor(ms / 1000);
|
||||
};
|
||||
|
||||
const normalizeOccurredAt = (value) => {
|
||||
if (typeof value !== "string" || !value.trim()) {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
const ms = Date.parse(value);
|
||||
if (Number.isNaN(ms)) {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
return new Date(ms).toISOString();
|
||||
};
|
||||
|
||||
/**
|
||||
* 统一 Neo4j 结果结构,避免控制器层处理图数据库 Record 细节。
|
||||
*/
|
||||
const toEventDto = (record) => ({
|
||||
id: record.get("id"),
|
||||
type: record.get("type"),
|
||||
summary: record.get("summary"),
|
||||
occurred_at: record.get("occurred_at"),
|
||||
importance: record.get("importance"),
|
||||
topics: record.get("topics") ?? [],
|
||||
participants: record.get("participants") ?? []
|
||||
});
|
||||
|
||||
/**
|
||||
* 构造标准业务错误,便于控制器返回统一 HTTP 状态码。
|
||||
*/
|
||||
const createHttpError = (statusCode, message) => {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
};
|
||||
|
||||
/**
|
||||
* 校验 /ingest 入参与向量维度。
|
||||
*/
|
||||
const validateIngestInput = (body, embeddingDim, canAutoEmbed) => {
|
||||
const persons = Array.isArray(body.persons) ? body.persons : [];
|
||||
const events = Array.isArray(body.events) ? body.events : [];
|
||||
const chunks = Array.isArray(body.chunks) ? body.chunks : [];
|
||||
|
||||
for (const person of persons) {
|
||||
if (!person?.id) {
|
||||
throw createHttpError(400, "persons[].id 必填");
|
||||
}
|
||||
}
|
||||
|
||||
for (const event of events) {
|
||||
if (!event?.id || !event?.occurred_at) {
|
||||
throw createHttpError(400, "events[].id 与 events[].occurred_at 必填");
|
||||
}
|
||||
if (!Array.isArray(event.participants) || event.participants.length === 0) {
|
||||
throw createHttpError(400, "events[].participants 至少包含 1 个人物 id");
|
||||
}
|
||||
}
|
||||
|
||||
for (const chunk of chunks) {
|
||||
if (!chunk?.id) {
|
||||
throw createHttpError(400, "chunks[].id 必填");
|
||||
}
|
||||
const hasVector = Array.isArray(chunk?.vector);
|
||||
const hasText = typeof chunk?.text === "string" && chunk.text.trim().length > 0;
|
||||
if (!hasVector && !(canAutoEmbed && hasText)) {
|
||||
throw createHttpError(400, "chunks[] 需提供 vector,或在配置 embedding 后提供 text");
|
||||
}
|
||||
if (hasVector && chunk.vector.length !== embeddingDim) {
|
||||
throw createHttpError(400, `chunks[].vector 维度必须为 ${embeddingDim}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { persons, events, chunks };
|
||||
};
|
||||
|
||||
/**
|
||||
* GraphRAG 核心服务:
|
||||
* 1) 图谱结构写入 Neo4j;
|
||||
* 2) 文本向量写入 Qdrant;
|
||||
* 3) 按时序与向量联合检索上下文。
|
||||
*/
|
||||
export class GraphRagService {
|
||||
/**
|
||||
* @param {{ driver: import("neo4j-driver").Driver, qdrantClient: any, env: Record<string, any> }} deps
|
||||
*/
|
||||
constructor({ driver, qdrantClient, embeddingService, rerankService, llmService, env }) {
|
||||
this.driver = driver;
|
||||
this.qdrantClient = qdrantClient;
|
||||
this.embeddingService = embeddingService;
|
||||
this.rerankService = rerankService;
|
||||
this.llmService = llmService;
|
||||
this.collection = env.QDRANT_COLLECTION;
|
||||
this.embeddingDim = env.EMBEDDING_DIM;
|
||||
}
|
||||
|
||||
async resolveChunkVector(chunk) {
|
||||
if (Array.isArray(chunk?.vector)) {
|
||||
if (chunk.vector.length !== this.embeddingDim) {
|
||||
throw createHttpError(400, `chunks[].vector 维度必须为 ${this.embeddingDim}`);
|
||||
}
|
||||
return chunk.vector;
|
||||
}
|
||||
if (!this.embeddingService?.isEnabled()) {
|
||||
throw createHttpError(400, "未检测到可用 embedding 配置");
|
||||
}
|
||||
return this.embeddingService.embed(chunk.text ?? "");
|
||||
}
|
||||
|
||||
async resolveQueryVector(body) {
|
||||
const queryVector = body?.query_vector;
|
||||
if (Array.isArray(queryVector)) {
|
||||
if (queryVector.length !== this.embeddingDim) {
|
||||
throw createHttpError(400, `query_vector 维度必须为 ${this.embeddingDim}`);
|
||||
}
|
||||
return queryVector;
|
||||
}
|
||||
const queryText = typeof body?.query_text === "string" ? body.query_text.trim() : "";
|
||||
if (!queryText) {
|
||||
throw createHttpError(400, `query_vector 维度必须为 ${this.embeddingDim},或提供 query_text`);
|
||||
}
|
||||
if (!this.embeddingService?.isEnabled()) {
|
||||
throw createHttpError(400, "未检测到可用 embedding 配置,无法使用 query_text 检索");
|
||||
}
|
||||
return this.embeddingService.embed(queryText);
|
||||
}
|
||||
|
||||
/**
|
||||
* 连接就绪检查。
|
||||
*/
|
||||
async ready() {
|
||||
const session = this.driver.session();
|
||||
try {
|
||||
await session.run("RETURN 1 AS ok");
|
||||
await this.qdrantClient.getCollections();
|
||||
return { ok: true };
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取图谱统计数据,用于前端 D3.js 可视化渲染。
|
||||
* 返回 nodes(人物/事件/主题)和 links(关系边)。
|
||||
*/
|
||||
async getGraphStats() {
|
||||
const runQuery = async (query) => {
|
||||
const session = this.driver.session();
|
||||
try { return await session.run(query) }
|
||||
finally { await session.close() }
|
||||
};
|
||||
const [personResult, eventResult, topicResult, personEventResult, eventTopicResult] =
|
||||
await Promise.all([
|
||||
runQuery(`MATCH (p:Person) RETURN p.id AS id, p.name AS name, 'person' AS type LIMIT 200`),
|
||||
runQuery(`MATCH (e:Event) RETURN e.id AS id, e.summary AS name, 'event' AS type, e.occurred_at AS occurred_at LIMIT 200`),
|
||||
runQuery(`MATCH (t:Topic) RETURN t.name AS id, t.name AS name, 'topic' AS type LIMIT 100`),
|
||||
runQuery(`MATCH (p:Person)-[:PARTICIPATES_IN]->(e:Event) RETURN p.id AS source, e.id AS target, 'PARTICIPATES_IN' AS type LIMIT 500`),
|
||||
runQuery(`MATCH (e:Event)-[:ABOUT]->(t:Topic) RETURN e.id AS source, t.name AS target, 'ABOUT' AS type LIMIT 300`)
|
||||
]);
|
||||
|
||||
const nodes = [];
|
||||
const idSet = new Set();
|
||||
|
||||
const addNode = (record) => {
|
||||
const id = record.get("id");
|
||||
if (!id || idSet.has(id)) return;
|
||||
idSet.add(id);
|
||||
const occurredAt = record.keys.includes("occurred_at") ? record.get("occurred_at") : null;
|
||||
nodes.push({
|
||||
id,
|
||||
name: record.get("name") ?? id,
|
||||
type: record.get("type"),
|
||||
occurred_at: occurredAt
|
||||
});
|
||||
};
|
||||
|
||||
personResult.records.forEach(addNode);
|
||||
eventResult.records.forEach(addNode);
|
||||
topicResult.records.forEach(addNode);
|
||||
|
||||
const links = [
|
||||
...personEventResult.records.map((r) => ({
|
||||
source: r.get("source"),
|
||||
target: r.get("target"),
|
||||
type: r.get("type")
|
||||
})),
|
||||
...eventTopicResult.records.map((r) => ({
|
||||
source: r.get("source"),
|
||||
target: r.get("target"),
|
||||
type: r.get("type")
|
||||
}))
|
||||
];
|
||||
|
||||
return { ok: true, nodes, links, total: nodes.length };
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化图谱约束与向量集合。
|
||||
*/
|
||||
async bootstrap() {
|
||||
const session = this.driver.session();
|
||||
try {
|
||||
await session.run("CREATE CONSTRAINT person_id IF NOT EXISTS FOR (p:Person) REQUIRE p.id IS UNIQUE");
|
||||
await session.run("CREATE CONSTRAINT event_id IF NOT EXISTS FOR (e:Event) REQUIRE e.id IS UNIQUE");
|
||||
await session.run("CREATE CONSTRAINT topic_name IF NOT EXISTS FOR (t:Topic) REQUIRE t.name IS UNIQUE");
|
||||
await session.run("CREATE INDEX event_time IF NOT EXISTS FOR (e:Event) ON (e.occurred_at)");
|
||||
|
||||
const collections = await this.qdrantClient.getCollections();
|
||||
const exists = collections.collections?.some((item) => item.name === this.collection);
|
||||
if (!exists) {
|
||||
await this.qdrantClient.createCollection(this.collection, {
|
||||
vectors: { size: this.embeddingDim, distance: "Cosine" }
|
||||
});
|
||||
}
|
||||
|
||||
return { ok: true, collection: this.collection };
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 写入人物、事件、主题关系,并可同步写入向量分片。
|
||||
*/
|
||||
async ingest(body) {
|
||||
const { persons, events, chunks } = validateIngestInput(
|
||||
body ?? {},
|
||||
this.embeddingDim,
|
||||
this.embeddingService?.isEnabled() ?? false
|
||||
);
|
||||
const session = this.driver.session();
|
||||
|
||||
try {
|
||||
await session.executeWrite(async (tx) => {
|
||||
for (const person of persons) {
|
||||
await tx.run(
|
||||
`
|
||||
MERGE (p:Person {id: $id})
|
||||
SET p.name = coalesce($name, p.name),
|
||||
p.updated_at = datetime()
|
||||
`,
|
||||
{ id: person.id, name: person.name ?? null }
|
||||
);
|
||||
}
|
||||
|
||||
for (const event of events) {
|
||||
await tx.run(
|
||||
`
|
||||
MERGE (e:Event {id: $id})
|
||||
SET e.type = $type,
|
||||
e.summary = $summary,
|
||||
e.occurred_at = datetime($occurred_at),
|
||||
e.importance = $importance,
|
||||
e.updated_at = datetime()
|
||||
`,
|
||||
{
|
||||
id: event.id,
|
||||
type: event.type ?? "event",
|
||||
summary: event.summary ?? "",
|
||||
occurred_at: event.occurred_at,
|
||||
importance: event.importance ?? 0.5
|
||||
}
|
||||
);
|
||||
|
||||
for (const personId of event.participants) {
|
||||
await tx.run(
|
||||
`
|
||||
MERGE (p:Person {id: $person_id})
|
||||
SET p.updated_at = datetime()
|
||||
WITH p
|
||||
MATCH (e:Event {id: $event_id})
|
||||
MERGE (p)-[:PARTICIPATES_IN]->(e)
|
||||
`,
|
||||
{ person_id: personId, event_id: event.id }
|
||||
);
|
||||
}
|
||||
|
||||
const topics = Array.isArray(event.topics) ? event.topics : [];
|
||||
for (const topicName of topics) {
|
||||
await tx.run(
|
||||
`
|
||||
MERGE (t:Topic {name: $name})
|
||||
WITH t
|
||||
MATCH (e:Event {id: $event_id})
|
||||
MERGE (e)-[:ABOUT]->(t)
|
||||
`,
|
||||
{ name: topicName, event_id: event.id }
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (chunks.length > 0) {
|
||||
const points = await Promise.all(chunks.map(async (chunk) => {
|
||||
const vector = await this.resolveChunkVector(chunk);
|
||||
const payload = chunk.payload ?? {};
|
||||
return {
|
||||
id: chunk.id,
|
||||
vector,
|
||||
payload: {
|
||||
text: chunk.text ?? payload.text ?? "",
|
||||
event_id: payload.event_id ?? null,
|
||||
occurred_at: payload.occurred_at ?? null,
|
||||
occurred_ts: toTimestamp(payload.occurred_at),
|
||||
person_ids: Array.isArray(payload.person_ids) ? payload.person_ids : [],
|
||||
source: payload.source ?? "unknown"
|
||||
}
|
||||
};
|
||||
}));
|
||||
|
||||
await this.qdrantClient.upsert(this.collection, { points, wait: true });
|
||||
}
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
ingested: {
|
||||
persons: persons.length,
|
||||
events: events.length,
|
||||
chunks: chunks.length
|
||||
}
|
||||
};
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询两个人在时间窗内的时序事件链。
|
||||
*/
|
||||
async queryTimeline(body) {
|
||||
const { a_id, b_id, start, end, limit = 100 } = body ?? {};
|
||||
if (!a_id || !b_id) {
|
||||
throw createHttpError(400, "a_id 和 b_id 必填");
|
||||
}
|
||||
|
||||
const session = this.driver.session();
|
||||
try {
|
||||
const result = await session.run(
|
||||
`
|
||||
MATCH (a:Person {id: $a_id})-[:PARTICIPATES_IN]->(e:Event)<-[:PARTICIPATES_IN]-(b:Person {id: $b_id})
|
||||
WHERE ($start IS NULL OR e.occurred_at >= datetime($start))
|
||||
AND ($end IS NULL OR e.occurred_at <= datetime($end))
|
||||
OPTIONAL MATCH (e)-[:ABOUT]->(t:Topic)
|
||||
WITH e, collect(DISTINCT t.name) AS topics
|
||||
OPTIONAL MATCH (p:Person)-[:PARTICIPATES_IN]->(e)
|
||||
WITH e, topics, collect(DISTINCT {id: p.id, name: p.name}) AS participants
|
||||
RETURN
|
||||
e.id AS id,
|
||||
e.type AS type,
|
||||
e.summary AS summary,
|
||||
toString(e.occurred_at) AS occurred_at,
|
||||
e.importance AS importance,
|
||||
topics AS topics,
|
||||
participants AS participants
|
||||
ORDER BY e.occurred_at ASC
|
||||
LIMIT $limit
|
||||
`,
|
||||
{
|
||||
a_id,
|
||||
b_id,
|
||||
start: start ?? null,
|
||||
end: end ?? null,
|
||||
limit: neo4j.int(limit)
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
total: result.records.length,
|
||||
timeline: result.records.map(toEventDto)
|
||||
};
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 先做向量召回,再回查图谱事件上下文,输出 GraphRAG 检索结果。
|
||||
*/
|
||||
async queryGraphRag(body) {
|
||||
const {
|
||||
a_id = null,
|
||||
b_id = null,
|
||||
start = null,
|
||||
end = null,
|
||||
top_k = 8,
|
||||
timeline_limit = 60
|
||||
} = body ?? {};
|
||||
const queryVector = await this.resolveQueryVector(body ?? {});
|
||||
|
||||
const filterMust = [];
|
||||
const startTs = toTimestamp(start);
|
||||
const endTs = toTimestamp(end);
|
||||
if (startTs !== null || endTs !== null) {
|
||||
filterMust.push({
|
||||
key: "occurred_ts",
|
||||
range: {
|
||||
...(startTs !== null ? { gte: startTs } : {}),
|
||||
...(endTs !== null ? { lte: endTs } : {})
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const searchResult = await this.qdrantClient.search(this.collection, {
|
||||
vector: queryVector,
|
||||
limit: top_k,
|
||||
with_payload: true,
|
||||
...(filterMust.length > 0 ? { filter: { must: filterMust } } : {})
|
||||
});
|
||||
|
||||
let chunks = searchResult.map((item) => ({
|
||||
id: item.id,
|
||||
score: item.score,
|
||||
text: item.payload?.text ?? "",
|
||||
payload: item.payload ?? {}
|
||||
}));
|
||||
|
||||
if (body?.query_text && this.rerankService?.isEnabled()) {
|
||||
chunks = await this.rerankService.rerank(body.query_text, chunks);
|
||||
}
|
||||
|
||||
const eventIds = Array.from(
|
||||
new Set(
|
||||
chunks
|
||||
.map((item) => item.payload?.event_id)
|
||||
.filter((id) => typeof id === "string" && id.length > 0)
|
||||
)
|
||||
);
|
||||
|
||||
const session = this.driver.session();
|
||||
try {
|
||||
const result = await session.run(
|
||||
`
|
||||
MATCH (e:Event)
|
||||
WHERE (size($event_ids) = 0 OR e.id IN $event_ids)
|
||||
AND ($start IS NULL OR e.occurred_at >= datetime($start))
|
||||
AND ($end IS NULL OR e.occurred_at <= datetime($end))
|
||||
AND ($a_id IS NULL OR EXISTS { MATCH (:Person {id: $a_id})-[:PARTICIPATES_IN]->(e) })
|
||||
AND ($b_id IS NULL OR EXISTS { MATCH (:Person {id: $b_id})-[:PARTICIPATES_IN]->(e) })
|
||||
OPTIONAL MATCH (e)-[:ABOUT]->(t:Topic)
|
||||
WITH e, collect(DISTINCT t.name) AS topics
|
||||
OPTIONAL MATCH (p:Person)-[:PARTICIPATES_IN]->(e)
|
||||
WITH e, topics, collect(DISTINCT {id: p.id, name: p.name}) AS participants
|
||||
RETURN
|
||||
e.id AS id,
|
||||
e.type AS type,
|
||||
e.summary AS summary,
|
||||
toString(e.occurred_at) AS occurred_at,
|
||||
e.importance AS importance,
|
||||
topics AS topics,
|
||||
participants AS participants
|
||||
ORDER BY e.occurred_at DESC
|
||||
LIMIT $timeline_limit
|
||||
`,
|
||||
{
|
||||
event_ids: eventIds,
|
||||
start,
|
||||
end,
|
||||
a_id,
|
||||
b_id,
|
||||
timeline_limit: neo4j.int(timeline_limit)
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
retrieved_chunks: chunks,
|
||||
timeline_context: result.records.map(toEventDto)
|
||||
};
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
async analyzeAndIngest(text) {
|
||||
if (!this.llmService?.isEnabled()) {
|
||||
throw createHttpError(400, "LLM 服务未配置,请检查 LLM_BASE_URL/LLM_API_KEY/LLM_MODEL_NAME");
|
||||
}
|
||||
|
||||
const analysis = await this.llmService.analyzeText(text);
|
||||
console.log("[DEBUG] LLM analysis result:", JSON.stringify(analysis));
|
||||
if (!analysis || (!analysis.persons && !analysis.events && !analysis.topics)) {
|
||||
throw createHttpError(500, `LLM 返回数据异常: ${JSON.stringify(analysis)}`);
|
||||
}
|
||||
|
||||
const session = this.driver.session();
|
||||
console.log("[DEBUG] Got session, driver:", !!this.driver);
|
||||
try {
|
||||
const deleteResult = await session.run("MATCH (n) DETACH DELETE n", {});
|
||||
console.log("[DEBUG] Delete result:", deleteResult?.summary?.counters);
|
||||
|
||||
const personMap = {};
|
||||
for (const person of (analysis.persons || [])) {
|
||||
console.log("[DEBUG] Creating person:", person);
|
||||
const result = await session.run(
|
||||
`CREATE (p:Person {id: $id, name: $name, description: $description}) RETURN p.id AS id`,
|
||||
{ id: person.id, name: person.name, description: person.description || "" }
|
||||
);
|
||||
console.log("[DEBUG] Person create result:", result?.records?.length);
|
||||
if (result?.records?.length > 0) {
|
||||
personMap[person.id] = person.name;
|
||||
}
|
||||
}
|
||||
|
||||
const topicMap = {};
|
||||
for (const topic of (analysis.topics || [])) {
|
||||
await session.run(
|
||||
`CREATE (t:Topic {name: $name})`,
|
||||
{ name: topic.name }
|
||||
);
|
||||
topicMap[topic.id] = topic.name;
|
||||
}
|
||||
|
||||
for (const event of (analysis.events || [])) {
|
||||
const normalizedOccurredAt = normalizeOccurredAt(event.occurred_at);
|
||||
await session.run(
|
||||
`CREATE (e:Event {
|
||||
id: $id,
|
||||
type: $type,
|
||||
summary: $summary,
|
||||
occurred_at: datetime($occurred_at),
|
||||
importance: $importance
|
||||
})`,
|
||||
{
|
||||
id: event.id,
|
||||
type: event.type || "general",
|
||||
summary: event.summary || "",
|
||||
occurred_at: normalizedOccurredAt,
|
||||
importance: neo4j.int(event.importance || 5)
|
||||
}
|
||||
);
|
||||
|
||||
for (const pid of (event.participants || [])) {
|
||||
await session.run(
|
||||
`MATCH (p:Person {id: $pid}), (e:Event {id: $eid})
|
||||
MERGE (p)-[:PARTICIPATES_IN]->(e)`,
|
||||
{ pid, eid: event.id }
|
||||
);
|
||||
}
|
||||
|
||||
for (const tid of (event.topics || [])) {
|
||||
const topicName = topicMap[tid];
|
||||
if (topicName) {
|
||||
await session.run(
|
||||
`MATCH (e:Event {id: $eid}), (t:Topic {name: $tname})
|
||||
MERGE (e)-[:ABOUT]->(t)`,
|
||||
{ eid: event.id, tname: topicName }
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const rel of (analysis.relations || [])) {
|
||||
const sourceName = personMap[rel.source];
|
||||
const targetName = personMap[rel.target];
|
||||
if (sourceName && targetName) {
|
||||
await session.run(
|
||||
`MATCH (s:Person {name: $sname}), (t:Person {name: $tname})
|
||||
MERGE (s)-[r:${rel.type}]->(t)`,
|
||||
{ sname: sourceName, tname: targetName }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
message: "分析并导入成功",
|
||||
analysis,
|
||||
stats: {
|
||||
persons: (analysis.persons || []).length,
|
||||
events: (analysis.events || []).length,
|
||||
topics: (analysis.topics || []).length,
|
||||
relations: (analysis.relations || []).length
|
||||
}
|
||||
};
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
4
OnceLove/oncelove-graphrag/api/src/services/index.js
Normal file
4
OnceLove/oncelove-graphrag/api/src/services/index.js
Normal file
@@ -0,0 +1,4 @@
|
||||
export { EmbeddingService } from "./embedding.service.js";
|
||||
export { RerankService } from "./rerank.service.js";
|
||||
export { GraphRagService } from "./graphrag.service.js";
|
||||
export { LLMService } from "./llm.service.js";
|
||||
127
OnceLove/oncelove-graphrag/api/src/services/llm.service.js
Normal file
127
OnceLove/oncelove-graphrag/api/src/services/llm.service.js
Normal file
@@ -0,0 +1,127 @@
|
||||
const createHttpError = (statusCode, message) => {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
};
|
||||
|
||||
export class LLMService {
|
||||
constructor(env) {
|
||||
this.baseUrl = (env.LLM_BASE_URL ?? "").replace(/\/+$/, "");
|
||||
this.apiKey = env.LLM_API_KEY ?? "";
|
||||
this.model = env.LLM_MODEL_NAME ?? "";
|
||||
}
|
||||
|
||||
isEnabled() {
|
||||
return Boolean(this.baseUrl && this.apiKey && this.model);
|
||||
}
|
||||
|
||||
async chat(messages, temperature = 0.7) {
|
||||
if (!this.isEnabled()) {
|
||||
throw createHttpError(400, "LLM 服务未配置,请提供 LLM_BASE_URL/LLM_API_KEY/LLM_MODEL_NAME");
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
messages: messages,
|
||||
temperature: temperature
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw createHttpError(response.status, `LLM 请求失败:${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data;
|
||||
}
|
||||
|
||||
async analyzeText(text) {
|
||||
if (!text?.trim()) {
|
||||
throw createHttpError(400, "分析文本不能为空");
|
||||
}
|
||||
|
||||
const systemPrompt = `你是一个实体关系分析专家。请分析用户输入的文本,提取人物、事件、主题、关系。
|
||||
|
||||
## 输出格式
|
||||
{
|
||||
"persons": [{"id": "p1", "name": "人物名称", "description": "人物描述"}],
|
||||
"events": [{"id": "e1", "type": "事件类型", "summary": "事件摘要", "occurred_at": "ISO 时间", "participants": ["p1"], "topics": ["t1"], "importance": 5}],
|
||||
"topics": [{"id": "t1", "name": "主题名称"}],
|
||||
"relations": [{"source": "p1", "target": "p2", "type": "关系类型", "description": "关系描述"}]
|
||||
}
|
||||
|
||||
## 注意
|
||||
- 时间用 ISO 格式,如文本没明确时间用当前时间
|
||||
- importance 是重要性评分 1-10
|
||||
- 关系类型:PARTICIPATES_IN, ABOUT, LOVES, FIGHTS_WITH, GIVES, PROPOSES_TO 等
|
||||
- 如果文本涉及"我",推断另一个角色(如"她")
|
||||
- 即使文本很短也要提取信息,不要返回空数组
|
||||
|
||||
只返回 JSON,不要有其他文字。`;
|
||||
|
||||
const messages = [
|
||||
{ role: "system", content: systemPrompt },
|
||||
{ role: "user", content: text }
|
||||
];
|
||||
console.log("[DEBUG] LLM request messages:", JSON.stringify(messages));
|
||||
const result = await this.chat(messages, 0.3);
|
||||
|
||||
const content = result?.choices?.[0]?.message?.content;
|
||||
console.log("[DEBUG] LLM raw response:", content);
|
||||
if (!content) {
|
||||
throw createHttpError(500, "LLM 返回内容为空");
|
||||
}
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
||||
parsed = jsonMatch ? JSON.parse(jsonMatch[0]) : JSON.parse(content);
|
||||
} catch (e) {
|
||||
throw createHttpError(500, `LLM 返回格式错误:${content.substring(0, 200)}`);
|
||||
}
|
||||
|
||||
if (this.isEmptyAnalysis(parsed)) {
|
||||
const retryMessages = [
|
||||
{
|
||||
role: "system",
|
||||
content: "你是信息抽取器。必须输出非空 JSON:{persons:[{id,name,description}],events:[{id,type,summary,occurred_at,participants,topics,importance}],topics:[{id,name}],relations:[{source,target,type,description}]}"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `从下列文本提取实体关系,至少给出 2 个 persons、1 个 event、1 个 relation,且仅返回 JSON:${text}`
|
||||
}
|
||||
];
|
||||
const retryResult = await this.chat(retryMessages, 0.2);
|
||||
const retryContent = retryResult?.choices?.[0]?.message?.content;
|
||||
if (!retryContent) {
|
||||
return parsed;
|
||||
}
|
||||
try {
|
||||
const retryJsonMatch = retryContent.match(/\{[\s\S]*\}/);
|
||||
const retryParsed = retryJsonMatch ? JSON.parse(retryJsonMatch[0]) : JSON.parse(retryContent);
|
||||
if (!this.isEmptyAnalysis(retryParsed)) {
|
||||
return retryParsed;
|
||||
}
|
||||
} catch (_) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
isEmptyAnalysis(data) {
|
||||
return !data
|
||||
|| (!Array.isArray(data.persons) || data.persons.length === 0)
|
||||
&& (!Array.isArray(data.events) || data.events.length === 0)
|
||||
&& (!Array.isArray(data.topics) || data.topics.length === 0)
|
||||
&& (!Array.isArray(data.relations) || data.relations.length === 0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
const createHttpError = (statusCode, message) => {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
};
|
||||
|
||||
export class RerankService {
|
||||
constructor(env) {
|
||||
this.baseUrl = (env.RERANK_BASE_URL ?? "").replace(/\/+$/, "");
|
||||
this.apiKey = env.RERANK_API_KEY ?? "";
|
||||
this.model = env.RERANK_MODEL ?? "";
|
||||
}
|
||||
|
||||
isEnabled() {
|
||||
return Boolean(this.baseUrl && this.apiKey && this.model);
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用第三方 rerank 模型对结果进行重排
|
||||
* @param {string} query 查询语句
|
||||
* @param {Array<{id: string, text: string, [key: string]: any}>} chunks 待重排的文档块
|
||||
* @returns {Promise<Array>} 返回重排后的文档块列表
|
||||
*/
|
||||
async rerank(query, chunks) {
|
||||
if (!this.isEnabled()) {
|
||||
return chunks; // 未配置则直接返回原结果
|
||||
}
|
||||
|
||||
if (!chunks || chunks.length === 0) {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
const cleanedQuery = typeof query === "string" ? query.trim() : "";
|
||||
if (!cleanedQuery) {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
const texts = chunks.map(c => c.text);
|
||||
|
||||
try {
|
||||
// 假设使用类似 OpenAI 或通用的 rerank 接口格式
|
||||
// 实际使用时需根据具体第三方模型的 API 调整参数和路径
|
||||
const response = await fetch(`${this.baseUrl}/v1/rerank`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
query: cleanedQuery,
|
||||
texts: texts
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw createHttpError(response.status, `rerank 请求失败: ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
// data.results 格式通常为: [{ index: 0, relevance_score: 0.9 }, ...]
|
||||
const results = data?.results;
|
||||
|
||||
if (!Array.isArray(results)) {
|
||||
throw createHttpError(500, "rerank 返回格式异常");
|
||||
}
|
||||
|
||||
// 根据重排结果重新排序 chunks
|
||||
const rerankedChunks = results
|
||||
.sort((a, b) => b.relevance_score - a.relevance_score)
|
||||
.map(r => ({
|
||||
...chunks[r.index],
|
||||
relevance_score: r.relevance_score
|
||||
}));
|
||||
|
||||
return rerankedChunks;
|
||||
} catch (error) {
|
||||
// 重排失败时,为了不阻断流程,可以选择直接返回原结果并记录日志,或者抛出错误
|
||||
console.error("Rerank error:", error);
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user