Files
OnceLove-New/OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js
KOSHM-Pig ec21df7aa6 feat: 初始化 OnceLove GraphRAG 项目基础架构
- 添加完整的项目结构,包括前端(Vue3 + Vite)、后端(Fastify)和基础设施配置
- 实现核心 GraphRAG 服务,集成 Neo4j 图数据库和 Qdrant 向量数据库
- 添加用户认证系统和管理员登录界面
- 提供 Docker 容器化部署方案和开发环境配置
- 包含项目文档、API 文档(Swagger)和测试脚本
2026-03-23 00:00:13 +08:00

590 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import neo4j from "neo4j-driver";
/**
* 将 ISO 时间转换为秒级时间戳,用于 Qdrant 的范围过滤。
*/
const toTimestamp = (value) => {
if (!value) return null;
const ms = Date.parse(value);
if (Number.isNaN(ms)) return null;
return Math.floor(ms / 1000);
};
const normalizeOccurredAt = (value) => {
if (typeof value !== "string" || !value.trim()) {
return new Date().toISOString();
}
const ms = Date.parse(value);
if (Number.isNaN(ms)) {
return new Date().toISOString();
}
return new Date(ms).toISOString();
};
/**
* 统一 Neo4j 结果结构,避免控制器层处理图数据库 Record 细节。
*/
const toEventDto = (record) => ({
id: record.get("id"),
type: record.get("type"),
summary: record.get("summary"),
occurred_at: record.get("occurred_at"),
importance: record.get("importance"),
topics: record.get("topics") ?? [],
participants: record.get("participants") ?? []
});
/**
* 构造标准业务错误,便于控制器返回统一 HTTP 状态码。
*/
const createHttpError = (statusCode, message) => {
const error = new Error(message);
error.statusCode = statusCode;
return error;
};
/**
* 校验 /ingest 入参与向量维度。
*/
const validateIngestInput = (body, embeddingDim, canAutoEmbed) => {
const persons = Array.isArray(body.persons) ? body.persons : [];
const events = Array.isArray(body.events) ? body.events : [];
const chunks = Array.isArray(body.chunks) ? body.chunks : [];
for (const person of persons) {
if (!person?.id) {
throw createHttpError(400, "persons[].id 必填");
}
}
for (const event of events) {
if (!event?.id || !event?.occurred_at) {
throw createHttpError(400, "events[].id 与 events[].occurred_at 必填");
}
if (!Array.isArray(event.participants) || event.participants.length === 0) {
throw createHttpError(400, "events[].participants 至少包含 1 个人物 id");
}
}
for (const chunk of chunks) {
if (!chunk?.id) {
throw createHttpError(400, "chunks[].id 必填");
}
const hasVector = Array.isArray(chunk?.vector);
const hasText = typeof chunk?.text === "string" && chunk.text.trim().length > 0;
if (!hasVector && !(canAutoEmbed && hasText)) {
throw createHttpError(400, "chunks[] 需提供 vector或在配置 embedding 后提供 text");
}
if (hasVector && chunk.vector.length !== embeddingDim) {
throw createHttpError(400, `chunks[].vector 维度必须为 ${embeddingDim}`);
}
}
return { persons, events, chunks };
};
/**
* GraphRAG 核心服务:
* 1) 图谱结构写入 Neo4j
* 2) 文本向量写入 Qdrant
* 3) 按时序与向量联合检索上下文。
*/
export class GraphRagService {
/**
* @param {{ driver: import("neo4j-driver").Driver, qdrantClient: any, env: Record<string, any> }} deps
*/
constructor({ driver, qdrantClient, embeddingService, rerankService, llmService, env }) {
this.driver = driver;
this.qdrantClient = qdrantClient;
this.embeddingService = embeddingService;
this.rerankService = rerankService;
this.llmService = llmService;
this.collection = env.QDRANT_COLLECTION;
this.embeddingDim = env.EMBEDDING_DIM;
}
async resolveChunkVector(chunk) {
if (Array.isArray(chunk?.vector)) {
if (chunk.vector.length !== this.embeddingDim) {
throw createHttpError(400, `chunks[].vector 维度必须为 ${this.embeddingDim}`);
}
return chunk.vector;
}
if (!this.embeddingService?.isEnabled()) {
throw createHttpError(400, "未检测到可用 embedding 配置");
}
return this.embeddingService.embed(chunk.text ?? "");
}
async resolveQueryVector(body) {
const queryVector = body?.query_vector;
if (Array.isArray(queryVector)) {
if (queryVector.length !== this.embeddingDim) {
throw createHttpError(400, `query_vector 维度必须为 ${this.embeddingDim}`);
}
return queryVector;
}
const queryText = typeof body?.query_text === "string" ? body.query_text.trim() : "";
if (!queryText) {
throw createHttpError(400, `query_vector 维度必须为 ${this.embeddingDim},或提供 query_text`);
}
if (!this.embeddingService?.isEnabled()) {
throw createHttpError(400, "未检测到可用 embedding 配置,无法使用 query_text 检索");
}
return this.embeddingService.embed(queryText);
}
/**
* 连接就绪检查。
*/
async ready() {
const session = this.driver.session();
try {
await session.run("RETURN 1 AS ok");
await this.qdrantClient.getCollections();
return { ok: true };
} finally {
await session.close();
}
}
/**
* 获取图谱统计数据,用于前端 D3.js 可视化渲染。
* 返回 nodes人物/事件/主题)和 links关系边
*/
async getGraphStats() {
const runQuery = async (query) => {
const session = this.driver.session();
try { return await session.run(query) }
finally { await session.close() }
};
const [personResult, eventResult, topicResult, personEventResult, eventTopicResult] =
await Promise.all([
runQuery(`MATCH (p:Person) RETURN p.id AS id, p.name AS name, 'person' AS type LIMIT 200`),
runQuery(`MATCH (e:Event) RETURN e.id AS id, e.summary AS name, 'event' AS type, e.occurred_at AS occurred_at LIMIT 200`),
runQuery(`MATCH (t:Topic) RETURN t.name AS id, t.name AS name, 'topic' AS type LIMIT 100`),
runQuery(`MATCH (p:Person)-[:PARTICIPATES_IN]->(e:Event) RETURN p.id AS source, e.id AS target, 'PARTICIPATES_IN' AS type LIMIT 500`),
runQuery(`MATCH (e:Event)-[:ABOUT]->(t:Topic) RETURN e.id AS source, t.name AS target, 'ABOUT' AS type LIMIT 300`)
]);
const nodes = [];
const idSet = new Set();
const addNode = (record) => {
const id = record.get("id");
if (!id || idSet.has(id)) return;
idSet.add(id);
const occurredAt = record.keys.includes("occurred_at") ? record.get("occurred_at") : null;
nodes.push({
id,
name: record.get("name") ?? id,
type: record.get("type"),
occurred_at: occurredAt
});
};
personResult.records.forEach(addNode);
eventResult.records.forEach(addNode);
topicResult.records.forEach(addNode);
const links = [
...personEventResult.records.map((r) => ({
source: r.get("source"),
target: r.get("target"),
type: r.get("type")
})),
...eventTopicResult.records.map((r) => ({
source: r.get("source"),
target: r.get("target"),
type: r.get("type")
}))
];
return { ok: true, nodes, links, total: nodes.length };
}
/**
* 初始化图谱约束与向量集合。
*/
async bootstrap() {
const session = this.driver.session();
try {
await session.run("CREATE CONSTRAINT person_id IF NOT EXISTS FOR (p:Person) REQUIRE p.id IS UNIQUE");
await session.run("CREATE CONSTRAINT event_id IF NOT EXISTS FOR (e:Event) REQUIRE e.id IS UNIQUE");
await session.run("CREATE CONSTRAINT topic_name IF NOT EXISTS FOR (t:Topic) REQUIRE t.name IS UNIQUE");
await session.run("CREATE INDEX event_time IF NOT EXISTS FOR (e:Event) ON (e.occurred_at)");
const collections = await this.qdrantClient.getCollections();
const exists = collections.collections?.some((item) => item.name === this.collection);
if (!exists) {
await this.qdrantClient.createCollection(this.collection, {
vectors: { size: this.embeddingDim, distance: "Cosine" }
});
}
return { ok: true, collection: this.collection };
} finally {
await session.close();
}
}
/**
* 写入人物、事件、主题关系,并可同步写入向量分片。
*/
async ingest(body) {
const { persons, events, chunks } = validateIngestInput(
body ?? {},
this.embeddingDim,
this.embeddingService?.isEnabled() ?? false
);
const session = this.driver.session();
try {
await session.executeWrite(async (tx) => {
for (const person of persons) {
await tx.run(
`
MERGE (p:Person {id: $id})
SET p.name = coalesce($name, p.name),
p.updated_at = datetime()
`,
{ id: person.id, name: person.name ?? null }
);
}
for (const event of events) {
await tx.run(
`
MERGE (e:Event {id: $id})
SET e.type = $type,
e.summary = $summary,
e.occurred_at = datetime($occurred_at),
e.importance = $importance,
e.updated_at = datetime()
`,
{
id: event.id,
type: event.type ?? "event",
summary: event.summary ?? "",
occurred_at: event.occurred_at,
importance: event.importance ?? 0.5
}
);
for (const personId of event.participants) {
await tx.run(
`
MERGE (p:Person {id: $person_id})
SET p.updated_at = datetime()
WITH p
MATCH (e:Event {id: $event_id})
MERGE (p)-[:PARTICIPATES_IN]->(e)
`,
{ person_id: personId, event_id: event.id }
);
}
const topics = Array.isArray(event.topics) ? event.topics : [];
for (const topicName of topics) {
await tx.run(
`
MERGE (t:Topic {name: $name})
WITH t
MATCH (e:Event {id: $event_id})
MERGE (e)-[:ABOUT]->(t)
`,
{ name: topicName, event_id: event.id }
);
}
}
});
if (chunks.length > 0) {
const points = await Promise.all(chunks.map(async (chunk) => {
const vector = await this.resolveChunkVector(chunk);
const payload = chunk.payload ?? {};
return {
id: chunk.id,
vector,
payload: {
text: chunk.text ?? payload.text ?? "",
event_id: payload.event_id ?? null,
occurred_at: payload.occurred_at ?? null,
occurred_ts: toTimestamp(payload.occurred_at),
person_ids: Array.isArray(payload.person_ids) ? payload.person_ids : [],
source: payload.source ?? "unknown"
}
};
}));
await this.qdrantClient.upsert(this.collection, { points, wait: true });
}
return {
ok: true,
ingested: {
persons: persons.length,
events: events.length,
chunks: chunks.length
}
};
} finally {
await session.close();
}
}
/**
* 查询两个人在时间窗内的时序事件链。
*/
async queryTimeline(body) {
const { a_id, b_id, start, end, limit = 100 } = body ?? {};
if (!a_id || !b_id) {
throw createHttpError(400, "a_id 和 b_id 必填");
}
const session = this.driver.session();
try {
const result = await session.run(
`
MATCH (a:Person {id: $a_id})-[:PARTICIPATES_IN]->(e:Event)<-[:PARTICIPATES_IN]-(b:Person {id: $b_id})
WHERE ($start IS NULL OR e.occurred_at >= datetime($start))
AND ($end IS NULL OR e.occurred_at <= datetime($end))
OPTIONAL MATCH (e)-[:ABOUT]->(t:Topic)
WITH e, collect(DISTINCT t.name) AS topics
OPTIONAL MATCH (p:Person)-[:PARTICIPATES_IN]->(e)
WITH e, topics, collect(DISTINCT {id: p.id, name: p.name}) AS participants
RETURN
e.id AS id,
e.type AS type,
e.summary AS summary,
toString(e.occurred_at) AS occurred_at,
e.importance AS importance,
topics AS topics,
participants AS participants
ORDER BY e.occurred_at ASC
LIMIT $limit
`,
{
a_id,
b_id,
start: start ?? null,
end: end ?? null,
limit: neo4j.int(limit)
}
);
return {
ok: true,
total: result.records.length,
timeline: result.records.map(toEventDto)
};
} finally {
await session.close();
}
}
/**
* 先做向量召回,再回查图谱事件上下文,输出 GraphRAG 检索结果。
*/
async queryGraphRag(body) {
const {
a_id = null,
b_id = null,
start = null,
end = null,
top_k = 8,
timeline_limit = 60
} = body ?? {};
const queryVector = await this.resolveQueryVector(body ?? {});
const filterMust = [];
const startTs = toTimestamp(start);
const endTs = toTimestamp(end);
if (startTs !== null || endTs !== null) {
filterMust.push({
key: "occurred_ts",
range: {
...(startTs !== null ? { gte: startTs } : {}),
...(endTs !== null ? { lte: endTs } : {})
}
});
}
const searchResult = await this.qdrantClient.search(this.collection, {
vector: queryVector,
limit: top_k,
with_payload: true,
...(filterMust.length > 0 ? { filter: { must: filterMust } } : {})
});
let chunks = searchResult.map((item) => ({
id: item.id,
score: item.score,
text: item.payload?.text ?? "",
payload: item.payload ?? {}
}));
if (body?.query_text && this.rerankService?.isEnabled()) {
chunks = await this.rerankService.rerank(body.query_text, chunks);
}
const eventIds = Array.from(
new Set(
chunks
.map((item) => item.payload?.event_id)
.filter((id) => typeof id === "string" && id.length > 0)
)
);
const session = this.driver.session();
try {
const result = await session.run(
`
MATCH (e:Event)
WHERE (size($event_ids) = 0 OR e.id IN $event_ids)
AND ($start IS NULL OR e.occurred_at >= datetime($start))
AND ($end IS NULL OR e.occurred_at <= datetime($end))
AND ($a_id IS NULL OR EXISTS { MATCH (:Person {id: $a_id})-[:PARTICIPATES_IN]->(e) })
AND ($b_id IS NULL OR EXISTS { MATCH (:Person {id: $b_id})-[:PARTICIPATES_IN]->(e) })
OPTIONAL MATCH (e)-[:ABOUT]->(t:Topic)
WITH e, collect(DISTINCT t.name) AS topics
OPTIONAL MATCH (p:Person)-[:PARTICIPATES_IN]->(e)
WITH e, topics, collect(DISTINCT {id: p.id, name: p.name}) AS participants
RETURN
e.id AS id,
e.type AS type,
e.summary AS summary,
toString(e.occurred_at) AS occurred_at,
e.importance AS importance,
topics AS topics,
participants AS participants
ORDER BY e.occurred_at DESC
LIMIT $timeline_limit
`,
{
event_ids: eventIds,
start,
end,
a_id,
b_id,
timeline_limit: neo4j.int(timeline_limit)
}
);
return {
ok: true,
retrieved_chunks: chunks,
timeline_context: result.records.map(toEventDto)
};
} finally {
await session.close();
}
}
async analyzeAndIngest(text) {
if (!this.llmService?.isEnabled()) {
throw createHttpError(400, "LLM 服务未配置,请检查 LLM_BASE_URL/LLM_API_KEY/LLM_MODEL_NAME");
}
const analysis = await this.llmService.analyzeText(text);
console.log("[DEBUG] LLM analysis result:", JSON.stringify(analysis));
if (!analysis || (!analysis.persons && !analysis.events && !analysis.topics)) {
throw createHttpError(500, `LLM 返回数据异常: ${JSON.stringify(analysis)}`);
}
const session = this.driver.session();
console.log("[DEBUG] Got session, driver:", !!this.driver);
try {
const deleteResult = await session.run("MATCH (n) DETACH DELETE n", {});
console.log("[DEBUG] Delete result:", deleteResult?.summary?.counters);
const personMap = {};
for (const person of (analysis.persons || [])) {
console.log("[DEBUG] Creating person:", person);
const result = await session.run(
`CREATE (p:Person {id: $id, name: $name, description: $description}) RETURN p.id AS id`,
{ id: person.id, name: person.name, description: person.description || "" }
);
console.log("[DEBUG] Person create result:", result?.records?.length);
if (result?.records?.length > 0) {
personMap[person.id] = person.name;
}
}
const topicMap = {};
for (const topic of (analysis.topics || [])) {
await session.run(
`CREATE (t:Topic {name: $name})`,
{ name: topic.name }
);
topicMap[topic.id] = topic.name;
}
for (const event of (analysis.events || [])) {
const normalizedOccurredAt = normalizeOccurredAt(event.occurred_at);
await session.run(
`CREATE (e:Event {
id: $id,
type: $type,
summary: $summary,
occurred_at: datetime($occurred_at),
importance: $importance
})`,
{
id: event.id,
type: event.type || "general",
summary: event.summary || "",
occurred_at: normalizedOccurredAt,
importance: neo4j.int(event.importance || 5)
}
);
for (const pid of (event.participants || [])) {
await session.run(
`MATCH (p:Person {id: $pid}), (e:Event {id: $eid})
MERGE (p)-[:PARTICIPATES_IN]->(e)`,
{ pid, eid: event.id }
);
}
for (const tid of (event.topics || [])) {
const topicName = topicMap[tid];
if (topicName) {
await session.run(
`MATCH (e:Event {id: $eid}), (t:Topic {name: $tname})
MERGE (e)-[:ABOUT]->(t)`,
{ eid: event.id, tname: topicName }
);
}
}
}
for (const rel of (analysis.relations || [])) {
const sourceName = personMap[rel.source];
const targetName = personMap[rel.target];
if (sourceName && targetName) {
await session.run(
`MATCH (s:Person {name: $sname}), (t:Person {name: $tname})
MERGE (s)-[r:${rel.type}]->(t)`,
{ sname: sourceName, tname: targetName }
);
}
}
return {
ok: true,
message: "分析并导入成功",
analysis,
stats: {
persons: (analysis.persons || []).length,
events: (analysis.events || []).length,
topics: (analysis.topics || []).length,
relations: (analysis.relations || []).length
}
};
} finally {
await session.close();
}
}
}