From 447062446af6080437e707b955873dd4a2ced544 Mon Sep 17 00:00:00 2001 From: KOSHM-Pig <2578878700@qq.com> Date: Wed, 25 Mar 2026 00:28:22 +0800 Subject: [PATCH] =?UTF-8?q?feat(api):=20=E6=96=B0=E5=A2=9E=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E9=97=AE=E7=AD=94=E3=80=81=E6=84=8F=E5=9B=BE=E5=88=86?= =?UTF-8?q?=E7=B1=BB=E4=B8=8E=E4=BB=BB=E5=8A=A1=E7=AE=A1=E7=90=86=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 SocialImageAgentService 支持朋友圈/聊天截图解析,提供图片线索提取与图谱问答建议 - 扩展 LLMService 支持多模型配置(意图分类、图片模型)与流式响应,增加思考模式控制 - 新增 /intent/classify 端点用于轻量意图分类(问答/导入/混合)以节省 token - 新增 /tasks/{taskId} 与 /tasks/{taskId}/retry 端点用于流式任务状态查询与重试 - 前端 Dashboard 扩展人物详情显示(年龄标签、出生日期、别名、关系置信度等) - 前端导入流程增加任务 ID 追踪与质量回放信息展示 --- .../oncelove-graphrag/api/src/config/env.js | 3 + .../src/controllers/graphrag.controller.js | 374 +++- .../api/src/routes/graphrag.route.js | 70 + OnceLove/oncelove-graphrag/api/src/server.js | 4 +- .../api/src/services/graphrag.service.js | 1771 ++++++++++++++++- .../api/src/services/index.js | 1 + .../api/src/services/llm.service.js | 288 ++- .../services/social-image-agent.service.js | 272 +++ .../frontend/src/components/GraphQaPanel.vue | 1055 +++++++++- .../frontend/src/views/Dashboard.vue | 163 ++ 10 files changed, 3852 insertions(+), 149 deletions(-) create mode 100644 OnceLove/oncelove-graphrag/api/src/services/social-image-agent.service.js diff --git a/OnceLove/oncelove-graphrag/api/src/config/env.js b/OnceLove/oncelove-graphrag/api/src/config/env.js index f37483a..2f9a7d0 100644 --- a/OnceLove/oncelove-graphrag/api/src/config/env.js +++ b/OnceLove/oncelove-graphrag/api/src/config/env.js @@ -26,5 +26,8 @@ export const env = { LLM_BASE_URL: process.env.LLM_BASE_URL ?? "", LLM_API_KEY: process.env.LLM_API_KEY ?? "", LLM_MODEL_NAME: process.env.LLM_MODEL_NAME ?? "", + LLM_INTENT_MODEL_NAME: process.env.LLM_INTENT_MODEL_NAME ?? "", + LLM_IMAGE_MODEL_NAME: process.env.LLM_IMAGE_MODEL_NAME ?? "", + LLM_THINKING_MODE: process.env.LLM_THINKING_MODE ?? "auto", ADMIN_PASSWORD: process.env.ADMIN_PASSWORD ?? "oncelove123" }; diff --git a/OnceLove/oncelove-graphrag/api/src/controllers/graphrag.controller.js b/OnceLove/oncelove-graphrag/api/src/controllers/graphrag.controller.js index ebe7529..14bb859 100644 --- a/OnceLove/oncelove-graphrag/api/src/controllers/graphrag.controller.js +++ b/OnceLove/oncelove-graphrag/api/src/controllers/graphrag.controller.js @@ -14,6 +14,187 @@ const sendServiceResult = async (reply, action) => { } }; +// 内存任务存储:用于追踪流式任务状态、进度和重试信息 +const TASK_STORE_MAX = 300; +const taskStore = new Map(); + +const nowIso = () => new Date().toISOString(); + +const trimTaskStore = () => { + if (taskStore.size <= TASK_STORE_MAX) return; + const entries = [...taskStore.entries()].sort( + (a, b) => new Date(a[1].updated_at).getTime() - new Date(b[1].updated_at).getTime() + ); + const toDelete = entries.slice(0, Math.max(taskStore.size - TASK_STORE_MAX, 0)); + toDelete.forEach(([taskId]) => taskStore.delete(taskId)); +}; + +const createTask = (type, payload) => { + const taskId = `task_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; + const record = { + task_id: taskId, + type, + status: "pending", + retry_count: 0, + payload: payload || {}, + last_progress: null, + result_summary: null, + quality_replay: null, + error: null, + started_at: nowIso(), + updated_at: nowIso(), + finished_at: null + }; + taskStore.set(taskId, record); + trimTaskStore(); + return record; +}; + +const updateTask = (taskId, patch = {}) => { + const record = taskStore.get(taskId); + if (!record) return null; + const next = { + ...record, + ...patch, + updated_at: nowIso() + }; + taskStore.set(taskId, next); + return next; +}; + +const buildTaskResultSummary = (type, result) => { + if (type === "graphrag_multi_stream") { + return { + rounds: Number(result?.meta?.rounds || result?.rounds?.length || 0), + retrieval_mode: result?.meta?.retrieval_mode_requested || null, + confidence: result?.final_review?.confidence ?? null, + answer_preview: String(result?.final_review?.answer || result?.answer || "").slice(0, 240) + }; + } + if (type === "analyze_stream") { + return { + created: result?.stats?.created || {}, + updated: result?.stats?.updated || {}, + corrected_events: Number(result?.stats?.corrected_events || 0), + vector_indexed: Number(result?.vector_sync?.indexed || 0) + }; + } + return { ok: Boolean(result?.ok) }; +}; + +const buildTaskQualityReplay = (type, taskPayload, result, error = null) => { + const replay = { + input: {}, + output: {}, + error: error ? { message: error?.message || "internal error" } : null + }; + if (type === "graphrag_multi_stream") { + replay.input = { + userId: taskPayload?.userId || "default", + query: String(taskPayload?.query_text || taskPayload?.query || "").slice(0, 300), + retrieval_mode: taskPayload?.retrieval_mode || "hybrid", + max_rounds: Number(taskPayload?.max_rounds || 3), + top_k: Number(taskPayload?.top_k || 8) + }; + replay.output = result ? { + confidence: result?.final_review?.confidence ?? null, + relation_quality: result?.meta?.relation_quality || null, + retrieval_mode_adaptive: Boolean(result?.meta?.retrieval_mode_adaptive) + } : {}; + return replay; + } + if (type === "analyze_stream") { + replay.input = { + userId: taskPayload?.userId || "default", + text_length: String(taskPayload?.text || "").length, + parallelism: Number(taskPayload?.parallelism || 0) || null + }; + replay.output = result ? { + created: result?.stats?.created || {}, + corrected_events: Number(result?.stats?.corrected_events || 0), + normalization: result?.normalization || null + } : {}; + return replay; + } + replay.input = { type }; + replay.output = result ? { ok: Boolean(result?.ok) } : {}; + return replay; +}; + +const safeTaskView = (record) => { + if (!record) return null; + return { + task_id: record.task_id, + type: record.type, + status: record.status, + retry_count: record.retry_count, + last_progress: record.last_progress, + result_summary: record.result_summary, + quality_replay: record.quality_replay, + error: record.error, + started_at: record.started_at, + updated_at: record.updated_at, + finished_at: record.finished_at + }; +}; + +const createStreamContext = (request, reply) => { + reply.hijack(); + const raw = reply.raw; + const requestOrigin = request.headers.origin; + raw.setHeader('Access-Control-Allow-Origin', requestOrigin || '*'); + raw.setHeader('Vary', 'Origin'); + raw.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization'); + raw.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS'); + raw.setHeader('Content-Type', 'text/event-stream; charset=utf-8'); + raw.setHeader('Cache-Control', 'no-cache, no-transform'); + raw.setHeader('Connection', 'keep-alive'); + raw.setHeader('X-Accel-Buffering', 'no'); + raw.flushHeaders?.(); + const runId = `run_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; + const push = (event, data) => { + const payload = data && typeof data === 'object' + ? { run_id: runId, ...data } + : { run_id: runId, value: data }; + raw.write(`event: ${event}\n`); + raw.write(`data: ${JSON.stringify(payload)}\n\n`); + }; + return { raw, runId, push }; +}; + +const streamWithGuard = async (request, reply, action) => { + const { raw, runId, push } = createStreamContext(request, reply); + try { + push('meta', { run_id: runId }); + const result = await action(push, runId); + push('done', result); + } catch (error) { + push('error', { + ok: false, + statusCode: Number(error?.statusCode) || 500, + message: error?.message || 'internal error' + }); + } finally { + raw.end(); + } +}; + +const executeTaskByType = async (service, taskRecord, onProgress = () => {}) => { + if (!taskRecord) throw new Error("任务不存在"); + if (taskRecord.type === "graphrag_multi_stream") { + return service.queryGraphRagMultiRound(taskRecord.payload, { onProgress }); + } + if (taskRecord.type === "analyze_stream") { + const payload = taskRecord.payload || {}; + return service.incrementalUpdate(payload.text, payload.userId || "default", { + parallelism: payload.parallelism, + expertReview: payload.expertReview, + onProgress + }); + } + throw new Error(`不支持的任务类型: ${taskRecord.type}`); +}; + /** * GraphRAG 控制器:负责请求转发与响应封装。 */ @@ -30,37 +211,38 @@ export const createGraphRagController = (service, multiAgentService) => ({ sendServiceResult(reply, () => service.queryGraphRag(request.body)), queryGraphRagMultiRound: async (request, reply) => sendServiceResult(reply, () => service.queryGraphRagMultiRound(request.body)), + classifyIntent: async (request, reply) => + sendServiceResult(reply, () => service.classifyIntent(request.body)), queryGraphRagMultiRoundStream: async (request, reply) => { - reply.hijack(); - const raw = reply.raw; - const requestOrigin = request.headers.origin; - raw.setHeader('Access-Control-Allow-Origin', requestOrigin || '*'); - raw.setHeader('Vary', 'Origin'); - raw.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization'); - raw.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS'); - raw.setHeader('Content-Type', 'text/event-stream; charset=utf-8'); - raw.setHeader('Cache-Control', 'no-cache, no-transform'); - raw.setHeader('Connection', 'keep-alive'); - raw.setHeader('X-Accel-Buffering', 'no'); - raw.flushHeaders?.(); - const push = (event, data) => { - raw.write(`event: ${event}\n`); - raw.write(`data: ${JSON.stringify(data)}\n\n`); - }; - try { - const result = await service.queryGraphRagMultiRound(request.body, { - onProgress: (event) => push('progress', event) - }); - push('done', result); - } catch (error) { - push('error', { - ok: false, - statusCode: Number(error?.statusCode) || 500, - message: error?.message || 'internal error' - }); - } finally { - raw.end(); - } + const task = createTask("graphrag_multi_stream", request.body || {}); + return streamWithGuard(request, reply, async (push) => { + updateTask(task.task_id, { status: "running" }); + push("meta", { task_id: task.task_id }); + try { + const result = await executeTaskByType(service, task, (event) => { + updateTask(task.task_id, { last_progress: event || null }); + push('progress', event); + }); + updateTask(task.task_id, { + status: "done", + result_summary: buildTaskResultSummary(task.type, result), + quality_replay: buildTaskQualityReplay(task.type, task.payload, result), + finished_at: nowIso(), + error: null + }); + return result; + } catch (error) { + updateTask(task.task_id, { + status: "error", + quality_replay: buildTaskQualityReplay(task.type, task.payload, null, error), + finished_at: nowIso(), + error: { + message: error?.message || "internal error" + } + }); + throw error; + } + }); }, analyzeAndIngest: async (request, reply) => sendServiceResult(reply, () => service.incrementalUpdate(request.body.text, request.body.userId || 'default')), @@ -70,44 +252,104 @@ export const createGraphRagController = (service, multiAgentService) => ({ limit: request.body.limit })), analyzeAndIngestStream: async (request, reply) => { - reply.hijack(); - const raw = reply.raw; - const requestOrigin = request.headers.origin; - raw.setHeader('Access-Control-Allow-Origin', requestOrigin || '*'); - raw.setHeader('Vary', 'Origin'); - raw.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization'); - raw.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS'); - raw.setHeader('Content-Type', 'text/event-stream; charset=utf-8'); - raw.setHeader('Cache-Control', 'no-cache, no-transform'); - raw.setHeader('Connection', 'keep-alive'); - raw.setHeader('X-Accel-Buffering', 'no'); - raw.flushHeaders?.(); - const push = (event, data) => { - raw.write(`event: ${event}\n`); - raw.write(`data: ${JSON.stringify(data)}\n\n`); - }; - try { + const task = createTask("analyze_stream", request.body || {}); + return streamWithGuard(request, reply, async (push) => { + updateTask(task.task_id, { status: "running" }); + push("meta", { task_id: task.task_id }); push('progress', { stage: 'start' }); - const result = await service.incrementalUpdate( - request.body.text, - request.body.userId || 'default', - { - parallelism: request.body.parallelism, - expertReview: request.body.expertReview, - onProgress: (event) => push('progress', event) - } - ); - push('done', result); - } catch (error) { - push('error', { - ok: false, - statusCode: Number(error?.statusCode) || 500, - message: error?.message || 'internal error' - }); - } finally { - raw.end(); - } + try { + const result = await executeTaskByType(service, task, (event) => { + updateTask(task.task_id, { last_progress: event || null }); + push('progress', event); + }); + updateTask(task.task_id, { + status: "done", + result_summary: buildTaskResultSummary(task.type, result), + quality_replay: buildTaskQualityReplay(task.type, task.payload, result), + finished_at: nowIso(), + error: null + }); + return result; + } catch (error) { + updateTask(task.task_id, { + status: "error", + quality_replay: buildTaskQualityReplay(task.type, task.payload, null, error), + finished_at: nowIso(), + error: { + message: error?.message || "internal error" + } + }); + throw error; + } + }); }, + getTaskStatus: async (request, reply) => + sendServiceResult(reply, async () => { + const taskId = String(request.params.taskId || "").trim(); + if (!taskId) { + const error = new Error("taskId 不能为空"); + error.statusCode = 400; + throw error; + } + const task = taskStore.get(taskId); + if (!task) { + const error = new Error(`任务不存在: ${taskId}`); + error.statusCode = 404; + throw error; + } + return { ok: true, task: safeTaskView(task) }; + }), + retryTask: async (request, reply) => + sendServiceResult(reply, async () => { + const sourceTaskId = String(request.params.taskId || "").trim(); + if (!sourceTaskId) { + const error = new Error("taskId 不能为空"); + error.statusCode = 400; + throw error; + } + const sourceTask = taskStore.get(sourceTaskId); + if (!sourceTask) { + const error = new Error(`任务不存在: ${sourceTaskId}`); + error.statusCode = 404; + throw error; + } + const overridePayload = request.body && typeof request.body === "object" ? request.body : {}; + const nextTask = createTask(sourceTask.type, { + ...(sourceTask.payload || {}), + ...overridePayload + }); + updateTask(nextTask.task_id, { + status: "running", + retry_count: Number(sourceTask.retry_count || 0) + 1, + quality_replay: sourceTask.quality_replay || null + }); + executeTaskByType(service, nextTask, (event) => { + updateTask(nextTask.task_id, { last_progress: event || null }); + }).then((result) => { + updateTask(nextTask.task_id, { + status: "done", + result_summary: buildTaskResultSummary(nextTask.type, result), + quality_replay: buildTaskQualityReplay(nextTask.type, nextTask.payload, result), + finished_at: nowIso(), + error: null + }); + }).catch((err) => { + updateTask(nextTask.task_id, { + status: "error", + quality_replay: buildTaskQualityReplay(nextTask.type, nextTask.payload, null, err), + finished_at: nowIso(), + error: { + message: err?.message || "internal error" + } + }); + }); + return { + ok: true, + source_task_id: sourceTaskId, + task_id: nextTask.task_id, + status: "running" + }; + }), queryHistory: async (request, reply) => sendServiceResult(reply, () => service.queryRelationshipHistory( request.body.userId || 'default', diff --git a/OnceLove/oncelove-graphrag/api/src/routes/graphrag.route.js b/OnceLove/oncelove-graphrag/api/src/routes/graphrag.route.js index 01e017b..56c5268 100644 --- a/OnceLove/oncelove-graphrag/api/src/routes/graphrag.route.js +++ b/OnceLove/oncelove-graphrag/api/src/routes/graphrag.route.js @@ -262,6 +262,29 @@ export const registerGraphRagRoutes = async (app, controller) => { * description: 参数错误 */ app.post("/query/graphrag", controller.queryGraphRag); + /** + * @openapi + * /intent/classify: + * post: + * tags: + * - GraphRAG + * summary: 轻量意图分类(问答/导入/混合)用于节省 token + * requestBody: + * required: true + * content: + * application/json: + * schema: + * type: object + * properties: + * text: + * type: string + * responses: + * 200: + * description: 分类成功 + * 400: + * description: 参数错误 + */ + app.post("/intent/classify", controller.classifyIntent); /** * @openapi * /query/graphrag/multi: @@ -353,6 +376,53 @@ export const registerGraphRagRoutes = async (app, controller) => { * description: 参数错误 */ app.post("/analyze/stream", controller.analyzeAndIngestStream); + /** + * @openapi + * /tasks/{taskId}: + * get: + * tags: + * - GraphRAG + * summary: 查询任务状态(适用于流式问答/流式分析任务) + * parameters: + * - in: path + * name: taskId + * required: true + * schema: + * type: string + * responses: + * 200: + * description: 查询成功 + * 404: + * description: 任务不存在 + */ + app.get("/tasks/:taskId", controller.getTaskStatus); + /** + * @openapi + * /tasks/{taskId}/retry: + * post: + * tags: + * - GraphRAG + * summary: 重试指定任务(异步触发,立即返回新任务ID) + * parameters: + * - in: path + * name: taskId + * required: true + * schema: + * type: string + * requestBody: + * required: false + * content: + * application/json: + * schema: + * type: object + * description: 可选覆盖原任务参数 + * responses: + * 200: + * description: 重试任务已启动 + * 404: + * description: 原任务不存在 + */ + app.post("/tasks/:taskId/retry", controller.retryTask); /** * @openapi diff --git a/OnceLove/oncelove-graphrag/api/src/server.js b/OnceLove/oncelove-graphrag/api/src/server.js index 02dddc0..f3fa7f8 100644 --- a/OnceLove/oncelove-graphrag/api/src/server.js +++ b/OnceLove/oncelove-graphrag/api/src/server.js @@ -5,7 +5,7 @@ import swaggerUi from "@fastify/swagger-ui"; import { env } from "./config/env.js"; import { createClients } from "./config/clients.js"; import { createSwaggerSpec } from "./config/swagger.js"; -import { EmbeddingService, RerankService, GraphRagService, LLMService, MultiAgentService } from "./services/index.js"; +import { EmbeddingService, RerankService, GraphRagService, LLMService, MultiAgentService, SocialImageAgentService } from "./services/index.js"; import { createGraphRagController } from "./controllers/index.js"; import { registerRoutes } from "./routes/index.js"; @@ -34,12 +34,14 @@ export const createServer = async () => { const embeddingService = new EmbeddingService(env); const rerankService = new RerankService(env); const llmService = new LLMService(env); + const socialImageAgentService = new SocialImageAgentService({ llmService }); const service = new GraphRagService({ driver: neo4jDriver, qdrantClient, embeddingService, rerankService, llmService, + socialImageAgentService, env }); const multiAgentService = new MultiAgentService({ diff --git a/OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js b/OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js index 0cee2c5..896caca 100644 --- a/OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js +++ b/OnceLove/oncelove-graphrag/api/src/services/graphrag.service.js @@ -75,6 +75,272 @@ const mergeCompactSummary = (existingSummary, incomingSummary, options = {}) => return compact.length > maxLen ? `${compact.slice(0, maxLen - 1)}…` : compact; }; +const normalizeNameKey = (value) => String(value || "").trim().toLowerCase().replace(/\s+/g, ""); +const normalizeMentionKey = (value) => String(value || "").toLowerCase().replace(/[\s\u200B-\u200D\uFEFF·•\-_'"“”‘’`~!@#$%^&*()+=|\\/:;,.!?,。!?、()【】《》]/g, ""); + +const PERSON_ROLE_SET = new Set(["self", "partner", "ex", "friend", "colleague", "other", "unknown"]); + +const normalizePersonRole = (value) => { + const raw = String(value || "").trim().toLowerCase(); + if (!raw) return "unknown"; + if (["self", "me", "user", "本人", "自己", "我"].includes(raw)) return "self"; + if (["partner", "lover", "spouse", "伴侣", "对象", "恋人", "女友", "男友", "妻子", "丈夫"].includes(raw)) return "partner"; + if (["ex", "former", "前任", "旧爱", "前男友", "前女友"].includes(raw)) return "ex"; + if (["friend", "好友", "朋友", "闺蜜", "兄弟"].includes(raw)) return "friend"; + if (["colleague", "coworker", "同事", "同僚"].includes(raw)) return "colleague"; + if (["other", "others", "其他"].includes(raw)) return "other"; + return PERSON_ROLE_SET.has(raw) ? raw : "unknown"; +}; + +const inferPersonRoleFromId = (id, userId) => { + const pid = String(id || "").trim(); + if (!pid) return "unknown"; + if (pid === "user" || pid === `${userId}__user`) return "self"; + if (pid === "partner" || pid === `${userId}__partner` || /(?:^|__)partner$/i.test(pid)) return "partner"; + if (/(?:^|__)ex(?:_|$)/i.test(pid)) return "ex"; + return "unknown"; +}; + +const mergeAliases = (...sources) => { + const out = []; + const seen = new Set(); + for (const src of sources) { + const list = Array.isArray(src) ? src : [src]; + for (const item of list) { + const text = String(item || "").trim(); + if (!text || text.length > 48) continue; + const key = normalizeMentionKey(text); + if (!key || seen.has(key)) continue; + seen.add(key); + out.push(text); + } + } + return out.slice(0, 12); +}; + +const extractSamePersonAliasPairs = (text) => { + const raw = String(text || ""); + if (!raw) return []; + const pairs = []; + const seen = new Set(); + const addPair = (a, b) => { + const left = String(a || "").trim().replace(/^["“‘'「『]|["”’'」』]$/g, ""); + const right = String(b || "").trim().replace(/^["“‘'「『]|["”’'」』]$/g, ""); + if (!left || !right || left === right) return; + const leftKey = normalizeMentionKey(left); + const rightKey = normalizeMentionKey(right); + if (!leftKey || !rightKey || leftKey === rightKey) return; + const key = leftKey < rightKey ? `${leftKey}__${rightKey}` : `${rightKey}__${leftKey}`; + if (seen.has(key)) return; + seen.add(key); + pairs.push([left, right]); + }; + const patterns = [ + /([A-Za-z0-9\u4e00-\u9fa5·]{2,12})和([A-Za-z0-9\u4e00-\u9fa5·]{2,12})是(?:同一|一个)人/g, + /([A-Za-z0-9\u4e00-\u9fa5·]{2,12})(?:就)?是([A-Za-z0-9\u4e00-\u9fa5·]{2,12})(?:本)?人/g, + /([A-Za-z0-9\u4e00-\u9fa5·]{2,12})(?:也叫|又名|别名(?:是|叫)?)([A-Za-z0-9\u4e00-\u9fa5·]{2,12})/g + ]; + for (const pattern of patterns) { + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(raw)) !== null) { + addPair(match[1], match[2]); + } + } + return pairs; +}; + +const normalizeOptionalText = (value, maxLen = 120) => { + const text = String(value || "").trim(); + if (!text) return null; + return text.length > maxLen ? text.slice(0, maxLen) : text; +}; + +const pickFirstText = (...values) => { + for (const value of values) { + const normalized = normalizeOptionalText(value); + if (normalized) return normalized; + } + return null; +}; + +const normalizeConsentStatus = (value) => { + const raw = String(value || "").trim().toLowerCase(); + if (!raw) return "unknown"; + if (["granted", "allow", "allowed", "consented", "同意", "已同意", "授权"].includes(raw)) return "granted"; + if (["denied", "deny", "rejected", "拒绝", "不同意"].includes(raw)) return "denied"; + if (["revoked", "withdrawn", "撤回", "已撤回"].includes(raw)) return "revoked"; + if (["unknown", "未确认", "未知", "待确认"].includes(raw)) return "unknown"; + return "unknown"; +}; + +const normalizeDataSource = (value) => { + const raw = String(value || "").trim().toLowerCase(); + if (!raw) return "ingest"; + if (["ingest", "user_input", "manual", "import", "llm_extract", "system"].includes(raw)) return raw; + if (["用户输入", "手动", "导入"].includes(raw)) return "manual"; + if (["模型抽取", "llm"].includes(raw)) return "llm_extract"; + return "ingest"; +}; + +const readPersonProfileFields = (person = {}) => ({ + occupation: pickFirstText(person.occupation, person.job, person.profession, person.work), + education_background: pickFirstText(person.education_background, person.education, person.educationLevel, person.school), + residential_status: pickFirstText(person.residential_status, person.residence_status, person.residence, person.location, person.current_city), + data_source: normalizeDataSource(person.data_source || person.source), + consent_status: normalizeConsentStatus(person.consent_status || person.consent) +}); + +const normalizeBirthDateUtc = (value) => { + if (!value) return null; + if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString(); + if (typeof value === "number" && Number.isFinite(value)) { + const year = Math.round(value); + if (year >= 1900 && year <= 2100) return `${year}-01-01T00:00:00.000Z`; + } + const text = String(value || "").trim(); + if (!text) return null; + const yearOnly = text.match(/^(\d{4})$/); + if (yearOnly) { + const year = Number(yearOnly[1]); + if (year >= 1900 && year <= 2100) return `${year}-01-01T00:00:00.000Z`; + } + const ms = Date.parse(text); + if (Number.isNaN(ms)) return null; + const date = new Date(ms); + const year = date.getUTCFullYear(); + if (year < 1900 || year > 2100) return null; + return date.toISOString(); +}; + +const inferBirthDateUtcFromPerson = (person = {}) => { + const direct = normalizeBirthDateUtc( + person.birth_date_utc + || person.birth_date + || person.birthday + || person.dob + || person.birthdate + ); + if (direct) return direct; + const yearRaw = Number(person.birth_year); + if (Number.isFinite(yearRaw) && yearRaw >= 1900 && yearRaw <= 2100) { + return `${Math.round(yearRaw)}-01-01T00:00:00.000Z`; + } + const ageRaw = Number(person.age); + if (Number.isFinite(ageRaw) && ageRaw >= 1 && ageRaw <= 120) { + const now = new Date(); + return `${now.getUTCFullYear() - Math.round(ageRaw)}-01-01T00:00:00.000Z`; + } + return null; +}; + +const computeAgeByBirthDateUtc = (birthDateUtc, now = new Date()) => { + const iso = normalizeBirthDateUtc(birthDateUtc); + if (!iso) return null; + const birth = new Date(iso); + if (Number.isNaN(birth.getTime())) return null; + let age = now.getUTCFullYear() - birth.getUTCFullYear(); + const hasHadBirthday = + now.getUTCMonth() > birth.getUTCMonth() + || (now.getUTCMonth() === birth.getUTCMonth() && now.getUTCDate() >= birth.getUTCDate()); + if (!hasHadBirthday) age -= 1; + if (age < 0 || age > 120) return null; + return age; +}; + +const buildAgeLabelByBirthDateUtc = (birthDateUtc, now = new Date()) => { + const age = computeAgeByBirthDateUtc(birthDateUtc, now); + if (age == null) return null; + return `${age}岁`; +}; + +const buildMentionKeywords = ({ name, aliases, relationToUser } = {}) => { + const relationMap = { + 恋爱: ["对象", "恋人", "伴侣"], + 前任: ["前任"], + 暧昧: ["暧昧"], + 同学: ["同学"], + 同事: ["同事"], + 本人: ["我", "本人", "自己"] + }; + return mergeAliases(name, aliases, relationMap[String(relationToUser || "").trim()] || []); +}; + +const extractActiveLocations = (summaries = [], limit = 6) => { + const counter = new Map(); + for (const summaryRaw of summaries) { + const summary = String(summaryRaw || ""); + const matches = summary.matchAll(/(?:在|于|到|去|回到)([\u4e00-\u9fa5A-Za-z0-9]{2,24}(?:大学|学院|学校|校区|楼|路|街|巷|站|市|区|县|村|镇|广场|公园|商场|酒店|医院))/g); + for (const match of matches) { + const place = String(match?.[1] || "").trim(); + if (!place) continue; + counter.set(place, (counter.get(place) || 0) + 1); + } + } + return [...counter.entries()] + .sort((a, b) => b[1] - a[1] || b[0].length - a[0].length) + .map(([name]) => name) + .slice(0, Math.max(1, limit)); +}; + +const buildTopicProfile = ({ eventTypes = [], summaries = [] } = {}) => { + const typeCounter = new Map(); + for (const type of eventTypes) { + const clean = String(type || "").trim(); + if (!clean) continue; + typeCounter.set(clean, (typeCounter.get(clean) || 0) + 1); + } + const keywords = []; + for (const summary of summaries) { + const picks = String(summary || "").match(/[\u4e00-\u9fff]{2,6}/g) || []; + for (const word of picks) { + if (["关系", "事件", "情况", "问题", "目前", "现在", "进行", "发生"].includes(word)) continue; + keywords.push(word); + if (keywords.length >= 80) break; + } + if (keywords.length >= 80) break; + } + const kwCounter = new Map(); + for (const kw of keywords) kwCounter.set(kw, (kwCounter.get(kw) || 0) + 1); + const topTypes = [...typeCounter.entries()].sort((a, b) => b[1] - a[1]).map(([name]) => name).slice(0, 4); + const topKeywords = [...kwCounter.entries()].sort((a, b) => b[1] - a[1]).map(([name]) => name).slice(0, 4); + return [...new Set([...topTypes, ...topKeywords])].slice(0, 8); +}; + +const buildDataQuality = ({ sourceCount = 0, coEventCount = 0, aliases = [], relationConfidence = 0, birthDateUtc = null } = {}) => { + let score = 0.2; + score += Math.min(0.22, Number(sourceCount || 0) * 0.03); + score += Math.min(0.18, Number(coEventCount || 0) * 0.03); + score += Math.min(0.14, (Array.isArray(aliases) ? aliases.length : 0) * 0.03); + score += Math.min(0.22, Math.max(0, Number(relationConfidence || 0)) * 0.22); + if (birthDateUtc) score += 0.08; + score = Math.min(1, Math.max(0, score)); + return Number(score.toFixed(3)); +}; + +const buildPrivacyLevel = ({ summaries = [], relationToUser = "未知" } = {}) => { + const text = `${(summaries || []).join(" ")} ${relationToUser || ""}`; + if (/(出轨|分娩|遗弃|婴儿|亲密|创伤|法律|伦理|精神|隐私|性)/i.test(text)) return "high"; + if (/(恋爱|前任|暧昧|家庭|情感)/i.test(text)) return "medium"; + return "low"; +}; + +const RELATION_SYMMETRIC_TYPES = new Set([ + "RELATED_TO", + "KNOWS", + "FRIEND", + "FRIEND_OF", + "INTERACTS_WITH", + "CONTACT", + "CLOSE_TO" +]); + +const NEGATIVE_SIGNAL_RE = /(吵架|冷战|分手|拉黑|争执|冲突|误会|背叛|欺骗|冷淡|失联|矛盾|第三者|出轨|拒绝|否认)/i; +const POSITIVE_SIGNAL_RE = /(和好|复合|约会|关心|支持|信任|陪伴|沟通|道歉|拥抱|礼物|庆祝|表白|承诺|修复)/i; +const INTENT_QUERY_MARKERS_RE = /[??]|吗|么|如何|怎么|怎样|是否|能否|请问|为什么|分析|总结|建议|怎么看|哪些|哪个|哪位|哪几个|多少|几位|几人|谁|是什么|有啥|有什么|有没有/i; +const INTENT_INGEST_VERBS_RE = /(新增|导入|更新|补充|添加|记录|同步|入图|写入|存档|纠错|更正|澄清)/i; +const INTENT_EVENT_FACT_RE = /(今天|昨晚|昨天|刚刚|刚才|这周|上周|周一|周二|周三|周四|周五|周六|周日|发生|吵架|和好|道歉|沟通|见面|电话|消息|旅行|预算|压力|家长|结婚|分手)/i; + const tryParseJson = (rawText) => { const text = String(rawText || "").trim(); if (!text) return null; @@ -101,12 +367,13 @@ const toQdrantPointId = (seed) => { * GraphRAG 服务 - 知识图谱构建 */ export class GraphRagService { - constructor({ driver, qdrantClient, embeddingService, rerankService, llmService, env }) { + constructor({ driver, qdrantClient, embeddingService, rerankService, llmService, socialImageAgentService, env }) { this.driver = driver; this.qdrantClient = qdrantClient; this.embeddingService = embeddingService; this.rerankService = rerankService; this.llmService = llmService; + this.socialImageAgentService = socialImageAgentService; this.env = env; this.collection = env.QDRANT_COLLECTION ?? "oncelove_chunks"; } @@ -204,6 +471,7 @@ export class GraphRagService { const created = await this._createRelation(session, normalizedRel, userId); if (created) stats.created.relations += 1; } + await this._refreshPersonProfiles(session, userId); } finally { await session.close(); } @@ -324,12 +592,22 @@ export class GraphRagService { const params = { userId, aId, bId, limit: neo4j.int(limit), start, end }; const result = await session.run( ` - MATCH (a {id: $aId, user_id: $userId})-[:PARTICIPATES_IN]->(e:Event)<-[:PARTICIPATES_IN]-(b {id: $bId, user_id: $userId}) + MATCH (a {id: $aId})-[:PARTICIPATES_IN]->(e:Event)<-[:PARTICIPATES_IN]-(b {id: $bId}) WHERE coalesce(e.status, 'active') <> 'invalidated' + AND ( + a.user_id = $userId + OR EXISTS { MATCH ()-[ra]->(a) WHERE ra.user_id = $userId } + OR EXISTS { MATCH (a)-[ra]->() WHERE ra.user_id = $userId } + ) + AND ( + b.user_id = $userId + OR EXISTS { MATCH ()-[rb]->(b) WHERE rb.user_id = $userId } + OR EXISTS { MATCH (b)-[rb]->() WHERE rb.user_id = $userId } + ) AND ($start IS NULL OR e.occurred_at >= datetime($start)) AND ($end IS NULL OR e.occurred_at <= datetime($end)) OPTIONAL MATCH (p)-[:PARTICIPATES_IN]->(e) - WITH e, collect(DISTINCT coalesce(p.id, p.name)) AS participants + WITH e, collect(DISTINCT coalesce(p.name, p.id)) AS participants RETURN e.id AS id, e.type AS type, e.summary AS summary, e.occurred_at AS occurred_at, e.importance AS importance, participants ORDER BY e.occurred_at DESC LIMIT $limit @@ -350,6 +628,154 @@ export class GraphRagService { } } + async classifyIntent(payload = {}) { + const text = String(payload.text || payload.query_text || payload.query || "").trim(); + if (!text) { + throw createHttpError(400, "text 不能为空"); + } + const isQuestion = INTENT_QUERY_MARKERS_RE.test(text); + const hasIngestVerb = INTENT_INGEST_VERBS_RE.test(text); + const hasEventFact = INTENT_EVENT_FACT_RE.test(text); + let intent = "qa"; + let reason = "qa_pattern"; + let confidence = 0.66; + if (isQuestion && hasIngestVerb) { + intent = "mixed"; + reason = "query_and_ingest"; + confidence = 0.72; + } else if (!isQuestion && (hasIngestVerb || hasEventFact)) { + intent = "ingest"; + reason = hasIngestVerb ? "ingest_keyword" : "event_statement"; + confidence = hasIngestVerb ? 0.88 : 0.78; + } else if (!isQuestion && text.length < 8) { + intent = "unknown"; + reason = "too_short"; + confidence = 0.51; + } + let modelUsed = null; + const shouldUseLlmRefine = this.llmService?.isIntentEnabled?.() && this.llmService?.classifyIntent; + if (shouldUseLlmRefine) { + try { + const llmDecision = await this.llmService.classifyIntent(text, { maxTokens: 180 }); + if (llmDecision?.intent) { + intent = llmDecision.intent; + reason = llmDecision.reason || reason; + confidence = Number.isFinite(Number(llmDecision.confidence)) + ? Number(llmDecision.confidence) + : confidence; + modelUsed = llmDecision.model || null; + } + } catch {} + } + const shouldIngest = intent === "ingest" || intent === "mixed"; + const shouldQuery = intent !== "ingest"; + return { + ok: true, + intent, + should_ingest: shouldIngest, + should_query: shouldQuery, + confidence, + reason, + model: modelUsed, + token_strategy: shouldIngest && !shouldQuery + ? "ingest_only" + : shouldIngest && shouldQuery + ? "ingest_then_query" + : "query_only" + }; + } + + _buildImageIngestText(imageContext = {}) { + const summary = String(imageContext?.summary || "").trim(); + const extractedText = String(imageContext?.extracted_text || "").trim(); + const keyEvents = Array.isArray(imageContext?.key_events) ? imageContext.key_events : []; + const riskSignals = Array.isArray(imageContext?.risk_signals) ? imageContext.risk_signals : []; + const relationHints = Array.isArray(imageContext?.relation_hints) ? imageContext.relation_hints : []; + const chatTurns = Array.isArray(imageContext?.chat_turns) ? imageContext.chat_turns : []; + const chatLines = chatTurns + .map((item) => { + const speaker = String(item?.speaker || "").trim().toLowerCase(); + const text = String(item?.text || "").trim(); + if (!text) return ""; + const role = speaker === "self" ? "我方" : speaker === "other" ? "对方" : "未知"; + return `${role}:${text}`; + }) + .filter(Boolean) + .slice(0, 12); + return [ + summary ? `图片摘要:${summary}` : "", + keyEvents.length ? `关键事件:${keyEvents.slice(0, 8).join(";")}` : "", + riskSignals.length ? `风险信号:${riskSignals.slice(0, 8).join(";")}` : "", + relationHints.length ? `关系线索:${relationHints.slice(0, 8).join(";")}` : "", + chatLines.length ? `聊天记录:${chatLines.join(" | ")}` : "", + extractedText ? `OCR文本:${extractedText.slice(0, 600)}` : "" + ].filter(Boolean).join("\n"); + } + + async _decideImageAutoIngest(payload = {}, imageContext = null) { + if (!imageContext || typeof imageContext !== "object") { + return { + enabled: false, + should_ingest: false, + reason: "no_image_context", + confidence: 0 + }; + } + const imageAutoIngestRaw = payload?.image_auto_ingest; + const enabled = imageAutoIngestRaw === undefined + ? true + : !["false", "0", "off", "no"].includes(String(imageAutoIngestRaw).trim().toLowerCase()); + if (!enabled) { + return { + enabled: false, + should_ingest: false, + reason: "disabled_by_payload", + confidence: 1 + }; + } + const ingestText = this._buildImageIngestText(imageContext); + const keyEventCount = Array.isArray(imageContext?.key_events) ? imageContext.key_events.length : 0; + const chatTurnCount = Array.isArray(imageContext?.chat_turns) ? imageContext.chat_turns.length : 0; + const riskSignalCount = Array.isArray(imageContext?.risk_signals) ? imageContext.risk_signals.length : 0; + const extractedLen = String(imageContext?.extracted_text || "").trim().length; + const signalScore = (keyEventCount > 0 ? 2 : 0) + + (chatTurnCount >= 2 ? 1 : 0) + + (riskSignalCount > 0 ? 1 : 0) + + (extractedLen >= 20 ? 1 : 0); + if (!ingestText || signalScore < 2) { + return { + enabled: true, + should_ingest: false, + reason: "signal_too_weak", + confidence: 0.6, + signal_score: signalScore, + ingest_text: ingestText + }; + } + let intentDecision = null; + try { + intentDecision = await this.classifyIntent({ text: ingestText }); + } catch {} + const noUserText = !String(payload?.query_text || payload?.query || "").trim(); + const shouldIngestByIntent = Boolean(intentDecision?.should_ingest); + const shouldIngestBySignal = noUserText && signalScore >= 4; + const shouldIngest = shouldIngestByIntent || shouldIngestBySignal; + return { + enabled: true, + should_ingest: shouldIngest, + reason: shouldIngestByIntent + ? `intent_${String(intentDecision?.intent || "unknown")}` + : shouldIngestBySignal + ? "strong_image_signal" + : `intent_${String(intentDecision?.intent || "qa")}`, + confidence: Number(intentDecision?.confidence) || (shouldIngestBySignal ? 0.82 : 0.58), + signal_score: signalScore, + ingest_text: ingestText, + intent: intentDecision?.intent || null, + model: intentDecision?.model || null + }; + } + async queryGraphRag(payload = {}) { const userId = payload.userId || "default"; const topK = Math.min(Math.max(Number(payload.top_k || 8), 1), 50); @@ -495,7 +921,9 @@ export class GraphRagService { const session = this.driver.session(); try { const eventIds = [...new Set(chunks.map((c) => c.event_id).filter(Boolean))]; - const timelineKeyword = queryText + const timelineKeyword = (aId || bId) + ? null + : queryText ? (this._buildKeywordCandidates(queryText, 6).sort((a, b) => a.length - b.length)[0] || null) : null; const timelineParams = { @@ -513,13 +941,27 @@ export class GraphRagService { MATCH (e:Event {user_id: $userId}) WHERE coalesce(e.status, 'active') <> 'invalidated' AND (size($eventIds) = 0 OR e.id IN $eventIds) - AND ($aId IS NULL OR EXISTS { MATCH ({id: $aId, user_id: $userId})-[:PARTICIPATES_IN]->(e) }) - AND ($bId IS NULL OR EXISTS { MATCH ({id: $bId, user_id: $userId})-[:PARTICIPATES_IN]->(e) }) + AND ($aId IS NULL OR EXISTS { + MATCH (a {id: $aId})-[:PARTICIPATES_IN]->(e) + WHERE ( + a.user_id = $userId + OR EXISTS { MATCH ()-[ra]->(a) WHERE ra.user_id = $userId } + OR EXISTS { MATCH (a)-[ra]->() WHERE ra.user_id = $userId } + ) + }) + AND ($bId IS NULL OR EXISTS { + MATCH (b {id: $bId})-[:PARTICIPATES_IN]->(e) + WHERE ( + b.user_id = $userId + OR EXISTS { MATCH ()-[rb]->(b) WHERE rb.user_id = $userId } + OR EXISTS { MATCH (b)-[rb]->() WHERE rb.user_id = $userId } + ) + }) AND ($queryText IS NULL OR toLower(coalesce(e.summary, '')) CONTAINS $queryText OR toLower(coalesce(e.type, '')) CONTAINS $queryText) AND ($start IS NULL OR e.occurred_at >= datetime($start)) AND ($end IS NULL OR e.occurred_at <= datetime($end)) OPTIONAL MATCH (p)-[:PARTICIPATES_IN]->(e) - WITH e, collect(DISTINCT coalesce(p.id, p.name)) AS participants + WITH e, collect(DISTINCT coalesce(p.name, p.id)) AS participants RETURN e.id AS id, e.summary AS summary, e.type AS type, e.occurred_at AS occurred_at, e.importance AS importance, participants ORDER BY e.occurred_at DESC LIMIT $limit @@ -650,36 +1092,37 @@ export class GraphRagService { } async _planMultiRoundQueries(question, maxRounds = 3) { + const cappedRounds = Math.min(Math.max(Number(maxRounds || 3), 1), 50); const fallbackParts = String(question || "") .split(/[??。;;\n]/) .map((s) => s.trim()) .filter(Boolean); const fallback = [String(question || "").trim(), ...fallbackParts] .filter(Boolean) - .slice(0, maxRounds); + .slice(0, cappedRounds); if (!this.llmService?.isEnabled?.()) return fallback; try { const resp = await this.llmService.chat([ { role: "system", - content: "你是知识图谱问答拆解器。请把用户问题拆成 2-4 条可检索子问题,返回 JSON:{\"sub_queries\":[\"...\"],\"focus\":[\"...\"]}。不要输出其它文字。" + content: "你是知识图谱问答拆解器。请根据问题复杂度动态规划 1-N 条可检索子问题(N 不超过给定上限 max_rounds)。优先保留高价值、去重、可检索的问题。返回 JSON:{\"sub_queries\":[\"...\"],\"focus\":[\"...\"]}。不要输出其它文字。" }, - { role: "user", content: `问题:${question}` } + { role: "user", content: `max_rounds=${cappedRounds}\n问题:${question}` } ], 0.2); const content = resp?.choices?.[0]?.message?.content || ""; const parsed = tryParseJson(content); const candidates = Array.isArray(parsed?.sub_queries) ? parsed.sub_queries : []; - const merged = [...candidates, ...fallback] + const merged = [String(question || "").trim(), ...candidates, ...fallback] .map((item) => String(item || "").trim()) .filter(Boolean); const unique = []; const seen = new Set(); for (const query of merged) { - const key = query.toLowerCase(); + const key = this._normalizeSubQueryKey(query); if (seen.has(key)) continue; seen.add(key); unique.push(query); - if (unique.length >= maxRounds) break; + if (unique.length >= cappedRounds) break; } return unique.length ? unique : fallback; } catch { @@ -772,7 +1215,7 @@ export class GraphRagService { const session = this.driver.session(); try { const direct = await session.run( - `MATCH (p:Person {user_id: $userId}) + `MATCH (p {user_id: $userId}) WHERE p.id IN $candidateIds RETURN p.id AS id, p.name AS name LIMIT 1`, @@ -788,7 +1231,7 @@ export class GraphRagService { }; } const fallback = await session.run( - `MATCH (p:Person {user_id: $userId}) + `MATCH (p {user_id: $userId}) WHERE toLower(coalesce(p.name, '')) IN $selfNames RETURN p.id AS id, p.name AS name LIMIT 1`, @@ -913,9 +1356,94 @@ export class GraphRagService { async queryGraphRagMultiRound(payload = {}, options = {}) { const userId = payload.userId || "default"; - const rawQuestion = String(payload.query_text || payload.query || "").trim(); + const onProgress = typeof options.onProgress === "function" ? options.onProgress : () => {}; + let rawQuestion = String(payload.query_text || payload.query || "").trim(); + let imageContext = null; + let imageIngest = null; + const hasImageInput = Boolean( + payload?.image_payload?.data_url + || payload?.image_payload?.url + || payload?.image_data_url + || payload?.image_url + ); + if (hasImageInput) { + onProgress({ + stage: "image_agent", + status: "running", + scene: payload?.image_scene || payload?.image_payload?.scene || "auto" + }); + try { + if (!this.socialImageAgentService?.isEnabled?.()) { + throw createHttpError(400, "图片问答能力未启用,请先配置 LLM"); + } + imageContext = await this.socialImageAgentService.analyzeForGraphQa(payload); + if (!rawQuestion && imageContext?.query_text_for_qa) { + rawQuestion = String(imageContext.query_text_for_qa || "").trim(); + } else if (rawQuestion && imageContext?.image_digest) { + rawQuestion = `${rawQuestion}\n\n图片线索补充:\n${imageContext.image_digest}`; + } + onProgress({ + stage: "image_agent", + status: "done", + scene: imageContext?.scene || "unknown", + confidence: imageContext?.confidence ?? null, + summary: imageContext?.summary || "", + file_name: imageContext?.file_name || null + }); + onProgress({ + stage: "image_ingest_decision", + status: "running" + }); + imageIngest = await this._decideImageAutoIngest(payload, imageContext); + onProgress({ + stage: "image_ingest_decision", + status: "done", + should_ingest: Boolean(imageIngest?.should_ingest), + reason: imageIngest?.reason || "unknown", + confidence: imageIngest?.confidence ?? null + }); + if (imageIngest?.should_ingest && imageIngest?.ingest_text) { + onProgress({ + stage: "image_ingest", + status: "running", + reason: imageIngest?.reason || "auto" + }); + try { + const ingestResult = await this.incrementalUpdate(imageIngest.ingest_text, userId); + imageIngest = { + ...imageIngest, + applied: true, + stats: ingestResult?.stats || null + }; + onProgress({ + stage: "image_ingest", + status: "done", + stats: ingestResult?.stats || null + }); + } catch (ingestError) { + imageIngest = { + ...imageIngest, + applied: false, + error: ingestError?.message || "图片线索入图失败" + }; + onProgress({ + stage: "image_ingest", + status: "error", + message: ingestError?.message || "图片线索入图失败" + }); + } + } + } catch (error) { + onProgress({ + stage: "image_agent", + status: "error", + message: error?.message || "图片解析失败" + }); + if (!rawQuestion) throw error; + } + } if (!rawQuestion) { - throw createHttpError(400, "query_text 不能为空"); + throw createHttpError(400, "query_text 不能为空(可通过 image_payload 提供图片)"); } const hasSelfPronoun = this._containsSelfPronoun(rawQuestion); const selfPerson = hasSelfPronoun ? await this._resolveSelfPerson(userId) : null; @@ -923,9 +1451,8 @@ export class GraphRagService { const inferredAId = payload.a_id || (hasSelfPronoun ? (selfPerson?.id || null) : null); const topK = Math.min(Math.max(Number(payload.top_k || 8), 2), 30); const timelineLimit = Math.min(Math.max(Number(payload.timeline_limit || 20), 5), 80); - const maxRounds = Math.min(Math.max(Number(payload.max_rounds || 3), 2), 5); + const maxRounds = Math.min(Math.max(Number(payload.max_rounds || 3), 1), 50); const retrievalMode = String(payload.retrieval_mode || "hybrid").trim().toLowerCase(); - const onProgress = typeof options.onProgress === "function" ? options.onProgress : () => {}; const rerankRuntime = this.rerankService?.getRuntimeInfo?.() || { configured: false, @@ -966,17 +1493,60 @@ export class GraphRagService { eventCount: item.events?.length || 0 })) }); + let inferredBId = payload.b_id || this._pickExplicitTargetPersonId(question, graphProbe, { + selfId: inferredAId, + selfName: selfPerson?.name + }); + if (!inferredBId) { + inferredBId = await this._pickExplicitTargetPersonIdFromGraph(question, userId, { + selfId: inferredAId, + selfName: selfPerson?.name + }); + } + if (!inferredBId) { + const stats = await this.getGraphStats(userId).catch(() => null); + const personNodes = (Array.isArray(stats?.nodes) ? stats.nodes : []) + .filter((item) => String(item?.type || "").toLowerCase() === "person") + .map((item) => ({ + id: String(item?.id || "").trim(), + name: String(item?.name || "").trim() + })) + .filter((item) => item.id && item.name && item.id !== inferredAId && item.name.length >= 2) + .sort((a, b) => b.name.length - a.name.length); + for (const person of personNodes) { + if (this._containsNameMention(question, person.name)) { + inferredBId = person.id; + break; + } + } + } + if (!inferredBId) { + inferredBId = await this._pickExplicitTargetPersonIdByNameLookup(question, userId, { + selfId: inferredAId, + selfName: selfPerson?.name + }); + } + const focusPairNames = []; + if (inferredAId) { + const selfName = await this._resolvePersonNameById(userId, inferredAId); + if (selfName) focusPairNames.push(selfName); + } + if (inferredBId) { + const targetName = await this._resolvePersonNameById(userId, inferredBId); + if (targetName) focusPairNames.push(targetName); + } const graphSeedQueries = graphProbe.entities .map((item) => item?.name) .filter(Boolean) .slice(0, 2) .map((name) => `${name} 相关关系变化`); - subQueries = [question, ...graphSeedQueries, ...subQueries] + const shouldAppendGraphSeeds = !this.llmService?.isEnabled?.() && subQueries.length < Math.min(maxRounds, 3); + subQueries = [question, ...subQueries, ...(shouldAppendGraphSeeds ? graphSeedQueries : [])] .map((item) => String(item || "").trim()) .filter(Boolean); const querySeen = new Set(); subQueries = subQueries.filter((item) => { - const key = item.toLowerCase(); + const key = this._normalizeSubQueryKey(item); if (querySeen.has(key)) return false; querySeen.add(key); return true; @@ -987,17 +1557,32 @@ export class GraphRagService { const chunkById = new Map(); const timelineById = new Map(); + let prevRoundSnapshot = null; for (let i = 0; i < subQueries.length; i += 1) { const subQuery = subQueries[i]; - onProgress({ stage: "knowledge_retrieval", status: "running", round: i + 1, totalRounds: subQueries.length, subQuery }); + const chunkCountBefore = chunkById.size; + const timelineCountBefore = timelineById.size; + const roundRetrievalMode = this._selectRoundRetrievalMode(retrievalMode, { + round: i + 1, + graphProbe, + prevRound: prevRoundSnapshot + }); + onProgress({ + stage: "knowledge_retrieval", + status: "running", + round: i + 1, + totalRounds: subQueries.length, + subQuery, + round_retrieval_mode: roundRetrievalMode + }); const oneRound = await this.queryGraphRag({ userId, query_text: subQuery, top_k: topK, timeline_limit: timelineLimit, - retrieval_mode: retrievalMode, + retrieval_mode: roundRetrievalMode, a_id: inferredAId, - b_id: payload.b_id, + b_id: inferredBId, start: payload.start, end: payload.end }); @@ -1019,6 +1604,7 @@ export class GraphRagService { round: i + 1, subQuery, answer: oneRound?.answer || "", + requestedRetrievalMode: roundRetrievalMode, retrievalMode: oneRound?.meta?.retrieval_mode || null, rerankModel: oneRound?.meta?.rerank?.model || null, rerankEnabled: Boolean(oneRound?.meta?.rerank?.enabled), @@ -1028,10 +1614,39 @@ export class GraphRagService { id: c.id, text: String(c.text || "").slice(0, 180), score: Number(c.final_score ?? c.rerank_score ?? c.score ?? 0) - })) + })), + newChunkCount: Math.max(0, chunkById.size - chunkCountBefore), + newTimelineCount: Math.max(0, timelineById.size - timelineCountBefore) }; roundResults.push(roundResult); + const stopDecision = this._shouldStopMultiRound({ + question, + roundResult, + maxRounds, + topK, + mergedChunkCount: chunkById.size, + mergedTimelineCount: timelineById.size, + prevRound: prevRoundSnapshot + }); + prevRoundSnapshot = { + retrievalMode: roundResult.retrievalMode, + chunkCount: roundResult.chunkCount, + timelineCount: roundResult.timelineCount, + topScore: roundResult.topChunks?.[0]?.score || 0 + }; onProgress({ stage: "knowledge_retrieval", status: "done", ...roundResult }); + if (stopDecision.stop && i + 1 < subQueries.length) { + onProgress({ + stage: "query_plan", + status: "done", + stop_reason: stopDecision.reason, + executed_rounds: i + 1, + planned_rounds: subQueries.length, + queries: subQueries.slice(0, i + 1) + }); + subQueries = subQueries.slice(0, i + 1); + break; + } } const mergedTimeline = [...timelineById.values()].sort((a, b) => { @@ -1039,13 +1654,38 @@ export class GraphRagService { const tsB = Number(b?.occurred_at_ts || 0); return tsB - tsA; }); + const focusContext = this._buildFocusContext(question, mergedTimeline, graphProbe, { + forceFocusPersons: focusPairNames, + explicitMatch: Boolean(inferredBId) + }); + const filteredTimeline = this._applyFocusTimelineFilter(mergedTimeline, focusContext); + const timelineForReasoning = filteredTimeline.length ? filteredTimeline : mergedTimeline; + const mergedEventById = new Map(timelineForReasoning.map((item) => [item.id, item])); const mergedChunks = [...chunkById.values()] .sort((a, b) => Number(b?.final_score ?? b?.rerank_score ?? b?.score ?? 0) - Number(a?.final_score ?? a?.rerank_score ?? a?.score ?? 0)) .slice(0, Math.max(topK * 2, 12)); + const rankedMergedChunks = mergedChunks + .map((chunk) => { + const baseScore = Number(chunk?.final_score ?? chunk?.rerank_score ?? chunk?.score ?? 0); + const event = chunk?.event_id ? mergedEventById.get(chunk.event_id) : null; + const focusWeight = this._computeFocusWeight(event, focusContext); + const recencyWeight = this._computeRecencyWeight(event); + return { + ...chunk, + focus_weight: focusWeight, + recency_weight: recencyWeight, + adjusted_score: Number((baseScore * focusWeight * recencyWeight).toFixed(6)) + }; + }) + .sort((a, b) => Number(b?.adjusted_score || 0) - Number(a?.adjusted_score || 0)); const relationMap = new Map(); - mergedTimeline.forEach((event) => { + timelineForReasoning.forEach((event) => { const participants = [...new Set((event?.participants || []).filter(Boolean))]; + if (focusPairNames.length >= 2) { + const hasAll = focusPairNames.every((name) => participants.includes(name)); + if (!hasAll) return; + } for (let i = 0; i < participants.length; i += 1) { for (let j = i + 1; j < participants.length; j += 1) { const a = participants[i]; @@ -1055,9 +1695,17 @@ export class GraphRagService { pair: [a, b], coEventCount: 0, recentEvent: null, - samples: [] + samples: [], + positiveSignals: 0, + negativeSignals: 0, + latestTs: null }; current.coEventCount += 1; + const signalPolarity = this._detectEventSignal(event); + if (signalPolarity > 0) current.positiveSignals += 1; + if (signalPolarity < 0) current.negativeSignals += 1; + const eventTs = Number(event?.occurred_at_ts || 0); + if (!current.latestTs || eventTs > current.latestTs) current.latestTs = eventTs; if (!current.recentEvent || Number(event?.occurred_at_ts || 0) > Number(current.recentEvent?.occurred_at_ts || 0)) { current.recentEvent = { id: event.id, summary: event.summary || "", occurred_at: event.occurred_at || null, occurred_at_ts: event.occurred_at_ts || null }; } @@ -1073,14 +1721,26 @@ export class GraphRagService { } }); const relationHints = [...relationMap.values()] - .sort((a, b) => b.coEventCount - a.coEventCount) + .map((item) => { + const quality = this._buildRelationQualitySignal(item); + const focusBoost = this._computeRelationFocusBoost(item, focusContext); + return { + ...item, + quality: { + ...quality, + score: Number(Math.min(1, Number(quality.score || 0) * focusBoost).toFixed(4)) + } + }; + }) + .sort((a, b) => Number(b?.quality?.score || 0) - Number(a?.quality?.score || 0) || b.coEventCount - a.coEventCount) .slice(0, 8) .map((item) => ({ pair: item.pair, coEventCount: item.coEventCount, recentEvent: item.recentEvent?.summary || "", recentAt: item.recentEvent?.occurred_at || null, - samples: item.samples + samples: item.samples, + quality: item.quality })); if (relationHints.length === 0 && graphProbe.entities.length) { const fromNeighbors = []; @@ -1092,6 +1752,12 @@ export class GraphRagService { coEventCount: entity.events?.length || 0, recentEvent: entity.events?.[0]?.summary || "", recentAt: entity.events?.[0]?.occurred_at || null, + quality: { + score: 0.42, + stability: 0.35, + recentness: 0.4, + signal: "graph_fallback" + }, samples: (entity.events || []).slice(0, 2).map((e) => ({ id: e.id, summary: e.summary, @@ -1106,11 +1772,13 @@ export class GraphRagService { } onProgress({ stage: "relation_organize", status: "done", relationHints }); - let evidence = mergedChunks.slice(0, 10).map((item, idx) => ({ + const temporalGuardrails = this._buildTemporalGuardrails(question, timelineForReasoning, focusContext); + + let evidence = rankedMergedChunks.slice(0, 10).map((item, idx) => ({ index: idx + 1, id: item.id, event_id: item.event_id || null, - score: Number(item?.final_score ?? item?.rerank_score ?? item?.score ?? 0), + score: Number(item?.adjusted_score ?? item?.final_score ?? item?.rerank_score ?? item?.score ?? 0), text: String(item.text || "").slice(0, 260), occurred_at: item.occurred_at || null })); @@ -1138,7 +1806,43 @@ export class GraphRagService { }); evidence = probeEvidence.slice(0, 10).map((item, idx) => ({ ...item, index: idx + 1 })); } - onProgress({ stage: "evidence_synthesis", status: "done", evidencePreview: evidence.slice(0, 5), evidenceCount: evidence.length }); + const retrievalDetails = rankedMergedChunks + .slice(0, Math.max(topK, 8)) + .map((item, idx) => ({ + index: idx + 1, + id: item.id, + event_id: item.event_id || null, + score: Number(item?.final_score ?? item?.rerank_score ?? item?.score ?? 0), + occurred_at: item.occurred_at || null, + summary: String(item?.text || "").replace(/\s+/g, " ").trim().slice(0, 120), + detail: String(item?.text || "").replace(/\s+/g, " ").trim().slice(0, 520) + })); + const retrievalSummary = { + chunk_count: mergedChunks.length, + timeline_count: timelineForReasoning.length, + relation_hint_count: relationHints.length, + top_entity_names: graphProbe.entities.slice(0, 6).map((item) => item?.name).filter(Boolean), + top_round_findings: roundResults + .slice(0, 3) + .map((item) => String(item?.answer || "").replace(/\s+/g, " ").trim()) + .filter(Boolean) + .map((text, idx) => `${idx + 1}. ${text.slice(0, 130)}`), + digest: [ + `检索到片段 ${mergedChunks.length} 条,时间线事件 ${mergedTimeline.length} 条,关系线索 ${relationHints.length} 条。`, + focusContext.focusPersons.length ? `主关系人物优先:${focusContext.focusPersons.join("、")}。` : "", + temporalGuardrails.summary || "", + retrievalDetails.length ? `高分证据:${retrievalDetails.slice(0, 3).map((item) => `[#${item.index}]${item.summary}`).join(";")}` : "", + roundResults.length ? `阶段结论:${roundResults.slice(0, 2).map((item) => item?.answer).filter(Boolean).join(";").slice(0, 280)}` : "" + ].filter(Boolean).join("\n") + }; + onProgress({ + stage: "evidence_synthesis", + status: "done", + evidencePreview: evidence.slice(0, 5), + evidenceCount: evidence.length, + retrievalSummary, + retrievalDetails: retrievalDetails.slice(0, 6) + }); let finalReview = { verdict: roundResults.map((r) => r.answer).filter(Boolean).join("\n"), @@ -1152,19 +1856,59 @@ export class GraphRagService { if (this.llmService?.isEnabled?.()) { try { const roundText = roundResults.map((r) => `第${r.round}轮 子问题:${r.subQuery}\n中间结论:${r.answer}`).join("\n\n"); - const relationText = relationHints.map((r, idx) => `${idx + 1}. ${r.pair.join(" <-> ")} 共同事件:${r.coEventCount} 最近:${r.recentEvent}`).join("\n"); + const relationText = relationHints.map((r, idx) => `${idx + 1}. ${r.pair.join(" <-> ")} 共同事件:${r.coEventCount} 最近:${r.recentEvent} 关系分:${Number(r?.quality?.score || 0).toFixed(3)} 信号:${r?.quality?.signal || "-"}`).join("\n"); const evidenceText = evidence.map((e) => `[${e.index}] score=${e.score.toFixed(3)} time=${e.occurred_at || "未知"}\n${e.text}`).join("\n\n"); - const judgeResp = await this.llmService.chat([ + const retrievalSummaryText = retrievalSummary?.digest || "无"; + const retrievalDetailsText = retrievalDetails + .slice(0, 8) + .map((item) => `[${item.index}] score=${Number(item.score || 0).toFixed(3)} time=${item.occurred_at || "未知"}\n摘要:${item.summary}\n明细:${item.detail}`) + .join("\n\n"); + onProgress({ stage: "final_review", status: "running" }); + let streamedJudgeText = ""; + let streamBuffer = ""; + let streamLastEmitAt = 0; + const flushTokenProgress = (force = false) => { + if (!streamBuffer) return; + const now = Date.now(); + if (!force && streamBuffer.length < 8 && now - streamLastEmitAt < 60) return; + streamedJudgeText += streamBuffer; + onProgress({ + stage: "final_review_token", + status: "running", + delta: streamBuffer, + text: streamedJudgeText + }); + streamBuffer = ""; + streamLastEmitAt = now; + }; + const judgeMessages = [ { role: "system", - content: "你是图谱问答终审官。请综合多轮检索中间结论、关系线索和证据片段,输出 JSON:{\"verdict\":\"\",\"confidence\":0-1,\"reasoning\":\"\",\"uncertainty\":\"\",\"next_actions\":[\"\"],\"citations\":[1,2]}。不要输出其它文字。" + content: "你是图谱问答终审官。请先阅读“检索资料摘要”,再结合“检索资料明细”“多轮中间结论”“关系线索”“证据片段”给出最终结论。若用户本轮输入与历史证据冲突,不要直接否定用户,先给出兼容解释与待确认点。必须遵守:1) 时序优先,近180天证据权重高于更早历史;2) 若识别到主关系人物,优先使用其相关证据,其他历史关系仅作辅证;3) 当存在时序冲突时,禁止直接下“当前无关系/不在关系中”的硬结论,改为“待确认+给出下一步核对信息”。输出 JSON:{\"verdict\":\"\",\"confidence\":0-1,\"reasoning\":\"\",\"uncertainty\":\"\",\"next_actions\":[\"\"],\"citations\":[1,2]}。不要输出其它文字。" }, { role: "user", - content: `用户问题:${question}\n\n多轮中间结论:\n${roundText}\n\n关系线索:\n${relationText || "无"}\n\n证据片段:\n${evidenceText || "无"}` + content: `用户问题:${question}\n\n主关系上下文:\n${JSON.stringify({ + focus_persons: focusContext.focusPersons, + from_explicit_name: focusContext.explicitMatch, + from_pronoun_infer: focusContext.pronounInferred + }, null, 2)}\n\n时序守卫信息:\n${JSON.stringify(temporalGuardrails, null, 2)}\n\n检索资料摘要:\n${retrievalSummaryText}\n\n检索资料明细:\n${retrievalDetailsText || "无"}\n\n多轮中间结论:\n${roundText}\n\n关系线索:\n${relationText || "无"}\n\n证据片段:\n${evidenceText || "无"}` } - ], 0.2); - const parsed = tryParseJson(judgeResp?.choices?.[0]?.message?.content || ""); + ]; + let judgeContent = ""; + if (this.llmService?.chatStream) { + const streamResp = await this.llmService.chatStream(judgeMessages, 0.2, { thinking: "off" }, (deltaText) => { + if (!deltaText) return; + streamBuffer += String(deltaText); + flushTokenProgress(false); + }); + flushTokenProgress(true); + judgeContent = String(streamResp?.text || streamedJudgeText || ""); + } else { + const judgeResp = await this.llmService.chat(judgeMessages, 0.2); + judgeContent = String(judgeResp?.choices?.[0]?.message?.content || ""); + } + const parsed = tryParseJson(judgeContent); if (parsed && typeof parsed === "object") { const confidence = Number(parsed.confidence); finalReview = { @@ -1179,7 +1923,15 @@ export class GraphRagService { } catch {} } - const answer = `结论:${finalReview.verdict}\n\n置信度:${(Number(finalReview.confidence || 0) * 100).toFixed(1)}%\n\n依据:${finalReview.reasoning}\n\n不确定性:${finalReview.uncertainty}`; + if (temporalGuardrails.avoidHardDenial && this._containsHardNoRelationship(finalReview.verdict)) { + finalReview.verdict = this._softenNoRelationshipVerdict(finalReview.verdict, temporalGuardrails); + finalReview.uncertainty = String(finalReview.uncertainty || "").trim() || "时序证据存在冲突,需要补充关系时点确认"; + finalReview.confidence = Math.min(Number(finalReview.confidence || 0.62), 0.68); + } + + const answer = String(finalReview.verdict || "").trim() || (roundResults[0]?.answer || "暂未形成结论"); + const relationQualitySummary = this._buildRelationQualitySummary(relationHints); + onProgress({ stage: "quality_assess", status: "done", relationQuality: relationQualitySummary }); onProgress({ stage: "final_review", status: "done", finalReview: { ...finalReview, answer } }); return { @@ -1192,23 +1944,418 @@ export class GraphRagService { graph_probe: graphProbe, relation_hints: relationHints, evidence, + retrieval_summary: retrievalSummary, + retrieval_details: retrievalDetails, final_review: { ...finalReview, answer }, + image_context: imageContext || null, + image_ingest: imageIngest || null, meta: { rounds: subQueries.length, + focus_a_id: inferredAId || null, + focus_b_id: inferredBId || null, retrieval_mode_requested: retrievalMode, + retrieval_mode_adaptive: retrievalMode === "hybrid", retrieved_chunks: mergedChunks.length, - retrieved_timeline_events: mergedTimeline.length, + retrieved_timeline_events: timelineForReasoning.length, graph_probe_entities: graphProbe.counts.entityCount, graph_probe_neighbors: graphProbe.counts.neighborCount, graph_probe_events: graphProbe.counts.eventCount, + image_ingest: imageIngest || null, + relation_quality: relationQualitySummary, rerank: this.rerankService?.getRuntimeInfo?.() || null } }; } + _selectRoundRetrievalMode(requestedMode, context = {}) { + const normalized = ["vector", "graph", "hybrid"].includes(String(requestedMode || "").toLowerCase()) + ? String(requestedMode || "").toLowerCase() + : "hybrid"; + if (normalized === "vector" || normalized === "graph") return normalized; + const round = Number(context.round || 1); + const prev = context.prevRound || null; + if (round <= 1 || !prev) return "hybrid"; + const prevMode = String(prev.retrievalMode || "").toLowerCase(); + if (prevMode.includes("nohit") || Number(prev.chunkCount || 0) <= 1) { + const graphEventCount = Number(context?.graphProbe?.counts?.eventCount || 0); + return graphEventCount > 0 ? "graph" : "vector"; + } + if (prevMode.includes("vector") && Number(prev.timelineCount || 0) > Number(prev.chunkCount || 0)) return "graph"; + if (prevMode.includes("graph") && Number(prev.chunkCount || 0) <= 1) return "vector"; + return "hybrid"; + } + + _normalizeSubQueryKey(text) { + return String(text || "") + .trim() + .toLowerCase() + .replace(/[??!!。;;,,\s]+/g, ""); + } + + _shouldStopMultiRound(context = {}) { + const question = String(context?.question || "").trim(); + const round = Number(context?.roundResult?.round || 1); + const topScore = Number(context?.roundResult?.topChunks?.[0]?.score || 0); + const chunkCount = Number(context?.roundResult?.chunkCount || 0); + const newChunkCount = Number(context?.roundResult?.newChunkCount || 0); + const newTimelineCount = Number(context?.roundResult?.newTimelineCount || 0); + const mergedChunkCount = Number(context?.mergedChunkCount || 0); + const mergedTimelineCount = Number(context?.mergedTimelineCount || 0); + const topK = Number(context?.topK || 8); + const prevTopScore = Number(context?.prevRound?.topScore || 0); + const simpleQuestion = question.length <= 18 + || /谁|是谁|什么|现任|对象|有没有|是否|在哪|哪位|多久|几号/.test(question); + + if (round <= 1) { + const enoughForSimple = simpleQuestion && topScore >= 0.72 && chunkCount >= 2; + return enoughForSimple + ? { stop: true, reason: "simple_question_enough_context" } + : { stop: false, reason: "continue" }; + } + + const diminishingReturn = newChunkCount <= 0 && newTimelineCount <= 0; + if (diminishingReturn) return { stop: true, reason: "no_new_context" }; + + const enoughCoverage = mergedChunkCount >= Math.max(6, topK) || mergedTimelineCount >= Math.max(4, Math.floor(topK / 2)); + const stableConfidence = topScore >= 0.72 && prevTopScore >= 0.72; + if (enoughCoverage && stableConfidence) return { stop: true, reason: "context_sufficient" }; + return { stop: false, reason: "continue" }; + } + + _detectEventSignal(event) { + const text = `${String(event?.type || "")} ${String(event?.summary || "")}`; + if (NEGATIVE_SIGNAL_RE.test(text)) return -1; + if (POSITIVE_SIGNAL_RE.test(text)) return 1; + return 0; + } + + _buildRelationQualitySignal(relation = {}) { + const nowTs = Math.floor(Date.now() / 1000); + const latestTs = Number(relation.latestTs || relation?.recentEvent?.occurred_at_ts || 0); + const ageDays = latestTs > 0 ? Math.max(0, (nowTs - latestTs) / 86400) : 365; + const recentness = Math.max(0, Math.min(1, 1 - ageDays / 180)); + const coEventScore = Math.max(0, Math.min(1, Number(relation.coEventCount || 0) / 6)); + const positive = Number(relation.positiveSignals || 0); + const negative = Number(relation.negativeSignals || 0); + const totalSignals = positive + negative; + const stability = totalSignals > 0 ? Math.max(0, Math.min(1, (positive + 1) / (totalSignals + 2))) : 0.52; + const conflictPenalty = totalSignals > 0 ? Math.max(0, Math.min(1, negative / totalSignals)) : 0; + const score = Math.max( + 0, + Math.min(1, coEventScore * 0.42 + recentness * 0.34 + stability * 0.24 - conflictPenalty * 0.18) + ); + const signal = conflictPenalty >= 0.55 ? "conflict_risk" : stability >= 0.62 ? "stable" : "mixed"; + return { + score: Number(score.toFixed(4)), + stability: Number(stability.toFixed(4)), + recentness: Number(recentness.toFixed(4)), + signal + }; + } + + _buildRelationQualitySummary(relationHints = []) { + const hints = Array.isArray(relationHints) ? relationHints : []; + const scores = hints.map((item) => Number(item?.quality?.score || 0)).filter((x) => Number.isFinite(x)); + const avg = scores.length ? scores.reduce((sum, x) => sum + x, 0) / scores.length : 0; + const stableCount = hints.filter((item) => item?.quality?.signal === "stable").length; + const conflictRiskCount = hints.filter((item) => item?.quality?.signal === "conflict_risk").length; + return { + average_score: Number(avg.toFixed(4)), + stable_count: stableCount, + conflict_risk_count: conflictRiskCount + }; + } + + _containsNameMention(text, name) { + const source = String(text || ""); + const target = String(name || ""); + if (!source || !target) return false; + const normalize = (value) => String(value || "") + .toLowerCase() + .replace(/[\s\u200B-\u200D\uFEFF·•\-_'"“”‘’`~!@#$%^&*()+=|\\/:;,.!?,。!?、()【】《》]/g, ""); + const sourceNorm = normalize(source); + const targetNorm = normalize(target); + if (!sourceNorm || !targetNorm) return false; + return sourceNorm.includes(targetNorm); + } + + _buildFocusContext(question, mergedTimeline = [], graphProbe = {}, options = {}) { + const q = String(question || ""); + const personEntities = (Array.isArray(graphProbe?.entities) ? graphProbe.entities : []) + .filter((item) => String(item?.type || "").toLowerCase() === "person") + .map((item) => String(item?.name || item?.id || "").trim()) + .filter((name) => name.length >= 2); + const timelineNames = [...new Set( + (Array.isArray(mergedTimeline) ? mergedTimeline : []) + .flatMap((event) => (Array.isArray(event?.participants) ? event.participants : [])) + .map((item) => String(item || "").trim()) + .filter((name) => name.length >= 2) + )]; + const personCandidates = [...new Set([...personEntities, ...timelineNames])]; + const explicitPersons = [...new Set(personCandidates.filter((name) => this._containsNameMention(q, name)))]; + const pronounRef = /(她|他|对象|伴侣|女朋友|男朋友|现任|前任)/.test(q); + let inferred = []; + if (explicitPersons.length === 0 && pronounRef) { + for (const event of mergedTimeline) { + const people = (Array.isArray(event?.participants) ? event.participants : []) + .map((item) => String(item || "").trim()) + .filter((name) => name && !["我", "user", "本人", "自己"].includes(name)); + if (people.length > 0) { + inferred = [people[0]]; + break; + } + } + } + const forced = (Array.isArray(options?.forceFocusPersons) ? options.forceFocusPersons : []) + .map((item) => String(item || "").trim()) + .filter((item) => item.length >= 2); + const focusPersons = [...new Set([...forced, ...explicitPersons, ...inferred])]; + return { + focusPersons, + explicitMatch: Boolean(options?.explicitMatch || explicitPersons.length > 0 || forced.length > 0), + pronounInferred: explicitPersons.length === 0 && inferred.length > 0 + }; + } + + _pickExplicitTargetPersonId(question, graphProbe = {}, options = {}) { + const q = String(question || ""); + const selfId = String(options?.selfId || "").trim(); + const selfName = String(options?.selfName || "").trim(); + const persons = (Array.isArray(graphProbe?.entities) ? graphProbe.entities : []) + .filter((item) => String(item?.type || "").toLowerCase() === "person") + .map((item) => ({ + id: String(item?.id || "").trim(), + name: String(item?.name || "").trim() + })) + .filter((item) => item.id && item.name && item.name.length >= 2) + .sort((a, b) => b.name.length - a.name.length); + const blockNames = new Set(["我", "本人", "自己", "user", selfName].filter(Boolean)); + for (const person of persons) { + if (person.id === selfId) continue; + if (blockNames.has(person.name)) continue; + if (this._containsNameMention(q, person.name)) return person.id; + } + return null; + } + + async _pickExplicitTargetPersonIdFromGraph(question, userId, options = {}) { + const q = String(question || ""); + if (!q) return null; + const selfId = String(options?.selfId || "").trim(); + const selfName = String(options?.selfName || "").trim(); + const session = this.driver.session(); + try { + const result = await session.run( + ` + MATCH (p) + WHERE p.name IS NOT NULL + AND ( + p.user_id = $userId + OR EXISTS { MATCH ()-[r]->(p) WHERE r.user_id = $userId } + OR EXISTS { MATCH (p)-[r]->() WHERE r.user_id = $userId } + ) + RETURN p.id AS id, p.name AS name + LIMIT 300 + `, + { userId } + ); + const blockNames = new Set(["我", "本人", "自己", "user", selfName].filter(Boolean)); + const persons = result.records + .map((r) => ({ + id: String(r.get("id") || "").trim(), + name: String(r.get("name") || "").trim() + })) + .filter((item) => item.id && item.name && item.name.length >= 2 && item.id !== selfId && !blockNames.has(item.name)) + .sort((a, b) => b.name.length - a.name.length); + for (const person of persons) { + if (this._containsNameMention(q, person.name)) return person.id; + } + const stats = await this.getGraphStats(userId).catch(() => null); + const statPersons = (Array.isArray(stats?.nodes) ? stats.nodes : []) + .filter((item) => String(item?.type || "").toLowerCase() === "person") + .map((item) => ({ + id: String(item?.id || "").trim(), + name: String(item?.name || "").trim() + })) + .filter((item) => item.id && item.name && item.name.length >= 2 && item.id !== selfId && !blockNames.has(item.name)) + .sort((a, b) => b.name.length - a.name.length); + for (const person of statPersons) { + if (this._containsNameMention(q, person.name)) return person.id; + } + return null; + } catch { + return null; + } finally { + await session.close(); + } + } + + async _pickExplicitTargetPersonIdByNameLookup(question, userId, options = {}) { + const q = String(question || "").trim(); + if (!q) return null; + const selfId = String(options?.selfId || "").trim(); + const selfName = String(options?.selfName || "").trim(); + const blockNames = new Set(["我", "本人", "自己", "user", selfName].filter(Boolean)); + const nameTokens = [...new Set( + (q.match(/[\u4e00-\u9fa5]{2,6}/g) || []) + .map((item) => String(item || "").trim()) + .filter((item) => item.length >= 2) + )].slice(0, 8); + if (!nameTokens.length) return null; + const session = this.driver.session(); + try { + const result = await session.run( + ` + MATCH (p:Person) + WHERE ( + p.user_id = $userId + OR EXISTS { MATCH ()-[r]->(p) WHERE r.user_id = $userId } + OR EXISTS { MATCH (p)-[r]->() WHERE r.user_id = $userId } + ) + AND p.name IS NOT NULL + AND any(token IN $tokens WHERE toLower(p.name) CONTAINS toLower(token) OR toLower(token) CONTAINS toLower(p.name)) + RETURN p.id AS id, p.name AS name + LIMIT 80 + `, + { userId, tokens: nameTokens } + ); + const candidates = result.records + .map((r) => ({ + id: String(r.get("id") || "").trim(), + name: String(r.get("name") || "").trim() + })) + .filter((item) => item.id && item.name && item.id !== selfId && !blockNames.has(item.name)) + .sort((a, b) => b.name.length - a.name.length); + for (const item of candidates) { + if (this._containsNameMention(q, item.name)) return item.id; + } + return candidates[0]?.id || null; + } catch { + return null; + } finally { + await session.close(); + } + } + + async _resolvePersonNameById(userId, personId) { + const id = String(personId || "").trim(); + if (!id) return null; + const session = this.driver.session(); + try { + const result = await session.run( + ` + MATCH (p {id: $id}) + WHERE ( + p.user_id = $userId + OR EXISTS { MATCH ()-[r]->(p) WHERE r.user_id = $userId } + OR EXISTS { MATCH (p)-[r]->() WHERE r.user_id = $userId } + ) + RETURN coalesce(p.name, p.id) AS name + LIMIT 1 + `, + { id, userId } + ); + if (!result.records.length) return null; + return String(result.records[0].get("name") || "").trim() || null; + } catch { + return null; + } finally { + await session.close(); + } + } + + _applyFocusTimelineFilter(timeline = [], focusContext = {}) { + const list = Array.isArray(timeline) ? timeline : []; + const focusPersons = Array.isArray(focusContext?.focusPersons) ? focusContext.focusPersons : []; + if (!focusContext?.explicitMatch || focusPersons.length === 0 || list.length <= 2) return list; + const filtered = list.filter((event) => { + const participants = Array.isArray(event?.participants) ? event.participants : []; + return participants.some((p) => focusPersons.includes(String(p || ""))); + }); + if (filtered.length >= 1) return filtered; + return list; + } + + _computeRecencyWeight(event) { + const ts = Number(event?.occurred_at_ts || 0); + if (!ts) return 1; + const nowTs = Math.floor(Date.now() / 1000); + const ageDays = Math.max(0, (nowTs - ts) / 86400); + if (ageDays <= 30) return 1.14; + if (ageDays <= 180) return 1.06; + if (ageDays > 365) return 0.86; + return 1; + } + + _computeFocusWeight(event, focusContext = {}) { + const focusPersons = Array.isArray(focusContext?.focusPersons) ? focusContext.focusPersons : []; + if (focusPersons.length === 0) return 1; + const participants = (Array.isArray(event?.participants) ? event.participants : []).map((item) => String(item || "")); + if (participants.some((p) => focusPersons.includes(p))) return 1.12; + if (participants.length === 0) return 0.9; + return 0.78; + } + + _computeRelationFocusBoost(relation, focusContext = {}) { + const focusPersons = Array.isArray(focusContext?.focusPersons) ? focusContext.focusPersons : []; + if (focusPersons.length === 0) return 1; + const pair = Array.isArray(relation?.pair) ? relation.pair.map((x) => String(x || "")) : []; + return pair.some((p) => focusPersons.includes(p)) ? 1.15 : 0.84; + } + + _buildTemporalGuardrails(question, mergedTimeline = [], focusContext = {}) { + const q = String(question || ""); + const isCurrentQuestion = /(现在|目前|当前|现任|如今|有没有|是否有|有女朋友|有男朋友|对象)/.test(q); + const focusPersons = Array.isArray(focusContext?.focusPersons) ? focusContext.focusPersons : []; + const focusEvents = mergedTimeline.filter((event) => { + if (focusPersons.length === 0) return true; + const participants = Array.isArray(event?.participants) ? event.participants : []; + return participants.some((p) => focusPersons.includes(String(p || ""))); + }); + const nowTs = Math.floor(Date.now() / 1000); + const windowSec = 180 * 86400; + const recentEvents = focusEvents.filter((event) => { + const ts = Number(event?.occurred_at_ts || 0); + return ts > 0 && nowTs - ts <= windowSec; + }); + const oldEvents = focusEvents.filter((event) => { + const ts = Number(event?.occurred_at_ts || 0); + return ts > 0 && nowTs - ts > windowSec; + }); + const recentPositive = recentEvents.filter((event) => this._detectEventSignal(event) > 0).length; + const recentNegative = recentEvents.filter((event) => this._detectEventSignal(event) < 0).length; + const oldNegative = oldEvents.filter((event) => this._detectEventSignal(event) < 0).length; + const avoidHardDenial = Boolean(isCurrentQuestion && recentEvents.length > 0 && (recentPositive > 0 || oldNegative > 0)); + const summary = `时序守卫:近180天事件${recentEvents.length}条(正向${recentPositive}/负向${recentNegative}),较早负向${oldNegative}条。`; + return { + is_current_question: isCurrentQuestion, + focus_persons: focusPersons, + recent_event_count: recentEvents.length, + recent_positive_count: recentPositive, + recent_negative_count: recentNegative, + old_negative_count: oldNegative, + avoidHardDenial, + summary + }; + } + + _containsHardNoRelationship(text) { + const value = String(text || ""); + return /(并未处于|不在.*关系|无证据支持.*(关系|恋爱)|当前无.*(关系|对象)|没有证据表明你正与)/.test(value); + } + + _softenNoRelationshipVerdict(verdict, guardrails = {}) { + const prefix = `基于现有资料,关系时序存在待核对点(近180天事件 ${Number(guardrails?.recent_event_count || 0)} 条),先不做“当前无关系”的硬判断。`; + const tail = String(verdict || "") + .replace(/当前无证据支持存在一段正在进行的、可讨论[^。!?\n]*[。!?]?/g, "") + .replace(/你当前并未处于一段进行中的恋爱关系[。!?]?/g, "") + .trim(); + return `${prefix}${tail ? `\n\n${tail}` : ""}`.trim(); + } + async listUsers(limit = 200) { const session = this.driver.session(); const cappedLimit = Math.min(Math.max(Number(limit || 200), 1), 1000); @@ -1289,6 +2436,29 @@ export class GraphRagService { coalesce(n.name, n.summary, n.id, elementId(n)) AS name, n.summary AS summary, n.gender AS gender, + n.aliases AS aliases, + n.normalized_name AS normalized_name, + n.person_role AS person_role, + n.first_seen_at AS first_seen_at, + n.last_seen_at AS last_seen_at, + n.source_count AS source_count, + n.co_event_count_with_user AS co_event_count_with_user, + n.relation_to_user AS relation_to_user, + n.relation_confidence AS relation_confidence, + n.mention_keywords AS mention_keywords, + n.name_pinyin AS name_pinyin, + n.active_locations AS active_locations, + n.topic_profile AS topic_profile, + n.related_person_ids_topk AS related_person_ids_topk, + n.data_quality AS data_quality, + n.privacy_level AS privacy_level, + n.data_source AS data_source, + n.consent_status AS consent_status, + n.occupation AS occupation, + n.education_background AS education_background, + n.residential_status AS residential_status, + n.source_event_ids AS source_event_ids, + n.birth_date_utc AS birth_date_utc, CASE WHEN size(labels) = 0 THEN 'entity' WHEN 'person' IN labels THEN 'person' @@ -1322,11 +2492,36 @@ export class GraphRagService { .map((r) => { const id = toId(r.get("id")); if (!id) return null; + const birthDateUtc = r.get("birth_date_utc")?.toString?.() || r.get("birth_date_utc") || null; return { id, name: r.get("name"), summary: r.get("summary"), gender: normalizeGender(r.get("gender")), + aliases: Array.isArray(r.get("aliases")) ? r.get("aliases") : [], + normalized_name: r.get("normalized_name") || null, + person_role: normalizePersonRole(r.get("person_role")), + first_seen_at: r.get("first_seen_at")?.toString?.() || r.get("first_seen_at") || null, + last_seen_at: r.get("last_seen_at")?.toString?.() || r.get("last_seen_at") || null, + source_count: toNumber(r.get("source_count")) ?? 0, + co_event_count_with_user: toNumber(r.get("co_event_count_with_user")) ?? 0, + relation_to_user: r.get("relation_to_user") || "未知", + relation_confidence: toNumber(r.get("relation_confidence")) ?? 0, + mention_keywords: Array.isArray(r.get("mention_keywords")) ? r.get("mention_keywords") : [], + name_pinyin: r.get("name_pinyin") || null, + active_locations: Array.isArray(r.get("active_locations")) ? r.get("active_locations") : [], + topic_profile: Array.isArray(r.get("topic_profile")) ? r.get("topic_profile") : [], + related_person_ids_topk: Array.isArray(r.get("related_person_ids_topk")) ? r.get("related_person_ids_topk") : [], + data_quality: toNumber(r.get("data_quality")) ?? 0, + privacy_level: String(r.get("privacy_level") || "low"), + data_source: normalizeDataSource(r.get("data_source")), + consent_status: normalizeConsentStatus(r.get("consent_status")), + occupation: r.get("occupation") || null, + education_background: r.get("education_background") || null, + residential_status: r.get("residential_status") || null, + source_event_ids: Array.isArray(r.get("source_event_ids")) ? r.get("source_event_ids") : [], + birth_date_utc: birthDateUtc, + age_label: buildAgeLabelByBirthDateUtc(birthDateUtc), type: normalizeType(r.get("type")), occurred_at: r.get("occurred_at"), importance: r.get("importance") @@ -1403,18 +2598,20 @@ export class GraphRagService { persons: existingEntities.persons?.length || 0, organizations: existingEntities.organizations?.length || 0 }); - const analysis = await this.llmService.analyzeText(text, existingEntities, { + const rawAnalysis = await this.llmService.analyzeText(text, existingEntities, { parallelism: options?.parallelism, expertReview: options?.expertReview, onProgress: (event) => onProgress({ stage: 'llm', ...event }) }); + const { analysis, normalization } = this._normalizeAnalysisPayload(rawAnalysis, existingEntities, text); onProgress({ stage: 'analysis_ready', persons: analysis.persons?.length || 0, organizations: analysis.organizations?.length || 0, events: analysis.events?.length || 0, topics: analysis.topics?.length || 0, - relations: analysis.relations?.length || 0 + relations: analysis.relations?.length || 0, + normalization }); console.log("[DEBUG] LLM analysis result:", JSON.stringify(analysis)); console.log("[DEBUG] Existing entities:", JSON.stringify(existingEntities)); @@ -1472,6 +2669,8 @@ export class GraphRagService { if (created) stats.relations++; } onProgress({ stage: 'relations_done', created: stats.relations }); + await this._refreshPersonProfiles(session, userId); + onProgress({ stage: 'person_profile_done' }); const rawInput = String(text || ""); const keywordTriggered = CORRECTION_INTENT_RE.test(rawInput); @@ -1513,7 +2712,8 @@ export class GraphRagService { const result = { ok: true, message: "增量更新成功", - stats + stats, + normalization }; try { const vectorSync = await this.reindexUserVectors({ userId, limit: 300 }); @@ -1537,7 +2737,9 @@ export class GraphRagService { try { const persons = await session.run( `MATCH (p:Person {user_id: $userId}) - RETURN p.id AS id, p.name AS name, p.summary AS summary, p.gender AS gender + RETURN p.id AS id, p.name AS name, p.summary AS summary, p.gender AS gender, p.aliases AS aliases, p.person_role AS person_role, p.birth_date_utc AS birth_date_utc, + p.occupation AS occupation, p.education_background AS education_background, p.residential_status AS residential_status, + p.data_source AS data_source, p.consent_status AS consent_status, p.privacy_level AS privacy_level ORDER BY p.created_at DESC LIMIT 50`, { userId } @@ -1556,7 +2758,17 @@ export class GraphRagService { id: r.get('id'), name: r.get('name'), summary: r.get('summary'), - gender: normalizeGender(r.get('gender')) + gender: normalizeGender(r.get('gender')), + aliases: Array.isArray(r.get('aliases')) ? r.get('aliases') : [], + person_role: normalizePersonRole(r.get('person_role')), + birth_date_utc: r.get('birth_date_utc')?.toString?.() || r.get('birth_date_utc') || null, + age_label: buildAgeLabelByBirthDateUtc(r.get('birth_date_utc')?.toString?.() || r.get('birth_date_utc') || null), + occupation: r.get('occupation') || null, + education_background: r.get('education_background') || null, + residential_status: r.get('residential_status') || null, + data_source: normalizeDataSource(r.get('data_source')), + consent_status: normalizeConsentStatus(r.get('consent_status')), + privacy_level: String(r.get('privacy_level') || 'low') })), organizations: organizations.records.map(r => ({ id: r.get('id'), @@ -1573,6 +2785,11 @@ export class GraphRagService { * 创建或更新人物实体(根据名字/别名去重) */ async _upsertPerson(session, person, userId) { + const profileFields = readPersonProfileFields(person); + const incomingAliases = mergeAliases(person.name, person.aliases); + const aliasKeys = incomingAliases.map((item) => normalizeMentionKey(item)).filter(Boolean); + const normalizedName = normalizeNameKey(person.name); + const namePart = String(person.name || "").split('的')[0]; // 1. 尝试通过 ID 查找(LLM 复用了已有 ID) if (person.id && !person.id.startsWith('p_')) { const byId = await session.run( @@ -1585,6 +2802,7 @@ export class GraphRagService { const existingId = byId.records[0].get('id'); await this._updatePersonSummary(session, existingId, person.summary || person.description, userId); await this._updatePersonGender(session, existingId, person.gender, userId); + await this._updatePersonBaseProfile(session, existingId, person, userId); return { created: false, updated: true, id: existingId }; } } @@ -1592,16 +2810,39 @@ export class GraphRagService { // 2. 尝试通过名字查找(包括别名) const existing = await session.run( `MATCH (p:Person {user_id: $userId}) - WHERE p.name = $name OR p.name CONTAINS $namePart OR $name CONTAINS p.name + WITH p, + toLower(coalesce(p.name, '')) AS p_name_lower, + coalesce(p.normalized_name, '') AS p_normalized_name, + [x IN coalesce(p.aliases, []) | toLower(toString(x))] AS p_aliases_lower + WHERE p.name = $name + OR p.name CONTAINS $namePart + OR $name CONTAINS p.name + OR p_normalized_name = $normalizedName + OR p_name_lower IN $aliasKeys + OR any(alias IN p_aliases_lower WHERE alias = toLower($name) OR alias IN $aliasKeys) + WITH p, + CASE WHEN p.name = $name THEN 100 ELSE 0 END + + CASE WHEN p_normalized_name = $normalizedName THEN 60 ELSE 0 END + + CASE WHEN p_name_lower IN $aliasKeys THEN 35 ELSE 0 END + + CASE WHEN any(alias IN p_aliases_lower WHERE alias = toLower($name) OR alias IN $aliasKeys) THEN 30 ELSE 0 END + + CASE WHEN p.name CONTAINS $namePart OR $name CONTAINS p.name THEN 10 ELSE 0 END AS match_score RETURN p.id AS id, p.summary AS summary, p.gender AS gender + ORDER BY match_score DESC, p.updated_at DESC LIMIT 1`, - { name: person.name, namePart: person.name.split('的')[0], userId } + { + name: person.name, + namePart, + normalizedName, + aliasKeys, + userId + } ); if (existing.records.length > 0) { const existingId = existing.records[0].get('id'); await this._updatePersonSummary(session, existingId, person.summary || person.description, userId); await this._updatePersonGender(session, existingId, person.gender, userId); + await this._updatePersonBaseProfile(session, existingId, person, userId); return { created: false, updated: true, id: existingId }; } @@ -1613,6 +2854,29 @@ export class GraphRagService { name: $name, summary: $summary, gender: $gender, + aliases: $aliases, + normalized_name: $normalizedName, + person_role: $personRole, + first_seen_at: null, + last_seen_at: null, + source_count: 0, + co_event_count_with_user: 0, + relation_to_user: '未知', + relation_confidence: 0.2, + mention_keywords: $mentionKeywords, + name_pinyin: $namePinyin, + active_locations: [], + topic_profile: [], + related_person_ids_topk: [], + data_quality: 0.2, + privacy_level: 'low', + data_source: $dataSource, + consent_status: $consentStatus, + occupation: $occupation, + education_background: $educationBackground, + residential_status: $residentialStatus, + source_event_ids: [], + birth_date_utc: CASE WHEN $birthDateUtc = '' THEN null ELSE datetime($birthDateUtc) END, user_id: $userId, created_at: datetime(), updated_at: datetime() @@ -1622,12 +2886,90 @@ export class GraphRagService { name: person.name, summary: mergeCompactSummary("", person.summary || person.description || "", { maxParts: 4, maxLen: 240 }), gender: normalizeGender(person.gender), + aliases: mergeAliases(person.name, person.aliases), + normalizedName: normalizeNameKey(person.name), + personRole: (() => { + const incoming = normalizePersonRole(person.person_role || person.role); + if (incoming !== "unknown") return incoming; + return inferPersonRoleFromId(newId, userId); + })(), + mentionKeywords: buildMentionKeywords({ + name: person.name, + aliases: mergeAliases(person.name, person.aliases), + relationToUser: "未知" + }), + namePinyin: String(person.name_pinyin || person.pinyin || "").trim() || null, + birthDateUtc: inferBirthDateUtcFromPerson(person) || "", + dataSource: profileFields.data_source, + consentStatus: profileFields.consent_status, + occupation: profileFields.occupation, + educationBackground: profileFields.education_background, + residentialStatus: profileFields.residential_status, userId } ); return { created: true, updated: false, id: newId }; } + async _updatePersonBaseProfile(session, personId, person, userId) { + const current = await session.run( + `MATCH (p:Person {id: $id, user_id: $userId}) + RETURN p.name AS name, p.aliases AS aliases, p.person_role AS person_role, p.name_pinyin AS name_pinyin, p.birth_date_utc AS birth_date_utc, + p.data_source AS data_source, p.consent_status AS consent_status, + p.occupation AS occupation, p.education_background AS education_background, p.residential_status AS residential_status`, + { id: personId, userId } + ); + if (current.records.length === 0) return; + const record = current.records[0]; + const baseName = String(record.get("name") || person?.name || "").trim(); + const aliases = mergeAliases(record.get("aliases"), person?.aliases, person?.name, baseName); + const existingRole = normalizePersonRole(record.get("person_role")); + const incomingRole = normalizePersonRole(person?.person_role || person?.role); + const incomingBirthDateUtc = inferBirthDateUtcFromPerson(person); + const incomingProfile = readPersonProfileFields(person); + const storedBirthDateUtc = record.get("birth_date_utc")?.toString?.() || record.get("birth_date_utc") || ""; + const finalBirthDateUtc = storedBirthDateUtc || incomingBirthDateUtc || ""; + const dataSource = normalizeDataSource(record.get("data_source") || incomingProfile.data_source); + const consentStatus = normalizeConsentStatus(record.get("consent_status") || incomingProfile.consent_status); + const occupation = pickFirstText(record.get("occupation"), incomingProfile.occupation); + const educationBackground = pickFirstText(record.get("education_background"), incomingProfile.education_background); + const residentialStatus = pickFirstText(record.get("residential_status"), incomingProfile.residential_status); + const currentNamePinyin = String(record.get("name_pinyin") || "").trim(); + const incomingNamePinyin = String(person?.name_pinyin || person?.pinyin || "").trim(); + const finalNamePinyin = currentNamePinyin || incomingNamePinyin || null; + const personRole = existingRole !== "unknown" + ? existingRole + : (incomingRole !== "unknown" ? incomingRole : inferPersonRoleFromId(personId, userId)); + await session.run( + `MATCH (p:Person {id: $id, user_id: $userId}) + SET p.aliases = $aliases, + p.normalized_name = $normalizedName, + p.person_role = $personRole, + p.name_pinyin = $namePinyin, + p.data_source = $dataSource, + p.consent_status = $consentStatus, + p.occupation = $occupation, + p.education_background = $educationBackground, + p.residential_status = $residentialStatus, + p.birth_date_utc = CASE WHEN $birthDateUtc = '' THEN p.birth_date_utc ELSE datetime($birthDateUtc) END, + p.updated_at = datetime()`, + { + id: personId, + userId, + aliases, + normalizedName: normalizeNameKey(baseName || person?.name), + personRole, + namePinyin: finalNamePinyin, + birthDateUtc: finalBirthDateUtc, + dataSource, + consentStatus, + occupation, + educationBackground, + residentialStatus + } + ); + } + /** * 更新人物 summary(追加新信息) */ @@ -1728,6 +3070,141 @@ export class GraphRagService { return true; } + _inferRelationToUser({ isSelf = false, personRole = "unknown", relTypes = [], coEventCount = 0, sourceCount = 0 } = {}) { + if (isSelf) return { relation: "本人", confidence: 1 }; + const role = normalizePersonRole(personRole); + if (role === "partner") return { relation: "恋爱", confidence: 0.95 }; + if (role === "ex") return { relation: "前任", confidence: 0.9 }; + const types = (Array.isArray(relTypes) ? relTypes : []) + .map((item) => String(item || "").trim().toUpperCase()) + .filter(Boolean); + const text = types.join("|"); + let relation = "未知"; + if (/(EX|FORMER|BROKE|BREAKUP|前任)/.test(text)) relation = "前任"; + else if (/(PARTNER|SPOUSE|BOYFRIEND|GIRLFRIEND|LOVER|COUPLE|ROMANCE|恋|伴侣|情侣|对象)/.test(text)) relation = "恋爱"; + else if (/(FLIRT|暧昧)/.test(text)) relation = "暧昧"; + else if (/(CLASSMATE|SCHOOLMATE|同学)/.test(text)) relation = "同学"; + else if (/(COLLEAGUE|COWORKER|WORKMATE|同事)/.test(text)) relation = "同事"; + let confidence = ({ + 恋爱: 0.88, + 前任: 0.84, + 暧昧: 0.66, + 同学: 0.72, + 同事: 0.72, + 未知: 0.32 + })[relation] ?? 0.32; + confidence += Math.min(0.12, Number(coEventCount || 0) * 0.02); + confidence += Math.min(0.12, types.length * 0.04); + confidence += Math.min(0.08, Number(sourceCount || 0) * 0.01); + confidence = Math.max(0, Math.min(0.99, confidence)); + return { relation, confidence }; + } + + async _refreshPersonProfiles(session, userId) { + const selfPerson = await this._resolveSelfPerson(userId).catch(() => null); + const selfIds = [...new Set([selfPerson?.id, "user", `${userId}__user`].map((x) => String(x || "").trim()).filter(Boolean))]; + const result = await session.run( + ` + MATCH (p:Person) + WHERE p.user_id = $userId + OR EXISTS { MATCH ()-[rp]->(p) WHERE rp.user_id = $userId } + OR EXISTS { MATCH (p)-[rp]->() WHERE rp.user_id = $userId } + OPTIONAL MATCH (p)-[:PARTICIPATES_IN]->(e:Event {user_id: $userId}) + WHERE coalesce(e.status, 'active') <> 'invalidated' + WITH p, count(DISTINCT e) AS source_count, min(e.occurred_at) AS first_seen_at, max(e.occurred_at) AS last_seen_at + OPTIONAL MATCH (self:Person)-[:PARTICIPATES_IN]->(co:Event {user_id: $userId})<-[:PARTICIPATES_IN]-(p) + WHERE coalesce(co.status, 'active') <> 'invalidated' AND self.id IN $selfIds + WITH p, source_count, first_seen_at, last_seen_at, count(DISTINCT co) AS co_event_count_with_user + OPTIONAL MATCH (self2:Person)-[r]-(p) + WHERE self2.id IN $selfIds AND r.user_id = $userId + RETURN p.id AS id, + p.name AS name, + p.aliases AS aliases, + p.person_role AS person_role, + p.summary AS summary, + p.birth_date_utc AS birth_date_utc, + p.data_source AS data_source, + p.consent_status AS consent_status, + source_count, + first_seen_at, + last_seen_at, + co_event_count_with_user, + collect(DISTINCT type(r)) AS relation_types + `, + { userId, selfIds } + ); + for (const row of result.records) { + const personId = String(row.get("id") || "").trim(); + if (!personId) continue; + const name = String(row.get("name") || "").trim(); + const aliases = mergeAliases(row.get("aliases"), name); + const sourceCount = Math.max(0, Math.round(toNumber(row.get("source_count")) ?? 0)); + const coEventCount = Math.max(0, Math.round(toNumber(row.get("co_event_count_with_user")) ?? 0)); + const relationTypes = Array.isArray(row.get("relation_types")) ? row.get("relation_types") : []; + const existingRole = normalizePersonRole(row.get("person_role")); + const inferredRole = inferPersonRoleFromId(personId, userId); + const personRole = existingRole !== "unknown" ? existingRole : inferredRole; + const relation = this._inferRelationToUser({ + isSelf: selfIds.includes(personId), + personRole, + relTypes: relationTypes, + coEventCount, + sourceCount + }); + const firstSeenAt = row.get("first_seen_at")?.toString?.() || ""; + const lastSeenAt = row.get("last_seen_at")?.toString?.() || ""; + const summary = String(row.get("summary") || "").trim(); + const birthDateUtc = row.get("birth_date_utc")?.toString?.() || row.get("birth_date_utc") || null; + const privacyLevel = buildPrivacyLevel({ summaries: [summary], relationToUser: relation.relation }); + const dataQuality = buildDataQuality({ + sourceCount, + coEventCount, + aliases, + relationConfidence: relation.confidence, + birthDateUtc + }); + await session.run( + ` + MATCH (p:Person {id: $id}) + WHERE p.user_id = $userId + OR EXISTS { MATCH ()-[rp]->(p) WHERE rp.user_id = $userId } + OR EXISTS { MATCH (p)-[rp]->() WHERE rp.user_id = $userId } + SET p.aliases = $aliases, + p.normalized_name = $normalized_name, + p.person_role = $person_role, + p.first_seen_at = CASE WHEN $first_seen_at = '' THEN null ELSE datetime($first_seen_at) END, + p.last_seen_at = CASE WHEN $last_seen_at = '' THEN null ELSE datetime($last_seen_at) END, + p.source_count = $source_count, + p.co_event_count_with_user = $co_event_count_with_user, + p.relation_to_user = $relation_to_user, + p.relation_confidence = $relation_confidence, + p.data_quality = $data_quality, + p.privacy_level = $privacy_level, + p.data_source = CASE WHEN coalesce(p.data_source, '') = '' THEN $data_source ELSE p.data_source END, + p.consent_status = CASE WHEN coalesce(p.consent_status, '') = '' THEN $consent_status ELSE p.consent_status END, + p.updated_at = datetime() + `, + { + id: personId, + userId, + aliases, + normalized_name: normalizeNameKey(name), + person_role: personRole, + first_seen_at: firstSeenAt, + last_seen_at: lastSeenAt, + source_count: neo4j.int(sourceCount), + co_event_count_with_user: neo4j.int(coEventCount), + relation_to_user: relation.relation, + relation_confidence: relation.confidence, + data_quality: dataQuality, + privacy_level: privacyLevel, + data_source: normalizeDataSource(row.get("data_source")), + consent_status: normalizeConsentStatus(row.get("consent_status")) + } + ); + } + } + /** * 创建事件 */ @@ -1889,6 +3366,206 @@ export class GraphRagService { return `${userId}__${cleanId}`; } + _normalizeAnalysisPayload(rawAnalysis = {}, existingEntities = {}, rawText = "") { + const personsRaw = Array.isArray(rawAnalysis?.persons) ? rawAnalysis.persons : []; + const organizationsRaw = Array.isArray(rawAnalysis?.organizations) ? rawAnalysis.organizations : []; + const eventsRaw = Array.isArray(rawAnalysis?.events) ? rawAnalysis.events : []; + const relationsRaw = Array.isArray(rawAnalysis?.relations) ? rawAnalysis.relations : []; + const topicsRaw = Array.isArray(rawAnalysis?.topics) ? rawAnalysis.topics : []; + + const personNameMap = new Map(); + const orgNameMap = new Map(); + const idAliasMap = new Map(); + const personByKey = new Map(); + const orgByKey = new Map(); + + (Array.isArray(existingEntities?.persons) ? existingEntities.persons : []).forEach((item) => { + const key = normalizeNameKey(item?.name); + if (key && item?.id) personNameMap.set(key, item.id); + }); + (Array.isArray(existingEntities?.organizations) ? existingEntities.organizations : []).forEach((item) => { + const key = normalizeNameKey(item?.name); + if (key && item?.id) orgNameMap.set(key, item.id); + }); + + for (const person of personsRaw) { + const name = String(person?.name || "").trim(); + if (!name) continue; + const key = normalizeNameKey(name); + if (!key) continue; + const canonicalExistingId = personNameMap.get(key); + const canonicalLocalId = personByKey.get(key)?.id; + const canonicalId = canonicalExistingId || canonicalLocalId || person?.id || `p_${key.slice(0, 12)}`; + const prev = personByKey.get(key); + const merged = { + ...(prev || {}), + ...person, + id: canonicalId, + name + }; + personByKey.set(key, merged); + if (person?.id) idAliasMap.set(person.id, canonicalId); + } + + const aliasPairs = extractSamePersonAliasPairs(rawText); + let explicitSamePersonMerged = 0; + const findPersonEntryById = (personId) => { + const id = String(personId || "").trim(); + if (!id) return null; + for (const [key, value] of personByKey.entries()) { + if (String(value?.id || "").trim() === id) return { key, value }; + } + return null; + }; + const mergePersonEntries = (leftKey, rightKey, leftNameHint = "", rightNameHint = "") => { + if (!leftKey || !rightKey || leftKey === rightKey) return false; + const leftPerson = personByKey.get(leftKey); + const rightPerson = personByKey.get(rightKey); + if (!leftPerson || !rightPerson) return false; + const leftExisting = personNameMap.has(leftKey); + const rightExisting = personNameMap.has(rightKey); + const keepLeft = leftExisting || (!rightExisting && String(leftPerson.name || "").length >= String(rightPerson.name || "").length); + const primaryPerson = keepLeft ? leftPerson : rightPerson; + const secondaryPerson = keepLeft ? rightPerson : leftPerson; + const primaryKey = keepLeft ? leftKey : rightKey; + const secondaryKey = keepLeft ? rightKey : leftKey; + const mergedPerson = { + ...secondaryPerson, + ...primaryPerson, + id: primaryPerson.id || secondaryPerson.id, + name: primaryPerson.name || secondaryPerson.name, + aliases: mergeAliases( + primaryPerson.name, + primaryPerson.aliases, + secondaryPerson.name, + secondaryPerson.aliases, + leftNameHint, + rightNameHint + ) + }; + personByKey.set(primaryKey, mergedPerson); + personByKey.delete(secondaryKey); + if (secondaryPerson?.id) idAliasMap.set(secondaryPerson.id, mergedPerson.id); + return true; + }; + for (const [leftName, rightName] of aliasPairs) { + const leftKey = normalizeNameKey(leftName); + const rightKey = normalizeNameKey(rightName); + if (!leftKey || !rightKey || leftKey === rightKey) continue; + if (mergePersonEntries(leftKey, rightKey, leftName, rightName)) { + explicitSamePersonMerged += 1; + continue; + } + const leftPerson = personByKey.get(leftKey); + const rightPerson = personByKey.get(rightKey); + const singleSide = leftPerson || rightPerson; + const aliasName = leftPerson ? rightName : leftName; + if (singleSide) { + singleSide.aliases = mergeAliases(singleSide.name, singleSide.aliases, aliasName); + } + } + for (const rel of relationsRaw) { + const relType = String(rel?.type || "").replace(/[^a-zA-Z0-9_]/g, "_").toUpperCase(); + if (!["SAME_AS", "ALIAS_OF", "IS_ALIAS_OF", "IDENTICAL_TO"].includes(relType)) continue; + const sourceId = String(idAliasMap.get(rel?.source) || rel?.source || "").trim(); + const targetId = String(idAliasMap.get(rel?.target) || rel?.target || "").trim(); + if (!sourceId || !targetId || sourceId === targetId) continue; + const leftEntry = findPersonEntryById(sourceId); + const rightEntry = findPersonEntryById(targetId); + if (!leftEntry || !rightEntry) continue; + if (mergePersonEntries(leftEntry.key, rightEntry.key, leftEntry.value?.name, rightEntry.value?.name)) { + explicitSamePersonMerged += 1; + } + } + + for (const org of organizationsRaw) { + const name = String(org?.name || "").trim(); + if (!name) continue; + const key = normalizeNameKey(name); + if (!key) continue; + const canonicalExistingId = orgNameMap.get(key); + const canonicalLocalId = orgByKey.get(key)?.id; + const canonicalId = canonicalExistingId || canonicalLocalId || org?.id || `o_${key.slice(0, 12)}`; + const prev = orgByKey.get(key); + const merged = { + ...(prev || {}), + ...org, + id: canonicalId, + name + }; + orgByKey.set(key, merged); + if (org?.id) idAliasMap.set(org.id, canonicalId); + } + + const persons = [...personByKey.values()]; + const organizations = [...orgByKey.values()]; + + const events = eventsRaw.map((event, idx) => { + const rawParticipants = Array.isArray(event?.participants) ? event.participants : []; + const participants = [...new Set(rawParticipants + .map((pid) => idAliasMap.get(pid) || pid) + .map((pid) => String(pid || "").trim()) + .filter(Boolean))]; + return { + ...event, + id: event?.id || `e_norm_${idx}_${Date.now().toString(36)}`, + summary: String(event?.summary || "").trim(), + participants + }; + }).filter((event) => event.summary || event.participants.length > 0); + + const relationSeen = new Set(); + const relations = []; + for (const rel of relationsRaw) { + const source = String(idAliasMap.get(rel?.source) || rel?.source || "").trim(); + const target = String(idAliasMap.get(rel?.target) || rel?.target || "").trim(); + if (!source || !target || source === target) continue; + const relType = String(rel?.type || "RELATED_TO").replace(/[^a-zA-Z0-9_]/g, "_").toUpperCase() || "RELATED_TO"; + let finalSource = source; + let finalTarget = target; + if (RELATION_SYMMETRIC_TYPES.has(relType) && finalSource > finalTarget) { + finalSource = target; + finalTarget = source; + } + const key = `${finalSource}__${relType}__${finalTarget}`; + if (relationSeen.has(key)) continue; + relationSeen.add(key); + relations.push({ + ...rel, + type: relType, + source: finalSource, + target: finalTarget + }); + } + + const topicSeen = new Set(); + const topics = topicsRaw.filter((topic) => { + const key = normalizeNameKey(topic?.name); + if (!key || topicSeen.has(key)) return false; + topicSeen.add(key); + return true; + }); + + const normalization = { + person_alias_merged: Math.max(0, personsRaw.length - persons.length), + person_same_identity_merged: explicitSamePersonMerged, + organization_alias_merged: Math.max(0, organizationsRaw.length - organizations.length), + relation_deduped: Math.max(0, relationsRaw.length - relations.length) + }; + + return { + analysis: { + ...rawAnalysis, + persons, + organizations, + events, + relations, + topics + }, + normalization + }; + } + /** * 创建关系(自动去重) */ diff --git a/OnceLove/oncelove-graphrag/api/src/services/index.js b/OnceLove/oncelove-graphrag/api/src/services/index.js index 13ae3b6..5be2781 100644 --- a/OnceLove/oncelove-graphrag/api/src/services/index.js +++ b/OnceLove/oncelove-graphrag/api/src/services/index.js @@ -3,3 +3,4 @@ export { RerankService } from "./rerank.service.js"; export { GraphRagService } from "./graphrag.service.js"; export { LLMService } from "./llm.service.js"; export { MultiAgentService } from "./multiagent.service.js"; +export { SocialImageAgentService } from "./social-image-agent.service.js"; diff --git a/OnceLove/oncelove-graphrag/api/src/services/llm.service.js b/OnceLove/oncelove-graphrag/api/src/services/llm.service.js index 92babbd..2994f2b 100644 --- a/OnceLove/oncelove-graphrag/api/src/services/llm.service.js +++ b/OnceLove/oncelove-graphrag/api/src/services/llm.service.js @@ -12,10 +12,76 @@ const estimateTokens = (text) => { const estimateMessageTokens = (messages = []) => { return messages.reduce((sum, message) => { - return sum + estimateTokens(message?.content || "") + 6; + const content = message?.content; + if (Array.isArray(content)) { + const merged = content.map((part) => { + if (!part || typeof part !== "object") return ""; + if (typeof part.text === "string") return part.text; + if (typeof part.image_url === "string") return part.image_url; + if (part.image_url && typeof part.image_url === "object" && typeof part.image_url.url === "string") return part.image_url.url; + if (part.source && typeof part.source === "object" && typeof part.source.data === "string") return part.source.data.slice(0, 64); + return ""; + }).join(" "); + return sum + estimateTokens(merged) + 6; + } + return sum + estimateTokens(content || "") + 6; }, 0); }; +const normalizeContentPart = (part) => { + if (typeof part === "string") { + return { type: "text", text: part }; + } + if (!part || typeof part !== "object") { + return null; + } + if (typeof part.type !== "string" || !part.type.trim()) { + if (typeof part.text === "string") { + return { ...part, type: "text" }; + } + return null; + } + if (part.type === "image_url" && typeof part.image_url === "string") { + return { ...part, image_url: { url: part.image_url } }; + } + return part; +}; + +const normalizeMessagesForChat = (messages = []) => { + const hasArrayContent = messages.some((message) => Array.isArray(message?.content)); + if (!hasArrayContent) return messages; + return messages.map((message) => { + const content = message?.content; + if (!Array.isArray(content)) { + return message; + } + const normalizedContent = content.map(normalizeContentPart).filter(Boolean); + return { + ...message, + content: normalizedContent.length ? normalizedContent : [{ type: "text", text: "" }] + }; + }); +}; + +const extractDeltaText = (chunk = {}) => { + const choices = Array.isArray(chunk?.choices) ? chunk.choices : []; + if (!choices.length) return ""; + const delta = choices[0]?.delta; + if (!delta || typeof delta !== "object") return ""; + if (typeof delta.content === "string") return delta.content; + if (Array.isArray(delta.content)) { + return delta.content + .map((item) => { + if (typeof item === "string") return item; + if (item && typeof item === "object" && typeof item.text === "string") return item.text; + return ""; + }) + .join(""); + } + if (typeof delta.reasoning_content === "string") return delta.reasoning_content; + return ""; +}; + const parseJsonObject = (content) => { if (!content || typeof content !== "string") { throw new Error("empty content"); @@ -257,19 +323,65 @@ export class LLMService { this.baseUrl = (env.LLM_BASE_URL ?? "").replace(/\/+$/, ""); this.apiKey = env.LLM_API_KEY ?? ""; this.model = env.LLM_MODEL_NAME ?? ""; + this.intentModel = env.LLM_INTENT_MODEL_NAME ?? ""; + this.imageModel = env.LLM_IMAGE_MODEL_NAME ?? ""; + this.thinkingMode = String(env.LLM_THINKING_MODE ?? "auto").trim().toLowerCase(); } isEnabled() { return Boolean(this.baseUrl && this.apiKey && this.model); } - async chat(messages, temperature = 0.7) { - if (!this.isEnabled()) { - throw createHttpError(400, "LLM 服务未配置,请提供 LLM_BASE_URL/LLM_API_KEY/LLM_MODEL_NAME"); + isModelEnabled(modelName = "") { + return Boolean(this.baseUrl && this.apiKey && String(modelName || "").trim()); + } + + isIntentEnabled() { + return this.isModelEnabled(this.intentModel || this.model); + } + + isImageEnabled() { + return this.isModelEnabled(this.imageModel || this.model); + } + + async chat(messages, temperature = 0.7, options = {}) { + return this.chatWithModel(messages, temperature, this.model, 4096, options); + } + + async chatImage(messages, temperature = 0.7, options = {}) { + return this.chatWithModel(messages, temperature, this.imageModel || this.model, 4096, options); + } + + async chatStream(messages, temperature = 0.7, options = {}, onToken = () => {}) { + return this.chatWithModelStream(messages, temperature, this.model, 4096, options, onToken); + } + + async chatWithModel(messages, temperature = 0.7, model = this.model, maxTokens = 4096, options = {}) { + if (!this.baseUrl || !this.apiKey) { + throw createHttpError(400, "LLM 服务未配置,请提供 LLM_BASE_URL/LLM_API_KEY"); + } + if (!String(model || "").trim()) { + throw createHttpError(400, "LLM 模型未配置,请提供 LLM_MODEL_NAME 或 LLM_INTENT_MODEL_NAME"); } - const promptTokensEstimated = estimateMessageTokens(messages); - console.log(`[TOKEN] model=${this.model} prompt_tokens_estimated=${promptTokensEstimated} max_tokens=4096`); + const normalizedMessages = normalizeMessagesForChat(messages); + const promptTokensEstimated = estimateMessageTokens(normalizedMessages); + const normalizedModeRaw = String(options?.thinking || this.thinkingMode || "auto").trim().toLowerCase(); + const thinkingMode = ["on", "off", "auto"].includes(normalizedModeRaw) ? normalizedModeRaw : "auto"; + const isQwen3Family = /qwen\s*3|qwen3/i.test(String(model || "")); + const autoEnableThinking = isQwen3Family && promptTokensEstimated >= 1600 && Number(maxTokens || 0) >= 900; + const enableThinking = thinkingMode === "on" ? true : thinkingMode === "off" ? false : autoEnableThinking; + console.log(`[TOKEN] model=${model} prompt_tokens_estimated=${promptTokensEstimated} max_tokens=${maxTokens} thinking_mode=${thinkingMode} thinking_enabled=${enableThinking}`); + + const payload = { + model, + messages: normalizedMessages, + temperature: temperature, + max_tokens: maxTokens + }; + if (isQwen3Family) { + payload.extra_body = { enable_thinking: enableThinking }; + } const response = await fetch(`${this.baseUrl}/chat/completions`, { method: "POST", @@ -277,12 +389,7 @@ export class LLMService { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, - body: JSON.stringify({ - model: this.model, - messages: messages, - temperature: temperature, - max_tokens: 4096 - }) + body: JSON.stringify(payload) }); if (!response.ok) { @@ -296,11 +403,102 @@ export class LLMService { const completionTokens = usage.completion_tokens; const totalTokens = usage.total_tokens; console.log( - `[TOKEN] model=${this.model} prompt_tokens_actual=${promptTokens ?? "n/a"} completion_tokens=${completionTokens ?? "n/a"} total_tokens=${totalTokens ?? "n/a"}` + `[TOKEN] model=${model} prompt_tokens_actual=${promptTokens ?? "n/a"} completion_tokens=${completionTokens ?? "n/a"} total_tokens=${totalTokens ?? "n/a"}` ); return data; } + async chatWithModelStream(messages, temperature = 0.7, model = this.model, maxTokens = 4096, options = {}, onToken = () => {}) { + if (!this.baseUrl || !this.apiKey) { + throw createHttpError(400, "LLM 服务未配置,请提供 LLM_BASE_URL/LLM_API_KEY"); + } + if (!String(model || "").trim()) { + throw createHttpError(400, "LLM 模型未配置,请提供 LLM_MODEL_NAME 或 LLM_INTENT_MODEL_NAME"); + } + + const normalizedMessages = normalizeMessagesForChat(messages); + const promptTokensEstimated = estimateMessageTokens(normalizedMessages); + const normalizedModeRaw = String(options?.thinking || this.thinkingMode || "auto").trim().toLowerCase(); + const thinkingMode = ["on", "off", "auto"].includes(normalizedModeRaw) ? normalizedModeRaw : "auto"; + const isQwen3Family = /qwen\s*3|qwen3/i.test(String(model || "")); + const autoEnableThinking = isQwen3Family && promptTokensEstimated >= 1600 && Number(maxTokens || 0) >= 900; + const enableThinking = thinkingMode === "on" ? true : thinkingMode === "off" ? false : autoEnableThinking; + + const payload = { + model, + messages: normalizedMessages, + temperature: temperature, + max_tokens: maxTokens, + stream: true + }; + if (isQwen3Family) { + payload.extra_body = { enable_thinking: enableThinking }; + } + + const response = await fetch(`${this.baseUrl}/chat/completions`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}` + }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorText = await response.text(); + throw createHttpError(response.status, `LLM 请求失败:${errorText}`); + } + + const reader = response.body?.getReader?.(); + if (!reader) { + const fallback = await response.json().catch(() => null); + const text = String(fallback?.choices?.[0]?.message?.content || ""); + if (text) onToken(text, text, { done: true }); + return { text, done: true }; + } + + const decoder = new TextDecoder("utf-8"); + let buffer = ""; + let fullText = ""; + let done = false; + while (!done) { + const readResult = await reader.read(); + done = Boolean(readResult?.done); + if (!readResult?.value) continue; + buffer += decoder.decode(readResult.value, { stream: true }); + const lines = buffer.split(/\r?\n/); + buffer = lines.pop() || ""; + for (const line of lines) { + const trimmed = String(line || "").trim(); + if (!trimmed || !trimmed.startsWith("data:")) continue; + const dataPayload = trimmed.slice(5).trim(); + if (!dataPayload || dataPayload === "[DONE]") continue; + try { + const chunk = JSON.parse(dataPayload); + const deltaText = extractDeltaText(chunk); + if (deltaText) { + fullText += deltaText; + onToken(deltaText, fullText, chunk); + } + } catch {} + } + } + if (buffer.trim().startsWith("data:")) { + const dataPayload = buffer.trim().slice(5).trim(); + if (dataPayload && dataPayload !== "[DONE]") { + try { + const chunk = JSON.parse(dataPayload); + const deltaText = extractDeltaText(chunk); + if (deltaText) { + fullText += deltaText; + onToken(deltaText, fullText, chunk); + } + } catch {} + } + } + return { text: fullText, done: true }; + } + async _analyzeTextSinglePass(text, existingContext) { const systemPrompt = `你是一个恋爱关系知识图谱构建专家。从用户输入的文本中提取实体和关系,用于后续的恋爱决策建议。 @@ -319,7 +517,13 @@ export class LLMService { "name": "人物名称", "summary": "人物描述", "role": "用户 | 恋爱对象 | 朋友 | 家人 | 同事 | 其他", - "gender": "male | female | unknown" + "gender": "male | female | unknown", + "birth_date_utc": "YYYY-MM-DDT00:00:00.000Z 或 null", + "occupation": "职业(可选)", + "education_background": "教育背景(可选)", + "residential_status": "常住状态/所在城市(可选)", + "consent_status": "granted | denied | revoked | unknown", + "data_source": "ingest | user_input | manual | import | llm_extract | system" } ], "organizations": [ @@ -372,6 +576,8 @@ export class LLMService { 8. 长度控制:persons.summary 不超过 100 字;events.summary 不超过 120 字;relations.summary 不超过 80 字 9. 人物性别:person.gender 只允许 male/female/unknown,无法确认时填 unknown 10. 若文本出现“某人毕业于/就读于/来自某大学”,需创建该大学 organization,并补充关系(如 STUDIED_AT/ALUMNUS_OF) +11. 出现生日/出生年份时,person.birth_date_utc 必须写成 UTC ISO(如 2001-05-20T00:00:00.000Z);只有年份时用该年 01-01 +12. person.consent_status 未明确时填 unknown;person.data_source 默认填 llm_extract 只返回 JSON,不要有其他文字。`; @@ -535,6 +741,60 @@ export class LLMService { } } + async classifyIntent(text, options = {}) { + const rawText = typeof text === "string" ? text.trim() : ""; + if (!rawText) { + return { + intent: "qa", + should_ingest: false, + should_query: true, + confidence: 0.52, + reason: "empty_text", + model: this.intentModel || this.model + }; + } + const model = this.intentModel || this.model; + const maxTokens = Math.min(Math.max(Number(options?.maxTokens || 220), 80), 400); + const result = await this.chatWithModel([ + { + role: "system", + content: `你是“意图分类器”,只输出 JSON: +{"intent":"qa|ingest|mixed|unknown","confidence":0.0,"reason":"一句话","should_ingest":true|false,"should_query":true|false} +规则: +1) qa:主要是提问/求分析 +2) ingest:主要是陈述新增事实,意图写入图谱 +3) mixed:既有新增事实又要求回答 +4) unknown:无法判定时保守返回 +5) 结果要保守,不要过度推断` + }, + { + role: "user", + content: rawText + } + ], 0, model, maxTokens, { thinking: "off" }); + const content = result?.choices?.[0]?.message?.content || ""; + let parsed; + try { + parsed = parseJsonObject(content); + } catch { + parsed = {}; + } + const intentRaw = String(parsed?.intent || "").trim().toLowerCase(); + const allowed = new Set(["qa", "ingest", "mixed", "unknown"]); + const intent = allowed.has(intentRaw) ? intentRaw : "unknown"; + const confidence = Math.min(Math.max(Number(parsed?.confidence ?? 0), 0), 1); + const shouldIngest = parsed?.should_ingest === true || intent === "ingest" || intent === "mixed"; + const shouldQuery = parsed?.should_query === false ? false : intent !== "ingest"; + return { + intent, + should_ingest: shouldIngest, + should_query: shouldQuery, + confidence, + reason: String(parsed?.reason || "").slice(0, 120), + model + }; + } + async adjudicateEventCorrections(text, recentEvents = [], options = {}) { const rawText = typeof text === "string" ? text.trim() : ""; if (!rawText) return { decisions: [] }; diff --git a/OnceLove/oncelove-graphrag/api/src/services/social-image-agent.service.js b/OnceLove/oncelove-graphrag/api/src/services/social-image-agent.service.js new file mode 100644 index 0000000..f9cb1c7 --- /dev/null +++ b/OnceLove/oncelove-graphrag/api/src/services/social-image-agent.service.js @@ -0,0 +1,272 @@ +const createHttpError = (statusCode, message) => { + const error = new Error(message); + error.statusCode = statusCode; + return error; +}; + +const tryParseJson = (rawText) => { + const text = String(rawText || "").trim(); + if (!text) return null; + const candidates = [text]; + const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i); + if (fenced?.[1]) candidates.push(fenced[1].trim()); + const startIdx = text.indexOf("{"); + const endIdx = text.lastIndexOf("}"); + if (startIdx >= 0 && endIdx > startIdx) candidates.push(text.slice(startIdx, endIdx + 1)); + for (const candidate of candidates) { + try { + return JSON.parse(candidate); + } catch {} + } + return null; +}; + +const normalizeScene = (value) => { + const raw = String(value || "").trim().toLowerCase(); + if (["moments", "wechat_moments", "朋友圈"].includes(raw)) return "moments"; + if (["wechat_chat", "chat", "聊天", "微信聊天记录"].includes(raw)) return "wechat_chat"; + return "auto"; +}; + +const normalizeDataUrl = (value) => { + const text = String(value || "").trim(); + if (!text || !text.startsWith("data:image/")) return ""; + if (!text.includes(";base64,")) return ""; + return text; +}; + +const normalizeImageUrl = (value) => { + const text = String(value || "").trim(); + if (!text) return ""; + if (/^https?:\/\//i.test(text)) return text; + return ""; +}; + +const compactText = (value, maxLen = 600) => { + const text = String(value || "").replace(/\s+/g, " ").trim(); + if (!text) return ""; + return text.length > maxLen ? `${text.slice(0, maxLen - 1)}…` : text; +}; + +const parseDataUrl = (dataUrl = "") => { + const raw = String(dataUrl || ""); + const match = raw.match(/^data:(image\/[a-zA-Z0-9.+-]+);base64,(.+)$/); + if (!match) return { mediaType: "", base64: "" }; + return { + mediaType: match[1] || "", + base64: match[2] || "" + }; +}; + +export class SocialImageAgentService { + constructor({ llmService }) { + this.llmService = llmService; + this.maxImageChars = 2_600_000; + } + + isEnabled() { + if (typeof this.llmService?.isImageEnabled === "function") { + return Boolean(this.llmService.isImageEnabled()); + } + return Boolean(this.llmService?.isEnabled?.()); + } + + _readImageInput(payload = {}) { + const image = payload.image_payload && typeof payload.image_payload === "object" + ? payload.image_payload + : payload.image && typeof payload.image === "object" + ? payload.image + : {}; + const dataUrl = normalizeDataUrl( + image.data_url + || image.dataUrl + || payload.image_data_url + || payload.imageDataUrl + ); + const imageUrl = normalizeImageUrl( + image.url + || image.image_url + || payload.image_url + ); + const fileName = String(image.file_name || image.fileName || payload.image_file_name || "").trim(); + const scene = normalizeScene(payload.image_scene || image.scene || payload.scene); + return { dataUrl, imageUrl, fileName, scene }; + } + + _buildSystemPrompt(scene = "auto") { + const sceneHint = scene === "moments" + ? "当前图片偏向朋友圈动态,请重点提取发帖人、互动对象、时间线线索、情绪倾向、评论互动与配图语义。" + : scene === "wechat_chat" + ? "当前图片偏向微信聊天记录,请重点提取对话双方、关键句、冲突/和好信号、时间点、主动方与边界表达。" + : "请先识别是朋友圈还是微信聊天记录,再按对应维度提取。"; + return `你是图谱问答的图片理解Agent,擅长解析朋友圈截图/微信聊天截图并转成可检索线索。 +${sceneHint} +朋友圈模式下,请优先提取:发帖人身份、被@对象、评论关系、情绪强度、关系状态变化。 +微信聊天模式下,请优先提取:说话人轮次、关键原句、冲突信号、修复信号、时间顺序与关系边界。 +微信聊天角色识别规则: +1) 绿色聊天气泡默认判定为 self(我方); +2) 白色聊天气泡默认判定为 other(对方); +3) 若颜色不清晰,再结合左右位置、头像位置、昵称与时间分布判断; +4) 无法确认时标记 unknown,禁止强行猜测。 +只返回 JSON,不要输出任何额外文字。格式: +{ + "scene":"moments|wechat_chat|unknown", + "confidence":0-1, + "scene_focus":["本图最关键的3-5个观察点"], + "summary":"图片核心摘要(120字内)", + "extracted_text":"OCR关键信息,可为空", + "chat_turns":[{"speaker":"self|other|unknown","text":"一句话","cue":"green|white|left|right|avatar"}], + "key_entities":["人物/群聊/地点"], + "key_events":["关键事件线索"], + "risk_signals":["冲突/误解/边界风险信号"], + "relation_hints":["关系线索"], + "suggested_question":"可用于图谱问答的建议问题(尽量具体)" +}`; + } + + _isMissingImageSignal(raw = "", parsed = {}) { + const text = `${String(raw || "")}\n${String(parsed?.summary || "")}`.toLowerCase(); + return /未提供(截图|图片)|无法进行视觉解析|无可见图像|看不到图片|cannot\s+see\s+the\s+image|no\s+image\s+provided|image\s+not\s+provided/.test(text); + } + + _buildUserContextParts(scene = "auto", queryText = "") { + return [ + { type: "text", text: `场景偏好: ${scene}` }, + { type: "text", text: `用户补充问题: ${queryText || "未提供文本问题,请基于图片生成可检索问题"}` }, + { type: "text", text: "请输出标准JSON。" } + ]; + } + + _buildImageContentVariants({ dataUrl = "", imageUrl = "", contextParts = [] } = {}) { + if (imageUrl) { + return [ + [...contextParts, { type: "image_url", image_url: { url: imageUrl } }], + [...contextParts, { type: "image_url", image_url: imageUrl }] + ]; + } + if (!dataUrl) return [contextParts]; + return [ + [...contextParts, { type: "image_url", image_url: { url: dataUrl } }], + [...contextParts, { type: "image_url", image_url: dataUrl }] + ]; + } + + async analyzeForGraphQa(payload = {}) { + if (!this.isEnabled()) { + throw createHttpError(400, "LLM 服务未配置,无法启用图片问答"); + } + const { dataUrl, imageUrl, fileName, scene } = this._readImageInput(payload); + if (!dataUrl && !imageUrl) { + throw createHttpError(400, "缺少图片输入,请提供 image_payload.data_url 或 image_url"); + } + if (dataUrl && dataUrl.length > this.maxImageChars) { + throw createHttpError(413, "图片体积过大,请压缩后重试"); + } + const queryText = String(payload.query_text || payload.query || "").trim(); + const contextParts = this._buildUserContextParts(scene, queryText); + const variants = this._buildImageContentVariants({ dataUrl, imageUrl, contextParts }); + let raw = ""; + let parsed = {}; + let lastError = null; + for (let i = 0; i < variants.length; i += 1) { + try { + const response = await this.llmService.chatImage([ + { role: "system", content: this._buildSystemPrompt(scene) }, + { role: "user", content: variants[i] } + ], 0.2, { thinking: "off" }); + raw = response?.choices?.[0]?.message?.content || ""; + parsed = tryParseJson(raw) || {}; + const missingSignal = this._isMissingImageSignal(raw, parsed); + if (missingSignal && i < variants.length - 1) { + continue; + } + break; + } catch (error) { + lastError = error; + if (i >= variants.length - 1) { + throw error; + } + } + } + if (!raw && lastError) { + throw lastError; + } + if (this._isMissingImageSignal(raw, parsed)) { + throw createHttpError(422, "图片未被模型正确识别,请尝试 Ctrl+V 粘贴截图或更换支持视觉的模型"); + } + const finalScene = ["moments", "wechat_chat", "unknown"].includes(String(parsed.scene || "").trim().toLowerCase()) + ? String(parsed.scene || "").trim().toLowerCase() + : "unknown"; + const confidenceRaw = Number(parsed.confidence); + const confidence = Number.isFinite(confidenceRaw) ? Math.max(0, Math.min(1, confidenceRaw)) : 0.6; + const summary = compactText(parsed.summary, 180); + const extractedText = compactText(parsed.extracted_text, 1200); + const keyEntities = (Array.isArray(parsed.key_entities) ? parsed.key_entities : []) + .map((item) => compactText(item, 40)) + .filter(Boolean) + .slice(0, 10); + const chatTurns = (Array.isArray(parsed.chat_turns) ? parsed.chat_turns : []) + .map((item) => { + const speakerRaw = String(item?.speaker || "").trim().toLowerCase(); + const speaker = ["self", "other", "unknown"].includes(speakerRaw) ? speakerRaw : "unknown"; + return { + speaker, + text: compactText(item?.text, 120), + cue: compactText(item?.cue, 24) + }; + }) + .filter((item) => item.text) + .slice(0, 20); + const keyEvents = (Array.isArray(parsed.key_events) ? parsed.key_events : []) + .map((item) => compactText(item, 80)) + .filter(Boolean) + .slice(0, 8); + const sceneFocus = (Array.isArray(parsed.scene_focus) ? parsed.scene_focus : []) + .map((item) => compactText(item, 80)) + .filter(Boolean) + .slice(0, 6); + const riskSignals = (Array.isArray(parsed.risk_signals) ? parsed.risk_signals : []) + .map((item) => compactText(item, 80)) + .filter(Boolean) + .slice(0, 6); + const relationHints = (Array.isArray(parsed.relation_hints) ? parsed.relation_hints : []) + .map((item) => compactText(item, 80)) + .filter(Boolean) + .slice(0, 8); + const suggestedQuestion = compactText(parsed.suggested_question, 200); + const imageDigest = [ + summary ? `图片摘要:${summary}` : "", + chatTurns.length + ? `聊天分轨:${chatTurns.slice(0, 8).map((item) => `${item.speaker === "self" ? "我" : item.speaker === "other" ? "对方" : "未知"}:${item.text}`).join(" | ")}` + : "", + sceneFocus.length ? `场景观察:${sceneFocus.join(";")}` : "", + keyEntities.length ? `关键实体:${keyEntities.join("、")}` : "", + keyEvents.length ? `事件线索:${keyEvents.join(";")}` : "", + riskSignals.length ? `风险信号:${riskSignals.join(";")}` : "", + relationHints.length ? `关系线索:${relationHints.join(";")}` : "", + extractedText ? `OCR摘录:${extractedText.slice(0, 260)}` : "" + ].filter(Boolean).join("\n"); + const queryTextForQa = [ + queryText, + suggestedQuestion, + imageDigest ? `以下是图片提取线索,请结合图谱回答:\n${imageDigest}` : "" + ].filter(Boolean).join("\n\n"); + return { + ok: true, + scene: finalScene, + confidence, + file_name: fileName || null, + summary, + extracted_text: extractedText, + chat_turns: chatTurns, + scene_focus: sceneFocus, + key_entities: keyEntities, + key_events: keyEvents, + risk_signals: riskSignals, + relation_hints: relationHints, + suggested_question: suggestedQuestion, + image_digest: imageDigest, + query_text_for_qa: queryTextForQa || queryText || suggestedQuestion || "请基于图片线索给出关系判断" + }; + } +} diff --git a/OnceLove/oncelove-graphrag/frontend/src/components/GraphQaPanel.vue b/OnceLove/oncelove-graphrag/frontend/src/components/GraphQaPanel.vue index b5f7695..cbdf5ac 100644 --- a/OnceLove/oncelove-graphrag/frontend/src/components/GraphQaPanel.vue +++ b/OnceLove/oncelove-graphrag/frontend/src/components/GraphQaPanel.vue @@ -1,7 +1,11 @@