hack-ink · yvette-carlisle · Jun 19, 2026 · Jun 19, 2026
diff --git a/README.md b/README.md
@@ -267,7 +267,9 @@ provider-backed ELF evidence was required.
   comparison blocked; graphify is `wrong_result`; llm-wiki is not_tested; gbrain is
   blocked; private and hosted graph/RAG profiles are non_goal. These reports preserve
   the smoke and typed non-pass boundaries and do not create an ELF win claim against
-  graph/RAG strengths.
+  graph/RAG strengths. Graph/RAG citation/navigation promotion after XY-985 refreshes
+  this state as 0 pass, 1 wrong_result, 1 incomplete, and 3 blocked, with graphify
+  evidence-linked output still scoring wrong_result.
 - mem0/OpenMemory history follow-up after XY-924 and XY-931: the local OSS mem0
   adapter now passes encoded preference correction history, entity-scoped
   personalization, local `get_all` export-style readback, and deletion audit history.
@@ -318,6 +320,7 @@ Detailed evidence and interpretation:
 - [Proactive Brief Scoring Report - June 16, 2026](docs/evidence/benchmarking/2026-06-16-proactive-brief-scoring-report.md)
 - [Scheduled Memory Task Scoring Report - June 16, 2026](docs/evidence/benchmarking/2026-06-16-scheduled-memory-task-scoring-report.md)
 - [Dreaming Competitor-Strength Retest Report - June 17, 2026](docs/evidence/benchmarking/2026-06-17-dreaming-competitor-strength-retest-report.md)
+- [Graph/RAG Citation and Navigation Promotion Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-graph-rag-citation-navigation-promotion-report.md)
 - [qmd Debug-Ergonomics Dreaming Retest Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-qmd-debug-ergonomics-dreaming-retest-report.md)
 - [OpenViking Trajectory Materialization Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-openviking-trajectory-materialization-report.md)
 - [Service-Native Dreaming Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md)
@@ -406,6 +409,7 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [Proactive Brief Scoring Report - June 16, 2026](docs/evidence/benchmarking/2026-06-16-proactive-brief-scoring-report.md)
 - [Scheduled Memory Task Scoring Report - June 16, 2026](docs/evidence/benchmarking/2026-06-16-scheduled-memory-task-scoring-report.md)
 - [Dreaming Competitor-Strength Retest Report - June 17, 2026](docs/evidence/benchmarking/2026-06-17-dreaming-competitor-strength-retest-report.md)
+- [Graph/RAG Citation and Navigation Promotion Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-graph-rag-citation-navigation-promotion-report.md)
 - [qmd Debug-Ergonomics Dreaming Retest Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-qmd-debug-ergonomics-dreaming-retest-report.md)
 - [OpenMemory UI/Export Product Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-openmemory-ui-export-product-readback-report.md)
 - [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md)

diff --git a/.../fixtures/report_snapshots/2026-06-19-graph-rag-citation-navigation-promotion-report.json b/.../fixtures/report_snapshots/2026-06-19-graph-rag-citation-navigation-promotion-report.json
@@ -0,0 +1,168 @@
+{
+  "schema": "elf.graph_rag_citation_navigation_promotion_report/v1",
+  "report_id": "xy-985-graph-rag-citation-navigation-promotion-2026-06-19",
+  "authority": "XY-985",
+  "created_at": "2026-06-19T07:17:34Z",
+  "goal": "Promote graph/RAG citation, navigation, stale-source lint, and knowledge-surface cases only when adapters emit comparable evidence-linked outputs while preserving typed non-pass outcomes.",
+  "command": {
+    "command": "cargo make real-world-memory-graph-rag",
+    "status": "pass",
+    "report_artifact": "tmp/real-world-memory/graph-rag/report.json",
+    "markdown_artifact": "tmp/real-world-memory/graph-rag/report.md",
+    "run_id": "real-world-memory-graph-rag",
+    "adapter_id": "fixture_graph_rag_external_adapters"
+  },
+  "source_baseline": {
+    "previous_report": "docs/evidence/benchmarking/2026-06-17-dreaming-competitor-strength-retest-report.md",
+    "previous_graph_rag_report": "docs/evidence/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md",
+    "previous_status": "typed_non_pass",
+    "previous_counts": {
+      "pass": 0,
+      "wrong_result": 1,
+      "incomplete": 1,
+      "blocked": 3
+    }
+  },
+  "summary": {
+    "overall_judgment": "unchanged_typed_non_pass",
+    "broader_graph_rag_parity": "not_proven",
+    "job_count": 5,
+    "encoded_suite_count": 3,
+    "pass": 0,
+    "wrong_result": 1,
+    "incomplete": 1,
+    "blocked": 3,
+    "not_encoded": 0,
+    "wrong_result_count": 2,
+    "regressed_scenario_count": 0,
+    "evidence_coverage": 0.25,
+    "source_ref_coverage": 0.25,
+    "quote_coverage": 0.25,
+    "knowledge_citation_coverage": 0.667,
+    "stale_claim_detection": 0.0,
+    "unsupported_summary_count": 1,
+    "mean_score": 0.06
+  },
+  "scenario_outcomes": [
+    {
+      "project": "RAGFlow",
+      "scenario_id": "reference_chunk_citation_mapping",
+      "fixture": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/ragflow_reference_chunks_blocked.json",
+      "required_output": "Returned reference chunks must include generated document ids, chunk ids, content, and document metadata mapped to benchmark evidence ids.",
+      "current_status": "blocked",
+      "judgment": "unchanged",
+      "artifact": "tmp/real-world-memory/graph-rag/report.json",
+      "blocker": "resource_api_setup_and_reference_chunks_missing",
+      "claim_boundary": "No RAGFlow citation quality or ELF-over-RAGFlow claim is allowed."
+    },
+    {
+      "project": "LightRAG",
+      "scenario_id": "context_source_reference_mapping",
+      "fixture": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/lightrag_context_sources_incomplete.json",
+      "required_output": "Context/source export must expose generated file paths, snippets, or references mapped to evidence ids.",
+      "current_status": "incomplete",
+      "judgment": "unchanged",
+      "artifact": "tmp/real-world-memory/graph-rag/report.json",
+      "blocker": "default_api_export_not_available",
+      "claim_boundary": "No LightRAG graph/RAG quality claim is allowed until source references map to generated evidence."
+    },
+    {
+      "project": "GraphRAG",
+      "scenario_id": "output_table_citation_mapping",
+      "fixture": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/graphrag_output_tables_blocked.json",
+      "required_output": "Output tables must map documents, text units, communities, reports, entities, and relationships to generated evidence ids.",
+      "current_status": "blocked",
+      "judgment": "unchanged",
+      "artifact": "tmp/real-world-memory/graph-rag/report.json",
+      "blocker": "provider_backed_output_tables_missing",
+      "claim_boundary": "No GraphRAG citation, synthesis, or navigation claim is allowed without mapped output tables."
+    },
+    {
+      "project": "Graphiti/Zep",
+      "scenario_id": "temporal_graph_validity_mapping",
+      "fixture": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/graphiti_temporal_validity_blocked.json",
+      "required_output": "Current and historical graph facts must carry validity windows and evidence ids.",
+      "current_status": "blocked",
+      "judgment": "unchanged",
+      "artifact": "tmp/real-world-memory/graph-rag/report.json",
+      "blocker": "provider_backed_temporal_graph_output_missing",
+      "claim_boundary": "Graphiti/Zep remains the temporal-validity reference; no ELF superiority claim is allowed."
+    },
+    {
+      "project": "graphify",
+      "scenario_id": "graph_report_navigation_lint",
+      "fixture": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/graphify_graph_report_wrong_result.json",
+      "required_output": "graph.json, source-location sections, unsupported-claim lint, and stale-source lint must all be scored.",
+      "current_status": "wrong_result",
+      "judgment": "unchanged",
+      "artifact": "tmp/real-world-memory/graph-rag/report.json",
+      "produced_evidence": [
+        "graphify-derived-report-boundary",
+        "graphify-graph-summary-output",
+        "graphify-source-location-output"
+      ],
+      "blocker": "stale_claim_detection_missing_and_unsupported_summary_present",
+      "claim_boundary": "graphify has evidence-linked output but remains wrong_result; do not convert this into an ELF win."
+    },
+    {
+      "project": "llm-wiki",
+      "scenario_id": "wiki_page_citation_lint",
+      "fixture": null,
+      "required_output": "Contained page generation must emit cited sections plus stale-source and unsupported-claim lint.",
+      "current_status": "not_encoded",
+      "judgment": "unchanged",
+      "artifact": null,
+      "blocker": "no_contained_page_materializer",
+      "claim_boundary": "llm-wiki remains a reference workflow until a contained materializer exists."
+    },
+    {
+      "project": "gbrain",
+      "scenario_id": "compiled_truth_timeline_export",
+      "fixture": null,
+      "required_output": "Docker-local brain repository import must emit compiled-truth or timeline pages with source evidence.",
+      "current_status": "blocked",
+      "judgment": "unchanged",
+      "artifact": null,
+      "blocker": "docker_local_brain_repo_and_database_setup_missing",
+      "claim_boundary": "gbrain remains blocked until setup/export readback is proven."
+    }
+  ],
+  "improvement_regression_readback": {
+    "judgment": "unchanged",
+    "improved": [
+      "The fresh June 19 report records graphify evidence-linked output and typed blockers in a checked-in XY-985 companion."
+    ],
+    "unchanged": [
+      "No graph/RAG scenario moved to pass.",
+      "RAGFlow, GraphRAG, and Graphiti/Zep remain blocked.",
+      "LightRAG remains incomplete.",
+      "graphify remains wrong_result.",
+      "llm-wiki remains not_encoded and gbrain remains blocked."
+    ],
+    "regressed": []
+  },
+  "claim_boundaries": {
+    "allowed": [
+      "The representative graph/RAG command is reproducible and emits typed non-pass outcomes.",
+      "graphify emits evidence-linked graph/report output but remains wrong_result.",
+      "The comparison status is unchanged relative to XY-955."
+    ],
+    "not_allowed": [
+      "Do not claim graph/RAG parity or broad graph-navigation quality.",
+      "Do not convert research gates, tiny smokes, blocked setup, incomplete output, or graphify wrong_result into a win.",
+      "Do not use private providers, hosted services, or unrecorded credentials for this lane."
+    ]
+  },
+  "next_optimization_direction": {
+    "required_fields": [
+      "ragflow_reference_chunk_ids_and_document_metadata",
+      "lightrag_context_source_paths_or_snippets",
+      "graphrag_output_table_rows_with_generated_evidence_ids",
+      "graphiti_zep_valid_at_invalid_at_evidence_mapping",
+      "graphify_stale_source_lint_pass",
+      "llm_wiki_contained_page_materializer",
+      "gbrain_docker_local_brain_repo_export"
+    ],
+    "non_goal": "Do not implement broad ELF graph/RAG product features in this benchmark/report lane."
+  }
+}
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -250,6 +250,10 @@ fn openmemory_ui_export_product_readback_report_json_path() -> Result<PathBuf> {
 	report_snapshot_path("2026-06-19-openmemory-ui-export-product-readback-report.json")
 }
 
+fn graph_rag_citation_navigation_promotion_report_json_path() -> Result<PathBuf> {
+	report_snapshot_path("2026-06-19-graph-rag-citation-navigation-promotion-report.json")
+}
+
 fn openviking_trajectory_materialization_report_markdown_path() -> Result<PathBuf> {
 	Ok(workspace_root()?
 		.join("docs")
@@ -282,6 +286,14 @@ fn openmemory_ui_export_product_readback_report_markdown_path() -> Result<PathBu
 		.join("2026-06-19-openmemory-ui-export-product-readback-report.md"))
 }
 
+fn graph_rag_citation_navigation_promotion_report_markdown_path() -> Result<PathBuf> {
+	Ok(workspace_root()?
+		.join("docs")
+		.join("evidence")
+		.join("benchmarking")
+		.join("2026-06-19-graph-rag-citation-navigation-promotion-report.md"))
+}
+
 fn live_temporal_reconciliation_report_json_path() -> Result<PathBuf> {
 	report_snapshot_path("2026-06-16-live-temporal-reconciliation-report.json")
 }
@@ -3505,6 +3517,80 @@ fn openmemory_ui_export_product_recheck_preserves_blocked_boundary() -> Result<(
 	Ok(())
 }
 
+#[test]
+fn graph_rag_citation_navigation_promotion_preserves_typed_non_passes() -> Result<()> {
+	let report = serde_json::from_str::<Value>(&fs::read_to_string(
+		graph_rag_citation_navigation_promotion_report_json_path()?,
+	)?)?;
+	let markdown =
+		fs::read_to_string(graph_rag_citation_navigation_promotion_report_markdown_path()?)?;
+	let benchmarking_index = fs::read_to_string(benchmarking_index_path()?)?;
+	let readme = fs::read_to_string(readme_path()?)?;
+
+	assert_eq!(
+		report.pointer("/schema").and_then(Value::as_str),
+		Some("elf.graph_rag_citation_navigation_promotion_report/v1")
+	);
+	assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-985"));
+	assert_eq!(
+		report.pointer("/command/command").and_then(Value::as_str),
+		Some("cargo make real-world-memory-graph-rag")
+	);
+	assert_eq!(report.pointer("/command/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		report.pointer("/summary/overall_judgment").and_then(Value::as_str),
+		Some("unchanged_typed_non_pass")
+	);
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(3));
+	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(0.25));
+	assert_eq!(
+		report.pointer("/summary/knowledge_citation_coverage").and_then(Value::as_f64),
+		Some(0.667)
+	);
+
+	let scenarios = array_at(&report, "/scenario_outcomes")?;
+	let ragflow = find_by_field(scenarios, "/project", "RAGFlow")?;
+	let lightrag = find_by_field(scenarios, "/project", "LightRAG")?;
+	let graphrag = find_by_field(scenarios, "/project", "GraphRAG")?;
+	let graphiti = find_by_field(scenarios, "/project", "Graphiti/Zep")?;
+	let graphify = find_by_field(scenarios, "/project", "graphify")?;
+	let llm_wiki = find_by_field(scenarios, "/project", "llm-wiki")?;
+	let gbrain = find_by_field(scenarios, "/project", "gbrain")?;
+
+	assert_eq!(ragflow.pointer("/current_status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(lightrag.pointer("/current_status").and_then(Value::as_str), Some("incomplete"));
+	assert_eq!(graphrag.pointer("/current_status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(graphiti.pointer("/current_status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(graphify.pointer("/current_status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(llm_wiki.pointer("/current_status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(gbrain.pointer("/current_status").and_then(Value::as_str), Some("blocked"));
+	assert!(array_contains_str(graphify, "/produced_evidence", "graphify-source-location-output")?);
+	assert!(array_contains_str(
+		&report,
+		"/claim_boundaries/not_allowed",
+		"Do not claim graph/RAG parity or broad graph-navigation quality."
+	)?);
+	assert!(array_contains_str(
+		&report,
+		"/next_optimization_direction/required_fields",
+		"graphrag_output_table_rows_with_generated_evidence_ids"
+	)?);
+	assert!(markdown.contains("typed non-pass, no parity claim"));
+	assert!(
+		markdown.contains("graphify produces evidence-linked output but still scores wrong_result")
+	);
+	assert!(
+		benchmarking_index.contains("2026-06-19-graph-rag-citation-navigation-promotion-report.md")
+	);
+	assert!(readme.contains("Graph/RAG Citation and Navigation Promotion Report - June 19, 2026"));
+	assert!(readme.contains("Graph/RAG citation/navigation promotion after XY-985"));
+
+	Ok(())
+}
+
 fn assert_openviking_trajectory_materialization_summary(report: &Value) -> Result<()> {
 	assert_eq!(
 		report.pointer("/schema").and_then(Value::as_str),