From 84b9ad931377f9c13560a02feff33848dd1a41c2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 18 Jun 2026 18:32:02 +0000
Subject: [PATCH 1/2] Make MCP indexing observable and tools available
 immediately
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses dev-team findings about CodeRAG-as-MCP-tool feeling broken at
broad-root (e.g. /home) scale: "enabled" but not "usable", opaque status,
and tools that aren't reachable in time inside Hermes/Claude Code/Codex.

- Serve the MCP protocol immediately. run_mcp() moved warm-up (which
  downloads/loads the embedding model on first run) and the initial index
  into one background bootstrap thread, so mcp.run() — and thus tools/list
  — is reached at once. The toolset is registered synchronously in
  build_mcp(), so clients see all five tools right away instead of timing
  out behind the model download. This is the CodeRAG-side cause of the
  "tools registered but unusable" race.

- Live, pollable index progress. New thread-safe IndexProgress (types.py)
  threaded from Indexer.index(live=...) up through CodeRAG.index() to the
  index_status tool, which now returns a `progress` object: state
  (idle/scanning/indexing/optimizing/ready/failed), files_discovered,
  files_to_index, files_indexed, chunks, current_path, elapsed, last_error.
  files_discovered ticks up during the long pre-embed scan, so a big index
  reads as "scanning" instead of a stuck 0. reindex drives the same object.

- Earlier partial results. During a live (MCP background) index, buffered
  rows are committed every ~5s so dense search returns hits before the
  8192-chunk flush boundary. Gated on live is not None, so the CLI, watcher
  and tests keep today's single-flush-at-end batching (defaults unchanged).

- MCP best-practice polish. ToolAnnotations(readOnlyHint=True) on
  search_code/search_files/get_file/index_status; reindex marked non-read-only.

- Concurrency safety. Serving before warm-up means a query can arrive mid
  bootstrap; guard CodeRAG's lazy provider/store/searcher/indexer
  construction with a reentrant build lock so two threads can't build a
  second conflicting LanceStore.

Tests: live-progress + failure-path tests in test_indexer.py; live
progress, annotations, and reindex-progress assertions in test_mcp.py.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_015SPsWy8a63EpYMFDJjVE1e
---
 coderag/api.py                 |  54 ++++++++++-----
 coderag/indexer.py             |  49 ++++++++++++--
 coderag/surfaces/mcp_server.py | 118 ++++++++++++++++++++++-----------
 coderag/types.py               |  94 ++++++++++++++++++++++++++
 tests/test_indexer.py          |  41 ++++++++++++
 tests/test_mcp.py              |  36 ++++++++++
 6 files changed, 331 insertions(+), 61 deletions(-)

diff --git a/coderag/api.py b/coderag/api.py
index ab19c78..c4705dd 100644
--- a/coderag/api.py
+++ b/coderag/api.py
@@ -15,7 +15,7 @@
 
 from coderag._lines import split_lines
 from coderag.config import Config
-from coderag.types import IndexStats, SearchHit
+from coderag.types import IndexProgress, IndexStats, SearchHit
 
 if TYPE_CHECKING:  # avoid import-time cost / cycles
     from coderag.embeddings import EmbeddingProvider
@@ -39,6 +39,12 @@ def __init__(self, config: Optional[Config] = None) -> None:
         # surface, the MCP server's background index, and the live watcher) can't
         # interleave a file's delete-before-add sequence. Reads (search) are unaffected.
         self._index_lock = threading.Lock()
+        # Guards the lazy construction of the collaborators below. The MCP server now serves
+        # the protocol before warm-up finishes, so a query can land while the background
+        # bootstrap is still building the store/provider — without this lock two threads could
+        # each construct a second (conflicting) LanceStore. Reentrant because the properties
+        # depend on each other (e.g. ``store`` reads ``provider`` while holding the lock).
+        self._build_lock = threading.RLock()
 
     # --- lazily constructed collaborators ---
 
@@ -47,7 +53,9 @@ def provider(self) -> "EmbeddingProvider":
         if self._provider is None:
             from coderag.embeddings import get_provider
 
-            self._provider = get_provider(self.config)
+            with self._build_lock:
+                if self._provider is None:
+                    self._provider = get_provider(self.config)
         return self._provider
 
     @property
@@ -55,11 +63,14 @@ def store(self) -> "LanceStore":
         if self._store is None:
             from coderag.store.lance_store import LanceStore
 
-            self.config.store_dir.mkdir(parents=True, exist_ok=True)
-            self._store = LanceStore(self.config.store_dir, self.provider.dim)
-            # Clears the store when the embedding model/dim changed; a re-index then
-            # repopulates the now-empty tables (there is no separate cache to rebuild).
-            self._store.bootstrap(self.provider.dim, self.provider.model_id)
+            with self._build_lock:
+                if self._store is None:
+                    self.config.store_dir.mkdir(parents=True, exist_ok=True)
+                    store = LanceStore(self.config.store_dir, self.provider.dim)
+                    # Clears the store when the embedding model/dim changed; a re-index then
+                    # repopulates the now-empty tables (no separate cache to rebuild).
+                    store.bootstrap(self.provider.dim, self.provider.model_id)
+                    self._store = store
         return self._store
 
     @property
@@ -67,7 +78,9 @@ def indexer(self) -> "Indexer":
         if self._indexer is None:
             from coderag.indexer import Indexer
 
-            self._indexer = Indexer(self.config, self.provider, self.store)
+            with self._build_lock:
+                if self._indexer is None:
+                    self._indexer = Indexer(self.config, self.provider, self.store)
         return self._indexer
 
     @property
@@ -76,27 +89,34 @@ def searcher(self) -> "HybridSearcher":
             from coderag.retrieval.rerank import get_reranker
             from coderag.retrieval.search import HybridSearcher
 
-            self._searcher = HybridSearcher(
-                self.config,
-                self.provider,
-                self.store,
-                reranker=get_reranker(self.config),
-            )
+            with self._build_lock:
+                if self._searcher is None:
+                    self._searcher = HybridSearcher(
+                        self.config,
+                        self.provider,
+                        self.store,
+                        reranker=get_reranker(self.config),
+                    )
         return self._searcher
 
     # --- public operations ---
 
     def index(
-        self, path: Optional[Union[str, Path]] = None, *, full: bool = False
+        self,
+        path: Optional[Union[str, Path]] = None,
+        *,
+        full: bool = False,
+        live: Optional[IndexProgress] = None,
     ) -> IndexStats:
         """Incrementally index ``path`` (defaults to the configured watched dir).
 
         Only files whose content hash changed are re-embedded. Pass ``full=True`` to
-        force a clean rebuild.
+        force a clean rebuild. Pass ``live`` (an :class:`IndexProgress`) to receive live,
+        pollable progress — the MCP server uses this so ``index_status`` reflects the run.
         """
         target = Path(path).expanduser() if path else self.config.watched_dir
         with self._index_lock:
-            return self.indexer.index(target, full=full)
+            return self.indexer.index(target, full=full, live=live)
 
     def search(self, query: str, top_k: Optional[int] = None) -> List[SearchHit]:
         """Hybrid (dense + lexical) search over the indexed codebase."""
diff --git a/coderag/indexer.py b/coderag/indexer.py
index d4442ed..ee8ac8d 100644
--- a/coderag/indexer.py
+++ b/coderag/indexer.py
@@ -23,13 +23,19 @@
 from coderag.chunking.languages import detect_language
 from coderag.config import Config
 from coderag.embeddings import EmbeddingProvider
-from coderag.types import Chunk, IndexStats
+from coderag.types import Chunk, IndexProgress, IndexStats
 
 if TYPE_CHECKING:
     from coderag.store.lance_store import LanceStore
 
 logger = logging.getLogger(__name__)
 
+# During a long initial index, commit buffered rows at least this often so dense search can
+# return partial results before the steady-state 8192-chunk flush boundary. Only applied when
+# a live progress object is supplied (the MCP background index), so the CLI/watcher keep their
+# single-flush-at-end batching.
+_PARTIAL_FLUSH_SECS = 5.0
+
 
 class _ProgressReporter:
     """Live, human-facing indexing progress, written to stderr (stdout stays clean).
@@ -98,11 +104,21 @@ def index(
         *,
         full: bool = False,
         progress: bool = False,
+        live: Optional[IndexProgress] = None,
     ) -> IndexStats:
+        """Index ``target`` incrementally.
+
+        ``progress`` enables the human-facing stderr narration; ``live`` is an optional
+        machine-readable :class:`IndexProgress` the caller can poll concurrently (used by the
+        MCP server's background index so ``index_status`` reflects live state). Both default
+        off, so existing callers (CLI/watcher/tests) are unaffected.
+        """
         root = self.config.watched_dir.resolve()
         target = (target or self.config.watched_dir).resolve()
         prune = target == root  # only a full-root pass removes vanished files
         rep = _ProgressReporter(progress)
+        if live is not None:
+            live.begin("scanning")
 
         stats = IndexStats()
         if full:
@@ -121,6 +137,8 @@ def index(
                 stats.files_skipped += 1
             else:
                 work.append(item)
+            if live is not None:
+                live.saw_file(len(walked), len(work))
             rep.update(
                 f"Scanning {target} — {len(walked)} file(s) seen, "
                 f"{len(work)} to index, {stats.files_skipped} unchanged/skipped…"
@@ -139,10 +157,20 @@ def index(
         # 2. (Re)index changed files. Chunking + embedding (the CPU/network cost) may run
         #    in parallel across files (config.index_workers); the store writes stay on this
         #    single thread to preserve the delete-before-add invariant and single writer.
-        for added, removed in self._embed_and_write(work, reporter=rep):
+        if live is not None and work:
+            live.set_state("indexing")
+        last_flush = time.monotonic()
+        for item, added, removed in self._embed_and_write(work, reporter=rep):
             stats.chunks_added += added
             stats.chunks_removed += removed
             stats.files_indexed += 1
+            if live is not None:
+                live.wrote_file(item.rel, added)
+                # Commit periodically so dense search picks up partials during a long initial
+                # index, instead of waiting for the 8192-chunk boundary or the final persist.
+                if time.monotonic() - last_flush > _PARTIAL_FLUSH_SECS:
+                    self.store.flush()
+                    last_flush = time.monotonic()
 
         # 3. Prune files that disappeared from disk (full-root passes only).
         if prune:
@@ -157,6 +185,8 @@ def index(
         #    never triggers a whole-index rebuild.
         changed = stats.files_indexed > 0 or stats.files_removed > 0
         if prune and changed:
+            if live is not None:
+                live.set_state("optimizing")
             self.store.optimize()
         else:
             self.store.flush()
@@ -168,6 +198,8 @@ def index(
             f"✓ Indexed {stats.files_indexed} file(s) — "
             f"{stats.total_files} total / {stats.total_chunks} chunks."
         )
+        if live is not None:
+            live.finish("ready")
         return stats
 
     # --- internals ---
@@ -220,13 +252,17 @@ def _maybe_work(
 
     def _embed_and_write(
         self, work: List[_Work], *, reporter: _ProgressReporter
-    ) -> Iterator[Tuple[int, int]]:
+    ) -> Iterator[Tuple[_Work, int, int]]:
         """Chunk+embed each file (optionally across worker threads) and apply the writes.
 
         Embedding is the expensive, parallelizable step and touches no shared mutable
         state, so it runs in a thread pool when ``index_workers > 1``. The store writes are
         drained here on the single calling thread, so the no-duplicate (delete-before-add)
         invariant and the single-writer store are preserved.
+
+        Yields ``(item, chunks_added, chunks_removed)`` per file — the ``_Work`` item is
+        surfaced so the caller can report the current path (the worker pool completes out of
+        order, so positional zipping back to ``work`` is not possible).
         """
         if not work:
             return
@@ -239,14 +275,17 @@ def _embed_and_write(
             with ThreadPoolExecutor(max_workers=workers) as pool:
                 futures = {pool.submit(self._prepare, item): item for item in work}
                 for fut in as_completed(futures):
+                    item = futures[fut]
                     chunks, vectors = fut.result()
-                    yield self._write(futures[fut], chunks, vectors)
+                    added, removed = self._write(item, chunks, vectors)
+                    yield item, added, removed
                     done += 1
                     reporter.update(f"Embedding {done}/{total} file(s)…")
         else:
             for item in work:
                 chunks, vectors = self._prepare(item)
-                yield self._write(item, chunks, vectors)
+                added, removed = self._write(item, chunks, vectors)
+                yield item, added, removed
                 done += 1
                 reporter.update(f"Embedding {done}/{total} file(s)…")
 
diff --git a/coderag/surfaces/mcp_server.py b/coderag/surfaces/mcp_server.py
index 44ded35..4300aab 100644
--- a/coderag/surfaces/mcp_server.py
+++ b/coderag/surfaces/mcp_server.py
@@ -23,6 +23,8 @@
 import time
 from typing import TYPE_CHECKING, List, Literal, Optional
 
+from coderag.types import IndexProgress
+
 if TYPE_CHECKING:
     from mcp.server.fastmcp import FastMCP
 
@@ -51,7 +53,9 @@ def _notify(msg: str) -> None:
     "by identifier ('where is retry/backoff handled?'). Returns ranked path:line results.\n"
     "- search_files: exact regex/glob search (ripgrep-backed). Use it to find a LITERAL "
     "string or pattern, or to locate files by name (target='files', e.g. '*_test.py').\n"
-    "Then use get_file to read a precise range. Call index_status to check freshness."
+    "Then use get_file to read a precise range. Call index_status to check freshness — its "
+    "'progress' object reports live indexing state (scanning/indexing/ready); semantic hits "
+    "appear within seconds even while the initial index is still building."
 )
 
 # After this many identical consecutive search calls, the next one is blocked — a guard
@@ -64,6 +68,9 @@ class _State:
 
     def __init__(self) -> None:
         self.indexing = False  # True while the initial/manual index runs
+        # Live, pollable progress for the in-flight index (surfaced by index_status). Kept
+        # alongside the bool so the legacy "indexing" flag on search results is unchanged.
+        self.progress = IndexProgress()
         self.stop = threading.Event()  # set on shutdown to stop the watcher thread
         self._last_key: Optional[str] = None  # last search (tool, args) signature
         self._repeat = 0  # how many times in a row it has been issued
@@ -147,12 +154,17 @@ def build_mcp(cr: "CodeRAG", *, state: Optional[_State] = None) -> "FastMCP":
     Pure construction (no indexing, no transport), so tests can drive the tools in-memory.
     """
     from mcp.server.fastmcp import FastMCP
+    from mcp.types import ToolAnnotations
+
+    # Read-only tools (search/read/status) carry readOnlyHint so MCP clients (Claude Code,
+    # Hermes, Codex) can treat them as safe; reindex mutates the index, so it does not.
+    _READ_ONLY = ToolAnnotations(readOnlyHint=True)
 
     state = state or _State()
     snippet_lines = cr.config.mcp_snippet_lines
     mcp = FastMCP("coderag", instructions=_INSTRUCTIONS)
 
-    @mcp.tool()
+    @mcp.tool(annotations=_READ_ONLY)
     def search_code(
         query: str,
         top_k: int = 8,
@@ -213,7 +225,7 @@ def search_code(
             out["hint"] = f"More results available. Use offset={want} to see more."
         return out
 
-    @mcp.tool()
+    @mcp.tool(annotations=_READ_ONLY)
     def search_files(
         pattern: str,
         target: str = "content",
@@ -268,7 +280,7 @@ def search_files(
         result["indexing"] = _status_word(state)
         return result
 
-    @mcp.tool()
+    @mcp.tool(annotations=_READ_ONLY)
     def get_file(
         path: str,
         start_line: Optional[int] = None,
@@ -310,35 +322,51 @@ def get_file(
             "content": content,
         }
 
-    @mcp.tool()
+    @mcp.tool(annotations=_READ_ONLY)
     def index_status() -> dict:
-        """Report index coverage, freshness, and the active retrieval configuration.
+        """Report index coverage, freshness, live progress, and the retrieval configuration.
+
+        ``total_files`` / ``total_chunks`` count rows already committed to the store. During a
+        large initial index these stay low for a while (rows commit in batches), so read the
+        ``progress`` object for live state instead of assuming the server is stuck:
+
+        - ``progress.state``: ``idle | scanning | indexing | optimizing | ready | failed``
+        - ``progress.files_discovered`` / ``files_to_index`` / ``files_indexed`` / ``chunks``
+        - ``progress.current_path``, ``progress.elapsed``, ``progress.last_error``
 
-        Includes total_files / total_chunks, the embedding model, whether the reranker is
-        enabled, and ``"indexing": "ready" | "in_progress"`` so you can tell whether the
-        initial background index has finished. If results look thin, the index may still be
-        warming up — check here.
+        While ``state`` is ``scanning``/``indexing``, semantic (``search_code``) hits begin to
+        appear within seconds as partial results commit; exact keyword/BM25 hits and the final
+        ``total_*`` counts settle once ``state`` reaches ``ready``. The legacy
+        ``"indexing": "ready" | "in_progress"`` flag is kept for compatibility.
         """
         status = cr.status()
         status["indexing"] = _status_word(state)
+        status["progress"] = state.progress.snapshot()
         return status
 
-    @mcp.tool()
+    @mcp.tool(annotations=ToolAnnotations(readOnlyHint=False, destructiveHint=False))
     def reindex(path: Optional[str] = None, full: bool = False) -> dict:
         """Re-index the workspace now (incremental by default).
 
         Rarely needed — the watcher keeps the index live automatically — but useful right
         after a large checkout or branch switch. Pass ``full=true`` for a clean rebuild.
         Returns the index stats, or ``{"error": ...}`` if an index run is already going.
+        Watch ``index_status`` for live progress while this runs.
         """
         if state.indexing:
             return {"error": "An index operation is already in progress"}
         state.indexing = True
+        state.progress.begin("scanning")
         try:
-            stats = cr.index(path, full=full)
+            stats = cr.index(path, full=full, live=state.progress)
+        except Exception as exc:
+            state.progress.finish("failed", str(exc))
+            raise
+        else:
+            state.progress.finish("ready")
+            return stats.as_dict()
         finally:
             state.indexing = False
-        return stats.as_dict()
 
     return mcp
 
@@ -369,39 +397,51 @@ def run_mcp(
     do_watch = cr.config.mcp_watch if watch is None else watch
 
     state = _State()
-    mcp = build_mcp(cr, state=state)
+    mcp = build_mcp(cr, state=state)  # synchronous + cheap — tools are registered now
 
     _notify(f"starting — workspace: {cr.config.watched_dir}")
-    _notify("loading the embedding model (first run downloads it; may take a minute)…")
-    _warm_up(cr)
 
-    if auto_index:
-        # Index on a background thread so stdio is responsive immediately; search_code
-        # works against whatever is already indexed while this runs.
-        state.indexing = True
+    def _bootstrap() -> None:
+        """Warm the model and build the initial index — entirely off the serving thread.
 
-        def _initial_index() -> None:
+        Warm-up downloads/loads the embedding model on first run (can take a minute), so doing
+        it here (rather than before ``mcp.run()``) is what lets the server answer ``initialize``
+        / ``tools/list`` immediately — the tools are already registered. Clients (Hermes,
+        Claude Code, Codex) therefore see the toolset right away instead of timing out behind
+        the model download.
+        """
+        _notify(
+            "loading the embedding model (first run downloads it; may take a minute)…"
+        )
+        _warm_up(cr)
+        if not auto_index:
+            return
+        # search_code works against whatever is already indexed while this runs.
+        state.indexing = True
+        state.progress.begin("scanning")
+        _notify(
+            "building the initial index in the background — search works now and "
+            "returns more as it finishes (call index_status to check progress)"
+        )
+        started = time.monotonic()
+        try:
+            stats = cr.index(live=state.progress)
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.exception("Initial MCP index failed.")
+            state.progress.finish("failed", str(exc))
+            _notify("initial index FAILED — results may be incomplete (see logs)")
+        else:
+            state.progress.finish("ready")
             _notify(
-                "building the initial index in the background — search works now and "
-                "returns more as it finishes (call index_status to check progress)"
+                f"initial index ready: {stats.total_files} files / "
+                f"{stats.total_chunks} chunks in {time.monotonic() - started:.0f}s"
             )
-            started = time.monotonic()
-            try:
-                stats = cr.index()
-            except Exception:  # pragma: no cover - defensive
-                logger.exception("Initial MCP index failed.")
-                _notify("initial index FAILED — results may be incomplete (see logs)")
-            else:
-                _notify(
-                    f"initial index ready: {stats.total_files} files / "
-                    f"{stats.total_chunks} chunks in {time.monotonic() - started:.0f}s"
-                )
-            finally:
-                state.indexing = False
+        finally:
+            state.indexing = False
 
-        threading.Thread(
-            target=_initial_index, name="coderag-mcp-index", daemon=True
-        ).start()
+    threading.Thread(
+        target=_bootstrap, name="coderag-mcp-bootstrap", daemon=True
+    ).start()
 
     if do_watch:
         threading.Thread(
diff --git a/coderag/types.py b/coderag/types.py
index 89668e0..e4f2627 100644
--- a/coderag/types.py
+++ b/coderag/types.py
@@ -2,9 +2,20 @@
 
 from __future__ import annotations
 
+import threading
+import time
 from dataclasses import dataclass
 from typing import Any, Dict, Optional
 
+try:  # Literal lives in typing on 3.8+, but keep the import defensive.
+    from typing import Literal
+
+    IndexState = Literal[
+        "idle", "scanning", "indexing", "optimizing", "ready", "failed"
+    ]
+except ImportError:  # pragma: no cover - very old runtimes
+    IndexState = str  # type: ignore[misc,assignment]
+
 
 @dataclass(slots=True)
 class Chunk:
@@ -79,3 +90,86 @@ def as_dict(self) -> Dict[str, Any]:
             "total_files": self.total_files,
             "total_chunks": self.total_chunks,
         }
+
+
+class IndexProgress:
+    """Thread-safe live snapshot of an in-flight index run.
+
+    Unlike :class:`IndexStats` (a plain value object returned once a run finishes), this is a
+    *mutable, shared* object: the indexer thread updates it as it works while a different
+    thread — the MCP ``index_status`` tool — polls it. That is what makes a long index
+    legible: ``total_files``/``total_chunks`` read from the store stay 0 until rows are
+    committed, but ``files_discovered`` / ``files_indexed`` here tick up live, so an agent can
+    tell the difference between "hung" and "scanning a big tree".
+
+    A single lock guards every field so ``snapshot()`` returns a consistent set rather than a
+    torn mix of old and new values. Writer helpers are only called from the index thread;
+    ``snapshot()`` is safe to call from any thread.
+    """
+
+    def __init__(self) -> None:
+        self._lock = threading.Lock()
+        self.state: IndexState = "idle"
+        self.files_discovered = 0  # candidates walked so far (phase 1)
+        self.files_to_index = 0  # changed files queued for embedding
+        self.files_indexed = 0  # files embedded + written so far (phase 2)
+        self.chunks_added = 0  # chunks written so far
+        self.current_path: Optional[str] = None
+        self.started_at: Optional[float] = None  # time.time() epoch
+        self.finished_at: Optional[float] = None
+        self.last_error: Optional[str] = None
+
+    # --- writer-side helpers (called only from the index thread) ---
+
+    def begin(self, state: IndexState = "scanning") -> None:
+        with self._lock:
+            self.state = state
+            self.started_at = time.time()
+            self.finished_at = None
+            self.last_error = None
+            self.files_discovered = self.files_to_index = 0
+            self.files_indexed = self.chunks_added = 0
+            self.current_path = None
+
+    def set_state(self, state: IndexState) -> None:
+        with self._lock:
+            self.state = state
+
+    def saw_file(self, n_discovered: int, n_to_index: int) -> None:
+        with self._lock:
+            self.files_discovered = n_discovered
+            self.files_to_index = n_to_index
+
+    def wrote_file(self, path: str, chunks_added: int) -> None:
+        with self._lock:
+            self.files_indexed += 1
+            self.chunks_added += chunks_added
+            self.current_path = path
+
+    def finish(self, state: IndexState, error: Optional[str] = None) -> None:
+        with self._lock:
+            self.state = state
+            self.finished_at = time.time()
+            self.last_error = error
+            self.current_path = None
+
+    # --- reader-side ---
+
+    def snapshot(self) -> Dict[str, Any]:
+        """A consistent, JSON-friendly view of the current progress."""
+        with self._lock:
+            elapsed = None
+            if self.started_at is not None:
+                end = self.finished_at or time.time()
+                elapsed = round(end - self.started_at, 2)
+            return {
+                "state": self.state,
+                "files_discovered": self.files_discovered,
+                "files_to_index": self.files_to_index,
+                "files_indexed": self.files_indexed,
+                "chunks": self.chunks_added,
+                "current_path": self.current_path,
+                "started_at": self.started_at,
+                "elapsed": elapsed,
+                "last_error": self.last_error,
+            }
diff --git a/tests/test_indexer.py b/tests/test_indexer.py
index 1d53b3a..a114dad 100644
--- a/tests/test_indexer.py
+++ b/tests/test_indexer.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 
 from coderag.api import CodeRAG
+from coderag.types import IndexProgress
 from tests.conftest import write
 
 
@@ -154,6 +155,46 @@ def test_index_progress_is_silent_when_off(config, capsys):
     assert "Scanning" not in err and "✓ Indexed" not in err
 
 
+def test_index_reports_live_progress(config):
+    # A live IndexProgress is updated as the run proceeds and ends at "ready" — this is what
+    # makes the MCP index_status legible instead of showing 0 while the tree is scanned.
+    cr = _cr(config)
+    write(config.watched_dir / "a.py", "def alpha():\n    return 1\n")
+    write(config.watched_dir / "b.py", "def beta():\n    return 2\n")
+    prog = IndexProgress()
+    assert prog.snapshot()["state"] == "idle"
+
+    cr.indexer.index(live=prog)
+    snap = prog.snapshot()
+    assert snap["state"] == "ready"
+    assert snap["files_indexed"] == 2
+    assert snap["files_discovered"] == 2
+    assert snap["chunks"] >= 2
+    assert snap["started_at"] is not None
+    assert snap["elapsed"] is not None
+    assert snap["last_error"] is None
+
+
+def test_index_progress_records_failure(config, monkeypatch):
+    # If a run raises after begin(), the caller marks the progress "failed" with the error.
+    cr = _cr(config)
+    write(config.watched_dir / "a.py", "def alpha():\n    return 1\n")
+    prog = IndexProgress()
+
+    def boom(*args, **kwargs):
+        raise RuntimeError("embedding exploded")
+
+    monkeypatch.setattr(cr.indexer, "_embed_and_write", boom)
+    prog.begin("scanning")
+    try:
+        cr.indexer.index(live=prog)
+    except RuntimeError as exc:
+        prog.finish("failed", str(exc))
+    snap = prog.snapshot()
+    assert snap["state"] == "failed"
+    assert "embedding exploded" in (snap["last_error"] or "")
+
+
 def test_index_survives_reopen(config, tmp_path):
     cr = _cr(config)
     write(config.watched_dir / "a.py", "def alpha():\n    return 1\n")
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
index 912d9ff..bf2b05a 100644
--- a/tests/test_mcp.py
+++ b/tests/test_mcp.py
@@ -196,18 +196,54 @@ def test_index_status_reports_totals_and_flag(tmp_path):
     assert r["total_files"] == 2
     assert r["total_chunks"] == cr.store.total_chunks()
     assert r["indexing"] == "ready"
+    # The structured progress object is always present.
+    assert "progress" in r and "state" in r["progress"]
 
     state.indexing = True
     assert _call(mcp, "index_status", {})["indexing"] == "in_progress"
     cr.close()
 
 
+def test_index_status_reports_live_progress(tmp_path):
+    # While an index is mid-flight, index_status surfaces live counters so an agent can tell
+    # "scanning a big tree" apart from "stuck" — even before any rows are committed.
+    cr, mcp, state, _ = _make(tmp_path, DEMO)
+    state.indexing = True
+    state.progress.begin("scanning")
+    state.progress.saw_file(1200, 300)
+    state.progress.set_state("indexing")
+    state.progress.wrote_file("pkg/mod.py", 7)
+
+    p = _call(mcp, "index_status", {})["progress"]
+    assert p["state"] == "indexing"
+    assert p["files_discovered"] == 1200
+    assert p["files_to_index"] == 300
+    assert p["files_indexed"] == 1
+    assert p["chunks"] == 7
+    assert p["current_path"] == "pkg/mod.py"
+    assert p["elapsed"] is not None
+    cr.close()
+
+
+def test_tool_annotations_mark_read_only(tmp_path):
+    cr, mcp, _, _ = _make(tmp_path, DEMO)
+    tools = {t.name: t for t in asyncio.run(mcp.list_tools())}
+    for name in ("search_code", "search_files", "get_file", "index_status"):
+        assert tools[name].annotations is not None
+        assert tools[name].annotations.readOnlyHint is True, name
+    # reindex mutates the index — it must NOT be advertised as read-only.
+    assert tools["reindex"].annotations.readOnlyHint is not True
+    cr.close()
+
+
 def test_reindex_picks_up_new_file_and_guards_concurrency(tmp_path):
     cr, mcp, state, repo = _make(tmp_path, DEMO)
     write(repo / "extra.py", "def extra():\n    return 1\n")
     r = _call(mcp, "reindex", {})
     assert r["total_files"] == 3
     assert cr.store.total_chunks() == cr.store.total_chunks()
+    # The run drives the shared progress object and lands on "ready".
+    assert state.progress.snapshot()["state"] == "ready"
 
     state.indexing = True  # a run already in progress -> guarded
     assert "error" in _call(mcp, "reindex", {})

From 4640dc30ce7e4c4d476170a645b75ec518ec9348 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 05:53:46 +0000
Subject: [PATCH 2/2] fix(types): drop unused assignment ignore code in
 IndexState fallback

mypy flagged the [assignment] code in the type: ignore on the
ImportError fallback as unused; keep [misc] for the alias redefinition.
---
 coderag/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/coderag/types.py b/coderag/types.py
index e4f2627..f38cf02 100644
--- a/coderag/types.py
+++ b/coderag/types.py
@@ -14,7 +14,7 @@
         "idle", "scanning", "indexing", "optimizing", "ready", "failed"
     ]
 except ImportError:  # pragma: no cover - very old runtimes
-    IndexState = str  # type: ignore[misc,assignment]
+    IndexState = str  # type: ignore[misc]
 
 
 @dataclass(slots=True)