CodeRAG/example.env at master · Neverdecel/CodeRAG · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# CodeRAG configuration. Copy to .env and adjust. All values are optional —
# CodeRAG runs out of the box with a local model and no API key.

# --- Embedding backend (this IS the search; local by default, no API key) ---
# Provider: fastembed (local, default) | openai | fake
#   openai = the OpenAI API OR any OpenAI-compatible/local server (set OPENAI_BASE_URL below).
CODERAG_PROVIDER=fastembed
# Local embedding model (fastembed). 384-dim, no API key required.
# List recommended local models with: coderag eval --list-models
CODERAG_MODEL=BAAI/bge-small-en-v1.5
# Where downloaded local models are cached.
# CODERAG_CACHE_DIR=~/.cache/coderag

# --- Locations ---
# The codebase to index/search (defaults to the current directory).
CODERAG_WATCHED_DIR=/path/to/your/codebase
# Where the LanceDB store is kept (defaults to ./.coderag).
# CODERAG_STORE_DIR=./.coderag

# --- Retrieval ---
# CODERAG_TOP_K=8
# Structure-aware 1-hop call-graph expansion (opt-in): enrich results with the definitions
# of what a top hit calls (its callees), resolved from the hit's text — no API key, no
# reindex. Small, consistent symbol-level lift across flask/requests/click. The default
# weight 0.15 was a strict Pareto improvement; raise it cautiously. See docs/configuration.md.
# CODERAG_GRAPH_EXPANSION=false
# CODERAG_GRAPH_SEEDS=5
# CODERAG_GRAPH_NEIGHBORS=5
# CODERAG_GRAPH_WEIGHT=0.15

# --- Indexing throughput ---
# Number of worker threads for chunking + embedding during indexing. >1 parallelizes
# the embed step (a big win for the OpenAI/remote providers; for the local fastembed
# default ONNX already uses multiple cores per call, so the extra lever there is the
# batch size below). Set to 1 to force fully serial indexing.
# CODERAG_WORKERS=4
# CODERAG_EMBED_BATCH=64

# --- MCP server surface (`coderag mcp`, install: pip install 'coderag[mcp]') ---
# Lets AI coding agents (Claude Code, Codex, Cursor) query this workspace instead of
# grepping. By default it indexes the watched dir on startup (in the background) and
# keeps it live via the watcher.
# CODERAG_MCP_AUTO_INDEX=true
# CODERAG_MCP_WATCH=true
# Lines of a chunk returned in a search_code snippet by default (full text on request).
# CODERAG_MCP_SNIPPET_LINES=12
# Index any UTF-8 text file, not just code (docs/notes/config, extensionless files) so a
# plain file directory becomes searchable. Binary files are always skipped.
# CODERAG_INDEX_ALL_TEXT=false

# --- Optional: LLM answers (`coderag search ... --answer`) ---
# Search is fully local and needs none of this. Configure a backend below ONLY to also get
# a generated, cited prose answer. The answer model can be local too — see docs/configuration.md.

# Option A — LOCAL answer model (Ollama / LM Studio / vLLM / LocalAI). Point CodeRAG at any
# OpenAI-compatible server; the `openai` backend means the OpenAI *protocol*, not the company.
# No API key needed. Set the base URL and your local model name:
# OPENAI_BASE_URL=http://localhost:11434/v1   # Ollama's OpenAI-compatible endpoint
# CODERAG_CHAT_MODEL=llama3.1                  # the model your local server serves

# Option B — OpenAI cloud (embeddings and/or chat answers):
# OPENAI_API_KEY=sk-...
# CODERAG_CHAT_MODEL=gpt-4o-mini
# CODERAG_OPENAI_MODEL=text-embedding-3-small  # only if you also use --provider openai for embeddings

# Option C — Anthropic (Claude), answers only:
# CODERAG_LLM_PROVIDER=anthropic   # default is openai
# ANTHROPIC_API_KEY=sk-ant-...
# CODERAG_ANTHROPIC_MODEL=claude-opus-4-8

# Max tokens generated per answer (any backend):
# CODERAG_ANSWER_MAX_TOKENS=1024

# --- Demo mode (public, untrusted UI) ---
# When on, the Streamlit UI shows a notice, hides the Reindex button, caps results,
# and limits LLM answers per browser session (soft, session-state based — pair with an
# Ollama OLLAMA_NUM_PARALLEL cap for a hard GPU backstop, and lower ANSWER_MAX_TOKENS).
# CODERAG_DEMO_MODE=false
# CODERAG_DEMO_MAX_ANSWERS=5
# CODERAG_DEMO_COOLDOWN_SECONDS=20