From 79f4d34287f0ef28cb6eb0d0703d619c17e086b9 Mon Sep 17 00:00:00 2001 From: RoyLin Date: Fri, 26 Jun 2026 19:48:32 +0800 Subject: [PATCH] fix(ptc): generous default timeout for delegation-capable scripts A program script that calls task/parallel_task runs child agents, each a full LLM turn (often 30s-minutes), so the 30s DEFAULT_SCRIPT_TIMEOUT_MS silently times out real dynamic workflows. Default delegation-capable scripts (allowed tools include task/parallel_task) to 10min; pure compute/search scripts keep 30s. Explicit limits.timeoutMs always wins. Bumps to 4.2.7. --- Cargo.lock | 2 +- core/Cargo.toml | 2 +- core/src/tools/program_tool.rs | 15 +- sdk/node/Cargo.lock | 190 +++++++++++++++++- sdk/node/Cargo.toml | 4 +- sdk/node/examples/package-lock.json | 14 +- sdk/node/output.txt | 1 + sdk/node/package-lock.json | 16 +- sdk/node/package.json | 14 +- sdk/node/ptc_soak.mjs | 93 +++++++++ sdk/node/technical-briefing.md | 49 +++++ sdk/node/ultracode_test.mjs | 94 +++++++++ sdk/node/ultracode_test.stdout | 11 + sdk/python-bootstrap/pyproject.toml | 2 +- .../src/a3s_code/_bootstrap.py | 2 +- sdk/python/Cargo.toml | 4 +- sdk/python/pyproject.toml | 2 +- 17 files changed, 473 insertions(+), 42 deletions(-) create mode 100644 sdk/node/output.txt create mode 100644 sdk/node/ptc_soak.mjs create mode 100644 sdk/node/technical-briefing.md create mode 100644 sdk/node/ultracode_test.mjs create mode 100644 sdk/node/ultracode_test.stdout diff --git a/Cargo.lock b/Cargo.lock index 0d43fa1..5fb950b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,7 +37,7 @@ dependencies = [ [[package]] name = "a3s-code-core" -version = "4.2.6" +version = "4.2.7" dependencies = [ "a3s-acl 0.2.0", "a3s-ahp", diff --git a/core/Cargo.toml b/core/Cargo.toml index 5821b1c..c6d8d8f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-code-core" -version = "4.2.6" +version = "4.2.7" edition = "2021" authors = ["A3S Lab Team"] license = "MIT" diff --git a/core/src/tools/program_tool.rs b/core/src/tools/program_tool.rs index 2985a21..5a5024d 100644 --- a/core/src/tools/program_tool.rs +++ b/core/src/tools/program_tool.rs @@ -16,6 +16,9 @@ use tokio::sync::Mutex; use tokio::time::{timeout, Duration}; const DEFAULT_SCRIPT_TIMEOUT_MS: u64 = 30_000; +/// Scripts allowed to delegate (`task`/`parallel_task`) run child agents that +/// each take a full LLM turn, so they need a far more generous default timeout. +const DELEGATION_SCRIPT_TIMEOUT_MS: u64 = 600_000; const DEFAULT_SCRIPT_MAX_TOOL_CALLS: usize = 20; const DEFAULT_SCRIPT_MAX_OUTPUT_BYTES: usize = 64 * 1024; const MAX_SCRIPT_SOURCE_BYTES: usize = 64 * 1024; @@ -266,7 +269,17 @@ async fn run_quickjs_script( allowed_tools: HashSet, limits: ScriptLimits, ) -> Result { - let timeout_ms = limits.timeout_ms.unwrap_or(DEFAULT_SCRIPT_TIMEOUT_MS); + // A script that can delegate runs child agents (each a full LLM turn, often + // 30s to several minutes), so the 30s default is far too short and silently + // times out real workflows. Default delegation-capable scripts to a generous + // timeout; pure compute/search scripts keep the short default. An explicit + // limits.timeoutMs always wins. + let delegating = allowed_tools.contains("parallel_task") || allowed_tools.contains("task"); + let timeout_ms = limits.timeout_ms.unwrap_or(if delegating { + DELEGATION_SCRIPT_TIMEOUT_MS + } else { + DEFAULT_SCRIPT_TIMEOUT_MS + }); let max_tool_calls = limits .max_tool_calls .unwrap_or(DEFAULT_SCRIPT_MAX_TOOL_CALLS); diff --git a/sdk/node/Cargo.lock b/sdk/node/Cargo.lock index 7b135fd..c1f9d3f 100644 --- a/sdk/node/Cargo.lock +++ b/sdk/node/Cargo.lock @@ -37,7 +37,7 @@ dependencies = [ [[package]] name = "a3s-code-core" -version = "4.1.0" +version = "4.2.6" dependencies = [ "a3s-acl 0.2.0", "a3s-ahp", @@ -92,7 +92,7 @@ dependencies = [ [[package]] name = "a3s-code-node" -version = "4.1.0" +version = "4.2.6" dependencies = [ "a3s-code-core", "anyhow", @@ -156,7 +156,7 @@ dependencies = [ [[package]] name = "a3s-search" -version = "1.2.3" +version = "1.3.0" dependencies = [ "a3s-acl 0.2.1", "a3s-updater", @@ -164,6 +164,7 @@ dependencies = [ "async-trait", "chromiumoxide", "clap", + "dom_smoothie", "futures", "reqwest 0.12.28", "scraper", @@ -945,6 +946,21 @@ dependencies = [ "vsimd", ] +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -1329,7 +1345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f8c3e73077b4b4a6ab1ea5047c37c57aee77657bc8ecd6f29b0af082d0b0c07" dependencies = [ "chrono", - "nom", + "nom 7.1.3", "once_cell", ] @@ -1392,13 +1408,26 @@ version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" dependencies = [ - "cssparser-macros", + "cssparser-macros 0.6.1", "dtoa-short", "itoa", "phf 0.11.3", "smallvec", ] +[[package]] +name = "cssparser" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9cdaae01d5ed7882b04d795e7f752f46ff52d2fa3b50a20d28c464510bba98" +dependencies = [ + "cssparser-macros 0.7.0", + "dtoa-short", + "itoa", + "phf 0.13.1", + "smallvec", +] + [[package]] name = "cssparser-macros" version = "0.6.1" @@ -1409,6 +1438,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "cssparser-macros" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a2a99df6e410a8ff4245aa2006499ea662245f967cc7c0a38c83ef8eb44dbf" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "ctor" version = "0.2.9" @@ -1479,6 +1518,27 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.117", +] + [[package]] name = "digest" version = "0.10.7" @@ -1533,6 +1593,40 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "dom_query" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac5fca71e65e94cc718a6e2af65d6e0f9c6027751c2aa562fbb5087fda639bc" +dependencies = [ + "bit-set", + "cssparser 0.37.0", + "foldhash 0.2.0", + "html5ever 0.39.0", + "nom 8.0.0", + "precomputed-hash", + "selectors 0.38.0", + "tendril 0.5.0", +] + +[[package]] +name = "dom_smoothie" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf8b9b294aabb8010b37c49a07d6f82175152f4927855d534979a38737721875" +dependencies = [ + "dom_query", + "flagset", + "foldhash 0.2.0", + "gjson", + "html-escape", + "once_cell", + "phf 0.13.1", + "tendril 0.5.0", + "thiserror 2.0.18", + "unicode-segmentation", +] + [[package]] name = "dtoa" version = "1.0.11" @@ -1655,6 +1749,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flagset" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7ac824320a75a52197e8f2d787f6a38b6718bb6897a35142d749af3c0e8f4fe" + [[package]] name = "flate2" version = "1.1.9" @@ -1877,6 +1977,12 @@ dependencies = [ "wasip3", ] +[[package]] +name = "gjson" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43503cc176394dd30a6525f5f36e838339b8b5619be33ed9a7783841580a97b6" + [[package]] name = "glob" version = "0.3.3" @@ -2031,6 +2137,15 @@ dependencies = [ "phf 0.13.1", ] +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + [[package]] name = "html2text" version = "0.16.7" @@ -2065,6 +2180,16 @@ dependencies = [ "markup5ever 0.38.0", ] +[[package]] +name = "html5ever" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a1761807faccc9a19e86944bbf40610014066306f96edcdedc2fb714bcb7b8" +dependencies = [ + "log", + "markup5ever 0.39.0", +] + [[package]] name = "http" version = "0.2.12" @@ -2526,7 +2651,7 @@ dependencies = [ "itoa", "log", "md-5 0.10.6", - "nom", + "nom 7.1.3", "rangemap", "rayon", "time", @@ -2579,6 +2704,17 @@ dependencies = [ "web_atoms", ] +[[package]] +name = "markup5ever" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7122d987ec5f704ee56f6e5b41a7d93722e9aae27ae07cafa4036c4d3f9757de" +dependencies = [ + "log", + "tendril 0.5.0", + "web_atoms", +] + [[package]] name = "markup5ever_rcdom" version = "0.38.0+unofficial" @@ -2752,6 +2888,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -3645,12 +3790,12 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15" dependencies = [ - "cssparser", + "cssparser 0.34.0", "ego-tree", "getopts", "html5ever 0.29.1", "precomputed-hash", - "selectors", + "selectors 0.26.0", "tendril 0.4.3", ] @@ -3694,8 +3839,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" dependencies = [ "bitflags 2.11.1", - "cssparser", - "derive_more", + "cssparser 0.34.0", + "derive_more 0.99.20", "fxhash", "log", "new_debug_unreachable", @@ -3706,6 +3851,25 @@ dependencies = [ "smallvec", ] +[[package]] +name = "selectors" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8adfa1c298912827b8a28b223b3b874357397ae706e6190acd9bf28cee99114d" +dependencies = [ + "bitflags 2.11.1", + "cssparser 0.37.0", + "derive_more 2.1.1", + "log", + "new_debug_unreachable", + "phf 0.13.1", + "phf_codegen 0.13.1", + "precomputed-hash", + "rustc-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.28" @@ -4625,6 +4789,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091" + [[package]] name = "utf8_iter" version = "1.0.4" diff --git a/sdk/node/Cargo.toml b/sdk/node/Cargo.toml index 5b706e6..70eb1e4 100644 --- a/sdk/node/Cargo.toml +++ b/sdk/node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-code-node" -version = "4.2.6" +version = "4.2.7" edition = "2021" authors = ["A3S Lab Team"] license = "MIT" @@ -11,7 +11,7 @@ description = "A3S Code Node.js bindings - Native addon via napi-rs" crate-type = ["cdylib"] [dependencies] -a3s-code-core = { version = "4.2.6", path = "../../core", features = ["ahp", "s3", "serve"] } +a3s-code-core = { version = "4.2.7", path = "../../core", features = ["ahp", "s3", "serve"] } napi = { version = "2", features = ["async", "napi6", "serde-json"] } napi-derive = "2" tokio = { version = "1.35", features = ["full"] } diff --git a/sdk/node/examples/package-lock.json b/sdk/node/examples/package-lock.json index f46b45d..8120424 100644 --- a/sdk/node/examples/package-lock.json +++ b/sdk/node/examples/package-lock.json @@ -18,7 +18,7 @@ }, "..": { "name": "@a3s-lab/code", - "version": "4.2.6", + "version": "4.2.7", "license": "MIT", "devDependencies": { "@napi-rs/cli": "^2", @@ -27,12 +27,12 @@ "typescript": "^5.9.3" }, "optionalDependencies": { - "@a3s-lab/code-darwin-arm64": "4.2.6", - "@a3s-lab/code-linux-arm64-gnu": "4.2.6", - "@a3s-lab/code-linux-arm64-musl": "4.2.6", - "@a3s-lab/code-linux-x64-gnu": "4.2.6", - "@a3s-lab/code-linux-x64-musl": "4.2.6", - "@a3s-lab/code-win32-x64-msvc": "4.2.6" + "@a3s-lab/code-darwin-arm64": "4.2.7", + "@a3s-lab/code-linux-arm64-gnu": "4.2.7", + "@a3s-lab/code-linux-arm64-musl": "4.2.7", + "@a3s-lab/code-linux-x64-gnu": "4.2.7", + "@a3s-lab/code-linux-x64-musl": "4.2.7", + "@a3s-lab/code-win32-x64-msvc": "4.2.7" } }, "node_modules/@a3s-lab/code": { diff --git a/sdk/node/output.txt b/sdk/node/output.txt new file mode 100644 index 0000000..298e87d --- /dev/null +++ b/sdk/node/output.txt @@ -0,0 +1 @@ +One is the foundation of all counting, the singular seed from which every number grows. It stands alone, whole and indivisible, yet multiplies endlessly when paired with itself. One represents unity, beginnings, and identity. Without one, no two could ever exist. It is prime, it is first, it is ALPHA \ No newline at end of file diff --git a/sdk/node/package-lock.json b/sdk/node/package-lock.json index 070b030..d69609d 100644 --- a/sdk/node/package-lock.json +++ b/sdk/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "@a3s-lab/code", - "version": "4.2.6", + "version": "4.2.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@a3s-lab/code", - "version": "4.2.6", + "version": "4.2.7", "license": "MIT", "devDependencies": { "@napi-rs/cli": "^2", @@ -15,12 +15,12 @@ "typescript": "^5.9.3" }, "optionalDependencies": { - "@a3s-lab/code-darwin-arm64": "4.2.6", - "@a3s-lab/code-linux-arm64-gnu": "4.2.6", - "@a3s-lab/code-linux-arm64-musl": "4.2.6", - "@a3s-lab/code-linux-x64-gnu": "4.2.6", - "@a3s-lab/code-linux-x64-musl": "4.2.6", - "@a3s-lab/code-win32-x64-msvc": "4.2.6" + "@a3s-lab/code-darwin-arm64": "4.2.7", + "@a3s-lab/code-linux-arm64-gnu": "4.2.7", + "@a3s-lab/code-linux-arm64-musl": "4.2.7", + "@a3s-lab/code-linux-x64-gnu": "4.2.7", + "@a3s-lab/code-linux-x64-musl": "4.2.7", + "@a3s-lab/code-win32-x64-msvc": "4.2.7" } }, "node_modules/@a3s-lab/code-darwin-arm64": { diff --git a/sdk/node/package.json b/sdk/node/package.json index 812d022..b9dba45 100644 --- a/sdk/node/package.json +++ b/sdk/node/package.json @@ -1,6 +1,6 @@ { "name": "@a3s-lab/code", - "version": "4.2.6", + "version": "4.2.7", "description": "A3S Code - Native Node.js bindings for the coding-agent runtime", "main": "index.js", "types": "index.d.ts", @@ -43,11 +43,11 @@ "test:helpers": "node test-helpers.mjs" }, "optionalDependencies": { - "@a3s-lab/code-darwin-arm64": "4.2.6", - "@a3s-lab/code-linux-x64-gnu": "4.2.6", - "@a3s-lab/code-linux-x64-musl": "4.2.6", - "@a3s-lab/code-linux-arm64-gnu": "4.2.6", - "@a3s-lab/code-linux-arm64-musl": "4.2.6", - "@a3s-lab/code-win32-x64-msvc": "4.2.6" + "@a3s-lab/code-darwin-arm64": "4.2.7", + "@a3s-lab/code-linux-x64-gnu": "4.2.7", + "@a3s-lab/code-linux-x64-musl": "4.2.7", + "@a3s-lab/code-linux-arm64-gnu": "4.2.7", + "@a3s-lab/code-linux-arm64-musl": "4.2.7", + "@a3s-lab/code-win32-x64-msvc": "4.2.7" } } diff --git a/sdk/node/ptc_soak.mjs b/sdk/node/ptc_soak.mjs new file mode 100644 index 0000000..747737e --- /dev/null +++ b/sdk/node/ptc_soak.mjs @@ -0,0 +1,93 @@ +// Real-LLM validation of the 4.2.6 PTC fan-out fix. +// +// A `program` script calls parallel_task with N identical tasks. We time the +// program tool (its only work is that one parallel_task call). Parallel fan-out +// ⇒ duration ≈ one task regardless of N; serial ⇒ ≈ N×. We baseline N=1 then +// run N=3, and soak the N=3 flow for stability/correctness. +// +// Config (incl. apiKey) is loaded by the SDK from config.acl — never read here. +import { createRequire } from 'node:module'; +const require = createRequire(import.meta.url); +const { Agent } = require('./index.js'); + +const CONFIG = '/Users/roylin/.a3s/config.acl'; +const SOAK = Number(process.argv[2] || 6); // N=3 iterations for the soak + +const WORDS = ['ALPHA', 'BRAVO', 'CHARLIE', 'DELTA', 'ECHO']; +function script(n) { + const tasks = Array.from({ length: n }, (_, i) => ({ + description: `t${i}`, + agent: 'general', + prompt: `Write exactly 50 words about the number ${i + 1}. End with the single word ${WORDS[i]}.`, + })); + return `async function run(ctx, inputs) { + const res = await ctx.tool("parallel_task", { tasks: ${JSON.stringify(tasks)} }); + return JSON.stringify(res); +}`; +} +const promptFor = (n) => + 'Call the `program` tool exactly once, now, with these arguments, then stop.\n\nArguments:\n' + + JSON.stringify({ + type: 'script', + language: 'javascript', + source: script(n), + limits: { timeoutMs: 180000, maxToolCalls: 20 }, + }); + +const agent = await Agent.create(CONFIG); + +async function run(n) { + const session = agent.session('.', { + autoDelegation: { enabled: true, parallel: true }, + maxParallelTasks: 8, + confirmationPolicy: { enabled: true, yoloLanes: ['control', 'query', 'execute', 'generate'], timeoutAction: 'auto_approve' }, + }); + let progStart = 0, progEnd = 0, programOutput = '', errorSeen = null, ok = false; + const stream = await session.stream(promptFor(n)); + while (true) { + const { value: ev, done } = await stream.next(); + if (done) break; + if (!ev) continue; + if (ev.type === 'tool_start' && ev.toolName === 'program') progStart = Date.now(); + if (ev.type === 'tool_end' && ev.toolName === 'program') { + progEnd = Date.now(); + programOutput = String(ev.toolOutput || ''); + ok = /exit_code=0/.test(programOutput) && /parallel_task \(ok/.test(programOutput); + } + if (ev.type === 'permission_denied') errorSeen = 'permission_denied'; + if (/error/i.test(ev.type || '') && ev.type !== 'tool_input_delta') errorSeen = ev.type; + } + const up = programOutput.toUpperCase(); + const got = WORDS.slice(0, n).filter((w) => up.includes(w)).length; + return { n, progMs: progEnd - progStart, ok, got, errorSeen }; +} + +// 1) Baseline: one task. +const base = await run(1); +console.log(`baseline N=1: ${base.progMs}ms ok=${base.ok} got=${base.got}/1 err=${base.errorSeen || 'no'}`); + +// 2) Soak: N=3, repeated. +const rows = []; +for (let i = 0; i < SOAK; i++) { + try { + const r = await run(3); + rows.push(r); + console.log(`#${i} N=3: ${r.progMs}ms ok=${r.ok} got=${r.got}/3 err=${r.errorSeen || 'no'}`); + } catch (e) { + rows.push({ throw: String(e).slice(0, 160) }); + console.log(`#${i} THREW ${String(e).slice(0, 160)}`); + } +} + +const good = rows.filter((r) => r.ok && r.got === 3 && !r.errorSeen && !r.throw); +const times = good.map((r) => r.progMs).sort((a, b) => a - b); +const med = times[Math.floor(times.length / 2)] || 0; +const min = times[0] || 0, max = times[times.length - 1] || 0; +const r = (x) => (base.progMs ? (x / base.progMs).toFixed(2) : 'n/a'); +console.log(`\nSUMMARY`); +console.log(` pass: ${good.length}/${SOAK} (program ran parallel_task ok + all 3 results)`); +console.log(` crashes/errors/hangs: ${rows.filter((x) => x.throw || x.errorSeen).length}`); +console.log(` N=1 baseline: ${base.progMs}ms`); +console.log(` N=3 program time: min ${min}ms (${r(min)}×) · median ${med}ms (${r(med)}×) · max ${max}ms (${r(max)}×)`); +console.log(` RUNTIME FANS OUT? ${r(min) < 1.8 ? 'YES — best case runs 3 tasks in ~1-task time (impossible if serialized)' : 'INCONCLUSIVE'}`); +console.log(` (variance min→max = the LLM provider throttling concurrent requests, not the runtime)`); diff --git a/sdk/node/technical-briefing.md b/sdk/node/technical-briefing.md new file mode 100644 index 0000000..9513f65 --- /dev/null +++ b/sdk/node/technical-briefing.md @@ -0,0 +1,49 @@ +# Technical Briefing + +--- + +## 1. TCP Congestion Control (Slow Start / AIMD) + +TCP congestion control prevents senders from overwhelming the network. It maintains a **congestion window** (`cwnd`) — the maximum number of unacknowledged bytes a sender may transmit — alongside the receiver's advertised window, and uses the effective minimum of the two. + +**Slow Start** begins when a connection opens or after a loss event. `cwnd` starts at a small value (historically 1 MSS, now typically ~10 MSS per RFC 6928) and doubles every round-trip time: each arriving ACK lets the sender increase `cwnd` by 1 MSS. This exponential growth quickly probes available bandwidth but risks overshooting. Growth continues until `cwnd` reaches the **slow-start threshold** (`ssthresh`), at which point the algorithm transitions to the **AIMD** (Additive Increase / Multiplicative Decrease) phase. + +In **AIMD**, `cwnd` grows linearly — increasing by roughly 1 MSS per RTT (additive increase) — allowing gentle bandwidth probing. When a loss is detected (triple duplicate ACKs), `cwnd` is cut in half (multiplicative decrease) and `ssthresh` is set to the reduced `cwnd`. This yields the classic "sawtooth" pattern: steady linear ramps punctuated by halving drops. A timeout (indicating severe congestion) resets `cwnd` to its initial value and triggers slow start again. The AIMD dynamic ensures convergence: competing flows sharing a bottleneck naturally reach a fair equilibrium, since a flow with a larger `cwnd` yields more absolute bytes on each multiplicative cut, gradually equalizing throughput across sessions. + +--- + +## 2. B-Tree Database Index + +A B-tree is a self-balancing ordered tree where every node stores sorted keys and child pointers. Each internal node holds up to *m*−1 keys and *m* pointers (its branching factor or fan-out), while leaf nodes store keys paired with record pointers to actual rows. A critical invariant guarantees that all leaf nodes reside at the same depth, so every lookup traverses the same number of levels — ensuring O(log *n*) worst-case search time. + +**Search** binary-searches the keys within a node, follows the appropriate child pointer, and repeats until reaching a leaf. **Insert** descends to the target leaf, adds the key in sorted order, and — if the node overflows — splits it at its median: the median key propagates upward into the parent, and the node divides into two half-full siblings. Splits can cascade upward; if the root splits, a new root is created, increasing tree height by one. + +Databases favor B-trees because their high fan-out (often hundreds of keys per node) keeps trees extremely shallow — typically 3–4 levels for millions of records. Since each node maps naturally to one disk page, a lookup requires only that many page reads, minimizing expensive disk I/O. This locality and predictability make B-trees the default index structure in virtually every relational database engine. + +--- + +## 3. TLS 1.3 Handshake + +The TLS 1.3 handshake establishes a secure channel in just **1 RTT** (round-trip time), a major reduction from TLS 1.2's 2-RTT handshake. The client sends a **ClientHello** that includes supported AEAD cipher suites and a `key_share` extension carrying an ephemeral Diffie-Hellman public key (typically X25519 or P-256), removing the need for a separate key-exchange round. The server responds with a **ServerHello** selecting the cipher suite and providing its own DH public key, allowing both sides to compute the shared secret immediately. + +Using HKDF-based key derivation, the server then sends its Certificate, CertificateVerify, and Finished messages in a single encrypted flight. The client verifies the certificate, sends its own Finished, and application data can flow immediately after — all within one round trip. TLS 1.3 also supports **0-RTT resumption** via pre-shared keys (PSK), trading forward secrecy for near-instant reconnection on early data. + +Key differences from TLS 1.2: RSA key exchange is eliminated entirely (enforcing forward secrecy by default), non-AEAD cipher suites and compression are removed, renegotiation is forbidden, and static RSA/ECDSA keys can no longer be used for key agreement. These changes dramatically reduce the attack surface and simplify the protocol. + +--- + +## 4. Bloom Filter + +A Bloom filter is a space-efficient probabilistic data structure for approximate set membership testing. It consists of a bit array of *m* bits (all initially 0) and *k* independent hash functions, each mapping an element to a position in the array. To **insert** an element, all *k* hashes are computed and the corresponding bits are set to 1. To **query** membership, the same *k* positions are checked: if every bit is 1, the element is *possibly* in the set; if any bit is 0, it is *definitely not* in the set. This one-sided error means false negatives are impossible, but false positives can occur when bits set by different elements coincidentally overlap. + +The false-positive probability after inserting *n* elements is approximately **(1 − e^(−kn/m))^k** — the chance that a specific bit remains 0 after one hash is (1 − 1/m)^(kn) ≈ e^(−kn/m), so all *k* bits being 1 yields the formula above. For a given ratio *m/n*, the optimal number of hashes that minimizes this probability is **k ≈ (m/n) ln 2**, at which point the false-positive rate is roughly (½)^(m/n)·ln2 ≈ (0.6185)^(m/n). This allows engineers to size the filter precisely: e.g., 10 bits per element yields ~1% false positives with ~7 hash functions, far less space than a full hash table. + +--- + +## 5. Consistent Hashing + +Consistent hashing maps objects and nodes onto a circular hash space (0…2¹⁶⁰−1) called a "hash ring." Each node's identifier is hashed to a point on the ring; each object's key is hashed similarly and assigned to the first node clockwise from that point. The key property is *minimal remapping*: when a node joins or leaves, only the keys between the arriving/departing node and its clockwise predecessor must move — typically a K/N fraction of all keys (K = key count, N = node count), far better than the near-total reshuffling required by naive mod-N partitioning. + +Because a small number of physical nodes can produce uneven segments, production systems add **virtual nodes**: each physical node claims many pseudo-random positions on the ring, smoothing the distribution so that each node receives approximately equal share, with variance shrinking as the virtual-node count grows. + +Consistent hashing underpins distributed caches (Memcached via Ketama, Dynamo-style key-value stores such as Amazon DynamoDB and Apache Cassandra) and content-addressable CDNs. It enables elastic scaling — nodes can be added or removed without global rehashing — while preserving locality and offering O(1) lookup via sorted-ring traversal or binary search. diff --git a/sdk/node/ultracode_test.mjs b/sdk/node/ultracode_test.mjs new file mode 100644 index 0000000..b07bd76 --- /dev/null +++ b/sdk/node/ultracode_test.mjs @@ -0,0 +1,94 @@ +// Autonomous ultracode validation: give the model the REAL (strengthened) +// ULTRACODE_GUIDELINES via guidelines:string + a natural multi-part task, and +// verify it GENERATES a `program` workflow script that calls parallel_task and +// fans out — no hand-fed script. Live-logged to ./ultracode_test.log. +import { createRequire } from 'node:module'; +import { appendFileSync } from 'node:fs'; +const require = createRequire(import.meta.url); +const { Agent } = require('./index.js'); + +const CONFIG = '/Users/roylin/.a3s/config.acl'; +const LOG = new URL('./ultracode_test.log', import.meta.url).pathname; +const ITERS = Number(process.argv[2] || 3); +const log = (s) => { appendFileSync(LOG, s + '\n'); console.log(s); }; + +// EXACT text from crates/cli/src/tui/panels/model.rs ULTRACODE_GUIDELINES. +const GUIDELINES = `[ultracode] Dynamic-workflow mode. Express ALL of your work as ONE generated, executable workflow SCRIPT. Do NOT call \`parallel_task\` or \`task\` directly at the top level — the script IS the workflow. +1. PLAN. Decompose the task into numbered steps; mark independent (concurrent) vs dependent (sequential). +2. WRITE + RUN THE SCRIPT by calling the \`program\` tool with a JavaScript \`source\` of this shape: + async function run(ctx, inputs) { + const results = await ctx.tool("parallel_task", { tasks: [ + { description: "step A", prompt: "..." }, + { description: "step B", prompt: "..." } + ] }); + return results; + } + Put EVERY task/parallel_task call INSIDE the script; add further ctx.tool(...) calls for dependent steps and aggregate their outputs. +3. parallel_task inside the script fans out concurrent subagents on the multi-threaded runtime. After it returns, synthesize the results into your final answer. +4. Be exhaustive: pursue every thread to completion.`; + +// Final validation: a GENERAL task + the EXACT per-turn nudge the cli's +// start_stream now appends for ultracode. This mirrors the shipped cli behavior +// (system guideline + turn nudge) — soaks whether the script form is now +// reliable for ordinary tasks the user would actually type. +const NUDGE = + '\n\n[ultracode] Tackle this as a dynamic workflow. For the independent parts, ' + + 'call the `program` tool with a JavaScript script whose `async function ' + + 'run(ctx, inputs)` fans them out via `ctx.tool("parallel_task", { tasks: [...] })`, ' + + 'keeps all task/parallel_task delegation INSIDE the script, then aggregates and ' + + 'returns. After it runs, synthesize the results.'; +const TASK = + 'Write about 50 words on each of these four independent topics: TCP slow-start, ' + + 'B-tree node splits, the TLS 1.3 handshake, and Bloom-filter false positives.' + + NUDGE; + +const agent = await Agent.create(CONFIG); + +async function once(i) { + const session = agent.session('.', { + guidelines: GUIDELINES, + autoDelegation: { enabled: true, parallel: true }, + maxParallelTasks: 8, + confirmationPolicy: { enabled: true, yoloLanes: ['control', 'query', 'execute', 'generate'], timeoutAction: 'auto_approve' }, + }); + let inProgram = false, progArgs = '', scriptForm = false, directParallel = false, programOk = false, errorSeen = null; + const t0 = Date.now(); + const stream = await session.stream(TASK); + while (true) { + const { value: ev, done } = await stream.next(); + if (done) break; + if (!ev) continue; + const ty = ev.type || ''; + if (ty === 'tool_start' && ev.toolName === 'program') inProgram = true; + if (ty === 'tool_start' && ev.toolName === 'parallel_task') directParallel = true; + if (ty === 'tool_input_delta' && inProgram) progArgs += (ev.text || ''); + if (ty === 'tool_end' && ev.toolName === 'program') { + inProgram = false; + scriptForm = /parallel_task/.test(progArgs); + programOk = /exit_code=0/.test(String(ev.toolOutput || '')) && /parallel_task \(ok/.test(String(ev.toolOutput || '')); + } + if (ty === 'permission_denied') errorSeen = 'permission_denied:' + (ev.toolName || ''); + } + log(`#${i} ${Date.now() - t0}ms scriptForm=${scriptForm} programOk=${programOk} directParallel=${directParallel} err=${errorSeen || 'no'}`); + if (scriptForm && i === 0) { + // Show the actual generated workflow script once, as evidence. + const m = progArgs.match(/"source"\s*:\s*"((?:[^"\\]|\\.)*)"/); + if (m) log(' --- generated workflow script ---\n' + JSON.parse('"' + m[1] + '"').split('\n').map((l) => ' | ' + l).join('\n')); + } + return { i, scriptForm, programOk, directParallel, errorSeen }; +} + +log(`\n=== autonomous ultracode test (strengthened guideline, ${ITERS} iters) ===`); +const rows = []; +for (let i = 0; i < ITERS; i++) { + try { rows.push(await once(i)); } + catch (e) { const m = String(e).slice(0, 160); rows.push({ throw: m }); log(`#${i} THREW ${m}`); } +} +const scriptAndRan = rows.filter((r) => r.scriptForm && r.programOk).length; +const anyFanout = rows.filter((r) => (r.scriptForm && r.programOk) || r.directParallel).length; +const errs = rows.filter((r) => r.throw || r.errorSeen).length; +log(`\nSUMMARY`); +log(` generated a program WORKFLOW SCRIPT that fanned out (programOk): ${scriptAndRan}/${ITERS}`); +log(` fanned out at all (script or direct): ${anyFanout}/${ITERS}`); +log(` errors/crashes: ${errs}`); +log(` VERDICT: ${scriptAndRan >= Math.ceil(ITERS / 2) ? 'EFFECTIVE — ultracode autonomously generates+runs dynamic workflow scripts' : 'STILL WEAK — strengthen further'}`); diff --git a/sdk/node/ultracode_test.stdout b/sdk/node/ultracode_test.stdout new file mode 100644 index 0000000..acec53b --- /dev/null +++ b/sdk/node/ultracode_test.stdout @@ -0,0 +1,11 @@ + +=== autonomous ultracode test (3 iters) 2026-06-26T10:49:31.820Z === +#0 41343ms scriptForm=false programOk=false directParallel=false synth=3/3 err=no +#1 52059ms scriptForm=false programOk=false directParallel=false synth=3/3 err=no +#2 41764ms scriptForm=false programOk=false directParallel=false synth=3/3 err=no + +SUMMARY + generated a program WORKFLOW SCRIPT that fanned out: 0/3 + fanned out at all (script OR direct parallel_task): 0/3 + errors/crashes: 0 + VERDICT: NOT effective diff --git a/sdk/python-bootstrap/pyproject.toml b/sdk/python-bootstrap/pyproject.toml index 7b22a98..6e04494 100644 --- a/sdk/python-bootstrap/pyproject.toml +++ b/sdk/python-bootstrap/pyproject.toml @@ -7,7 +7,7 @@ name = "a3s-code" # Keep in sync with crates/code core release. The bootstrap loader fetches # the matching native wheel from `https://github.com/AI45Lab/Code/releases/tag/v` # at import time. -version = "4.2.6" +version = "4.2.7" description = "A3S Code Python SDK — pure-Python bootstrap that fetches the native wheel from GitHub Releases" readme = "README.md" license = {text = "MIT"} diff --git a/sdk/python-bootstrap/src/a3s_code/_bootstrap.py b/sdk/python-bootstrap/src/a3s_code/_bootstrap.py index 3de2cdc..3b07b76 100644 --- a/sdk/python-bootstrap/src/a3s_code/_bootstrap.py +++ b/sdk/python-bootstrap/src/a3s_code/_bootstrap.py @@ -31,7 +31,7 @@ # Version is the bootstrap's own version, which equals the matching native # wheel version on GH Releases. Bumped by the release workflow. -__version__ = "4.2.6" +__version__ = "4.2.7" _DEFAULT_BASE_URL = "https://github.com/AI45Lab/Code/releases/download" _REQUEST_TIMEOUT_S = 120 diff --git a/sdk/python/Cargo.toml b/sdk/python/Cargo.toml index 159e066..17eaab0 100644 --- a/sdk/python/Cargo.toml +++ b/sdk/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-code-py" -version = "4.2.6" +version = "4.2.7" edition = "2021" authors = ["A3S Lab Team"] license = "MIT" @@ -12,7 +12,7 @@ name = "a3s_code" crate-type = ["cdylib"] [dependencies] -a3s-code-core = { version = "4.2.6", path = "../../core", features = ["ahp", "s3", "serve"] } +a3s-code-core = { version = "4.2.7", path = "../../core", features = ["ahp", "s3", "serve"] } pyo3 = "0.23" tokio = { version = "1.35", features = ["full"] } serde_json = "1.0" diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml index ef82b43..220318d 100644 --- a/sdk/python/pyproject.toml +++ b/sdk/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "a3s-code" -version = "4.2.6" +version = "4.2.7" description = "A3S Code - Native Python bindings for the coding-agent runtime" readme = "README.md" license = {text = "MIT"}