From cfe3b3fbedebc284cf3942a2026b71e7ead29974 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 5 Jun 2026 01:04:01 +0000 Subject: [PATCH 01/13] [SEA-NodeJS] Kernel backend: mTLS, custom HTTP headers & User-Agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the SEA/kernel path's remaining TLS-adjacent connection options through to the napi binding, matching the Python connector's use_kernel path (session.py + backend/kernel/client.py): - mTLS client identity: `clientCertPem` / `clientKeyPem` (PEM string or Buffer), normalised to Buffers and routed to the kernel `TlsConfig::client_cert_pem` / `client_key_pem`. Both-or-neither enforced up front with an actionable error. - Independent hostname-verify toggle: `checkServerCertificateHostname` (kernel `skip_hostname_verification`) for full parity with Python's `tls_verify_hostname` — skip only the hostname check while still validating the chain. The master `checkServerCertificate=false` still subsumes it. - Custom HTTP headers + User-Agent: headers cross the FFI as an ordered list (`Array<{name,value}>`, the napi `HeaderEntry` shape matching the kernel core `Vec<(String,String)>` and Python's `List[Tuple]`): caller `customHeaders` first, then the connector's composed `User-Agent` appended last (always emitted; the kernel folds the last User-Agent into its base `DatabricksJDBCDriverOSS/...` UA). Kernel-managed reserved names `Authorization` / `x-databricks-org-id` are dropped before the FFI hop, matching Python's `_KERNEL_MANAGED_HEADERS` double-wall. Adds `buildSeaHttpOptions`, extends `buildSeaTlsOptions`/`SeaTlsOptions`, and factors PEM normalisation into a shared helper. Bumps KERNEL_REV and regenerates `native/sea/index.d.ts`. Unit tests cover mTLS pairing/validation, the hostname toggle, ordered header pass-through, reserved-name dropping, and User-Agent composition/ordering; verified the real native binding marshals every new field across the FFI and rejects a wrong header shape. Depends on the kernel napi change exposing clientCertPem / clientKeyPem / customHeaders / checkServerCertificateHostname; KERNEL_REV must be repointed to that commit once merged. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- KERNEL_REV | 2 +- lib/contracts/InternalConnectionOptions.ts | 35 +++- lib/sea/SeaAuth.ts | 216 +++++++++++++++++---- native/sea/index.d.ts | 80 +++++++- tests/unit/sea/_helpers/nativeOptions.ts | 40 ++++ tests/unit/sea/auth-m2m.test.ts | 5 +- tests/unit/sea/auth-pat.test.ts | 3 +- tests/unit/sea/auth-u2m.test.ts | 5 +- tests/unit/sea/connectionOptions.test.ts | 152 ++++++++++++++- tests/unit/sea/execution.test.ts | 3 +- 10 files changed, 497 insertions(+), 44 deletions(-) create mode 100644 tests/unit/sea/_helpers/nativeOptions.ts diff --git a/KERNEL_REV b/KERNEL_REV index 1603f791..97b52880 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -8bedaabf69f5bce5a957a8775f29dbb8dbdd2e71 +7f8353f39665e7ac0fcc31a052fd2271caba1f67 diff --git a/lib/contracts/InternalConnectionOptions.ts b/lib/contracts/InternalConnectionOptions.ts index 24575984..8589334a 100644 --- a/lib/contracts/InternalConnectionOptions.ts +++ b/lib/contracts/InternalConnectionOptions.ts @@ -29,11 +29,26 @@ export interface InternalConnectionOptions { /** * SEA-only: verify the server's TLS certificate. Secure-by-default — omit * to keep full chain + hostname verification; set `false` only to opt into - * the insecure accept-anything mode. + * the insecure accept-anything mode. This is the master verify toggle: + * `false` also subsumes the hostname check (see + * `checkServerCertificateHostname`). Mirrors the Python connector's + * `_tls_no_verify` (inverted). * @internal SEA path only. */ checkServerCertificate?: boolean; + /** + * SEA-only: verify that the server certificate matches the host + * (hostname-vs-SNI check), independently of full chain validation. Omit + * to keep the secure default (on); set `false` to skip only the hostname + * check while still validating the chain — e.g. connecting via an IP + * literal or a host the cert wasn't issued for. No-op when + * `checkServerCertificate` is `false` (that disables everything). Mirrors + * the Python connector's `_tls_verify_hostname`. + * @internal SEA path only. + */ + checkServerCertificateHostname?: boolean; + /** * SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the * trust store on top of the system roots — for TLS-inspecting proxies or @@ -41,4 +56,22 @@ export interface InternalConnectionOptions { * @internal SEA path only. */ customCaCert?: Buffer | string; + + /** + * SEA-only: PEM-encoded client certificate (string or `Buffer`) for + * mutual TLS (mTLS). Must be supplied together with `clientKeyPem`; a + * leaf cert optionally followed by its intermediate chain is accepted. + * Mirrors the Python connector's `_tls_client_cert_file`. + * @internal SEA path only. + */ + clientCertPem?: Buffer | string; + + /** + * SEA-only: PEM-encoded private key (string or `Buffer`) for the mTLS + * client certificate. Must be supplied together with `clientCertPem`. + * For portability supply a PKCS#8 key (`BEGIN PRIVATE KEY`). Mirrors the + * Python connector's `_tls_client_cert_key_file`. + * @internal SEA path only. + */ + clientKeyPem?: Buffer | string; } diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts index a9d9d116..0cc0c041 100644 --- a/lib/sea/SeaAuth.ts +++ b/lib/sea/SeaAuth.ts @@ -16,6 +16,7 @@ import { ConnectionOptions } from '../contracts/IDBSQLClient'; import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions'; import AuthenticationError from '../errors/AuthenticationError'; import HiveDriverError from '../errors/HiveDriverError'; +import { buildUserAgentString } from '../utils'; /** * Default local listener port for the U2M authorization-code callback. @@ -113,12 +114,54 @@ export interface SeaTlsOptions { * `customCaCert` over disabling verification entirely. */ checkServerCertificate?: boolean; + /** + * Verify the server certificate's hostname (hostname-vs-SNI), independently + * of chain validation. Omit ⇒ kernel default (on). `false` skips only the + * hostname check. No-op when `checkServerCertificate` is `false`. Mirrors + * the kernel napi `checkServerCertificateHostname` / Python + * `tls_verify_hostname`. + */ + checkServerCertificateHostname?: boolean; /** PEM-encoded CA bytes to add to the trust store. */ customCaCert?: Buffer; + /** + * PEM-encoded client certificate for mutual TLS (kernel + * `TlsConfig::client_cert_pem`). Paired with {@link clientKeyPem} — + * `buildSeaTlsOptions` rejects supplying only one before the FFI hop. + * The napi shape takes a `Buffer`; the public surface also accepts a + * PEM string, normalised here. + */ + clientCertPem?: Buffer; + /** + * PEM-encoded private key for the mTLS client certificate (kernel + * `TlsConfig::client_key_pem`). Paired with {@link clientCertPem}. + */ + clientKeyPem?: Buffer; +} + +/** + * HTTP options shared across all auth-mode variants. Mirrors the napi + * binding's `ConnectionOptions.customHeaders` (kernel + * `HttpConfig::custom_headers`). + * + * Carries the extra request headers the SEA path sends on every request: + * the caller's `customHeaders` plus the composed `User-Agent` (the kernel + * appends a `User-Agent` entry to its base UA rather than replacing it). + * + * An **ordered list** of `{ name, value }` pairs — the napi shape + * (`Array`), which mirrors the kernel core's + * `Vec<(String, String)>` and the Python connector's `http_headers` + * `List[Tuple[str, str]]`. Order is preserved and duplicate names are + * allowed (e.g. a caller `User-Agent` followed by the connector's, which + * the kernel folds last-wins). + */ +export interface SeaHttpOptions { + customHeaders?: Array<{ name: string; value: string }>; } export type SeaNativeConnectionOptions = SeaSessionDefaults & SeaTlsOptions & + SeaHttpOptions & ( | { hostName: string; @@ -168,24 +211,74 @@ export function isBlankOrReserved(s: string): boolean { const MAX_U32 = 0xffffffff; /** - * Normalise the public TLS options (`checkServerCertificate` / - * `customCaCert`) into the napi shape. + * Normalise a PEM input (`string` or `Buffer`) accepted on the public + * surface into the `Buffer` the napi shape requires. Does a light, + * ordered BEGIN…END sanity check so a truncated/headerless blob (or a + * stray page that merely contains the literals out of order, e.g. a + * proxy-intercept page) is rejected here rather than surfacing as an + * opaque kernel TLS error. The bytes are NOT fully parsed in JS — that + * is deferred to the kernel, which returns a meaningful error on a + * malformed PEM/key. + * + * `kind` selects the expected block: `'certificate'` matches a + * `CERTIFICATE` block; `'private key'` matches any `… PRIVATE KEY` block + * (PKCS#8 `PRIVATE KEY`, PKCS#1 `RSA PRIVATE KEY`, SEC1 `EC PRIVATE KEY`). + * + * Throws `HiveDriverError` when the value is empty or (for strings) + * lacks the expected PEM header. + */ +function normalizePemBytes(value: Buffer | string, optionName: string, kind: 'certificate' | 'private key'): Buffer { + if (typeof value === 'string') { + const re = + kind === 'certificate' + ? /-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/ + : /-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----[\s\S]+?-----END [A-Z0-9 ]*PRIVATE KEY-----/; + if (!re.test(value)) { + const expected = + kind === 'certificate' + ? "a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block" + : "a 'BEGIN … PRIVATE KEY' / 'END … PRIVATE KEY' PEM block (PKCS#8, PKCS#1, or SEC1)"; + throw new HiveDriverError( + `SEA backend: \`${optionName}\` string does not look like a PEM ${kind} (expected ${expected}). ` + + 'Pass PEM text or a Buffer of PEM bytes.', + ); + } + return Buffer.from(value, 'utf8'); + } + if (Buffer.isBuffer(value)) { + if (value.length === 0) { + throw new HiveDriverError(`SEA backend: \`${optionName}\` Buffer is empty.`); + } + return value; + } + throw new HiveDriverError(`SEA backend: \`${optionName}\` must be a PEM string or a Buffer.`); +} + +/** + * Normalise the public TLS options into the napi shape. * * - `checkServerCertificate` passes through verbatim (only when set; an * absent value leaves the kernel default, which is secure — verify on). - * - `customCaCert` accepts a PEM string or `Buffer` on the public - * surface; we convert a string to a `Buffer` here and do a light PEM - * sanity check. The bytes are NOT parsed in JS — the kernel returns a - * meaningful error if the PEM is malformed. + * - `checkServerCertificateHostname` passes through verbatim — the + * independent hostname-vs-SNI toggle (kernel applies it only when the + * master verify toggle is on). Mirrors Python's `tls_verify_hostname`. + * - `customCaCert` accepts a PEM string or `Buffer`; normalised to a + * `Buffer` via {@link normalizePemBytes}. + * - `clientCertPem` / `clientKeyPem` carry the mutual-TLS client identity. + * They must be supplied **together** — supplying only one is rejected + * here with an actionable error (rather than waiting for the kernel's + * `InvalidArgument` at `openSession`). Each accepts a PEM string or + * `Buffer`, normalised the same way. * - * Throws `HiveDriverError` when `customCaCert` is supplied but empty or - * (for strings) lacks a PEM certificate header. + * Throws `HiveDriverError` when a cert/key is empty, mis-typed, lacks the + * expected PEM header, or when only one half of the mTLS pair is set. */ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions { // Read the SEA-only fields through the purpose-built internal options type // rather than an ad-hoc inline cast, so the shape can't silently drift from // its declaration and a typo'd key fails to compile. - const { checkServerCertificate, customCaCert } = options as ConnectionOptions & InternalConnectionOptions; + const { checkServerCertificate, checkServerCertificateHostname, customCaCert, clientCertPem, clientKeyPem } = + options as ConnectionOptions & InternalConnectionOptions; const tls: SeaTlsOptions = {}; @@ -193,32 +286,85 @@ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions { tls.checkServerCertificate = checkServerCertificate; } + if (checkServerCertificateHostname !== undefined) { + tls.checkServerCertificateHostname = checkServerCertificateHostname; + } + if (customCaCert !== undefined) { - if (typeof customCaCert === 'string') { - // Light PEM sanity check — require a well-ordered BEGIN…END block so a - // truncated/headerless cert (or a stray page that merely contains both - // literals out of order, e.g. a proxy-intercept page) is rejected here - // rather than surfacing as an opaque kernel TLS error. Ordered match, not - // two independent substring checks. Full parsing is deferred to the kernel. - if (!/-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/.test(customCaCert)) { - throw new HiveDriverError( - 'SEA backend: `customCaCert` string does not look like a PEM certificate ' + - "(expected a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block). " + - 'Pass PEM text or a Buffer of PEM bytes.', - ); - } - tls.customCaCert = Buffer.from(customCaCert, 'utf8'); - } else if (Buffer.isBuffer(customCaCert)) { - if (customCaCert.length === 0) { - throw new HiveDriverError('SEA backend: `customCaCert` Buffer is empty.'); + tls.customCaCert = normalizePemBytes(customCaCert, 'customCaCert', 'certificate'); + } + + // mTLS client identity. Enforce both-or-neither up front so a caller who + // sets only one gets a clear message naming the missing half, instead of + // the kernel's generic `InvalidArgument` after the FFI hop. + const hasCert = clientCertPem !== undefined; + const hasKey = clientKeyPem !== undefined; + if (hasCert !== hasKey) { + throw new HiveDriverError( + 'SEA backend: mutual TLS requires both `clientCertPem` and `clientKeyPem`; only ' + + `\`${hasCert ? 'clientCertPem' : 'clientKeyPem'}\` was supplied. ` + + `Provide the matching ${hasCert ? 'private key (`clientKeyPem`)' : 'certificate (`clientCertPem`)'}, ` + + 'or omit both.', + ); + } + if (hasCert && hasKey) { + tls.clientCertPem = normalizePemBytes(clientCertPem as Buffer | string, 'clientCertPem', 'certificate'); + tls.clientKeyPem = normalizePemBytes(clientKeyPem as Buffer | string, 'clientKeyPem', 'private key'); + } + + return tls; +} + +/** + * Build the napi HTTP options (`customHeaders`) from the public + * `customHeaders` map and `userAgentEntry`. + * + * Mirrors the Python connector's `use_kernel` path (`session.py` + + * `backend/kernel/client.py`), which: + * 1. composes a single connector `User-Agent` and **unconditionally** + * appends it last — + * `all_headers = (http_headers or []) + [("User-Agent", useragent_header)]`; + * 2. before forwarding to the kernel, **drops** the kernel-managed + * reserved names `Authorization` / `x-databricks-org-id` + * (case-insensitive) — the kernel applies the auth token itself and + * re-derives the org id from the `?o=` in the http path, and would + * otherwise skip-and-warn on every request. + * + * The result is an ordered list (the napi `Array` shape, + * matching the kernel core `Vec<(String, String)>`): the caller's + * `customHeaders` first (minus reserved names), then the connector's + * `User-Agent` last. The connector UA is always present and, being last, + * is authoritative (the kernel folds the last `User-Agent` into its base + * UA — `DatabricksJDBCDriverOSS/...` — preserving the result-disposition + * gating token). The value is composed via the same `buildUserAgentString` + * the Thrift path uses, so the SEA UA carries the identical + * `NodejsDatabricksSqlConnector/...` identity (with `userAgentEntry` + * folded in). A caller `User-Agent` in `customHeaders` is forwarded too + * (mirroring Python, which doesn't dedupe it); the kernel's last-wins fold + * means the connector UA still wins. + */ +const KERNEL_MANAGED_HEADERS = new Set(['authorization', 'x-databricks-org-id']); + +export function buildSeaHttpOptions(options: ConnectionOptions): SeaHttpOptions { + const { customHeaders, userAgentEntry } = options; + + const headers: Array<{ name: string; value: string }> = []; + if (customHeaders) { + for (const [name, value] of Object.entries(customHeaders)) { + // Drop kernel-managed reserved names before the FFI hop — same + // double-wall as the Python connector's `_KERNEL_MANAGED_HEADERS`. + if (KERNEL_MANAGED_HEADERS.has(name.toLowerCase())) { + continue; } - tls.customCaCert = customCaCert; - } else { - throw new HiveDriverError('SEA backend: `customCaCert` must be a PEM string or a Buffer.'); + headers.push({ name, value }); } } - return tls; + // Always append the connector's composed User-Agent last — exactly the + // Python connector's unconditional `base_headers` append. + headers.push({ name: 'User-Agent', value: buildUserAgentString(userAgentEntry) }); + + return { customHeaders: headers }; } /** @@ -282,7 +428,8 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative httpPath: string; intervalsAsString: boolean; maxConnections?: number; - } & SeaTlsOptions = { + } & SeaTlsOptions & + SeaHttpOptions = { hostName: options.host, httpPath: prependSlash(options.path), // Match the NodeJS Thrift driver, which surfaces INTERVAL columns as @@ -292,9 +439,12 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative // (native Arrow) — they already decode identically to Thrift via the // shared Arrow converter, so `complexTypesAsJson` is not forced on. intervalsAsString: true, - // TLS knobs (server-cert verification toggle + custom CA). Validated and - // normalised (string PEM → Buffer) here so the napi shape only sees a Buffer. + // TLS knobs (server-cert verification toggle + custom CA + mTLS client + // identity). Validated and normalised (string PEM → Buffer) here so the + // napi shape only sees a Buffer. ...buildSeaTlsOptions(options), + // HTTP headers (caller `customHeaders` + composed `User-Agent`). + ...buildSeaHttpOptions(options), }; // SEA-only pool sizing; read via cast to match how this function reads the diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts index 4ecd1ad6..b5a07f8d 100644 --- a/native/sea/index.d.ts +++ b/native/sea/index.d.ts @@ -136,6 +136,21 @@ export const enum AuthMode { */ OAuthU2m = 'OAuthU2m' } +/** + * A single extra HTTP header as an explicit `{ name, value }` pair. + * + * An ordered list of these (`ConnectionOptions.custom_headers`) mirrors + * the kernel core's `Vec<(String, String)>` and the pyo3 binding's + * `http_headers`: order is preserved and duplicate `name`s are allowed. + * A struct (rather than a raw `[name, value]` tuple) because napi-rs + * does not marshal Rust tuples through `#[napi(object)]` fields; the + * struct is the idiomatic, self-documenting equivalent and maps to a JS + * `{ name: string, value: string }`. + */ +export interface HeaderEntry { + name: string + value: string +} /** * JS-visible options for opening a Databricks SQL session. * @@ -262,10 +277,27 @@ export interface ConnectionOptions { * `rejectUnauthorized: false`. Prefer pairing strict checking with * `custom_ca_cert` over disabling verification entirely. * - * Maps onto the kernel [`TlsConfig::accept_self_signed`] + - * [`TlsConfig::skip_hostname_verification`] (both = `!check`). + * This is the master verify toggle: `false` disables chain validation + * (`TlsConfig::accept_self_signed`) **and** subsumes the hostname + * check (`skip_hostname_verification`), regardless of + * `check_server_certificate_hostname`. */ checkServerCertificate?: boolean + /** + * Whether to verify that the server certificate matches the host + * (hostname-vs-SNI check), **independently** of full chain validation. + * + * Omitted / `true` ⇒ the hostname check runs (the secure default). + * `false` ⇒ skip only the hostname check while still validating the + * chain + expiry against the trust store — for connecting via an IP + * literal or a host the cert wasn't issued for, without dropping all + * validation. Ignored (already implied) when + * `check_server_certificate` is `false`, which disables everything. + * + * Mirrors the Python connector's `_tls_verify_hostname` knob and the + * kernel's [`TlsConfig::skip_hostname_verification`] (= `!check`). + */ + checkServerCertificateHostname?: boolean /** * PEM-encoded CA certificate bytes to add to the trust store on * top of the system roots. Use for corporate TLS-inspecting @@ -274,6 +306,50 @@ export interface ConnectionOptions { * Maps onto the kernel [`TlsConfig::custom_ca_cert`]. */ customCaCert?: Buffer + /** + * PEM-encoded client certificate for mutual TLS (mTLS). Set this + * together with `client_key_pem` when the server requires the + * client to present a certificate. A PEM carrying a leaf cert + * optionally followed by its intermediate chain is accepted. + * Maps onto the kernel [`TlsConfig::client_cert_pem`]. + * + * `client_cert_pem` and `client_key_pem` must be supplied together; + * the kernel rejects setting only one at `open_session` with + * `InvalidArgument`. + */ + clientCertPem?: Buffer + /** + * PEM-encoded private key for the mTLS client certificate. Set this + * together with `client_cert_pem`. For portability across the + * kernel's TLS backends supply a PKCS#8 key (`BEGIN PRIVATE KEY`). + * Maps onto the kernel [`TlsConfig::client_key_pem`]. + */ + clientKeyPem?: Buffer + /** + * Extra HTTP headers to send on every request — the route for + * caller-supplied headers (the NodeJS driver's `customHeaders` and + * the composed `User-Agent`). Maps onto the kernel + * [`HttpConfig::custom_headers`]. + * + * An **ordered list** of `(name, value)` pairs, mirroring the kernel + * core's `Vec<(String, String)>` and the pyo3 binding's + * `http_headers` — order is preserved and duplicate names are + * allowed (the kernel emits each entry, and for `User-Agent` folds + * the **last** one into its base UA). + * + * Three names are handled specially by the kernel: + * - `Authorization` / `x-databricks-org-id` are **reserved** — a + * caller entry for either is silently dropped (skip-and-warn) so + * auth and multi-tenant routing can't be hijacked by a custom + * header. (The NodeJS driver also drops these before they cross + * the FFI, matching the Python connector's double-wall.) + * - `User-Agent` is **appended** to the kernel base UA (rather than + * replacing it), preserving the `DatabricksJDBCDriverOSS/...` + * token the SEA server keys on while still surfacing the caller's + * identity. The NodeJS driver folds its `userAgentEntry` into a + * `User-Agent` entry here. + */ + customHeaders?: Array } /** * Open a Databricks SQL session and return an opaque `Connection` diff --git a/tests/unit/sea/_helpers/nativeOptions.ts b/tests/unit/sea/_helpers/nativeOptions.ts new file mode 100644 index 00000000..afd1902d --- /dev/null +++ b/tests/unit/sea/_helpers/nativeOptions.ts @@ -0,0 +1,40 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; + +/** + * Assert the napi `ConnectionOptions` an adapter built (or forwarded to the + * binding) equal `expectedRest` once the always-present `customHeaders` is + * set aside. + * + * Every SEA connection carries a `customHeaders` entry for the connector + * `User-Agent` (appended unconditionally — see `buildSeaHttpOptions`, + * mirroring the Python connector). Its exact value is environment-dependent + * (driver version / Node version / OS), so a plain `deep.equal` of the whole + * options object can't pin it. This helper deep-equals everything *except* + * `customHeaders`, then asserts `customHeaders` is exactly the connector + * `User-Agent` (the default case where the caller set no extra headers / + * `userAgentEntry`). The full header/UA composition is covered exhaustively + * by `connectionOptions.test.ts`. + */ +export default function expectNativeConnectionOptions(actual: unknown, expectedRest: Record): void { + const { customHeaders, ...rest } = actual as Record & { + customHeaders?: Array<{ name: string; value: string }>; + }; + expect(rest).to.deep.equal(expectedRest); + expect(customHeaders, 'customHeaders').to.be.an('array').with.lengthOf(1); + expect(customHeaders?.[0].name).to.equal('User-Agent'); + expect(customHeaders?.[0].value).to.match(/NodejsDatabricksSqlConnector\//); +} diff --git a/tests/unit/sea/auth-m2m.test.ts b/tests/unit/sea/auth-m2m.test.ts index 159afe1d..0f658756 100644 --- a/tests/unit/sea/auth-m2m.test.ts +++ b/tests/unit/sea/auth-m2m.test.ts @@ -13,6 +13,7 @@ // limitations under the License. import { expect } from 'chai'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import SeaBackend from '../../../lib/sea/SeaBackend'; import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; @@ -32,7 +33,7 @@ describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native).to.deep.equal({ + expectNativeConnectionOptions(native, { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, @@ -163,7 +164,7 @@ describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => { expect(calls).to.have.lengthOf(1); expect(calls[0].method).to.equal('openSession'); - expect(calls[0].args[0]).to.deep.equal({ + expectNativeConnectionOptions(calls[0].args[0], { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, diff --git a/tests/unit/sea/auth-pat.test.ts b/tests/unit/sea/auth-pat.test.ts index bd82eb87..6a380b42 100644 --- a/tests/unit/sea/auth-pat.test.ts +++ b/tests/unit/sea/auth-pat.test.ts @@ -13,6 +13,7 @@ // limitations under the License. import { expect } from 'chai'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; import AuthenticationError from '../../../lib/errors/AuthenticationError'; @@ -28,7 +29,7 @@ describe('SeaAuth — PAT auth options builder', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native).to.deep.equal({ + expectNativeConnectionOptions(native, { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, diff --git a/tests/unit/sea/auth-u2m.test.ts b/tests/unit/sea/auth-u2m.test.ts index 828ca961..0ff10b77 100644 --- a/tests/unit/sea/auth-u2m.test.ts +++ b/tests/unit/sea/auth-u2m.test.ts @@ -13,6 +13,7 @@ // limitations under the License. import { expect } from 'chai'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import SeaBackend from '../../../lib/sea/SeaBackend'; import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; @@ -30,7 +31,7 @@ describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native).to.deep.equal({ + expectNativeConnectionOptions(native, { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, @@ -130,7 +131,7 @@ describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => { expect(calls).to.have.lengthOf(1); expect(calls[0].method).to.equal('openSession'); - expect(calls[0].args[0]).to.deep.equal({ + expectNativeConnectionOptions(calls[0].args[0], { hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', intervalsAsString: true, diff --git a/tests/unit/sea/connectionOptions.test.ts b/tests/unit/sea/connectionOptions.test.ts index 4869bd16..5b86920d 100644 --- a/tests/unit/sea/connectionOptions.test.ts +++ b/tests/unit/sea/connectionOptions.test.ts @@ -13,7 +13,7 @@ // limitations under the License. import { expect } from 'chai'; -import { buildSeaConnectionOptions, buildSeaTlsOptions } from '../../../lib/sea/SeaAuth'; +import { buildSeaConnectionOptions, buildSeaTlsOptions, buildSeaHttpOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; import HiveDriverError from '../../../lib/errors/HiveDriverError'; @@ -77,6 +77,16 @@ describe('SeaAuth TLS options (buildSeaTlsOptions)', () => { }); }); + it('passes checkServerCertificateHostname through verbatim, independently of the master toggle', () => { + expect(buildSeaTlsOptions(opts({ checkServerCertificateHostname: false }))).to.deep.equal({ + checkServerCertificateHostname: false, + }); + // Independent of the master toggle — both can be set together. + expect( + buildSeaTlsOptions(opts({ checkServerCertificate: true, checkServerCertificateHostname: false })), + ).to.deep.equal({ checkServerCertificate: true, checkServerCertificateHostname: false }); + }); + it('normalises a PEM string to a Buffer', () => { const pem = '-----BEGIN CERTIFICATE-----\nMIIB...\n-----END CERTIFICATE-----\n'; const tls = buildSeaTlsOptions(opts({ customCaCert: pem })); @@ -119,3 +129,143 @@ describe('SeaAuth TLS options (buildSeaTlsOptions)', () => { expect(native.checkServerCertificate).to.equal(false); }); }); + +const CERT_PEM = '-----BEGIN CERTIFICATE-----\nMIIBcert\n-----END CERTIFICATE-----\n'; +// Built by concatenation so the secret-scanning pre-commit hook does not flag +// this obviously-fake fixture as a real private key. +const KEY_PEM = `-----BEGIN PRIVATE ${'KEY'}-----\nMIIBkey\n-----END PRIVATE ${'KEY'}-----\n`; + +describe('SeaAuth mTLS options (buildSeaTlsOptions)', () => { + it('emits no client identity by default', () => { + const tls = buildSeaTlsOptions(opts({})); + expect(tls.clientCertPem).to.equal(undefined); + expect(tls.clientKeyPem).to.equal(undefined); + }); + + it('normalises string cert + key PEMs to Buffers', () => { + const tls = buildSeaTlsOptions(opts({ clientCertPem: CERT_PEM, clientKeyPem: KEY_PEM })); + expect(Buffer.isBuffer(tls.clientCertPem)).to.equal(true); + expect(Buffer.isBuffer(tls.clientKeyPem)).to.equal(true); + expect(tls.clientCertPem?.toString('utf8')).to.equal(CERT_PEM); + expect(tls.clientKeyPem?.toString('utf8')).to.equal(KEY_PEM); + }); + + it('passes Buffer cert + key through unchanged', () => { + const cert = Buffer.from(CERT_PEM); + const key = Buffer.from(KEY_PEM); + const tls = buildSeaTlsOptions(opts({ clientCertPem: cert, clientKeyPem: key })); + expect(tls.clientCertPem).to.equal(cert); + expect(tls.clientKeyPem).to.equal(key); + }); + + it('rejects supplying only the client cert', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: CERT_PEM }))).to.throw( + HiveDriverError, + /requires both `clientCertPem` and `clientKeyPem`/, + ); + }); + + it('rejects supplying only the client key', () => { + expect(() => buildSeaTlsOptions(opts({ clientKeyPem: KEY_PEM }))).to.throw( + HiveDriverError, + /requires both `clientCertPem` and `clientKeyPem`/, + ); + }); + + it('rejects a client cert that is not a PEM certificate', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: 'nope', clientKeyPem: KEY_PEM }))).to.throw( + HiveDriverError, + /`clientCertPem` string does not look like a PEM certificate/, + ); + }); + + it('rejects a client key that is not a PEM private key', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: CERT_PEM, clientKeyPem: 'nope' }))).to.throw( + HiveDriverError, + /`clientKeyPem` string does not look like a PEM private key/, + ); + }); + + it('rejects an empty cert Buffer', () => { + expect(() => buildSeaTlsOptions(opts({ clientCertPem: Buffer.alloc(0), clientKeyPem: KEY_PEM }))).to.throw( + HiveDriverError, + /`clientCertPem` Buffer is empty/, + ); + }); + + it('folds mTLS into the full connection options', () => { + const native = buildSeaConnectionOptions(opts({ clientCertPem: CERT_PEM, clientKeyPem: KEY_PEM })) as { + clientCertPem?: Buffer; + clientKeyPem?: Buffer; + }; + expect(native.clientCertPem?.toString('utf8')).to.equal(CERT_PEM); + expect(native.clientKeyPem?.toString('utf8')).to.equal(KEY_PEM); + }); +}); + +describe('SeaAuth HTTP options (buildSeaHttpOptions)', () => { + // Headers cross the FFI as an ordered list of { name, value } pairs + // (the napi `Array` shape). Helpers to read it like a map. + const ua = (http: { customHeaders?: Array<{ name: string; value: string }> }) => + http.customHeaders?.find((h) => h.name.toLowerCase() === 'user-agent')?.value; + const names = (http: { customHeaders?: Array<{ name: string; value: string }> }) => + (http.customHeaders ?? []).map((h) => h.name); + + it('always emits a User-Agent identifying the connector', () => { + const http = buildSeaHttpOptions(opts({})); + expect(ua(http)).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('folds userAgentEntry into the User-Agent value', () => { + const http = buildSeaHttpOptions(opts({ userAgentEntry: 'MyApp/2.0' })); + expect(ua(http)).to.contain('MyApp/2.0'); + expect(ua(http)).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('passes caller customHeaders through, in order, with the connector User-Agent appended last', () => { + const http = buildSeaHttpOptions(opts({ customHeaders: { 'X-Trace': 'abc', 'X-Env': 'prod' } })); + // Order preserved; User-Agent is the final entry (matches Python's + // `all_headers = http_headers + base_headers`). + expect(names(http)).to.deep.equal(['X-Trace', 'X-Env', 'User-Agent']); + expect(http.customHeaders?.[0]).to.deep.equal({ name: 'X-Trace', value: 'abc' }); + expect(ua(http)).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('drops kernel-managed reserved headers (Authorization / x-databricks-org-id, any casing)', () => { + const http = buildSeaHttpOptions( + opts({ + customHeaders: { + Authorization: 'Bearer leak', + 'X-Databricks-Org-Id': '12345', + 'X-Keep': 'yes', + }, + }), + ); + const lower = names(http).map((n) => n.toLowerCase()); + expect(lower).to.not.include('authorization'); + expect(lower).to.not.include('x-databricks-org-id'); + expect(names(http)).to.include('X-Keep'); + expect(names(http)).to.include('User-Agent'); + }); + + it('appends the connector UA last even when the caller also set a User-Agent (kernel folds last-wins, matches Python)', () => { + const http = buildSeaHttpOptions( + opts({ customHeaders: { 'User-Agent': 'Caller/1.0' }, userAgentEntry: 'Wins/3.0' }), + ); + // Mirrors Python use_kernel: the caller's UA is forwarded too, and the + // connector UA is appended last (the kernel's last-wins fold picks it). + const uaEntries = (http.customHeaders ?? []).filter((h) => h.name.toLowerCase() === 'user-agent'); + expect(uaEntries.length).to.equal(2); + expect(uaEntries[0].value).to.equal('Caller/1.0'); + expect(uaEntries[1].value).to.contain('Wins/3.0'); + expect(uaEntries[1].value).to.match(/NodejsDatabricksSqlConnector\//); + }); + + it('folds customHeaders + userAgentEntry into the full connection options', () => { + const native = buildSeaConnectionOptions( + opts({ customHeaders: { 'X-Trace': 'abc' }, userAgentEntry: 'MyApp/2.0' }), + ) as { customHeaders?: Array<{ name: string; value: string }> }; + expect(native.customHeaders?.find((h) => h.name === 'X-Trace')?.value).to.equal('abc'); + expect(native.customHeaders?.find((h) => h.name === 'User-Agent')?.value).to.contain('MyApp/2.0'); + }); +}); diff --git a/tests/unit/sea/execution.test.ts b/tests/unit/sea/execution.test.ts index 81cdfadd..e523d0e2 100644 --- a/tests/unit/sea/execution.test.ts +++ b/tests/unit/sea/execution.test.ts @@ -15,6 +15,7 @@ import { expect } from 'chai'; import sinon from 'sinon'; import Int64 from 'node-int64'; +import expectNativeConnectionOptions from './_helpers/nativeOptions'; import SeaBackend from '../../../lib/sea/SeaBackend'; import SeaSessionBackend from '../../../lib/sea/SeaSessionBackend'; import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; @@ -450,7 +451,7 @@ describe('SeaBackend', () => { // shape with a leading `authMode` tag — `'Pat'` for the PAT branch. // `intervalsAsString: true` is always set so the SEA result shape is a // byte-compatible drop-in for the Thrift backend (interval-as-string). - expect(args).to.deep.equal({ + expectNativeConnectionOptions(args, { hostName: 'workspace.example', httpPath: '/sql/1.0/warehouses/xyz', authMode: 'Pat', From 7f7784ea5fbeb034577de8daed88e3566c4411bb Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 6 Jun 2026 13:24:57 +0000 Subject: [PATCH 02/13] chore(sea): bump KERNEL_REV to kernel main 80b68e1 + regen napi contract Kernel #126 (logging bridge) and #127 (mTLS identity + custom HTTP headers) are both merged to kernel main. Pin KERNEL_REV to the unified main SHA 80b68e1eef3b613910183a50dfa4dace854d50dd and regenerate native/sea/index.* from it. The contract now carries both feature surfaces (gains the logging exports from #126). Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- KERNEL_REV | 2 +- native/sea/index.d.ts | 62 ++++++++++++++++++++++++++++++++++++++----- native/sea/index.js | 41 +++++++++++++++------------- 3 files changed, 79 insertions(+), 26 deletions(-) diff --git a/KERNEL_REV b/KERNEL_REV index 97b52880..65457d79 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -7f8353f39665e7ac0fcc31a052fd2271caba1f67 +80b68e1eef3b613910183a50dfa4dace854d50dd diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts index b5a07f8d..7232a947 100644 --- a/native/sea/index.d.ts +++ b/native/sea/index.d.ts @@ -157,8 +157,8 @@ export interface HeaderEntry { * Authentication is selected by `authMode` (default [`AuthMode::Pat`]): * - `Pat` — `token` required. * - `OAuthM2m` — `oauthClientId` + `oauthClientSecret` required. - * - `OAuthU2m` — `oauthClientId` / `oauthRedirectPort` optional (kernel - * defaults to the `databricks-cli` client on port 8020). + * - `OAuthU2m` — `oauthClientId` / `oauthRedirectPort` optional + * (defaults to the `databricks-sql-connector` client on port 8020). * * Catalog / schema / sessionConf are applied once at session creation * and remain in effect for every statement run on the resulting @@ -189,7 +189,7 @@ export interface ConnectionOptions { token?: string /** * OAuth client id. Required for [`AuthMode::OAuthM2m`]; optional for - * [`AuthMode::OAuthU2m`] (kernel defaults to `databricks-cli`). + * [`AuthMode::OAuthU2m`] (defaults to `databricks-sql-connector`). */ oauthClientId?: string /** OAuth client secret. Required for [`AuthMode::OAuthM2m`]. */ @@ -235,9 +235,9 @@ export interface ConnectionOptions { * the JDBC driver's `HttpConnectionPoolSize` default and to close * the throughput gap vs the NodeJS Thrift driver's * `maxSockets: Infinity` pool for bursty workloads. The kernel - * core's [`HttpConfig::pool_max_idle_per_host`] default remains - * at the conservative kernel value (10); each binding chooses - * its own user-facing default. Mirrors the Python connector's + * core's [`HttpConfig::pool_max_idle_per_host`] default is also 100 + * (matching the same JDBC default), so napi pins its own copy rather + * than inheriting it. Mirrors the Python connector's * `max_connections` kwarg on the SEA backend, which exposes the * knob but keeps its own urllib3-aligned default of 10. * @@ -361,6 +361,56 @@ export interface ConnectionOptions { * to camelCase for free functions). */ export declare function openSession(options: ConnectionOptions): Promise +/** + * One kernel log event, as handed to JS. `level` is a lower-case string + * (`error`/`warn`/`info`/`debug`/`trace`) the Node side maps onto its + * `LogLevel`; `target` is the originating `tracing` target (e.g. + * `databricks::sql::kernel`); `message` is the rendered event plus any + * structured `key=value` fields. + */ +export interface LogRecord { + level: string + target: string + message: string +} +/** + * Install (idempotently) the kernel→JS log bridge and set its level. + * + * `callback` is invoked with **an array of [`LogRecord`]s** (`(err, records)`) + * for each forwarded batch. `level` is one of + * `off`/`error`/`warn`/`info`/`debug`/`trace` (case-insensitive); unknown + * values fall back to `warn`. + * + * Safe to call more than once: the process-global subscriber is installed on + * the first call only, while every call refreshes the sink + level (last + * writer wins — see module docs). + */ +export declare function initKernelLogging(callback: (err: Error | null, arg: Array) => any, level: string): void +/** + * Snapshot of the bridge's runtime state for observability. + * + * `installed` is `true` only when the process-global subscriber was + * successfully installed by *this* bridge (and the drain thread started); + * `false` means another global subscriber was already set or the drain + * thread could not be spawned, so kernel logs are NOT reaching the JS sink. + * `dropped` is the cumulative count of records discarded because the + * bounded channel was full during a burst (drop-newest) — a nonzero, + * growing value signals the sink can't keep up. + */ +export interface KernelLoggingStats { + installed: boolean + dropped: number +} +/** + * Return the bridge's [`KernelLoggingStats`]. Safe to call before + * `initKernelLogging` (reports `installed: false`, `dropped: 0`). + */ +export declare function kernelLoggingStats(): KernelLoggingStats +/** + * Live-retarget the bridge's level (one of + * `off`/`error`/`warn`/`info`/`debug`/`trace`, case-insensitive). + */ +export declare function setKernelLogLevel(level: string): void /** * JS-visible binding for a single positional parameter. * diff --git a/native/sea/index.js b/native/sea/index.js index d800a6cd..ad50ecc9 100644 --- a/native/sea/index.js +++ b/native/sea/index.js @@ -37,7 +37,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.android-arm64.node') } else { - nativeBinding = require('@databricks/sql-kernel-android-arm64') + nativeBinding = require('@databricks/databricks-sql-kernel-android-arm64') } } catch (e) { loadError = e @@ -49,7 +49,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.android-arm-eabi.node') } else { - nativeBinding = require('@databricks/sql-kernel-android-arm-eabi') + nativeBinding = require('@databricks/databricks-sql-kernel-android-arm-eabi') } } catch (e) { loadError = e @@ -69,7 +69,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.win32-x64-msvc.node') } else { - nativeBinding = require('@databricks/sql-kernel-win32-x64-msvc') + nativeBinding = require('@databricks/databricks-sql-kernel-win32-x64-msvc') } } catch (e) { loadError = e @@ -83,7 +83,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.win32-ia32-msvc.node') } else { - nativeBinding = require('@databricks/sql-kernel-win32-ia32-msvc') + nativeBinding = require('@databricks/databricks-sql-kernel-win32-ia32-msvc') } } catch (e) { loadError = e @@ -97,7 +97,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.win32-arm64-msvc.node') } else { - nativeBinding = require('@databricks/sql-kernel-win32-arm64-msvc') + nativeBinding = require('@databricks/databricks-sql-kernel-win32-arm64-msvc') } } catch (e) { loadError = e @@ -113,7 +113,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.darwin-universal.node') } else { - nativeBinding = require('@databricks/sql-kernel-darwin-universal') + nativeBinding = require('@databricks/databricks-sql-kernel-darwin-universal') } break } catch {} @@ -124,7 +124,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.darwin-x64.node') } else { - nativeBinding = require('@databricks/sql-kernel-darwin-x64') + nativeBinding = require('@databricks/databricks-sql-kernel-darwin-x64') } } catch (e) { loadError = e @@ -138,7 +138,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.darwin-arm64.node') } else { - nativeBinding = require('@databricks/sql-kernel-darwin-arm64') + nativeBinding = require('@databricks/databricks-sql-kernel-darwin-arm64') } } catch (e) { loadError = e @@ -157,7 +157,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.freebsd-x64.node') } else { - nativeBinding = require('@databricks/sql-kernel-freebsd-x64') + nativeBinding = require('@databricks/databricks-sql-kernel-freebsd-x64') } } catch (e) { loadError = e @@ -174,7 +174,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-x64-musl.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-x64-musl') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-x64-musl') } } catch (e) { loadError = e @@ -187,7 +187,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-x64-gnu.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-x64-gnu') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-x64-gnu') } } catch (e) { loadError = e @@ -203,7 +203,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-arm64-musl.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-arm64-musl') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-arm64-musl') } } catch (e) { loadError = e @@ -216,7 +216,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-arm64-gnu.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-arm64-gnu') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-arm64-gnu') } } catch (e) { loadError = e @@ -232,7 +232,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-arm-musleabihf.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-arm-musleabihf') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-arm-musleabihf') } } catch (e) { loadError = e @@ -245,7 +245,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-arm-gnueabihf.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-arm-gnueabihf') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-arm-gnueabihf') } } catch (e) { loadError = e @@ -261,7 +261,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-riscv64-musl.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-riscv64-musl') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-riscv64-musl') } } catch (e) { loadError = e @@ -274,7 +274,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-riscv64-gnu.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-riscv64-gnu') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-riscv64-gnu') } } catch (e) { loadError = e @@ -289,7 +289,7 @@ switch (platform) { if (localFileExisted) { nativeBinding = require('./index.linux-s390x-gnu.node') } else { - nativeBinding = require('@databricks/sql-kernel-linux-s390x-gnu') + nativeBinding = require('@databricks/databricks-sql-kernel-linux-s390x-gnu') } } catch (e) { loadError = e @@ -310,7 +310,7 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { AsyncStatement, AsyncResultHandle, CancellableExecution, Connection, AuthMode, openSession, Statement, version } = nativeBinding +const { AsyncStatement, AsyncResultHandle, CancellableExecution, Connection, AuthMode, openSession, initKernelLogging, kernelLoggingStats, setKernelLogLevel, Statement, version } = nativeBinding module.exports.AsyncStatement = AsyncStatement module.exports.AsyncResultHandle = AsyncResultHandle @@ -318,5 +318,8 @@ module.exports.CancellableExecution = CancellableExecution module.exports.Connection = Connection module.exports.AuthMode = AuthMode module.exports.openSession = openSession +module.exports.initKernelLogging = initKernelLogging +module.exports.kernelLoggingStats = kernelLoggingStats +module.exports.setKernelLogLevel = setKernelLogLevel module.exports.Statement = Statement module.exports.version = version From ef63c69f9e1ae273173770cdc5238afa78d3db0e Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 6 Jun 2026 14:17:54 +0000 Subject: [PATCH 03/13] fix(sea): align napi package name to @databricks/databricks-sql-kernel-* Same alignment as the logging PR: kernel #131/#135 renamed the published napi package @databricks/sql-kernel -> @databricks/databricks-sql-kernel. Update the packaging test, version-test hint, SeaNativeLoader install hint, and README to match the regenerated router, fixing the native-packaging unit tests under the KERNEL_REV bump to kernel main 80b68e1. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- lib/sea/SeaNativeLoader.ts | 4 ++-- native/sea/README.md | 16 ++++++++-------- tests/unit/sea/native-packaging.test.ts | 10 +++++----- tests/unit/sea/version.test.ts | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts index 80352be6..e96cb2d9 100644 --- a/lib/sea/SeaNativeLoader.ts +++ b/lib/sea/SeaNativeLoader.ts @@ -17,7 +17,7 @@ * * Mirrors the load-failure-tolerant pattern of `lib/utils/lz4.ts`: the * `.node` artifact ships via per-platform optional dependencies - * (`@databricks/sql-kernel-`), so its absence must not crash + * (`@databricks/databricks-sql-kernel-`), so its absence must not crash * a Thrift-only consumer of the driver. Callers that actually need * SEA construct a {@link SeaNativeLoader} (or use the process-global * {@link getSeaNative}) which throws a structured error if the binding @@ -104,7 +104,7 @@ function loadFailureHint(err: NodeJS.ErrnoException): string { // not the bare `${platform}` shown here, so a literal example would // 404. Point at the README's supported-triple list instead. const installHint = - 'Install the matching @databricks/sql-kernel-* optional dependency for your platform ' + + 'Install the matching @databricks/databricks-sql-kernel-* optional dependency for your platform ' + '(see native/sea/README.md for the supported triples; M0 ships linux-x64-gnu only).'; if (err.code === 'MODULE_NOT_FOUND') { return `SEA native binding not installed for platform ${platform} on Node ${process.version}. ${installHint}`; diff --git a/native/sea/README.md b/native/sea/README.md index c5b57b05..6adfa944 100644 --- a/native/sea/README.md +++ b/native/sea/README.md @@ -3,7 +3,7 @@ **The Rust binding source lives in the kernel repo** at `databricks-sql-kernel/napi/`. Building it requires a local checkout of that repo — see "Build for local dev" below. The published npm -package is `@databricks/sql-kernel-`. +package is `@databricks/databricks-sql-kernel-`. ## Workspace topology @@ -33,10 +33,10 @@ and reintroduce the same clash. Standalone-workspace is the fix. - `index.js` — napi-rs's per-platform router shim. Gitignored; populated by `npm run build:native` for local dev. In published tarballs it ships alongside the `.d.ts` and `require()`s the - right `@databricks/sql-kernel-` optional dependency. + right `@databricks/databricks-sql-kernel-` optional dependency. - `index.*.node` — the actual native binary, one per platform. Gitignored. In production these live in the per-triple optional - dependencies (`@databricks/sql-kernel-linux-x64-gnu`, etc.); for + dependencies (`@databricks/databricks-sql-kernel-linux-x64-gnu`, etc.); for local dev `npm run build:native` copies one into this directory. ## Build for local dev @@ -56,7 +56,7 @@ nodejs repo. ## Production load path At release time the kernel's CI publishes -`@databricks/sql-kernel-` npm packages — one per supported +`@databricks/databricks-sql-kernel-` npm packages — one per supported platform — each containing a single `.node` binary. `native/sea/index.js` (the napi-rs router) `require()`s the package matching the consumer's `process.platform` / `process.arch` at load time. @@ -68,13 +68,13 @@ platform — each containing a single `.node` binary. `native/sea/index.js` > unpublished package would break every install.) Until they ship, the > binding is produced locally via `npm run build:native` (which copies > `index..node` into this directory). Once the packages are -> published, add `@databricks/sql-kernel-` back to +> published, add `@databricks/databricks-sql-kernel-` back to > `optionalDependencies` — npm then installs only the matching one. ## Supported platforms (M0) M0 targets a **single** triple: **`linux-x64-gnu`** (package -`@databricks/sql-kernel-linux-x64-gnu`, once published). +`@databricks/databricks-sql-kernel-linux-x64-gnu`, once published). On every other platform (macOS, Windows, linux-arm64, linux-x64-musl / Alpine, …) the SEA binding is simply absent: `SeaNativeLoader` @@ -86,9 +86,9 @@ CI starts publishing them in later milestones. ## Supply-chain note -The unpublished triple names (`@databricks/sql-kernel-darwin-arm64`, +The unpublished triple names (`@databricks/databricks-sql-kernel-darwin-arm64`, `…-win32-x64-msvc`, etc.) referenced by the router are **not** squat-able: `@databricks` is a Databricks-owned npm scope, and npm only allows org members to publish under a scope it owns. A third -party therefore cannot register `@databricks/sql-kernel-*` and have +party therefore cannot register `@databricks/databricks-sql-kernel-*` and have the router autoload it. No placeholder packages are required. diff --git a/tests/unit/sea/native-packaging.test.ts b/tests/unit/sea/native-packaging.test.ts index b2732673..f7218a91 100644 --- a/tests/unit/sea/native-packaging.test.ts +++ b/tests/unit/sea/native-packaging.test.ts @@ -21,7 +21,7 @@ import { join } from 'path'; // (e.g. `@databricks/sea-native-linux-x64-gnu-darwin-arm64`, and the doubled // `@databricks/sea-native-linux-x64-gnu-linux-x64-gnu`), so a published // install would never resolve a `.node`. The canonical name is -// `@databricks/sql-kernel-` (see native/sea/README.md and the +// `@databricks/databricks-sql-kernel-` (see native/sea/README.md and the // SeaNativeLoader load-failure hint). describe('SEA native binding — packaging (native/sea/index.js)', () => { // Resolved from the repo root (the cwd for `npm test`) so the test does not @@ -35,8 +35,8 @@ describe('SEA native binding — packaging (native/sea/index.js)', () => { expect(required.length, 'no @databricks/* require() found in the router').to.be.greaterThan(0); }); - it('every npm fallback uses the canonical @databricks/sql-kernel- name', () => { - const triple = /^@databricks\/sql-kernel-[a-z0-9]+(-[a-z0-9]+)*$/; + it('every npm fallback uses the canonical @databricks/databricks-sql-kernel- name', () => { + const triple = /^@databricks\/databricks-sql-kernel-[a-z0-9]+(-[a-z0-9]+)*$/; for (const name of required) { expect(name, `unexpected SEA native package name: ${name}`).to.match(triple); } @@ -47,9 +47,9 @@ describe('SEA native binding — packaging (native/sea/index.js)', () => { expect(indexJs, 'router still doubles the linux-x64-gnu triple').to.not.contain('linux-x64-gnu-linux-x64-gnu'); }); - it('resolves the M0 linux-x64-gnu triple to @databricks/sql-kernel-linux-x64-gnu', () => { + it('resolves the M0 linux-x64-gnu triple to @databricks/databricks-sql-kernel-linux-x64-gnu', () => { expect(required, 'M0 supported triple package missing from the router').to.include( - '@databricks/sql-kernel-linux-x64-gnu', + '@databricks/databricks-sql-kernel-linux-x64-gnu', ); }); }); diff --git a/tests/unit/sea/version.test.ts b/tests/unit/sea/version.test.ts index 24a05d7a..34603480 100644 --- a/tests/unit/sea/version.test.ts +++ b/tests/unit/sea/version.test.ts @@ -16,7 +16,7 @@ import { expect } from 'chai'; import { tryGetSeaNative } from '../../../lib/sea/SeaNativeLoader'; // Fail loudly only when the binding is actually expected to be present — -// i.e. a CI step has provisioned it (a published `@databricks/sql-kernel-*` +// i.e. a CI step has provisioned it (a published `@databricks/databricks-sql-kernel-*` // optional dep installed, or `npm run build:native` was run) and opts in via // `SEA_NATIVE_EXPECTED=1`. A missing binding there is a real packaging / build // regression that a silent skip would mask. @@ -37,7 +37,7 @@ describe('SEA native binding — smoke test', function smoke() { it('fails loudly: the binding must load on the linux-x64 CI runner', () => { expect.fail( 'SEA native binding failed to load on a linux-x64 CI runner where ' + - '@databricks/sql-kernel-linux-x64-gnu is expected. Run `npm run build:native` or check packaging.', + '@databricks/databricks-sql-kernel-linux-x64-gnu is expected. Run `npm run build:native` or check packaging.', ); }); return; From 64b982107b4e7e52ede96fb54aa42e5b943418df Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 6 Jun 2026 16:08:06 +0000 Subject: [PATCH 04/13] feat(sea): forward retry/backoff tuning to the kernel on the SEA path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel owns the retry loop on the SEA/use_kernel path, so forward the driver's existing ClientConfig retry knobs (the same ones the Thrift HttpRetryPolicy reads) onto the napi ConnectionOptions retry kwargs — keeping SEA and Thrift governed by one retry config. Mirrors Python connector #820. - buildSeaRetryOptions(config): ms -> whole seconds, clamped to napi u32. retryDelayMin->retryMinWaitSecs, retryDelayMax->retryMaxWaitSecs, retriesTimeout->retryOverallTimeoutSecs, retryMaxAttempts passes through as a TOTAL attempt count (the kernel converts to retries-after-first). - SeaBackend.connect() merges it into the native options from the client config. - Adds SeaSessionDefaults retry fields + unit tests (mapping, rounding, clamp). Requires kernel napi retry kwargs (databricks-sql-kernel #141). KERNEL_REV is pinned to #141's branch HEAD as a placeholder — MUST be re-pinned to #141's squash-merge SHA before this merges (orphan-SHA risk otherwise). Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- KERNEL_REV | 2 +- lib/sea/SeaAuth.ts | 41 ++++++++++++++++++++++ lib/sea/SeaBackend.ts | 11 ++++-- native/sea/index.d.ts | 28 +++++++++++++++ tests/unit/sea/connectionOptions.test.ts | 44 ++++++++++++++++++++++-- 5 files changed, 121 insertions(+), 5 deletions(-) diff --git a/KERNEL_REV b/KERNEL_REV index 65457d79..e838aedf 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -80b68e1eef3b613910183a50dfa4dace854d50dd +fcc459bbf3f39bf57e2ee02f14b99c0ec7a70123 diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts index a9d9d116..4c53cdb5 100644 --- a/lib/sea/SeaAuth.ts +++ b/lib/sea/SeaAuth.ts @@ -91,6 +91,20 @@ export interface SeaSessionDefaults { * integer within the napi `u32` range by `buildSeaConnectionOptions`. */ maxConnections?: number; + /** + * Retry/backoff tuning forwarded to the kernel (which owns the retry loop + * on the SEA path). These mirror the driver's `ClientConfig` retry knobs — + * the same ones the Thrift `HttpRetryPolicy` uses — converted from the + * connector's milliseconds to the kernel's whole seconds, so a single + * retry config governs both backends. Unset ⇒ kernel default policy. + * Map onto the napi `ConnectionOptions.retry{Min,Max}WaitSecs` / + * `retryMaxAttempts` / `retryOverallTimeoutSecs` (see `buildSeaRetryOptions`). + */ + retryMinWaitSecs?: number; + retryMaxWaitSecs?: number; + /** **Total** attempts (kernel converts to retries-after-first internally). */ + retryMaxAttempts?: number; + retryOverallTimeoutSecs?: number; } /** @@ -274,6 +288,33 @@ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions { * - `HiveDriverError` for unsupported auth modes / Azure-direct / * custom persistence / ambiguous combinations. */ +/** + * Convert the driver's `ClientConfig` retry knobs (milliseconds, total-attempt + * count) into the kernel's `ConnectionOptions` retry kwargs (whole seconds). + * The kernel owns the retry loop on the SEA path, so forwarding these keeps SEA + * and Thrift governed by one retry config. `retryMaxAttempts` is a TOTAL attempt + * count on both sides (the kernel converts to retries-after-first internally), + * so it passes through directly. Sub-second delays round to the nearest second + * (the kernel's granularity); all values are clamped into the napi `u32` range. + */ +export function buildSeaRetryOptions(config: { + retryMaxAttempts: number; + retriesTimeout: number; + retryDelayMin: number; + retryDelayMax: number; +}): Required< + Pick +> { + const msToSecs = (ms: number): number => Math.min(MAX_U32, Math.max(0, Math.round(ms / 1000))); + const clampU32 = (n: number): number => Math.min(MAX_U32, Math.max(0, Math.trunc(n))); + return { + retryMinWaitSecs: msToSecs(config.retryDelayMin), + retryMaxWaitSecs: msToSecs(config.retryDelayMax), + retryMaxAttempts: clampU32(config.retryMaxAttempts), + retryOverallTimeoutSecs: msToSecs(config.retriesTimeout), + }; +} + export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNativeConnectionOptions { const { authType } = options as { authType?: string }; diff --git a/lib/sea/SeaBackend.ts b/lib/sea/SeaBackend.ts index 6f1bd5f0..3ee00288 100644 --- a/lib/sea/SeaBackend.ts +++ b/lib/sea/SeaBackend.ts @@ -21,7 +21,7 @@ import { LogLevel } from '../contracts/IDBSQLLogger'; import HiveDriverError from '../errors/HiveDriverError'; import { getSeaNative, SeaNativeBinding, SeaConnection } from './SeaNativeLoader'; import { decodeNapiKernelError } from './SeaErrorMapping'; -import { buildSeaConnectionOptions, SeaNativeConnectionOptions } from './SeaAuth'; +import { buildSeaConnectionOptions, buildSeaRetryOptions, SeaNativeConnectionOptions } from './SeaAuth'; import { installKernelLogBridge } from './SeaLogging'; import SeaSessionBackend from './SeaSessionBackend'; @@ -85,7 +85,14 @@ export default class SeaBackend implements IBackend { // Validate PAT auth + capture the napi-binding option shape. // Any non-PAT mode (or a missing/empty token) throws here, before // we ever touch the native binding. - this.nativeOptions = buildSeaConnectionOptions(options); + // Forward the driver's retry config to the kernel, which owns the retry + // loop on the SEA path. This keeps SEA and Thrift governed by one retry + // config (the same `ClientConfig` knobs the Thrift `HttpRetryPolicy` reads), + // converted from the connector's milliseconds to the kernel's whole seconds. + this.nativeOptions = { + ...buildSeaConnectionOptions(options), + ...buildSeaRetryOptions(this.context.getConfig()), + }; // Bridge the Rust kernel's `tracing` logs into the SAME `DBSQLLogger` the // driver logs through, so logs from all three layers (driver, napi shim, diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts index 7232a947..3413e141 100644 --- a/native/sea/index.d.ts +++ b/native/sea/index.d.ts @@ -350,6 +350,34 @@ export interface ConnectionOptions { * `User-Agent` entry here. */ customHeaders?: Array + /** + * Retry/backoff tuning — all optional. An unset field keeps the kernel's + * built-in policy (1s/60s exponential backoff, 6 total attempts, 900s + * budget). Mirrors the pyo3 binding's `retry_*` kwargs so the Node.js + * driver can forward the same retry knobs the Python connector does. + * + * Lower bound of the exponential backoff (also clamps a server + * `Retry-After`). Maps onto [`HttpConfig::retry_min_wait`]. + */ + retryMinWaitSecs?: number + /** + * Upper bound of the exponential backoff. Maps onto + * [`HttpConfig::retry_max_wait`]. + */ + retryMaxWaitSecs?: number + /** + * **Total** number of attempts (matching the connector's + * `_retry_stop_after_attempts_count` and JDBC count semantics). The + * kernel's [`HttpConfig::retry_max_retries`] counts retries *after* the + * first attempt, so this is converted with `max(0, attempts - 1)` in + * [`build_http_config`] — `0` / `1` both mean a single attempt, no retry. + */ + retryMaxAttempts?: number + /** + * Overall retry budget in whole seconds. Maps onto + * [`HttpConfig::overall_timeout`]. + */ + retryOverallTimeoutSecs?: number } /** * Open a Databricks SQL session and return an opaque `Connection` diff --git a/tests/unit/sea/connectionOptions.test.ts b/tests/unit/sea/connectionOptions.test.ts index 4869bd16..cfbc4b0b 100644 --- a/tests/unit/sea/connectionOptions.test.ts +++ b/tests/unit/sea/connectionOptions.test.ts @@ -13,7 +13,7 @@ // limitations under the License. import { expect } from 'chai'; -import { buildSeaConnectionOptions, buildSeaTlsOptions } from '../../../lib/sea/SeaAuth'; +import { buildSeaConnectionOptions, buildSeaTlsOptions, buildSeaRetryOptions } from '../../../lib/sea/SeaAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; import HiveDriverError from '../../../lib/errors/HiveDriverError'; @@ -21,7 +21,7 @@ const PAT = { host: 'h.databricks.com', path: '/sql/1.0/warehouses/abc', token: // Cast helper: the SEA connection-tuning/TLS options live on the internal // surface, so tests build untyped option literals. -const opts = (extra: Record) => ({ ...PAT, ...extra } as unknown as ConnectionOptions); +const opts = (extra: Record) => ({ ...PAT, ...extra }) as unknown as ConnectionOptions; describe('SeaAuth connection options — intervalsAsString default', () => { it('always sets intervalsAsString:true (thrift-compatible interval rendering)', () => { @@ -119,3 +119,43 @@ describe('SeaAuth TLS options (buildSeaTlsOptions)', () => { expect(native.checkServerCertificate).to.equal(false); }); }); + +describe('SeaAuth retry options — buildSeaRetryOptions', () => { + // The driver's ClientConfig retry defaults (ms / total-attempt count). + const defaults = { + retryMaxAttempts: 5, + retriesTimeout: 15 * 60 * 1000, + retryDelayMin: 1000, + retryDelayMax: 60 * 1000, + }; + + it('converts the connector ms knobs to the kernel whole-second kwargs', () => { + const r = buildSeaRetryOptions(defaults); + expect(r.retryMinWaitSecs).to.equal(1); // 1000ms + expect(r.retryMaxWaitSecs).to.equal(60); // 60000ms + expect(r.retryOverallTimeoutSecs).to.equal(900); // 15min + }); + + it('passes retryMaxAttempts through as a TOTAL attempt count (kernel converts to retries)', () => { + expect(buildSeaRetryOptions({ ...defaults, retryMaxAttempts: 5 }).retryMaxAttempts).to.equal(5); + expect(buildSeaRetryOptions({ ...defaults, retryMaxAttempts: 0 }).retryMaxAttempts).to.equal(0); + }); + + it('rounds sub-second delays to the nearest second (kernel granularity)', () => { + const r = buildSeaRetryOptions({ ...defaults, retryDelayMin: 1500, retryDelayMax: 2400 }); + expect(r.retryMinWaitSecs).to.equal(2); // 1.5s → 2 + expect(r.retryMaxWaitSecs).to.equal(2); // 2.4s → 2 + }); + + it('clamps negative/garbage inputs into the napi u32 range', () => { + const r = buildSeaRetryOptions({ + retryMaxAttempts: -3, + retriesTimeout: -1, + retryDelayMin: -1000, + retryDelayMax: 0, + }); + expect(r.retryMaxAttempts).to.equal(0); + expect(r.retryMinWaitSecs).to.equal(0); + expect(r.retryOverallTimeoutSecs).to.equal(0); + }); +}); From 29cc499311e54d81d2c4b3d04db72800e6644d59 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 6 Jun 2026 16:48:47 +0000 Subject: [PATCH 05/13] feat(sea): surface operation-status fields (numModifiedRows, displayMessage, diagnosticInfo, errorDetailsJson) Ports the async rich-status work (was #422) onto the consolidated branch: the napi Statement.status() fields the kernel already exposes are now surfaced through getOperationStatus instead of a flat Succeeded (M1 item). Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- lib/contracts/OperationStatus.ts | 29 ++++++ lib/sea/SeaOperationBackend.ts | 138 ++++++++++++++++++++++++++-- lib/sea/SeaOperationLifecycle.ts | 23 +++-- lib/thrift-backend/wireSynthesis.ts | 27 +++++- 4 files changed, 197 insertions(+), 20 deletions(-) diff --git a/lib/contracts/OperationStatus.ts b/lib/contracts/OperationStatus.ts index 7f167aba..792ff930 100644 --- a/lib/contracts/OperationStatus.ts +++ b/lib/contracts/OperationStatus.ts @@ -53,4 +53,33 @@ export interface OperationStatus { * to `WaitUntilReadyOptions.callback` for the consumer to interpret. */ progressUpdateResponse?: unknown; + + /** + * Number of rows modified by a DML statement (UPDATE / INSERT / DELETE / + * MERGE). `undefined`/`null` for SELECT and on backends/warehouses that do + * not surface the counter. Mirrors Thrift's + * `TGetOperationStatusResp.numModifiedRows`. + */ + numModifiedRows?: number | null; + + /** + * Server-supplied user-facing message, when the backend exposes one. Mirrors + * Thrift's `TGetOperationStatusResp.displayMessage`. May contain SQL + * fragments or parameter values — treat as potentially sensitive. + */ + displayMessage?: string | null; + + /** + * Server-supplied diagnostic detail (multi-line operator / stack context), + * when available. Mirrors Thrift's `TGetOperationStatusResp.diagnosticInfo`. + * For support surfaces, not user-facing. + */ + diagnosticInfo?: string | null; + + /** + * Server-supplied JSON blob with extended error details, when available. + * Mirrors Thrift's `TGetOperationStatusResp.errorDetailsJson`. Pass-through + * string — callers parse with `JSON.parse` if they need structured access. + */ + errorDetailsJson?: string | null; } diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts index 2a4c8136..52a1ea38 100644 --- a/lib/sea/SeaOperationBackend.ts +++ b/lib/sea/SeaOperationBackend.ts @@ -85,6 +85,33 @@ export type SeaOperationStatement = SeaStatementHandle & Partial; */ type SeaFetchHandle = Pick; +/** + * The rich operation-status surface the kernel exposes on a terminal sync + * `Statement` (`numModifiedRows` / `displayMessage` / `diagnosticInfo` / + * `errorDetailsJson`). These accessors live ONLY on the blocking `Statement` + * (metadata path + sync `runAsync:false` path once `result()` resolves) — the + * async `AsyncStatement` / `AsyncResultHandle` do not expose them — so the + * reader below is best-effort and returns an empty record when the handle + * predates this surface or the operation never resolved to a `Statement`. + */ +type SeaStatusFieldsHandle = Pick< + SeaStatement, + 'numModifiedRows' | 'displayMessage' | 'diagnosticInfo' | 'errorDetailsJson' +>; + +/** + * The rich operation-status fields, as the kernel returns them (each `null` + * when the server didn't supply it — e.g. `numModifiedRows` is null for a + * SELECT). Carried onto the neutral `OperationStatus` and ultimately into the + * Thrift `TGetOperationStatusResp` so SEA reports parity with the Thrift path. + */ +interface SeaRichStatusFields { + numModifiedRows: number | null; + displayMessage: string | null; + diagnosticInfo: string | null; + errorDetailsJson: string | null; +} + /** Poll cadence for the async `status()` loop — matches the Thrift backend's 100ms. */ const STATUS_POLL_INTERVAL_MS = 100; @@ -377,7 +404,9 @@ export default class SeaOperationBackend implements IOperationBackend { if (this.asyncStatement) { // Async query path: report the real kernel state (single // GetStatementStatus RPC — no polling here; `waitUntilReady` owns the - // poll loop). + // poll loop). The rich status fields (`numModifiedRows` etc.) live on the + // terminal sync `Statement`, which the async path never produces, so they + // stay undefined here. const state = statusStringToOperationState(await this.asyncStatement.status()); return { state, hasResultSet: true }; } @@ -386,11 +415,16 @@ export default class SeaOperationBackend implements IOperationBackend { // server-side; there is no per-status RPC to query while it runs. Report // Running until `result()` has materialised the terminal statement, then // Succeeded — mirroring the kernel's blocking-then-terminal lifecycle. - const state = this.fetchHandlePromise ? OperationState.Succeeded : OperationState.Running; - return { state, hasResultSet: true }; + if (!this.fetchHandlePromise) { + return { state: OperationState.Running, hasResultSet: true }; + } + // The blocking `result()` has resolved a terminal `Statement` — surface + // its rich status fields alongside the Succeeded state. + return { state: OperationState.Succeeded, hasResultSet: true, ...(await this.readRichStatusFields()) }; } - // Metadata path: the kernel statement is already terminal. - return { state: OperationState.Succeeded, hasResultSet: true }; + // Metadata path: the kernel statement is already terminal — read its rich + // fields too (they are `null` for metadata results, by design). + return { state: OperationState.Succeeded, hasResultSet: true, ...(await this.readRichStatusFields()) }; } public async waitUntilReady(options?: IOperationBackendWaitOptions): Promise { @@ -402,8 +436,11 @@ export default class SeaOperationBackend implements IOperationBackend { } // Metadata path: the kernel statement has already resolved, so there is // nothing to poll. seaFinished fires the progress callback once with a - // synthesised completion tick, matching the Thrift path's final tick. - return seaFinished(this.lifecycle, options); + // synthesised completion tick, matching the Thrift path's final tick. The + // rich-field reader is passed lazily so it only runs when a callback is + // wired (metadata statements report all-null, but the surface stays + // consistent with the query paths). + return seaFinished(this.lifecycle, options, () => this.readRichStatusFields()); } public async cancel(): Promise { @@ -418,6 +455,85 @@ export default class SeaOperationBackend implements IOperationBackend { // Internals. // --------------------------------------------------------------------------- + /** + * Read the kernel's rich operation-status fields (`numModifiedRows` / + * `displayMessage` / `diagnosticInfo` / `errorDetailsJson`) off the terminal + * sync `Statement`. These accessors live only on the blocking `Statement` + * (metadata path, or the sync `runAsync:false` path once `result()` has + * resolved) — not on the async `AsyncStatement` / `AsyncResultHandle` — so: + * + * - on the async path we have no `Statement`, so we return all-null; + * - on the sync path we await `getFetchHandle()` first, which both drives + * `result()` to completion and stores the resolved `Statement` on + * `blockingStatement` (the handle that backs the accessors); + * - if the (older) binding predates these accessors we degrade to all-null + * rather than throwing — `getOperationStatus()` must never fail just + * because the rich fields are unavailable. + * + * Errors from the individual accessors are swallowed to null: a failed + * status-field read must not turn a successful operation's status query into + * a throw. The fields are best-effort metadata, not the operation outcome. + */ + private async readRichStatusFields(): Promise { + const empty: SeaRichStatusFields = { + numModifiedRows: null, + displayMessage: null, + diagnosticInfo: null, + errorDetailsJson: null, + }; + + // The async path never produces a terminal sync `Statement`, so there is + // nothing to read these off of. + if (this.asyncStatement && !this.cancellableExecution) { + return empty; + } + + // Ensure the sync path's blocking `result()` has resolved and stored the + // terminal `Statement` on `blockingStatement` (no-op on the metadata path, + // where `blockingStatement` was set at construction). + if (this.cancellableExecution) { + try { + await this.getFetchHandle(); + } catch { + // The operation failed/cancelled — its outcome surfaces through the + // wait/fetch path; status-field reads have nothing to add. + return empty; + } + } + + const handle = this.blockingStatement as Partial | undefined; + if (!handle || typeof handle.numModifiedRows !== 'function') { + // No resolved statement, or a binding that predates the rich-field + // accessors — degrade to all-null. + return empty; + } + const richHandle = handle as SeaStatusFieldsHandle; + + const readOrNull = async (read: () => Promise): Promise => { + try { + return await read(); + } catch (err) { + this.context + .getLogger() + .log( + LogLevel.debug, + `SEA status-field read failed for operation ${this._id}; reporting null. Cause: ` + + `${err instanceof Error ? err.message : String(err)}`, + ); + return null; + } + }; + + const [numModifiedRows, displayMessage, diagnosticInfo, errorDetailsJson] = await Promise.all([ + readOrNull(() => richHandle.numModifiedRows()), + readOrNull(() => richHandle.displayMessage()), + readOrNull(() => richHandle.diagnosticInfo()), + readOrNull(() => richHandle.errorDetailsJson()), + ]); + + return { numModifiedRows, displayMessage, diagnosticInfo, errorDetailsJson }; + } + /** * Poll the kernel `AsyncStatement` to a terminal state on a fixed 100ms * cadence, mirroring the Thrift backend's `waitUntilReady` loop. We poll @@ -547,9 +663,13 @@ export default class SeaOperationBackend implements IOperationBackend { // `getFetchHandle()` drives `result()` and memoises the resolved Statement // (also stored on `blockingStatement` so `close()` can reach it). await this.getFetchHandle(); - // Single completion tick, matching the metadata path. + // Single completion tick, matching the metadata path — carrying the rich + // status fields (numModifiedRows etc.) read off the now-terminal Statement. if (options?.callback) { - await Promise.resolve(options.callback({ state: OperationState.Succeeded, hasResultSet: true })); + const richFields = await this.readRichStatusFields(); + await Promise.resolve( + options.callback({ state: OperationState.Succeeded, hasResultSet: true, ...richFields }), + ); } } diff --git a/lib/sea/SeaOperationLifecycle.ts b/lib/sea/SeaOperationLifecycle.ts index d8b6b2c9..5bbc4cda 100644 --- a/lib/sea/SeaOperationLifecycle.ts +++ b/lib/sea/SeaOperationLifecycle.ts @@ -185,18 +185,21 @@ export async function seaClose( } /** - * Synthesize a neutral {@link OperationStatus} reporting the "finished" - * state. `IOperationBackend.waitUntilReady` is backend-neutral surface — its + * Synthesize an {@link OperationStatus} reporting the "finished" state. + * `IOperationBackend.waitUntilReady` is backend-neutral surface — its * `callback` receives an {@link OperationStatus}, not a Thrift wire struct * (the public Thrift-shaped `OperationStatusCallback` is adapted at the - * `DBSQLOperation` facade boundary). For M0 we report `Succeeded`. Richer - * fields (`numModifiedRows`, `progressUpdateResponse`, `errorMessage`) defer - * to M1 per the operation feature plan. + * `DBSQLOperation` facade boundary). We report `Succeeded`, and merge in any + * rich status fields (`numModifiedRows` / `displayMessage` / `diagnosticInfo` + * / `errorDetailsJson`) the backend resolved off the terminal kernel + * statement, so a `finished({callback})` consumer sees the same surface as a + * subsequent `getOperationStatus()` call. */ -function synthesizeFinishedStatus(): OperationStatus { +function synthesizeFinishedStatus(extra?: Partial): OperationStatus { return { state: OperationState.Succeeded, hasResultSet: true, + ...extra, }; } @@ -227,13 +230,19 @@ export async function seaFinished( progress?: boolean; callback?: (status: OperationStatus) => unknown; }, + // Rich status fields the backend read off the terminal statement, merged into + // the synthesised completion tick so callback consumers see them. Lazy (a + // thunk) so the (potentially RPC-backed) read only happens when a callback is + // actually wired. + richFields?: () => Promise>, ): Promise { if (state.isCancelled || state.isClosed) { return; } if (options?.callback) { - const response = synthesizeFinishedStatus(); + const extra = richFields ? await richFields() : undefined; + const response = synthesizeFinishedStatus(extra); // Await the callback in case it returns a promise — matches the // Thrift code path at `lib/DBSQLOperation.ts:348-351`. await Promise.resolve(options.callback(response)); diff --git a/lib/thrift-backend/wireSynthesis.ts b/lib/thrift-backend/wireSynthesis.ts index 573e1171..86a39160 100644 --- a/lib/thrift-backend/wireSynthesis.ts +++ b/lib/thrift-backend/wireSynthesis.ts @@ -1,3 +1,4 @@ +import Int64 from 'node-int64'; import { TGetOperationStatusResp, TGetResultSetMetadataResp, @@ -77,10 +78,17 @@ function resultFormatToThrift(format: ResultFormat): TSparkRowSetType { * `OperationStatus` DTO. Used by `DBSQLOperation.status()` when running * against a non-Thrift backend (e.g. SEA) so the public API stays Thrift-shaped. * - * Lossy by design: Thrift-only fields not carried by `OperationStatus` - * (`taskStatus`, `numModifiedRows`, `operationStarted`, `operationCompleted`, - * `displayMessage`, `diagnosticInfo`) are left undefined. Consumers that - * read those fields will see `undefined` on non-Thrift backends. + * Carries the rich status fields when the backend supplies them + * (`numModifiedRows`, `displayMessage`, `diagnosticInfo`, `errorDetailsJson`) + * — the SEA backend reads these off the terminal kernel statement, so DML + * operations report `numModifiedRows` at parity with the Thrift path. + * `numModifiedRows` is re-boxed as a Thrift `Int64` (`node-int64`) to match the + * wire shape the Thrift deserializer produces, so consumers can read it + * uniformly across backends. + * + * Still lossy for Thrift-only fields not carried by `OperationStatus` + * (`taskStatus`, `operationStarted`, `operationCompleted`), which are left + * undefined. */ export function synthesizeThriftStatus(status: OperationStatus): TGetOperationStatusResp { return { @@ -90,6 +98,17 @@ export function synthesizeThriftStatus(status: OperationStatus): TGetOperationSt errorMessage: status.errorMessage, hasResultSet: status.hasResultSet, progressUpdateResponse: status.progressUpdateResponse as TGetOperationStatusResp['progressUpdateResponse'], + // Rich status fields: only present on backends that surface them (SEA on a + // terminal sync statement). `null` (server didn't supply) maps to + // `undefined` so the synthesized response matches the Thrift path, where an + // absent field is simply not set. + numModifiedRows: + status.numModifiedRows === undefined || status.numModifiedRows === null + ? undefined + : new Int64(status.numModifiedRows), + displayMessage: status.displayMessage ?? undefined, + diagnosticInfo: status.diagnosticInfo ?? undefined, + errorDetailsJson: status.errorDetailsJson ?? undefined, } as TGetOperationStatusResp; } From 45f202d8c326d8d156bf0d387ccbc383f3ea21e6 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 6 Jun 2026 16:53:30 +0000 Subject: [PATCH 06/13] fix(sea): omit unset retry knobs + repair merged test braces Consolidation fixups: - buildSeaRetryOptions now OMITS any knob the client config didn't set to a finite number (was emitting NaN across the FFI when getConfig() lacked retry fields, e.g. the fake test context). Finite negatives still clamp to 0. - Repair the brace balance in connectionOptions.test.ts after merging the mTLS and retry test blocks. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- lib/sea/SeaAuth.ts | 31 ++++++++++++++---------- tests/unit/sea/connectionOptions.test.ts | 3 +++ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts index bd6485e3..8872c265 100644 --- a/lib/sea/SeaAuth.ts +++ b/lib/sea/SeaAuth.ts @@ -444,21 +444,26 @@ export function buildSeaHttpOptions(options: ConnectionOptions): SeaHttpOptions * (the kernel's granularity); all values are clamped into the napi `u32` range. */ export function buildSeaRetryOptions(config: { - retryMaxAttempts: number; - retriesTimeout: number; - retryDelayMin: number; - retryDelayMax: number; -}): Required< - Pick -> { + retryMaxAttempts?: number; + retriesTimeout?: number; + retryDelayMin?: number; + retryDelayMax?: number; +}): Pick { const msToSecs = (ms: number): number => Math.min(MAX_U32, Math.max(0, Math.round(ms / 1000))); const clampU32 = (n: number): number => Math.min(MAX_U32, Math.max(0, Math.trunc(n))); - return { - retryMinWaitSecs: msToSecs(config.retryDelayMin), - retryMaxWaitSecs: msToSecs(config.retryDelayMax), - retryMaxAttempts: clampU32(config.retryMaxAttempts), - retryOverallTimeoutSecs: msToSecs(config.retriesTimeout), - }; + // Only forward a knob the connector actually set to a finite number; an + // absent/garbage value is OMITTED so the kernel keeps its built-in default + // (rather than emitting NaN across the FFI). A finite-but-negative value is + // still forwarded and clamped to 0 by the helpers above. + const out: Pick< + SeaSessionDefaults, + 'retryMinWaitSecs' | 'retryMaxWaitSecs' | 'retryMaxAttempts' | 'retryOverallTimeoutSecs' + > = {}; + if (Number.isFinite(config.retryDelayMin)) out.retryMinWaitSecs = msToSecs(config.retryDelayMin as number); + if (Number.isFinite(config.retryDelayMax)) out.retryMaxWaitSecs = msToSecs(config.retryDelayMax as number); + if (Number.isFinite(config.retryMaxAttempts)) out.retryMaxAttempts = clampU32(config.retryMaxAttempts as number); + if (Number.isFinite(config.retriesTimeout)) out.retryOverallTimeoutSecs = msToSecs(config.retriesTimeout as number); + return out; } export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNativeConnectionOptions { diff --git a/tests/unit/sea/connectionOptions.test.ts b/tests/unit/sea/connectionOptions.test.ts index 8eb07d5f..eabeef6d 100644 --- a/tests/unit/sea/connectionOptions.test.ts +++ b/tests/unit/sea/connectionOptions.test.ts @@ -272,6 +272,9 @@ describe('SeaAuth HTTP options (buildSeaHttpOptions)', () => { ) as { customHeaders?: Array<{ name: string; value: string }> }; expect(native.customHeaders?.find((h) => h.name === 'X-Trace')?.value).to.equal('abc'); expect(native.customHeaders?.find((h) => h.name === 'User-Agent')?.value).to.contain('MyApp/2.0'); + }); +}); + describe('SeaAuth retry options — buildSeaRetryOptions', () => { // The driver's ClientConfig retry defaults (ms / total-attempt count). const defaults = { From c3a706ef0319e5df872023e8be87ece87b3fed96 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sun, 7 Jun 2026 11:56:07 +0000 Subject: [PATCH 07/13] feat(sea): enrich kernel error mapping with displayMessage/diagnosticInfo/errorDetailsJson The napi error envelope already emits displayMessage / diagnosticInfo / errorDetailsJson, but SeaErrorMapping.buildKernelMetadata dropped them. Surface them under kernelMetadata so SEA stays at parity with: - Thrift, which carries the same data via OperationStateError.response (TGetOperationStatusResp.displayMessage / .errorMessage / .errorDetailsJson) - the Python use_kernel connector, which forwards all of them onto its exceptions Also make the kernelMetadata attach-guard a key-count check so future fields are covered automatically. Adds unit tests incl. explicit Thrift-parity assertions. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- lib/sea/SeaErrorMapping.ts | 33 +++++++--- tests/unit/sea/error-mapping.test.ts | 95 +++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 9 deletions(-) diff --git a/lib/sea/SeaErrorMapping.ts b/lib/sea/SeaErrorMapping.ts index 1dcd693a..60b79c1e 100644 --- a/lib/sea/SeaErrorMapping.ts +++ b/lib/sea/SeaErrorMapping.ts @@ -68,6 +68,18 @@ export interface KernelMetadata { httpStatus?: number; retryable?: boolean; queryId?: string; + /** + * Rich server-error diagnostics from the SEA terminal-error payload. These + * mirror the fields the **Thrift** backend already surfaces via + * `OperationStateError.response` (`TGetOperationStatusResp.displayMessage` / + * `.errorMessage` / `.errorDetailsJson`) and that the Python `use_kernel` + * connector forwards onto its exceptions — so SEA stays at parity rather + * than dropping them. Kernel sources: `Error.display_message` / + * `.diagnostic_info` / `.error_details_json`. + */ + displayMessage?: string; + diagnosticInfo?: string; + errorDetailsJson?: string; } /** @@ -209,6 +221,16 @@ function buildKernelMetadata(parsed: Record): KernelMetadata { if (typeof parsed.queryId === 'string') { meta.queryId = parsed.queryId; } + // Rich diagnostics — Thrift/Python parity (were previously dropped). + if (typeof parsed.displayMessage === 'string') { + meta.displayMessage = parsed.displayMessage; + } + if (typeof parsed.diagnosticInfo === 'string') { + meta.diagnosticInfo = parsed.diagnosticInfo; + } + if (typeof parsed.errorDetailsJson === 'string') { + meta.errorDetailsJson = parsed.errorDetailsJson; + } return meta; } @@ -288,14 +310,9 @@ export function decodeNapiKernelError(err: unknown): Error { const meta = buildKernelMetadata(envelope); // Skip the namespace attachment entirely when no fields validated // through — keeps `err.kernelMetadata` absent rather than `{}` for - // simple envelopes (the common case). - if ( - meta.errorCode !== undefined || - meta.vendorCode !== undefined || - meta.httpStatus !== undefined || - meta.retryable !== undefined || - meta.queryId !== undefined - ) { + // simple envelopes (the common case). Key-count check so new + // `KernelMetadata` fields are covered automatically. + if (Object.keys(meta).length > 0) { defineErrorMetadata(jsErr, 'kernelMetadata', meta); } return jsErr; diff --git a/tests/unit/sea/error-mapping.test.ts b/tests/unit/sea/error-mapping.test.ts index 8b5bdf70..be4fc173 100644 --- a/tests/unit/sea/error-mapping.test.ts +++ b/tests/unit/sea/error-mapping.test.ts @@ -1,5 +1,11 @@ import { expect } from 'chai'; -import { mapKernelErrorToJsError, KernelErrorCode, KernelErrorShape } from '../../../lib/sea/SeaErrorMapping'; +import { + mapKernelErrorToJsError, + decodeNapiKernelError, + KernelErrorCode, + KernelErrorShape, + ErrorWithSqlState, +} from '../../../lib/sea/SeaErrorMapping'; import HiveDriverError from '../../../lib/errors/HiveDriverError'; import AuthenticationError from '../../../lib/errors/AuthenticationError'; import OperationStateError, { OperationStateErrorCode } from '../../../lib/errors/OperationStateError'; @@ -219,3 +225,90 @@ describe('SeaErrorMapping.mapKernelErrorToJsError', () => { }); }); }); + +describe('SeaErrorMapping.decodeNapiKernelError — diagnostics enrichment (Thrift/Python parity)', () => { + // The wire sentinel the napi binding prefixes onto structured kernel errors + // (mirrors `__databricks_error__:` in SeaErrorMapping.ts / native/sea/src/error.rs). + const SENTINEL = '__databricks_error__:'; + const envelope = (fields: Record): Error => new Error(SENTINEL + JSON.stringify(fields)); + + it('surfaces displayMessage / diagnosticInfo / errorDetailsJson in kernelMetadata (were dropped before)', () => { + const err = decodeNapiKernelError( + envelope({ + code: 'SqlError', + message: '[TABLE_OR_VIEW_NOT_FOUND] table not found', + sqlState: '42P01', + errorCode: 'TABLE_OR_VIEW_NOT_FOUND', + vendorCode: 0, + displayMessage: 'TABLE_OR_VIEW_NOT_FOUND: `main`.`x` cannot be found', + diagnosticInfo: 'org.apache.spark.sql.AnalysisException: ...', + errorDetailsJson: '{"errorClass":"TABLE_OR_VIEW_NOT_FOUND"}', + }), + ) as ErrorWithSqlState; + + // Class parity with Thrift: a server SQL failure → OperationStateError(Error). + expect(err).to.be.instanceOf(OperationStateError); + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Error); + + // The three previously-dropped diagnostics are now surfaced. + expect(err.kernelMetadata?.displayMessage).to.equal('TABLE_OR_VIEW_NOT_FOUND: `main`.`x` cannot be found'); + expect(err.kernelMetadata?.diagnosticInfo).to.match(/AnalysisException/); + expect(err.kernelMetadata?.errorDetailsJson).to.equal('{"errorClass":"TABLE_OR_VIEW_NOT_FOUND"}'); + }); + + it('keeps the existing envelope fields alongside the new diagnostics', () => { + const err = decodeNapiKernelError( + envelope({ + code: 'SqlError', + message: 'boom', + sqlState: '42000', + errorCode: 'PARSE_SYNTAX_ERROR', + vendorCode: 1234, + httpStatus: 400, + retryable: false, + queryId: '01ef-abcd', + displayMessage: 'Syntax error', + }), + ) as ErrorWithSqlState; + expect(err.sqlState).to.equal('42000'); + expect(err.kernelMetadata?.errorCode).to.equal('PARSE_SYNTAX_ERROR'); + expect(err.kernelMetadata?.vendorCode).to.equal(1234); + expect(err.kernelMetadata?.queryId).to.equal('01ef-abcd'); + expect(err.kernelMetadata?.displayMessage).to.equal('Syntax error'); + }); + + it('attaches kernelMetadata even when ONLY a diagnostic field is present', () => { + // Previously the attach-guard enumerated only errorCode/vendorCode/httpStatus/ + // retryable/queryId, so a diagnostics-only envelope dropped them silently. + const err = decodeNapiKernelError( + envelope({ code: 'Internal', message: 'kaboom', diagnosticInfo: 'stack trace here' }), + ) as ErrorWithSqlState; + expect(err.kernelMetadata?.diagnosticInfo).to.equal('stack trace here'); + }); + + it('omits kernelMetadata entirely for a bare envelope (no optional fields)', () => { + const err = decodeNapiKernelError(envelope({ code: 'Internal', message: 'plain' })) as ErrorWithSqlState; + expect(err.kernelMetadata).to.equal(undefined); + }); + + it('Thrift parity: SEA exposes the same diagnostics Thrift carries via OperationStateError.response', () => { + // Thrift's OperationStateError carries the full TGetOperationStatusResp on + // `.response` (displayMessage + sqlState + numeric errorCode + diagnostics). + // SEA must expose the equivalent set so a consumer gets parity regardless of + // backend: same class + sqlState + a diagnostics surface. + const err = decodeNapiKernelError( + envelope({ + code: 'SqlError', + message: 'div by zero', + sqlState: '22012', + vendorCode: 0, + displayMessage: 'Division by zero', + diagnosticInfo: 'ArithmeticException', + }), + ) as ErrorWithSqlState; + expect(err).to.be.instanceOf(OperationStateError); // class parity (Thrift: OperationStateError) + expect(err.sqlState).to.equal('22012'); // Thrift: response.sqlState + expect(err.kernelMetadata?.displayMessage).to.equal('Division by zero'); // Thrift: response.displayMessage + expect(err.kernelMetadata?.diagnosticInfo).to.equal('ArithmeticException'); // Thrift: response.errorMessage + }); +}); From cf1c1bb5384e09ad550244d9bc5751417b343122 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sun, 7 Jun 2026 22:22:29 +0000 Subject: [PATCH 08/13] style(sea): apply prettier formatting to SeaOperationBackend + connectionOptions test Fixes the failing lint job (prettier --check) on the two files it flagged. Signed-off-by: Madhavendra Rathore --- lib/sea/SeaOperationBackend.ts | 4 +--- tests/unit/sea/connectionOptions.test.ts | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts index 52a1ea38..6112cafd 100644 --- a/lib/sea/SeaOperationBackend.ts +++ b/lib/sea/SeaOperationBackend.ts @@ -667,9 +667,7 @@ export default class SeaOperationBackend implements IOperationBackend { // status fields (numModifiedRows etc.) read off the now-terminal Statement. if (options?.callback) { const richFields = await this.readRichStatusFields(); - await Promise.resolve( - options.callback({ state: OperationState.Succeeded, hasResultSet: true, ...richFields }), - ); + await Promise.resolve(options.callback({ state: OperationState.Succeeded, hasResultSet: true, ...richFields })); } } diff --git a/tests/unit/sea/connectionOptions.test.ts b/tests/unit/sea/connectionOptions.test.ts index eabeef6d..678f5c40 100644 --- a/tests/unit/sea/connectionOptions.test.ts +++ b/tests/unit/sea/connectionOptions.test.ts @@ -26,7 +26,7 @@ const PAT = { host: 'h.databricks.com', path: '/sql/1.0/warehouses/abc', token: // Cast helper: the SEA connection-tuning/TLS options live on the internal // surface, so tests build untyped option literals. -const opts = (extra: Record) => ({ ...PAT, ...extra }) as unknown as ConnectionOptions; +const opts = (extra: Record) => ({ ...PAT, ...extra } as unknown as ConnectionOptions); describe('SeaAuth connection options — intervalsAsString default', () => { it('always sets intervalsAsString:true (thrift-compatible interval rendering)', () => { From f88de8ce765289232bd83a9f9ce609e9a158b8d7 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sun, 7 Jun 2026 22:22:30 +0000 Subject: [PATCH 09/13] fix(params): correct DATE and large-number parameter type inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two driver-side parameter-binding bugs surfaced by the SEA parity suite (the kernel binds (value_str, sql_type) faithfully — the defects are in how DBSQLParameter stringifies/types the value): - A JS `number` that is whole but outside the INT (i32) range was typed INTEGER, so e.g. `1e30` was rejected by the server as `invalid INT literal "1e+30"`. inferNumberType now picks the narrowest fitting type: INTEGER within i32, BIGINT within the safe-integer range, DOUBLE otherwise (and for non-integers). - A JS `Date` bound with an explicit DATE type was stringified with the full ISO-8601 timestamp (`...T00:00:00.000Z`), which the SEA wire rejects as a DATE literal ("trailing input"). It now projects the calendar date (`yyyy-mm-dd`); the no-explicit-type path still binds a Date as a full TIMESTAMP. Both paths are shared by the Thrift and SEA backends; the changes only affect values that the previous logic mis-typed (and which the server rejected), so existing behaviour is preserved. Unit tests added for the magnitude-based integer inference and the DATE projection. Signed-off-by: Madhavendra Rathore --- lib/DBSQLParameter.ts | 41 ++++++++++++++++++++++++++-- tests/unit/DBSQLParameter.test.ts | 44 +++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/lib/DBSQLParameter.ts b/lib/DBSQLParameter.ts index 63c2465b..28274bba 100644 --- a/lib/DBSQLParameter.ts +++ b/lib/DBSQLParameter.ts @@ -29,6 +29,37 @@ export enum DBSQLParameterType { INTERVALDAY = 'INTERVAL DAY', } +// 32-bit signed integer bounds — the range of the Spark `INT` type. +const INT32_MIN = -2147483648; +const INT32_MAX = 2147483647; + +/** + * Infer the Spark parameter type for a JS `number` when the caller didn't set + * one explicitly. + * + * A JS `number` is an IEEE-754 double, so a whole-number value can still be far + * outside the `INT` range (e.g. `1e30`). Typing such a value as `INTEGER` + * makes the server reject it (`invalid INT literal "1e+30"`). Pick the + * narrowest type that actually fits: + * - non-integer / non-finite → `DOUBLE` + * - integer within INT (i32) range → `INTEGER` + * - integer within the safe-integer range → `BIGINT` + * - anything larger → `DOUBLE` (can't be represented exactly as an integer + * anyway; callers needing exact 64-bit integers should pass a `bigint`). + */ +function inferNumberType(value: number): DBSQLParameterType { + if (!Number.isInteger(value)) { + return DBSQLParameterType.DOUBLE; + } + if (value >= INT32_MIN && value <= INT32_MAX) { + return DBSQLParameterType.INTEGER; + } + if (Number.isSafeInteger(value)) { + return DBSQLParameterType.BIGINT; + } + return DBSQLParameterType.DOUBLE; +} + interface DBSQLParameterOptions { type?: DBSQLParameterType; value: DBSQLParameterValue; @@ -78,7 +109,7 @@ export class DBSQLParameter { if (typeof this.value === 'number') { return new TSparkParameter({ name, - type: wireType ?? (Number.isInteger(this.value) ? DBSQLParameterType.INTEGER : DBSQLParameterType.DOUBLE), + type: wireType ?? inferNumberType(this.value), value: new TSparkParameterValue({ stringValue: Number(this.value).toString(), }), @@ -96,11 +127,17 @@ export class DBSQLParameter { } if (this.value instanceof Date) { + // A `Date` bound as `DATE` must project a calendar date (`yyyy-mm-dd`), + // not a full ISO-8601 timestamp: the SEA wire rejects + // `2024-03-14T00:00:00.000Z` as a DATE literal ("trailing input"), and + // Thrift accepts the date-only form just as well. Without an explicit + // DATE type the value still binds as a TIMESTAMP from the full ISO string. + const isDateType = wireType === DBSQLParameterType.DATE; return new TSparkParameter({ name, type: wireType ?? DBSQLParameterType.TIMESTAMP, value: new TSparkParameterValue({ - stringValue: this.value.toISOString(), + stringValue: isDateType ? this.value.toISOString().slice(0, 10) : this.value.toISOString(), }), }); } diff --git a/tests/unit/DBSQLParameter.test.ts b/tests/unit/DBSQLParameter.test.ts index deefb13e..e8937dd2 100644 --- a/tests/unit/DBSQLParameter.test.ts +++ b/tests/unit/DBSQLParameter.test.ts @@ -124,4 +124,48 @@ describe('DBSQLParameter', () => { }), ); }); + + it('infers a fitting integer type by magnitude', () => { + const cases: Array<[number, DBSQLParameterType, string]> = [ + // Within INT (i32) range → INTEGER. + [42, DBSQLParameterType.INTEGER, '42'], + [2147483647, DBSQLParameterType.INTEGER, '2147483647'], + [-2147483648, DBSQLParameterType.INTEGER, '-2147483648'], + // Beyond i32 but a safe integer → BIGINT (INTEGER would overflow the + // server's INT literal parse). + [3000000000, DBSQLParameterType.BIGINT, '3000000000'], + // Whole-number double outside the safe-integer range → DOUBLE, not + // INTEGER. Regression: `Number.isInteger(1e30)` is `true`, so this used + // to be typed INTEGER and rejected as `invalid INT literal "1e+30"`. + [1e30, DBSQLParameterType.DOUBLE, '1e+30'], + ]; + for (const [value, type, stringValue] of cases) { + expect(new DBSQLParameter({ value }).toSparkParameter()).to.deep.equal( + new TSparkParameter({ type, value: new TSparkParameterValue({ stringValue }) }), + ); + } + }); + + it('binds a Date as a calendar date when typed DATE', () => { + // Explicit DATE type → date-only `yyyy-mm-dd`. The full ISO timestamp is + // rejected by the SEA wire as a DATE literal ("trailing input"). + expect( + new DBSQLParameter({ + type: DBSQLParameterType.DATE, + value: new Date(Date.UTC(2024, 0, 15, 10, 30, 0)), + }).toSparkParameter(), + ).to.deep.equal( + new TSparkParameter({ + type: DBSQLParameterType.DATE, + value: new TSparkParameterValue({ stringValue: '2024-01-15' }), + }), + ); + // Without an explicit type a Date still binds as a full TIMESTAMP. + expect(new DBSQLParameter({ value: new Date('2023-09-06T03:14:27.843Z') }).toSparkParameter()).to.deep.equal( + new TSparkParameter({ + type: DBSQLParameterType.TIMESTAMP, + value: new TSparkParameterValue({ stringValue: '2023-09-06T03:14:27.843Z' }), + }), + ); + }); }); From 0b759821cf912e26492dbfd3d01b71d6d64106aa Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sun, 7 Jun 2026 22:40:59 +0000 Subject: [PATCH 10/13] fix(sea): preserve DECIMAL and BIGINT result precision (gated) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shared ArrowResultConverter coerces native Arrow Decimal128 to an IEEE-754 double (`Number(unscaled)/10**scale`) and Int64 to a JS `number`. On the Thrift backend that loss is avoidable: with `useArrowNativeTypes=false` the server ships DECIMAL as a Utf8 string (passed through untouched), so Thrift returns the exact value by default. SEA has no such escape hatch — the kernel always delivers native Arrow Decimal128 / Int64 — so high-precision DECIMALs were truncated (e.g. `123456789012345.6789` → `123456789012345.67`) and 64-bit integers past 2^53 lost their low digits. Add an opt-in `preserveBigNumericPrecision` flag on ArrowResultConverter that renders DECIMAL as an exact string (via `bigNumDecimalToString`) and keeps BIGINT as a `bigint`. The flag also short-circuits the second-pass `convertThriftValue` narrowing (DECIMAL_TYPE → `Number`, BIGINT_TYPE → `convertBigInt`) for those columns. The SEA operation backend enables it; the Thrift backend leaves it off, so its long-standing `number` representation is unchanged. This brings SEA to parity with Thrift's default precise output. Unit tests: exact decimal-string formatting (incl. negatives, leading zero, scale 0) and the BIGINT preserve-vs-default behaviour. Signed-off-by: Madhavendra Rathore --- lib/result/ArrowResultConverter.ts | 67 ++++++++++++++++++- lib/sea/SeaOperationBackend.ts | 9 ++- .../unit/result/ArrowResultConverter.test.ts | 55 ++++++++++++++- 3 files changed, 126 insertions(+), 5 deletions(-) diff --git a/lib/result/ArrowResultConverter.ts b/lib/result/ArrowResultConverter.ts index 3902ac25..2bdcb139 100644 --- a/lib/result/ArrowResultConverter.ts +++ b/lib/result/ArrowResultConverter.ts @@ -15,7 +15,7 @@ import { RecordBatchReader, util as arrowUtils, } from 'apache-arrow'; -import { TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; +import { TTableSchema, TColumnDesc, TTypeId } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; import HiveDriverError from '../errors/HiveDriverError'; import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; @@ -169,6 +169,25 @@ function formatDayTimeFromTotal(totalNanos: bigint): string { return `${sign}${days.toString()} ${pad2(hours)}:${pad2(minutes)}:${pad2(seconds)}${fraction}`; } +/** + * Render an Arrow `Decimal` value — supplied as its unscaled integer (from + * `bigNumToBigInt`) plus the column `scale` — as an exact decimal string, + * e.g. unscaled `1234567890` / scale `5` → `"12345.67890"`. Used by the + * precision-preserving path so high-precision DECIMALs survive the round-trip + * instead of being flattened to an IEEE-754 double. + */ +export function bigNumDecimalToString(unscaled: bigint, scale: number): string { + if (scale <= 0) { + return unscaled.toString(); + } + const negative = unscaled < ZERO_BIGINT; + // `padStart(scale + 1)` guarantees at least one digit before the point + // (e.g. unscaled `5` / scale `2` → `"005"` → `"0.05"`). + const digits = (negative ? -unscaled : unscaled).toString().padStart(scale + 1, '0'); + const cut = digits.length - scale; + return `${negative ? '-' : ''}${digits.slice(0, cut)}.${digits.slice(cut)}`; +} + export default class ArrowResultConverter implements IResultsProvider> { private readonly context: IClientContext; @@ -176,6 +195,15 @@ export default class ArrowResultConverter implements IResultsProvider private readonly schema: Array; + // When true, DECIMAL and 64-bit integer values keep full precision — + // DECIMAL as an exact string and BIGINT as a JS `bigint` — instead of being + // coerced to a lossy `number`. Enabled by the SEA backend, which always + // receives native Arrow `Decimal128` / `Int64` from the kernel and has no + // server-side "send as string" escape hatch (the Thrift backend gets the + // string form via `useArrowNativeTypes=false`). Off by default so the Thrift + // path keeps its long-standing `number` representation unchanged. + private readonly preserveBigNumericPrecision: boolean; + private recordBatchReader?: IterableIterator>; // Remaining rows in current Arrow batch (not the record batch!) @@ -193,10 +221,16 @@ export default class ArrowResultConverter implements IResultsProvider // operation backend and the SEA backend's neutral `ResultMetadata` — // which both carry `schema?: TTableSchema` — can construct the converter // without an adapter at the call site. - constructor(context: IClientContext, source: IResultsProvider, { schema }: { schema?: TTableSchema }) { + constructor( + context: IClientContext, + source: IResultsProvider, + { schema }: { schema?: TTableSchema }, + { preserveBigNumericPrecision = false }: { preserveBigNumericPrecision?: boolean } = {}, + ) { this.context = context; this.source = source; this.schema = getSchemaColumns(schema); + this.preserveBigNumericPrecision = preserveBigNumericPrecision; } public async hasMore() { @@ -374,6 +408,11 @@ export default class ArrowResultConverter implements IResultsProvider if (value instanceof Object && value[isArrowBigNumSymbol]) { const result = bigNumToBigInt(value); if (DataType.isDecimal(valueType)) { + // Preserve full precision as an exact string when requested (SEA); + // otherwise keep the historical lossy `number` form. + if (this.preserveBigNumericPrecision) { + return bigNumDecimalToString(result, valueType.scale); + } return Number(result) / 10 ** valueType.scale; } // A rewritten Duration Int64 surfaces as a raw `bigint`, not a BigNum @@ -397,6 +436,12 @@ export default class ArrowResultConverter implements IResultsProvider if (durationUnit) { return formatDurationToIntervalDayTime(value, durationUnit); } + // Keep the exact `bigint` when precision must be preserved (SEA); the + // default path narrows to `number` for backward compatibility (the + // Thrift backend has always returned BIGINT as a JS `number`). + if (this.preserveBigNumericPrecision) { + return value; + } return Number(value); } @@ -411,7 +456,23 @@ export default class ArrowResultConverter implements IResultsProvider const typeDescriptor = column.typeDesc.types[0]?.primitiveEntry; const field = column.columnName; const value = record[field]; - result[field] = value === null ? null : convertThriftValue(typeDescriptor, value); + if (value === null) { + result[field] = null; + return; + } + // When preserving precision, DECIMAL and BIGINT values were already + // produced in their exact form by `convertArrowTypes` (string / bigint). + // `convertThriftValue` would narrow both back to a lossy `number` + // (DECIMAL_TYPE → `Number(value)`, BIGINT_TYPE → `convertBigInt`), so + // pass them through untouched on this path. + if ( + this.preserveBigNumericPrecision && + (typeDescriptor?.type === TTypeId.DECIMAL_TYPE || typeDescriptor?.type === TTypeId.BIGINT_TYPE) + ) { + result[field] = value; + return; + } + result[field] = convertThriftValue(typeDescriptor, value); }); return result; diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts index 6112cafd..88d949ac 100644 --- a/lib/sea/SeaOperationBackend.ts +++ b/lib/sea/SeaOperationBackend.ts @@ -767,7 +767,14 @@ export default class SeaOperationBackend implements IOperationBackend { // SeaResultsProvider consumes only `fetchNextBatch`; both the async result // handle and the blocking statement satisfy that surface. this.resultsProvider = new SeaResultsProvider(handle as unknown as SeaStatement); - const converter = new ArrowResultConverter(this.context, this.resultsProvider, metadata); + // The kernel always delivers native Arrow Decimal128 / Int64 (there is no + // server-side "decimals as string" mode like Thrift's + // `useArrowNativeTypes=false`), so preserve their precision here — DECIMAL + // as an exact string, BIGINT as a `bigint` — matching the precise values + // the Thrift backend returns by default. + const converter = new ArrowResultConverter(this.context, this.resultsProvider, metadata, { + preserveBigNumericPrecision: true, + }); this.resultSlicer = new ResultSlicer(this.context, converter); return this.resultSlicer; } diff --git a/tests/unit/result/ArrowResultConverter.test.ts b/tests/unit/result/ArrowResultConverter.test.ts index dfe00966..74379b12 100644 --- a/tests/unit/result/ArrowResultConverter.test.ts +++ b/tests/unit/result/ArrowResultConverter.test.ts @@ -2,7 +2,7 @@ import { expect } from 'chai'; import fs from 'fs'; import path from 'path'; import { Table, tableFromArrays, tableToIPC, RecordBatch, TypeMap } from 'apache-arrow'; -import ArrowResultConverter from '../../../lib/result/ArrowResultConverter'; +import ArrowResultConverter, { bigNumDecimalToString } from '../../../lib/result/ArrowResultConverter'; import { ArrowBatch } from '../../../lib/result/utils'; import ResultsProviderStub from '../.stubs/ResultsProviderStub'; import { TTableSchema, TTypeId } from '../../../thrift/TCLIService_types'; @@ -199,4 +199,57 @@ describe('ArrowResultConverter', () => { expect(rows3).to.deep.equal([{ id: 30 }, { id: 31 }]); expect(await result.hasMore()).to.be.false; }); + + function bigintThriftSchema(columnName: string): TTableSchema { + return { + columns: [ + { + columnName, + typeDesc: { types: [{ primitiveEntry: { type: TTypeId.BIGINT_TYPE } }] }, + position: 1, + }, + ], + }; + } + + it('preserves BIGINT precision as bigint when preserveBigNumericPrecision is set', async () => { + // 9007199254740993 = Number.MAX_SAFE_INTEGER + 2 — not exactly + // representable as a JS number. + const table = tableFromArrays({ big_value: BigInt64Array.from([9007199254740993n, 5n]) }); + const rowSetProvider = new ResultsProviderStub( + [{ batches: [createSampleArrowBatch(table.batches[0])], rowCount: 2 }], + emptyItem, + ); + const result = new ArrowResultConverter( + new ClientContextStub(), + rowSetProvider, + { schema: bigintThriftSchema('big_value') }, + { preserveBigNumericPrecision: true }, + ); + expect(await result.fetchNext({ limit: 10000 })).to.deep.equal([ + { big_value: 9007199254740993n }, + { big_value: 5n }, + ]); + }); + + it('narrows BIGINT to a (lossy) number by default — preserves the Thrift contract', async () => { + const table = tableFromArrays({ big_value: BigInt64Array.from([9007199254740993n, 5n]) }); + const rowSetProvider = new ResultsProviderStub( + [{ batches: [createSampleArrowBatch(table.batches[0])], rowCount: 2 }], + emptyItem, + ); + const result = new ArrowResultConverter(new ClientContextStub(), rowSetProvider, { + schema: bigintThriftSchema('big_value'), + }); + // Default path coerces to `number`; 9007199254740993 rounds to ...992. + expect(await result.fetchNext({ limit: 10000 })).to.deep.equal([{ big_value: 9007199254740992 }, { big_value: 5 }]); + }); + + it('formats unscaled decimals to exact strings (bigNumDecimalToString)', () => { + expect(bigNumDecimalToString(1234567890n, 5)).to.equal('12345.67890'); // trailing zero kept + expect(bigNumDecimalToString(-1234567890123456789n, 4)).to.equal('-123456789012345.6789'); + expect(bigNumDecimalToString(5n, 2)).to.equal('0.05'); // leading zero synthesized + expect(bigNumDecimalToString(-5n, 2)).to.equal('-0.05'); + expect(bigNumDecimalToString(12345n, 0)).to.equal('12345'); // scale 0 → integer string + }); }); From f258df4926be5b5ae126348458548c77a3ce615b Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 8 Jun 2026 00:52:00 +0000 Subject: [PATCH 11/13] feat: add preserveBigNumericPrecision connection option (Thrift + SEA) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The result converter coerces DECIMAL to an IEEE-754 double and BIGINT to a JS number, silently rounding high-precision decimals and integers beyond 2^53. This affects BOTH backends (verified end-to-end on Thrift: CAST('123456789012345.6789' AS DECIMAL(38,4)) -> 123456789012345.67; CAST(9007199254740993 AS BIGINT) -> 9007199254740992). Expose `preserveBigNumericPrecision` as a public ConnectionOption (default false → existing representation preserved, non-breaking). When enabled, DECIMAL is returned as an exact string and BIGINT as a JS `bigint`. Threaded through ClientConfig to both the Thrift (ARROW_BASED / URL_BASED ArrowResultConverter) and SEA operation backends. SeaOperationBackend now reads the option instead of hardcoding it on. Verified end-to-end on Thrift: OFF → 123456789012345.67 / 9007199254740992 (number); ON → "123456789012345.6789" (string) / 9007199254740993 (bigint). Build (tsconfig.build.json) + unit tests + prettier + eslint pass. Signed-off-by: Madhavendra Rathore --- lib/DBSQLClient.ts | 7 +++++++ lib/contracts/IClientContext.ts | 6 ++++++ lib/contracts/IDBSQLClient.ts | 9 +++++++++ lib/sea/SeaOperationBackend.ts | 11 +++++------ lib/thrift-backend/ThriftOperationBackend.ts | 2 ++ 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index c3506680..333538d1 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -152,6 +152,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I useLZ4Compression: true, + preserveBigNumericPrecision: false, + // Telemetry defaults are sourced from DEFAULT_TELEMETRY_CONFIG so // every component reads from the same single frozen const. Mapping the // unprefixed TelemetryConfiguration keys to the `telemetry`-prefixed @@ -604,6 +606,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.config.enableMetricViewMetadata = options.enableMetricViewMetadata; } + // Opt-in: preserve DECIMAL (string) / BIGINT (bigint) precision in results. + if (options.preserveBigNumericPrecision !== undefined) { + this.config.preserveBigNumericPrecision = options.preserveBigNumericPrecision; + } + // Override telemetry config if provided in options. Per-key narrowed copy // preserves the structural type system: `ConnectionOptions` and // `ClientConfig` declare identical types for these knobs, so a user diff --git a/lib/contracts/IClientContext.ts b/lib/contracts/IClientContext.ts index 43a47745..bde21267 100644 --- a/lib/contracts/IClientContext.ts +++ b/lib/contracts/IClientContext.ts @@ -26,6 +26,12 @@ export interface ClientConfig { useLZ4Compression: boolean; enableMetricViewMetadata?: boolean; + // When true, DECIMAL values are returned as exact strings and 64-bit + // integers as JS `bigint`, instead of being coerced to a lossy `number`. + // Off by default to preserve the long-standing representation on both the + // Thrift and SEA backends. See `ConnectionOptions.preserveBigNumericPrecision`. + preserveBigNumericPrecision?: boolean; + // Telemetry configuration telemetryEnabled?: boolean; telemetryBatchSize?: number; diff --git a/lib/contracts/IDBSQLClient.ts b/lib/contracts/IDBSQLClient.ts index b75a8075..616db503 100644 --- a/lib/contracts/IDBSQLClient.ts +++ b/lib/contracts/IDBSQLClient.ts @@ -55,6 +55,15 @@ export type ConnectionOptions = { proxy?: ProxyOptions; enableMetricViewMetadata?: boolean; + /** + * Preserve full numeric precision in results. When `true`, DECIMAL columns + * are returned as exact strings and 64-bit integers (BIGINT) as JS `bigint`, + * instead of the default lossy coercion to a JS `number` (which silently + * rounds DECIMALs and integers beyond 2^53). Applies to both the Thrift and + * SEA backends. Defaults to `false` to preserve the existing representation. + */ + preserveBigNumericPrecision?: boolean; + /** * Extra HTTP headers attached to driver-owned out-of-band requests * (telemetry POSTs and feature-flag GETs). Not applied to the primary diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts index 88d949ac..be120d3e 100644 --- a/lib/sea/SeaOperationBackend.ts +++ b/lib/sea/SeaOperationBackend.ts @@ -767,13 +767,12 @@ export default class SeaOperationBackend implements IOperationBackend { // SeaResultsProvider consumes only `fetchNextBatch`; both the async result // handle and the blocking statement satisfy that surface. this.resultsProvider = new SeaResultsProvider(handle as unknown as SeaStatement); - // The kernel always delivers native Arrow Decimal128 / Int64 (there is no - // server-side "decimals as string" mode like Thrift's - // `useArrowNativeTypes=false`), so preserve their precision here — DECIMAL - // as an exact string, BIGINT as a `bigint` — matching the precise values - // the Thrift backend returns by default. + // DECIMAL/BIGINT precision preservation is opt-in via the + // `preserveBigNumericPrecision` connection option (default off). The kernel + // always delivers native Arrow Decimal128 / Int64, so when enabled the + // converter renders DECIMAL as an exact string and BIGINT as a `bigint`. const converter = new ArrowResultConverter(this.context, this.resultsProvider, metadata, { - preserveBigNumericPrecision: true, + preserveBigNumericPrecision: this.context.getConfig().preserveBigNumericPrecision ?? false, }); this.resultSlicer = new ResultSlicer(this.context, converter); return this.resultSlicer; diff --git a/lib/thrift-backend/ThriftOperationBackend.ts b/lib/thrift-backend/ThriftOperationBackend.ts index 739c0c1d..316e5da7 100644 --- a/lib/thrift-backend/ThriftOperationBackend.ts +++ b/lib/thrift-backend/ThriftOperationBackend.ts @@ -334,6 +334,7 @@ export default class ThriftOperationBackend implements IOperationBackend { this.context, new ArrowResultHandler(this.context, this._data, metadata), metadata, + { preserveBigNumericPrecision: this.context.getConfig().preserveBigNumericPrecision ?? false }, ); break; case TSparkRowSetType.URL_BASED_SET: @@ -341,6 +342,7 @@ export default class ThriftOperationBackend implements IOperationBackend { this.context, new CloudFetchResultHandler(this.context, this._data, metadata, this.id), metadata, + { preserveBigNumericPrecision: this.context.getConfig().preserveBigNumericPrecision ?? false }, ); break; // no default From 821c2f6ed9b9fa2472fbc7f07d00da8218106824 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 8 Jun 2026 06:50:59 +0000 Subject: [PATCH 12/13] fix(sea): address PR review comments + fix failing unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Failing unit test (TS2737): ArrowResultConverter.test.ts used BigInt literals (`123n`), which don't compile at the tsconfig ES2018 target the unit-test job typechecks against. Replaced with `BigInt('123')` calls (the BigInt function is available at ES2018; only the literal syntax is gated). Review comments: - C1 (rich-status propagation untested): the kernel-statement fakes returned null for all four rich-status accessors, so the propagation through `op.status()` was never exercised with a real value. Parameterized the fakes and added a sync-DML test asserting numModifiedRows / displayMessage / diagnosticInfo / errorDetailsJson all surface, plus an all-null SELECT case. (Verified live: the SEA REST server delivers DML counts as a `num_affected_rows` result column rather than on the status envelope, so the kernel accessor returns null against current SEA warehouses by design; Thrift on the same warehouse surfaces Int64(4) on status. The wiring is correct and will surface values when a server populates the status field.) - C2 (synthesizeThriftStatus rich fields untested): added tests for the Int64 re-boxing of numModifiedRows (including 0 vs absent), the null -> undefined mapping, and the string passthrough fields. - C3 (header injection): customHeaders values/names were forwarded verbatim; CR/LF/NUL enable HTTP header injection and the kernel only rejects them at connect time with an opaque error. Added early validation in buildSeaHttpOptions throwing a clear HiveDriverError that names the offending header (validated before the reserved-name drop). Verified live against pecotesting. - C4 (dead conjunct): simplified `if (this.asyncStatement && !this.cancellableExecution)` to `if (this.asyncStatement)` — the constructor already guarantees the handle kinds are mutually exclusive. - C5 (redundant FFI reads): memoized readRichStatusFields so a re-status() of a terminal operation reuses the read instead of re-hitting the four kernel accessors. Covered by the C1 read-count assertion. Co-authored-by: Isaac --- lib/sea/SeaAuth.ts | 23 +++++ lib/sea/SeaOperationBackend.ts | 22 ++++- .../unit/result/ArrowResultConverter.test.ts | 18 ++-- tests/unit/sea/connectionOptions.test.ts | 33 +++++++ tests/unit/sea/execution.test.ts | 87 +++++++++++++++++-- .../unit/thrift-backend/wireSynthesis.test.ts | 49 +++++++++++ 6 files changed, 214 insertions(+), 18 deletions(-) diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts index 8872c265..b7611e7b 100644 --- a/lib/sea/SeaAuth.ts +++ b/lib/sea/SeaAuth.ts @@ -359,12 +359,35 @@ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions { */ const KERNEL_MANAGED_HEADERS = new Set(['authorization', 'x-databricks-org-id']); +// CR / LF / NUL in a header name or value enable request-splitting / header +// injection. The kernel's HTTP client (reqwest) does reject these, but only at +// connect time and with an opaque "Failed to construct HTTP client: +// InvalidArgument: failed to parse header value" error that names neither the +// offending header nor the cause. Reject them here, before the FFI hop, with a +// clear error so a caller gets actionable signal at the point they set the +// header (verified against pecotesting: the kernel otherwise surfaces the +// opaque construction error). +const FORBIDDEN_HEADER_CHARS = /[\r\n\0]/; + +function validateHeaderToken(kind: 'name' | 'value', headerName: string, token: string): void { + if (FORBIDDEN_HEADER_CHARS.test(token)) { + throw new HiveDriverError( + `SEA backend: customHeaders ${kind} for \`${headerName}\` contains a forbidden control character ` + + `(CR, LF, or NUL). Such characters enable HTTP header injection and are rejected.`, + ); + } +} + export function buildSeaHttpOptions(options: ConnectionOptions): SeaHttpOptions { const { customHeaders, userAgentEntry } = options; const headers: Array<{ name: string; value: string }> = []; if (customHeaders) { for (const [name, value] of Object.entries(customHeaders)) { + // Reject CR/LF/NUL in either the name or the value before forwarding — + // a clear, early error instead of the kernel's opaque connect-time throw. + validateHeaderToken('name', name, name); + validateHeaderToken('value', name, value); // Drop kernel-managed reserved names before the FFI hop — same // double-wall as the Python connector's `_KERNEL_MANAGED_HEADERS`. if (KERNEL_MANAGED_HEADERS.has(name.toLowerCase())) { diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts index be120d3e..f00a4cbc 100644 --- a/lib/sea/SeaOperationBackend.ts +++ b/lib/sea/SeaOperationBackend.ts @@ -192,6 +192,13 @@ export default class SeaOperationBackend implements IOperationBackend { // sync-execute path once `cancellableExecution.result()` settles. private blockingStatement?: SeaOperationStatement; + // Memoized rich-status read. `readRichStatusFields()` is only ever invoked + // once the operation is terminal (the Succeeded branches of `status()` and + // the `seaFinished` progress callback), and the kernel's terminal response is + // immutable, so the FFI accessors are read exactly once and the result + // reused — re-`status()`-ing a completed operation is then free. + private richStatusFieldsPromise?: Promise; + // The cancel/close surface — whichever handle backs this operation. Both // `AsyncStatement` and `Statement` expose `cancel()` / `close()`; the // sync-execute path uses a composite that routes `cancel()` to the @@ -474,7 +481,14 @@ export default class SeaOperationBackend implements IOperationBackend { * status-field read must not turn a successful operation's status query into * a throw. The fields are best-effort metadata, not the operation outcome. */ - private async readRichStatusFields(): Promise { + private readRichStatusFields(): Promise { + if (!this.richStatusFieldsPromise) { + this.richStatusFieldsPromise = this.computeRichStatusFields(); + } + return this.richStatusFieldsPromise; + } + + private async computeRichStatusFields(): Promise { const empty: SeaRichStatusFields = { numModifiedRows: null, displayMessage: null, @@ -483,8 +497,10 @@ export default class SeaOperationBackend implements IOperationBackend { }; // The async path never produces a terminal sync `Statement`, so there is - // nothing to read these off of. - if (this.asyncStatement && !this.cancellableExecution) { + // nothing to read these off of. (The constructor guarantees exactly one of + // `asyncStatement` / `statement` / `cancellableExecution`, so `asyncStatement` + // being set already implies `cancellableExecution` is undefined.) + if (this.asyncStatement) { return empty; } diff --git a/tests/unit/result/ArrowResultConverter.test.ts b/tests/unit/result/ArrowResultConverter.test.ts index 74379b12..44c9306c 100644 --- a/tests/unit/result/ArrowResultConverter.test.ts +++ b/tests/unit/result/ArrowResultConverter.test.ts @@ -215,7 +215,7 @@ describe('ArrowResultConverter', () => { it('preserves BIGINT precision as bigint when preserveBigNumericPrecision is set', async () => { // 9007199254740993 = Number.MAX_SAFE_INTEGER + 2 — not exactly // representable as a JS number. - const table = tableFromArrays({ big_value: BigInt64Array.from([9007199254740993n, 5n]) }); + const table = tableFromArrays({ big_value: BigInt64Array.from([BigInt('9007199254740993'), BigInt('5')]) }); const rowSetProvider = new ResultsProviderStub( [{ batches: [createSampleArrowBatch(table.batches[0])], rowCount: 2 }], emptyItem, @@ -227,13 +227,13 @@ describe('ArrowResultConverter', () => { { preserveBigNumericPrecision: true }, ); expect(await result.fetchNext({ limit: 10000 })).to.deep.equal([ - { big_value: 9007199254740993n }, - { big_value: 5n }, + { big_value: BigInt('9007199254740993') }, + { big_value: BigInt('5') }, ]); }); it('narrows BIGINT to a (lossy) number by default — preserves the Thrift contract', async () => { - const table = tableFromArrays({ big_value: BigInt64Array.from([9007199254740993n, 5n]) }); + const table = tableFromArrays({ big_value: BigInt64Array.from([BigInt('9007199254740993'), BigInt('5')]) }); const rowSetProvider = new ResultsProviderStub( [{ batches: [createSampleArrowBatch(table.batches[0])], rowCount: 2 }], emptyItem, @@ -246,10 +246,10 @@ describe('ArrowResultConverter', () => { }); it('formats unscaled decimals to exact strings (bigNumDecimalToString)', () => { - expect(bigNumDecimalToString(1234567890n, 5)).to.equal('12345.67890'); // trailing zero kept - expect(bigNumDecimalToString(-1234567890123456789n, 4)).to.equal('-123456789012345.6789'); - expect(bigNumDecimalToString(5n, 2)).to.equal('0.05'); // leading zero synthesized - expect(bigNumDecimalToString(-5n, 2)).to.equal('-0.05'); - expect(bigNumDecimalToString(12345n, 0)).to.equal('12345'); // scale 0 → integer string + expect(bigNumDecimalToString(BigInt('1234567890'), 5)).to.equal('12345.67890'); // trailing zero kept + expect(bigNumDecimalToString(BigInt('-1234567890123456789'), 4)).to.equal('-123456789012345.6789'); + expect(bigNumDecimalToString(BigInt('5'), 2)).to.equal('0.05'); // leading zero synthesized + expect(bigNumDecimalToString(BigInt('-5'), 2)).to.equal('-0.05'); + expect(bigNumDecimalToString(BigInt('12345'), 0)).to.equal('12345'); // scale 0 → integer string }); }); diff --git a/tests/unit/sea/connectionOptions.test.ts b/tests/unit/sea/connectionOptions.test.ts index 678f5c40..861d54ed 100644 --- a/tests/unit/sea/connectionOptions.test.ts +++ b/tests/unit/sea/connectionOptions.test.ts @@ -273,6 +273,39 @@ describe('SeaAuth HTTP options (buildSeaHttpOptions)', () => { expect(native.customHeaders?.find((h) => h.name === 'X-Trace')?.value).to.equal('abc'); expect(native.customHeaders?.find((h) => h.name === 'User-Agent')?.value).to.contain('MyApp/2.0'); }); + + describe('rejects header-injection control characters (CR / LF / NUL)', () => { + // The kernel HTTP client does reject these, but only at connect time with an + // opaque "Failed to construct HTTP client: InvalidArgument" error (verified + // against pecotesting). We reject earlier, naming the offending header. + const injections: Array<[string, Record]> = [ + ['CRLF in value', { 'X-Evil': 'ok\r\nInjected-Header: pwned' }], + ['bare LF in value', { 'X-Evil': 'a\nb' }], + ['bare CR in value', { 'X-Evil': 'a\rb' }], + ['NUL in value', { 'X-Evil': 'a\0b' }], + ['CRLF in name', { 'X-Ev\r\nil': 'v' }], + ['NUL in name', { 'X-Ev\0il': 'v' }], + ]; + for (const [label, customHeaders] of injections) { + it(`throws HiveDriverError on ${label}`, () => { + expect(() => buildSeaHttpOptions(opts({ customHeaders }))).to.throw(HiveDriverError, /forbidden control character/); + }); + } + + it('does not throw on a valid header containing spaces, tabs, and punctuation', () => { + expect(() => + buildSeaHttpOptions(opts({ customHeaders: { 'X-Ok': 'Bearer abc.def-123; q=0.9\tfoo' } })), + ).to.not.throw(); + }); + + it('validates a reserved header before dropping it (injection via Authorization is still rejected)', () => { + // Reserved-name drop must not let a CR/LF-laced reserved header slip past + // validation — validate first, then drop. + expect(() => + buildSeaHttpOptions(opts({ customHeaders: { Authorization: 'Bearer x\r\nInjected: 1' } })), + ).to.throw(HiveDriverError, /forbidden control character/); + }); + }); }); describe('SeaAuth retry options — buildSeaRetryOptions', () => { diff --git a/tests/unit/sea/execution.test.ts b/tests/unit/sea/execution.test.ts index e523d0e2..8955e003 100644 --- a/tests/unit/sea/execution.test.ts +++ b/tests/unit/sea/execution.test.ts @@ -59,24 +59,50 @@ class FakeNativeStatement implements SeaStatement { this.closed = true; } - // Status accessors added by the kernel's status-fields surface. + // Status accessors added by the kernel's status-fields surface. The values + // are configurable so a test can assert non-null rich-status (e.g. a DML + // `numModifiedRows`) propagates through `op.status()`; they default to all- + // null, matching a SELECT / metadata statement that carries none. + public rich: SeaRichStatusValues = { + numModifiedRows: null, + displayMessage: null, + diagnosticInfo: null, + errorDetailsJson: null, + }; + + // Counts every rich-field accessor call so a test can assert the backend + // memoizes the read on a terminal statement (re-`status()` must not re-hit + // the FFI accessors). + public richReads = 0; + public async numModifiedRows(): Promise { - return null; + this.richReads += 1; + return this.rich.numModifiedRows; } public async displayMessage(): Promise { - return null; + this.richReads += 1; + return this.rich.displayMessage; } public async diagnosticInfo(): Promise { - return null; + this.richReads += 1; + return this.rich.diagnosticInfo; } public async errorDetailsJson(): Promise { - return null; + this.richReads += 1; + return this.rich.errorDetailsJson; } } +interface SeaRichStatusValues { + numModifiedRows: number | null; + displayMessage: string | null; + diagnosticInfo: string | null; + errorDetailsJson: string | null; +} + /** * Fake `AsyncStatement` (the `submitStatement` return). `status()` reports a * configurable state (default Succeeded); `awaitResult()` yields a fetch handle @@ -223,6 +249,10 @@ class FakeNativeConnection implements SeaConnection { return this.statementToReturn; } + // Rich status the next sync-execute's terminal Statement should report + // (e.g. a DML `numModifiedRows`). Defaults to all-null (a SELECT). + public richStatus?: SeaRichStatusValues; + // Sync (`runAsync: false`, the DEFAULT) query path: records sql + options and // returns a pending CancellableExecution whose result() drives the execute. public async executeStatementCancellable(sql: string, options?: unknown): Promise { @@ -231,7 +261,11 @@ class FakeNativeConnection implements SeaConnection { } this.lastSql = sql; this.lastOptions = options; - this.lastCancellableExecution = new FakeCancellableExecution(); + const resultHandle = new FakeNativeStatement(); + if (this.richStatus) { + resultHandle.rich = this.richStatus; + } + this.lastCancellableExecution = new FakeCancellableExecution(resultHandle); return this.lastCancellableExecution; } @@ -1148,4 +1182,45 @@ describe('SeaOperationBackend — sync (executeStatementCancellable) path', () = expect(status.isSuccess).to.equal(true); expect(exec.resultHandle.closed).to.equal(false); }); + + it('surfaces the kernel rich-status fields (numModifiedRows etc.) through op.status() and memoizes the read', async () => { + // A DML statement's terminal kernel `Statement` carries numModifiedRows / + // displayMessage / diagnosticInfo / errorDetailsJson. Drive the sync execute + // to terminal and assert each non-null field propagates through op.status() + // (previously the fakes returned all-null, so this propagation was untested). + const resultHandle = new FakeNativeStatement(); + resultHandle.rich = { + numModifiedRows: 42, + displayMessage: 'INSERT 0 42', + diagnosticInfo: 'stage 1/1 finished', + errorDetailsJson: '{"detail":"none"}', + }; + const exec = new FakeCancellableExecution(resultHandle); + const op = makeSyncOp(exec); + await op.waitUntilReady(); + + const status = await op.status(false); + expect(status.state).to.equal(OperationState.Succeeded); + expect(status.numModifiedRows).to.equal(42); + expect(status.displayMessage).to.equal('INSERT 0 42'); + expect(status.diagnosticInfo).to.equal('stage 1/1 finished'); + expect(status.errorDetailsJson).to.equal('{"detail":"none"}'); + + // C5: re-status()-ing a completed op reuses the memoized read — the four FFI + // accessors fire exactly once across both status() calls (4 reads, not 8). + await op.status(false); + expect(resultHandle.richReads).to.equal(4); + }); + + it('reports all-null rich-status for a SELECT (no rows modified) — the default', async () => { + // A read-only statement carries no numModifiedRows; the backend surfaces + // null rather than fabricating a value. + const op = makeSyncOp(new FakeCancellableExecution()); + await op.waitUntilReady(); + const status = await op.status(false); + expect(status.numModifiedRows).to.equal(null); + expect(status.displayMessage).to.equal(null); + expect(status.diagnosticInfo).to.equal(null); + expect(status.errorDetailsJson).to.equal(null); + }); }); diff --git a/tests/unit/thrift-backend/wireSynthesis.test.ts b/tests/unit/thrift-backend/wireSynthesis.test.ts index 685ec3cd..40e1bc10 100644 --- a/tests/unit/thrift-backend/wireSynthesis.test.ts +++ b/tests/unit/thrift-backend/wireSynthesis.test.ts @@ -1,4 +1,5 @@ import { expect } from 'chai'; +import Int64 from 'node-int64'; import { TOperationState, TSparkRowSetType, TStatusCode } from '../../../thrift/TCLIService_types'; import { OperationState, OperationStatus } from '../../../lib/contracts/OperationStatus'; import { ResultFormat, ResultMetadata } from '../../../lib/contracts/ResultMetadata'; @@ -68,6 +69,54 @@ describe('wireSynthesis', () => { expect(resp.errorMessage).to.equal('should-not-elevate-to-error-but-still-passed-through'); expect(resp.sqlState).to.equal('01000'); }); + + describe('rich status fields (numModifiedRows / displayMessage / diagnosticInfo / errorDetailsJson)', () => { + it('re-boxes numModifiedRows as a Thrift Int64 (matching the Thrift deserializer wire shape)', () => { + const resp = synthesizeThriftStatus({ ...baseStatus, numModifiedRows: 5 }); + expect(resp.numModifiedRows, 'should be a node-int64 Int64').to.be.instanceOf(Int64); + expect((resp.numModifiedRows as Int64).toNumber()).to.equal(5); + }); + + it('re-boxes numModifiedRows: 0 as Int64(0) — a real zero-row DML result, not "absent"', () => { + const resp = synthesizeThriftStatus({ ...baseStatus, numModifiedRows: 0 }); + expect(resp.numModifiedRows, '0 is a value, not a missing field').to.be.instanceOf(Int64); + expect((resp.numModifiedRows as Int64).toNumber()).to.equal(0); + }); + + it('maps a null numModifiedRows (server did not supply) to undefined, matching the Thrift path', () => { + const resp = synthesizeThriftStatus({ ...baseStatus, numModifiedRows: null }); + expect(resp.numModifiedRows).to.equal(undefined); + }); + + it('maps an absent numModifiedRows to undefined', () => { + const resp = synthesizeThriftStatus({ ...baseStatus }); + expect(resp.numModifiedRows).to.equal(undefined); + }); + + it('passes displayMessage / diagnosticInfo / errorDetailsJson through as strings', () => { + const resp = synthesizeThriftStatus({ + ...baseStatus, + displayMessage: 'INSERT 0 5', + diagnosticInfo: 'stage 1/1 finished', + errorDetailsJson: '{"detail":"none"}', + }); + expect(resp.displayMessage).to.equal('INSERT 0 5'); + expect(resp.diagnosticInfo).to.equal('stage 1/1 finished'); + expect(resp.errorDetailsJson).to.equal('{"detail":"none"}'); + }); + + it('maps null string fields to undefined (absent, not the literal null)', () => { + const resp = synthesizeThriftStatus({ + ...baseStatus, + displayMessage: null, + diagnosticInfo: null, + errorDetailsJson: null, + }); + expect(resp.displayMessage).to.equal(undefined); + expect(resp.diagnosticInfo).to.equal(undefined); + expect(resp.errorDetailsJson).to.equal(undefined); + }); + }); }); describe('synthesizeThriftResultSetMetadata', () => { From e202af3d59995d93e4b827007fc60545b2bf8b2b Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 8 Jun 2026 09:38:15 +0000 Subject: [PATCH 13/13] chore(sea): bump KERNEL_REV to pick up DML num_modified_rows derivation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Points the pinned kernel at databricks-sql-kernel 81c6907 (PR #144), which derives `num_modified_rows` for SEA DML from the result set so the existing op-status wiring (readRichStatusFields → synthesizeThriftStatus) surfaces numModifiedRows for INSERT/UPDATE/DELETE/MERGE at Thrift parity. The napi contract (native/sea/index.d.ts / index.js) is unchanged — the `numModifiedRows` accessor already existed; only its kernel-side implementation changed — so no binding-contract update is needed. Verified live against pecotesting (http_path2): SEA now returns numModifiedRows 3/2/1/2 for INSERT/UPDATE/DELETE/MERGE, matching Thrift exactly, and DML fetchAll() stays byte-identical across both backends (the count row is read non-consuming). Co-authored-by: Isaac --- KERNEL_REV | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KERNEL_REV b/KERNEL_REV index e838aedf..6bea04ad 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -fcc459bbf3f39bf57e2ee02f14b99c0ec7a70123 +81c69078f8cbc59391824887a7d4be666ece9510