diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 4bc6803eb..decbb385c 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -71,8 +71,10 @@ override template values so sandbox images cannot spoof identity, callback, or relay settings. Credential placeholders in proxied HTTP requests can be resolved by the proxy -when policy allows the target endpoint. Secrets must not be logged in OCSF or -plain tracing output. +when policy allows the target endpoint. For GCP providers, a loopback metadata +server inside the network namespace serves placeholders to SDKs that bypass the +proxy (e.g. Go's `cloud.google.com/go/compute/metadata`). Secrets must not be +logged in OCSF or plain tracing output. ## Connect and Logs diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 2254f0c89..0f9c783d5 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -740,7 +740,7 @@ enum ProviderCommands { /// Configure credentials from gcloud Application Default Credentials /// (`~/.config/gcloud/application_default_credentials.json`). - /// Only valid for google-vertex-ai providers. + /// Valid for providers whose profile declares an ADC-compatible credential. #[arg(long, group = "cred_source", conflicts_with_all = ["from_existing", "credentials"])] from_gcloud_adc: bool, diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 9988d46db..354b143d9 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -4230,7 +4230,7 @@ fn read_gcloud_adc() -> Result<(String, String, String)> { Ok((client_id, client_secret, refresh_token)) } -async fn rollback_provider_create_after_vertex_adc_failure( +async fn rollback_provider_create_after_gcloud_adc_failure( client: &mut crate::tls::GrpcClient, provider_name: &str, stage: &str, @@ -4243,7 +4243,7 @@ async fn rollback_provider_create_after_vertex_adc_failure( .await { Ok(_) => Err(miette!( - "failed to {stage} Vertex AI credentials from gcloud ADC for provider '{provider_name}': {source}. \ + "failed to {stage} credentials from gcloud ADC for provider '{provider_name}': {source}. \ The provider was rolled back successfully." )), Err(cleanup_err) => { @@ -4257,7 +4257,7 @@ async fn rollback_provider_create_after_vertex_adc_failure( provider_name ); Err(miette!( - "failed to {stage} Vertex AI credentials from gcloud ADC for provider '{provider_name}': {source}. \ + "failed to {stage} credentials from gcloud ADC for provider '{provider_name}': {source}. \ Cleanup also failed, so the provider may still exist. \ Run 'openshell provider delete {provider_name}' to remove it manually." )) @@ -4366,6 +4366,9 @@ async fn discover_existing_provider_data( /// Canonical provider type string for Google Vertex AI. const VERTEX_AI_PROVIDER_TYPE: &str = "google-vertex-ai"; +/// Canonical provider type string for Google Cloud (GCP APIs). +const GOOGLE_CLOUD_PROVIDER_TYPE: &str = "google-cloud"; + fn missing_credentials_error(provider_type: &str) -> miette::Report { if provider_type == VERTEX_AI_PROVIDER_TYPE { return miette::miette!( @@ -4376,6 +4379,14 @@ fn missing_credentials_error(provider_type: &str) -> miette::Report { ); } + if provider_type == GOOGLE_CLOUD_PROVIDER_TYPE { + return miette::miette!( + "no credentials resolved for provider type '{provider_type}'. \ + Set GCP_ADC_ACCESS_TOKEN or GCP_SA_ACCESS_TOKEN; \ + or use --from-gcloud-adc / --from-existing with those env vars set." + ); + } + miette::miette!( "no credentials resolved for provider type '{provider_type}'. \ Use --credential KEY[=VALUE] or --from-existing with the appropriate env vars set." @@ -4434,11 +4445,34 @@ pub async fn provider_create( } }; - if from_gcloud_adc && provider_type != VERTEX_AI_PROVIDER_TYPE { - return Err(miette::miette!( - "--from-gcloud-adc is only valid for google-vertex-ai providers" - )); - } + let adc_credential_key = if from_gcloud_adc { + let profile = + openshell_providers::get_default_profile(&provider_type).ok_or_else(|| { + miette::miette!( + "--from-gcloud-adc requires a built-in provider profile, \ + but '{provider_type}' has none" + ) + })?; + let adc_cred = profile.adc_credential().ok_or_else(|| { + miette::miette!( + "--from-gcloud-adc is not supported for '{provider_type}' providers \ + (no ADC-compatible credential in the provider profile)" + ) + })?; + Some( + adc_cred + .env_vars + .first() + .ok_or_else(|| { + miette::miette!( + "ADC credential in '{provider_type}' profile has no env_vars declared" + ) + })? + .clone(), + ) + } else { + None + }; let mut credential_map = parse_credential_pairs(credentials)?; let mut config_map = parse_key_value_pairs(config, "--config")?; @@ -4473,10 +4507,12 @@ pub async fn provider_create( } // Validate and read the ADC file BEFORE creating the provider so that - // a bad/missing ADC does not leave an orphan provider behind. - let gcloud_adc_material = if from_gcloud_adc { + // a bad/missing ADC does not leave an orphan provider behind. Bundle the + // credential key with the material so they stay coupled. + let gcloud_adc_bootstrap = if from_gcloud_adc { let (client_id, client_secret, refresh_token) = read_gcloud_adc()?; - Some((client_id, client_secret, refresh_token)) + let key = adc_credential_key.expect("set when from_gcloud_adc is true"); + Some((key, client_id, client_secret, refresh_token)) } else { None }; @@ -4506,7 +4542,9 @@ pub async fn provider_create( .ok_or_else(|| miette::miette!("provider missing from response"))?; let provider_name = provider.object_name().to_string(); - if let Some((client_id, client_secret, refresh_token)) = gcloud_adc_material { + if let Some((adc_credential_key, client_id, client_secret, refresh_token)) = + gcloud_adc_bootstrap + { let mut material = HashMap::new(); material.insert("client_id".to_string(), client_id); material.insert("client_secret".to_string(), client_secret); @@ -4515,7 +4553,7 @@ pub async fn provider_create( if let Err(configure_err) = client .configure_provider_refresh(ConfigureProviderRefreshRequest { provider: provider_name.clone(), - credential_key: openshell_core::inference::VERTEX_AI_ADC_TOKEN_KEY.to_string(), + credential_key: adc_credential_key.clone(), strategy: ProviderCredentialRefreshStrategy::Oauth2RefreshToken as i32, material, secret_material_keys: vec![ @@ -4526,7 +4564,7 @@ pub async fn provider_create( }) .await { - return rollback_provider_create_after_vertex_adc_failure( + return rollback_provider_create_after_gcloud_adc_failure( &mut client, &provider_name, "configure", @@ -4538,11 +4576,11 @@ pub async fn provider_create( if let Err(rotate_err) = client .rotate_provider_credential(RotateProviderCredentialRequest { provider: provider_name.clone(), - credential_key: openshell_core::inference::VERTEX_AI_ADC_TOKEN_KEY.to_string(), + credential_key: adc_credential_key, }) .await { - return rollback_provider_create_after_vertex_adc_failure( + return rollback_provider_create_after_gcloud_adc_failure( &mut client, &provider_name, "mint the initial access token for", @@ -4552,9 +4590,7 @@ pub async fn provider_create( } println!("{} Created provider {}", "✓".green().bold(), provider_name); - println!( - "Configured Vertex AI credentials from gcloud ADC and minted the initial access token" - ); + println!("Configured GCP credentials from gcloud ADC and minted the initial access token"); return Ok(()); } diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index ed78c6659..84aa6f3cb 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -2123,8 +2123,7 @@ async fn provider_create_from_gcloud_adc_rejects_wrong_provider_type_before_cred .expect_err("wrong provider type should fail before generic credential validation"); assert!( - err.to_string() - .contains("--from-gcloud-adc is only valid for google-vertex-ai providers"), + err.to_string().contains("--from-gcloud-adc"), "unexpected error: {err}" ); assert!(ts.state.providers.lock().await.is_empty()); diff --git a/crates/openshell-core/src/google_cloud.rs b/crates/openshell-core/src/google_cloud.rs new file mode 100644 index 000000000..fcab45ae0 --- /dev/null +++ b/crates/openshell-core/src/google_cloud.rs @@ -0,0 +1,111 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Shared GCP constants for the metadata emulator, provider env injection, +//! and credential resolution. +//! +//! This module is the single source of truth for GCP naming: env var aliases, +//! provider config keys, token search order, and Vertex-specific env vars. +//! `openshell-server`, `openshell-providers`, and `openshell-sandbox` +//! import from here. + +// ── Metadata emulator ─────────────────────────────────────────────────────── + +/// Hostname served by the GCE metadata emulator via proxy interception. +pub const METADATA_HOST: &str = "gcp.metadata.openshell.internal"; + +/// Loopback address for the GCE metadata server inside sandbox namespaces. +/// Go's metadata client dials this directly (bypasses `HTTP_PROXY`). +pub const METADATA_LOOPBACK_ADDR: &str = "127.0.0.1:8174"; + +// ── Env var alias arrays ──────────────────────────────────────────────────── + +/// Env vars that carry the GCP project ID inside sandboxes. +pub const PROJECT_ID_ENV_VARS: &[&str] = &["GCP_PROJECT_ID", "GOOGLE_CLOUD_PROJECT"]; + +/// Env vars that carry the GCP region/location inside sandboxes. +pub const REGION_ENV_VARS: &[&str] = &["CLOUD_ML_REGION", "GCP_LOCATION"]; + +/// Env vars that carry the GCP service account email inside sandboxes. +pub const SERVICE_ACCOUNT_EMAIL_ENV_VARS: &[&str] = &["GCP_SERVICE_ACCOUNT_EMAIL"]; + +// ── Provider config keys ──────────────────────────────────────────────────── + +/// Config key for project ID in `gcp` providers. +pub const GCP_PROJECT_ID_CONFIG_KEY: &str = "project_id"; + +/// Config key for region in `gcp` providers. +pub const GCP_REGION_CONFIG_KEY: &str = "region"; + +/// Config key for service account email in `gcp` providers. +pub const GCP_SERVICE_ACCOUNT_EMAIL_CONFIG_KEY: &str = "service_account_email"; + +// ── Token search order ────────────────────────────────────────────────────── + +/// GCP token env vars searched in priority order by the metadata emulator. +/// SA token wins over ADC if both are configured, matching GCP's own +/// credential precedence. +pub const TOKEN_ENV_KEYS: &[&str] = &["GCP_SA_ACCESS_TOKEN", "GCP_ADC_ACCESS_TOKEN"]; + +// ── Vertex-specific env vars ──────────────────────────────────────────────── + +/// Env var injected to signal Vertex AI usage to Goose. +pub const GOOSE_PROVIDER_ENV_VAR: &str = "GOOSE_PROVIDER"; + +/// Env var for Anthropic Vertex project ID (consumed by Claude Code SDK). +pub const ANTHROPIC_VERTEX_PROJECT_ID_ENV_VAR: &str = "ANTHROPIC_VERTEX_PROJECT_ID"; + +/// Env var for Vertex location (consumed by Claude Code SDK). +pub const VERTEX_LOCATION_ENV_VAR: &str = "VERTEX_LOCATION"; + +/// Non-secret GCP/Vertex config vars that must be resolved to real values +/// in the child environment. Everything else stays as placeholders for +/// proxy-time resolution. +/// +/// This list MUST be the union of all alias arrays above plus all +/// Vertex-specific env vars. If you add an alias to `PROJECT_ID_ENV_VARS`, +/// `REGION_ENV_VARS`, or a Vertex constant, add it here too. +pub const STATIC_CONFIG_KEYS: &[&str] = &[ + // project_id aliases + "GCP_PROJECT_ID", + "GOOGLE_CLOUD_PROJECT", + // region aliases + "CLOUD_ML_REGION", + "GCP_LOCATION", + // service account email + "GCP_SERVICE_ACCOUNT_EMAIL", + // Vertex-specific non-secret config + GOOSE_PROVIDER_ENV_VAR, + ANTHROPIC_VERTEX_PROJECT_ID_ENV_VAR, + VERTEX_LOCATION_ENV_VAR, +]; + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + #[test] + fn static_config_keys_matches_alias_arrays_and_vertex_vars() { + let expected: HashSet<&str> = PROJECT_ID_ENV_VARS + .iter() + .chain(REGION_ENV_VARS) + .chain(SERVICE_ACCOUNT_EMAIL_ENV_VARS) + .copied() + .chain([ + GOOSE_PROVIDER_ENV_VAR, + ANTHROPIC_VERTEX_PROJECT_ID_ENV_VAR, + VERTEX_LOCATION_ENV_VAR, + ]) + .collect(); + let actual: HashSet<&str> = STATIC_CONFIG_KEYS.iter().copied().collect(); + assert_eq!( + expected, + actual, + "STATIC_CONFIG_KEYS must be the union of all alias arrays + Vertex vars. \ + Missing: {:?}, Extra: {:?}", + expected.difference(&actual).collect::>(), + actual.difference(&expected).collect::>(), + ); + } +} diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index c04feb6b4..f261af064 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -101,12 +101,6 @@ pub const VERTEX_AI_CREDENTIAL_KEY_NAMES: &[&str] = &[ "VERTEX_AI_TOKEN", ]; -/// The credential key used for tokens minted from gcloud Application Default Credentials. -/// -/// This is the key written by the gateway's `OAuth2` refresh worker when using the -/// `--from-gcloud-adc` CLI flow. It must match `VERTEX_AI_CREDENTIAL_KEY_NAMES[2]`. -pub const VERTEX_AI_ADC_TOKEN_KEY: &str = "GOOGLE_VERTEX_AI_TOKEN"; - /// GCP project ID config key for Vertex AI providers. pub const VERTEX_AI_PROJECT_ID_KEY: &str = "VERTEX_AI_PROJECT_ID"; diff --git a/crates/openshell-core/src/lib.rs b/crates/openshell-core/src/lib.rs index c3241cdd8..28d47073f 100644 --- a/crates/openshell-core/src/lib.rs +++ b/crates/openshell-core/src/lib.rs @@ -14,6 +14,7 @@ pub mod config; pub mod driver_utils; pub mod error; pub mod forward; +pub mod google_cloud; pub mod gpu; pub mod image; pub mod inference; diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index 1d0d5a192..fb909e82d 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -71,13 +71,12 @@ pub trait ProviderPlugin: Send + Sync { &[] } - /// Apply provider data to sandbox runtime context. + /// Inject provider-specific environment variables into the sandbox env. /// - /// Default implementation is a no-op; provider-specific runtime projection - /// can be layered in incrementally. - fn apply_to_sandbox(&self, _provider: &Provider) -> Result<(), ProviderError> { - Ok(()) - } + /// Called during sandbox creation to project provider config (project IDs, + /// regions, SDK flags) into env vars the sandbox process will inherit. + /// Default is a no-op; GCP and Vertex providers override this. + fn inject_env(&self, _provider: &Provider, _env: &mut HashMap) {} } /// Blanket implementation of [`ProviderPlugin`] for [`ProviderDiscoverySpec`]. @@ -116,9 +115,11 @@ impl ProviderRegistry { registry.register(providers::openai::SPEC); registry.register(providers::anthropic::SPEC); registry.register(providers::nvidia::SPEC); - registry.register(providers::gitlab::SPEC); registry.register(providers::github::SPEC); + registry.register(providers::gitlab::SPEC); + registry.register(providers::google_cloud::GoogleCloudProvider); registry.register(providers::outlook::OutlookProvider); + registry.register(providers::vertex::VertexProvider); registry } @@ -158,6 +159,20 @@ impl ProviderRegistry { default_profiles().iter().collect() } + /// Inject provider-specific env vars via the registered plugin. + /// + /// Normalizes the provider type and delegates to the plugin's `inject_env`. + /// No-op if the provider type has no registered plugin or the plugin's + /// default implementation is a no-op. + pub fn inject_env(&self, provider: &Provider, env: &mut HashMap) { + let normalized = normalize_provider_type(&provider.r#type); + if let Some(id) = normalized + && let Some(plugin) = self.get(id) + { + plugin.inject_env(provider, env); + } + } + #[must_use] pub fn known_types(&self) -> Vec<&'static str> { let mut types = self.plugins.keys().copied().collect::>(); @@ -179,6 +194,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> { "codex" => Some("codex"), "copilot" => Some("copilot"), "opencode" => Some("opencode"), + "gcp" | "google-cloud" => Some("google-cloud"), "generic" => Some("generic"), "gitlab" | "glab" => Some("gitlab"), "github" | "gh" => Some("github"), diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 63a6b2eb3..528478ffc 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -22,6 +22,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[ include_str!("../../../providers/copilot.yaml"), include_str!("../../../providers/cursor.yaml"), include_str!("../../../providers/github.yaml"), + include_str!("../../../providers/google-cloud.yaml"), include_str!("../../../providers/google-vertex-ai.yaml"), include_str!("../../../providers/nvidia.yaml"), include_str!("../../../providers/pypi.yaml"), @@ -330,6 +331,25 @@ impl ProviderTypeProfile { has_gateway_mintable_credential } + /// Returns the credential suitable for `--from-gcloud-adc` bootstrap, if any. + /// + /// A credential qualifies when its refresh strategy is `Oauth2RefreshToken` + /// and its material declares the three gcloud ADC keys (`client_id`, + /// `client_secret`, `refresh_token`). + #[must_use] + pub fn adc_credential(&self) -> Option<&CredentialProfile> { + const ADC_MATERIAL_KEYS: &[&str] = &["client_id", "client_secret", "refresh_token"]; + + self.credentials.iter().find(|cred| { + cred.refresh.as_ref().is_some_and(|refresh| { + refresh.strategy == ProviderCredentialRefreshStrategy::Oauth2RefreshToken + && ADC_MATERIAL_KEYS + .iter() + .all(|key| refresh.material.iter().any(|m| m.name == *key)) + }) + }) + } + #[must_use] pub fn to_proto(&self) -> ProviderProfile { ProviderProfile { @@ -1268,6 +1288,73 @@ credentials: assert!(!static_only_profile.allows_gateway_refresh_bootstrap()); } + #[test] + fn adc_credential_returns_oauth2_refresh_token_credential_with_adc_material() { + let profile = get_default_profile("google-cloud").expect("google-cloud profile"); + let adc = profile + .adc_credential() + .expect("google-cloud should have an ADC credential"); + assert_eq!(adc.env_vars[0], "GCP_ADC_ACCESS_TOKEN"); + + let profile = get_default_profile("google-vertex-ai").expect("vertex profile"); + let adc = profile + .adc_credential() + .expect("vertex should have an ADC credential"); + assert_eq!(adc.env_vars[0], "GOOGLE_VERTEX_AI_TOKEN"); + } + + #[test] + fn adc_credential_returns_none_for_profiles_without_adc() { + let profile = get_default_profile("github").expect("github profile"); + assert!(profile.adc_credential().is_none()); + + let profile = get_default_profile("claude-code").expect("claude-code profile"); + assert!(profile.adc_credential().is_none()); + } + + #[test] + fn adc_credential_rejects_service_account_jwt_strategy() { + let profile = parse_profile_yaml( + r" +id: sa-only +display_name: SA Only +credentials: + - name: sa_token + env_vars: [SA_TOKEN] + refresh: + strategy: google_service_account_jwt + material: + - name: client_email + - name: private_key +", + ) + .expect("profile"); + assert!(profile.adc_credential().is_none()); + } + + #[test] + fn adc_credential_requires_all_three_material_keys() { + let profile = parse_profile_yaml( + r" +id: partial-material +display_name: Partial Material +credentials: + - name: token + env_vars: [TOKEN] + refresh: + strategy: oauth2_refresh_token + material: + - name: client_id + - name: client_secret +", + ) + .expect("profile"); + assert!( + profile.adc_credential().is_none(), + "missing refresh_token material should not qualify" + ); + } + #[test] fn parse_profile_yaml_reads_single_provider_document() { let profile = parse_profile_yaml( diff --git a/crates/openshell-providers/src/providers/google_cloud.rs b/crates/openshell-providers/src/providers/google_cloud.rs new file mode 100644 index 000000000..470d1d206 --- /dev/null +++ b/crates/openshell-providers/src/providers/google_cloud.rs @@ -0,0 +1,177 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; + +use openshell_core::google_cloud; + +use crate::{ + DiscoveredProvider, Provider, ProviderDiscoverySpec, ProviderError, ProviderPlugin, + RealDiscoveryContext, discover_with_spec, +}; + +pub struct GoogleCloudProvider; + +const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { + id: "google-cloud", + credential_env_vars: google_cloud::TOKEN_ENV_KEYS, +}; + +impl ProviderPlugin for GoogleCloudProvider { + fn id(&self) -> &'static str { + SPEC.id + } + + fn discover_existing(&self) -> Result, ProviderError> { + discover_with_spec(&SPEC, &RealDiscoveryContext) + } + + fn credential_env_vars(&self) -> &'static [&'static str] { + SPEC.credential_env_vars + } + + fn inject_env(&self, provider: &Provider, env: &mut HashMap) { + if let Some(project) = provider + .config + .get(google_cloud::GCP_PROJECT_ID_CONFIG_KEY) + .filter(|v| !v.trim().is_empty()) + { + for var in google_cloud::PROJECT_ID_ENV_VARS { + env.entry((*var).to_string()) + .or_insert_with(|| project.trim().to_string()); + } + } + + if let Some(region) = provider + .config + .get(google_cloud::GCP_REGION_CONFIG_KEY) + .filter(|v| !v.trim().is_empty()) + { + for var in google_cloud::REGION_ENV_VARS { + env.entry((*var).to_string()) + .or_insert_with(|| region.trim().to_string()); + } + } + + env.entry("GCE_METADATA_HOST".to_string()) + .or_insert_with(|| google_cloud::METADATA_HOST.to_string()); + + if let Some(email) = provider + .config + .get(google_cloud::GCP_SERVICE_ACCOUNT_EMAIL_CONFIG_KEY) + .filter(|v| !v.trim().is_empty()) + { + for var in google_cloud::SERVICE_ACCOUNT_EMAIL_ENV_VARS { + env.entry((*var).to_string()) + .or_insert_with(|| email.trim().to_string()); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_provider(config: HashMap) -> Provider { + Provider { + config, + r#type: "google-cloud".to_string(), + ..Default::default() + } + } + + #[test] + fn injects_project_id_aliases() { + let provider = make_provider(HashMap::from([( + "project_id".to_string(), + "my-project".to_string(), + )])); + let mut env = HashMap::new(); + GoogleCloudProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("GCP_PROJECT_ID").map(String::as_str), + Some("my-project") + ); + assert_eq!( + env.get("GOOGLE_CLOUD_PROJECT").map(String::as_str), + Some("my-project") + ); + } + + #[test] + fn injects_region_aliases() { + let provider = make_provider(HashMap::from([( + "region".to_string(), + "us-central1".to_string(), + )])); + let mut env = HashMap::new(); + GoogleCloudProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("CLOUD_ML_REGION").map(String::as_str), + Some("us-central1") + ); + assert_eq!( + env.get("GCP_LOCATION").map(String::as_str), + Some("us-central1") + ); + } + + #[test] + fn injects_metadata_host() { + let provider = make_provider(HashMap::new()); + let mut env = HashMap::new(); + GoogleCloudProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("GCE_METADATA_HOST").map(String::as_str), + Some(google_cloud::METADATA_HOST) + ); + } + + #[test] + fn injects_service_account_email() { + let provider = make_provider(HashMap::from([( + "service_account_email".to_string(), + "sa@project.iam.gserviceaccount.com".to_string(), + )])); + let mut env = HashMap::new(); + GoogleCloudProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("GCP_SERVICE_ACCOUNT_EMAIL").map(String::as_str), + Some("sa@project.iam.gserviceaccount.com") + ); + } + + #[test] + fn does_not_overwrite_existing_env() { + let provider = make_provider(HashMap::from([( + "project_id".to_string(), + "new-project".to_string(), + )])); + let mut env = + HashMap::from([("GCP_PROJECT_ID".to_string(), "existing-project".to_string())]); + GoogleCloudProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("GCP_PROJECT_ID").map(String::as_str), + Some("existing-project"), + "should not overwrite existing env" + ); + } + + #[test] + fn skips_empty_config_values() { + let provider = make_provider(HashMap::from([( + "project_id".to_string(), + " ".to_string(), + )])); + let mut env = HashMap::new(); + GoogleCloudProvider.inject_env(&provider, &mut env); + + assert!(!env.contains_key("GCP_PROJECT_ID")); + } +} diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs index dfe5935a1..43727ec2b 100644 --- a/crates/openshell-providers/src/providers/mod.rs +++ b/crates/openshell-providers/src/providers/mod.rs @@ -37,7 +37,9 @@ pub mod copilot; pub mod generic; pub mod github; pub mod gitlab; +pub mod google_cloud; pub mod nvidia; pub mod openai; pub mod opencode; pub mod outlook; +pub mod vertex; diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs new file mode 100644 index 000000000..ad52bc51c --- /dev/null +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; + +use openshell_core::google_cloud; +use openshell_core::inference; + +use crate::{ + DiscoveredProvider, Provider, ProviderDiscoverySpec, ProviderError, ProviderPlugin, + RealDiscoveryContext, discover_with_spec, +}; + +pub struct VertexProvider; + +const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { + id: "google-vertex-ai", + credential_env_vars: inference::VERTEX_AI_CREDENTIAL_KEY_NAMES, +}; + +impl ProviderPlugin for VertexProvider { + fn id(&self) -> &'static str { + SPEC.id + } + + fn discover_existing(&self) -> Result, ProviderError> { + let mut discovered = discover_with_spec(&SPEC, &RealDiscoveryContext)?.unwrap_or_default(); + + for key in inference::VERTEX_AI_CONFIG_KEY_NAMES { + if let Ok(val) = std::env::var(key) + && !val.trim().is_empty() + { + discovered.config.entry(key.to_string()).or_insert(val); + } + } + + if discovered.is_empty() { + Ok(None) + } else { + Ok(Some(discovered)) + } + } + + fn credential_env_vars(&self) -> &'static [&'static str] { + SPEC.credential_env_vars + } + + fn inject_env(&self, provider: &Provider, env: &mut HashMap) { + if let Some(project) = provider + .config + .get(inference::VERTEX_AI_PROJECT_ID_KEY) + .filter(|v| !v.trim().is_empty()) + { + let trimmed = project.trim().to_string(); + for var in google_cloud::PROJECT_ID_ENV_VARS { + env.entry((*var).to_string()) + .or_insert_with(|| trimmed.clone()); + } + env.entry(google_cloud::ANTHROPIC_VERTEX_PROJECT_ID_ENV_VAR.to_string()) + .or_insert_with(|| trimmed.clone()); + } + + if let Some(region) = provider + .config + .get(inference::VERTEX_AI_REGION_KEY) + .filter(|v| !v.trim().is_empty()) + { + let trimmed = region.trim().to_string(); + for var in google_cloud::REGION_ENV_VARS { + env.entry((*var).to_string()) + .or_insert_with(|| trimmed.clone()); + } + env.entry(google_cloud::VERTEX_LOCATION_ENV_VAR.to_string()) + .or_insert_with(|| trimmed.clone()); + } + + env.entry(google_cloud::GOOSE_PROVIDER_ENV_VAR.to_string()) + .or_insert_with(|| "gcp_vertex_ai".to_string()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_provider(config: HashMap) -> Provider { + Provider { + config, + r#type: "google-vertex-ai".to_string(), + ..Default::default() + } + } + + #[test] + fn injects_project_id_and_anthropic_alias() { + let provider = make_provider(HashMap::from([( + "VERTEX_AI_PROJECT_ID".to_string(), + "my-vertex-project".to_string(), + )])); + let mut env = HashMap::new(); + VertexProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("GCP_PROJECT_ID").map(String::as_str), + Some("my-vertex-project") + ); + assert_eq!( + env.get("GOOGLE_CLOUD_PROJECT").map(String::as_str), + Some("my-vertex-project") + ); + assert_eq!( + env.get("ANTHROPIC_VERTEX_PROJECT_ID").map(String::as_str), + Some("my-vertex-project") + ); + } + + #[test] + fn injects_region_and_vertex_location() { + let provider = make_provider(HashMap::from([( + "VERTEX_AI_REGION".to_string(), + "us-east4".to_string(), + )])); + let mut env = HashMap::new(); + VertexProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("CLOUD_ML_REGION").map(String::as_str), + Some("us-east4") + ); + assert_eq!( + env.get("GCP_LOCATION").map(String::as_str), + Some("us-east4") + ); + assert_eq!( + env.get("VERTEX_LOCATION").map(String::as_str), + Some("us-east4") + ); + } + + #[test] + fn injects_inference_flags() { + let provider = make_provider(HashMap::new()); + let mut env = HashMap::new(); + VertexProvider.inject_env(&provider, &mut env); + + assert!(!env.contains_key("CLAUDE_CODE_USE_VERTEX")); + assert_eq!( + env.get("GOOSE_PROVIDER").map(String::as_str), + Some("gcp_vertex_ai") + ); + } + + #[test] + fn does_not_overwrite_existing_env() { + let provider = make_provider(HashMap::from([( + "VERTEX_AI_PROJECT_ID".to_string(), + "new".to_string(), + )])); + let mut env = HashMap::from([("GCP_PROJECT_ID".to_string(), "existing".to_string())]); + VertexProvider.inject_env(&provider, &mut env); + + assert_eq!( + env.get("GCP_PROJECT_ID").map(String::as_str), + Some("existing") + ); + } + + #[test] + fn skips_empty_config_values() { + let provider = make_provider(HashMap::from([( + "VERTEX_AI_PROJECT_ID".to_string(), + " ".to_string(), + )])); + let mut env = HashMap::new(); + VertexProvider.inject_env(&provider, &mut env); + + assert!(!env.contains_key("GCP_PROJECT_ID")); + assert!(!env.contains_key("ANTHROPIC_VERTEX_PROJECT_ID")); + } +} diff --git a/crates/openshell-sandbox/src/google_cloud_metadata.rs b/crates/openshell-sandbox/src/google_cloud_metadata.rs new file mode 100644 index 000000000..ee22c97ca --- /dev/null +++ b/crates/openshell-sandbox/src/google_cloud_metadata.rs @@ -0,0 +1,535 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! GCE metadata server emulator for sandbox credential injection. +//! +//! Implements a subset of the GCE instance metadata API so that GCP client +//! libraries (Go, Python, Node.js) can obtain `OAuth2` tokens natively inside +//! sandboxes. Tokens are served from the existing `ProviderCredentialState` +//! store — no separate refresh mechanism is needed. +//! +//! The emulator runs as a loopback HTTP server inside the sandbox network +//! namespace (see [`metadata_server`](crate::metadata_server)). GCP SDKs +//! discover it via the `GCE_METADATA_HOST` environment variable, which is +//! set to the loopback address by `child_env_resolved()`. + +use crate::provider_credentials::ProviderCredentialState; +use crate::secrets; +use miette::{IntoDiagnostic, Result}; +use openshell_ocsf::{ActivityId, HttpActivityBuilder, SeverityId, StatusId, ocsf_emit}; +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; + +type MetadataResponse = (u16, &'static str, String); + +const PATH_SERVICE_ACCOUNTS: &str = "/computeMetadata/v1/instance/service-accounts"; +const PATH_SERVICE_ACCOUNT_DEFAULT: &str = "/computeMetadata/v1/instance/service-accounts/default"; +const PATH_TOKEN: &str = "/computeMetadata/v1/instance/service-accounts/default/token"; +const PATH_EMAIL: &str = "/computeMetadata/v1/instance/service-accounts/default/email"; +const PATH_SCOPES: &str = "/computeMetadata/v1/instance/service-accounts/default/scopes"; +const PATH_ALIASES: &str = "/computeMetadata/v1/instance/service-accounts/default/aliases"; +const PATH_PROJECT_ID: &str = "/computeMetadata/v1/project/project-id"; + +const ENV_GCP_PROJECT_ID: &str = openshell_core::google_cloud::PROJECT_ID_ENV_VARS[0]; +const ENV_GCP_SERVICE_ACCOUNT_EMAIL: &str = + openshell_core::google_cloud::SERVICE_ACCOUNT_EMAIL_ENV_VARS[0]; + +const METADATA_FLAVOR_HEADER: &str = "metadata-flavor"; +const METADATA_FLAVOR_VALUE: &str = "Google"; +const X_FORWARDED_FOR_HEADER: &str = "x-forwarded-for"; + +#[derive(Debug, Clone)] +pub struct MetadataContext { + credentials: ProviderCredentialState, +} + +impl MetadataContext { + pub fn new(credentials: ProviderCredentialState) -> Self { + Self { credentials } + } +} + +impl crate::metadata_server::MetadataHandler for MetadataContext { + async fn handle( + &self, + method: &str, + path: &str, + request: &[u8], + stream: &mut S, + ) -> Result<()> { + handle_forward_request(self, method, path, request, stream).await + } +} + +async fn handle_forward_request( + ctx: &MetadataContext, + method: &str, + path: &str, + initial_request: &[u8], + client: &mut S, +) -> Result<()> +where + S: AsyncRead + AsyncWrite + Unpin, +{ + let headers = parse_request_headers(initial_request); + let (status, content_type, body) = route_request(ctx, method, path, &headers); + write_metadata_response(client, status, content_type, &body).await +} + +fn route_request( + ctx: &MetadataContext, + method: &str, + path: &str, + headers: &[(String, String)], +) -> MetadataResponse { + if method != "GET" { + emit_metadata_event( + ActivityId::Refuse, + SeverityId::Low, + StatusId::Failure, + &format!("metadata: unsupported method {method}"), + ); + return (405, "text/html", "Method Not Allowed".to_string()); + } + + if let Err(resp) = validate_metadata_headers(headers) { + emit_metadata_event( + ActivityId::Refuse, + SeverityId::Medium, + StatusId::Failure, + &format!("metadata: header validation failed for {path}"), + ); + return resp; + } + + let (route, query) = path.split_once('?').map_or((path, ""), |(r, q)| (r, q)); + let route = route.strip_suffix('/').unwrap_or(route); + let recursive = query.split('&').any(|p| p == "recursive=true"); + + match route { + PATH_TOKEN => handle_token(ctx), + PATH_EMAIL => handle_env(ctx, ENV_GCP_SERVICE_ACCOUNT_EMAIL), + PATH_PROJECT_ID => handle_env(ctx, ENV_GCP_PROJECT_ID), + PATH_ALIASES => (200, "text/plain", "default\n".to_string()), + PATH_SCOPES => ( + 200, + "text/plain", + "https://www.googleapis.com/auth/cloud-platform".to_string(), + ), + PATH_SERVICE_ACCOUNT_DEFAULT => { + if recursive { + handle_service_account_recursive(ctx) + } else { + ( + 200, + "text/plain", + "aliases\nemail\nscopes\ntoken\n".to_string(), + ) + } + } + PATH_SERVICE_ACCOUNTS => (200, "text/plain", "default/\n".to_string()), + "" | "/" | "/computeMetadata" | "/computeMetadata/v1" => { + (200, "text/plain", "computeMetadata/\n".to_string()) + } + "/computeMetadata/v1/instance" => (200, "text/plain", "service-accounts/\n".to_string()), + _ => { + emit_metadata_event( + ActivityId::Refuse, + SeverityId::Low, + StatusId::Failure, + &format!("metadata: unknown path {route}"), + ); + ( + 404, + "application/json", + serde_json::json!({"error": "not_found"}).to_string(), + ) + } + } +} + +fn handle_token(ctx: &MetadataContext) -> MetadataResponse { + let Some((placeholder, expires_in)) = ctx.credentials.gcp_token_response() else { + let has_resolver = ctx.credentials.resolver().is_some(); + let (msg, error_key) = if has_resolver { + ( + "metadata: no GCP access token available or expired", + "token_unavailable", + ) + } else { + ( + "metadata: token request but no credentials configured", + "credentials_unavailable", + ) + }; + emit_metadata_event(ActivityId::Fail, SeverityId::Medium, StatusId::Failure, msg); + return ( + 503, + "application/json", + serde_json::json!({"error": error_key}).to_string(), + ); + }; + + emit_metadata_event( + ActivityId::Open, + SeverityId::Informational, + StatusId::Success, + "metadata: token placeholder served", + ); + + let body = serde_json::json!({ + "access_token": placeholder, + "expires_in": expires_in, + "token_type": "Bearer" + }); + (200, "application/json", body.to_string()) +} + +fn handle_service_account_recursive(ctx: &MetadataContext) -> MetadataResponse { + let resolver = ctx.credentials.resolver(); + let email = resolver + .as_ref() + .and_then(|r| { + let p = secrets::placeholder_for_env_key(ENV_GCP_SERVICE_ACCOUNT_EMAIL); + r.resolve_placeholder(&p).map(str::to_string) + }) + .unwrap_or_default(); + + let scopes = "https://www.googleapis.com/auth/cloud-platform"; + + let body = serde_json::json!({ + "aliases": ["default"], + "email": email, + "scopes": [scopes], + }); + (200, "application/json", body.to_string()) +} + +/// Serve a non-secret config value (project ID, SA email) as plain text. +/// +/// Unlike `handle_token` which serves placeholders, this resolves to the real +/// value. This matches real GCE metadata server behavior and is safe because +/// these values are non-secret configuration (project IDs, email addresses). +fn handle_env(ctx: &MetadataContext, env_key: &str) -> MetadataResponse { + let Some(resolver) = ctx.credentials.resolver() else { + emit_metadata_event( + ActivityId::Fail, + SeverityId::Medium, + StatusId::Failure, + &format!("metadata: {env_key} request but no credentials configured"), + ); + return (503, "text/plain", String::new()); + }; + + let placeholder = secrets::placeholder_for_env_key(env_key); + resolver.resolve_placeholder(&placeholder).map_or_else( + || { + emit_metadata_event( + ActivityId::Fail, + SeverityId::Low, + StatusId::Failure, + &format!("metadata: {env_key} not configured"), + ); + ( + 404, + "application/json", + serde_json::json!({"error": "not_found"}).to_string(), + ) + }, + |value| (200, "text/plain", value.to_string()), + ) +} + +fn validate_metadata_headers(headers: &[(String, String)]) -> Result<(), MetadataResponse> { + if headers + .iter() + .any(|(name, _)| name.eq_ignore_ascii_case(X_FORWARDED_FOR_HEADER)) + { + return Err((403, "text/html", "Forbidden".to_string())); + } + + let has_flavor = headers.iter().any(|(name, value)| { + name.eq_ignore_ascii_case(METADATA_FLAVOR_HEADER) + && value.trim().eq_ignore_ascii_case(METADATA_FLAVOR_VALUE) + }); + if !has_flavor { + return Err((403, "text/html", "Forbidden".to_string())); + } + + Ok(()) +} + +fn parse_request_headers(raw: &[u8]) -> Vec<(String, String)> { + let request = String::from_utf8_lossy(raw); + let mut headers = Vec::new(); + for line in request.split("\r\n").skip(1) { + if line.is_empty() { + break; + } + if let Some((name, value)) = line.split_once(':') { + headers.push((name.trim().to_string(), value.trim().to_string())); + } + } + headers +} + +fn status_text(status: u16) -> &'static str { + match status { + 403 => "Forbidden", + 404 => "Not Found", + 405 => "Method Not Allowed", + 503 => "Service Unavailable", + _ => "OK", + } +} + +async fn write_metadata_response( + client: &mut S, + status: u16, + content_type: &str, + body: &str, +) -> Result<()> +where + S: AsyncWrite + Unpin, +{ + let response = format!( + "HTTP/1.1 {status} {}\r\nContent-Type: {content_type}\r\nMetadata-Flavor: Google\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + status_text(status), + body.len(), + ); + client + .write_all(response.as_bytes()) + .await + .into_diagnostic()?; + client.flush().await.into_diagnostic()?; + Ok(()) +} + +fn emit_metadata_event( + activity: ActivityId, + severity: SeverityId, + status: StatusId, + message: &str, +) { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(activity) + .severity(severity) + .status(status) + .message(message.to_string()) + .build(); + ocsf_emit!(event); +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + fn make_context(env: HashMap) -> MetadataContext { + let state = ProviderCredentialState::from_environment(0, env, HashMap::new()); + MetadataContext::new(state) + } + + fn make_context_with_expiry( + env: HashMap, + expires: HashMap, + ) -> MetadataContext { + let state = ProviderCredentialState::from_environment(0, env, expires); + MetadataContext::new(state) + } + + fn flavor_headers() -> Vec<(String, String)> { + vec![("Metadata-Flavor".to_string(), "Google".to_string())] + } + + #[test] + fn token_returns_placeholder_not_real_value() { + let ctx = make_context(HashMap::from([( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.test-token".to_string(), + )])); + let (status, ct, body) = route_request(&ctx, "GET", PATH_TOKEN, &flavor_headers()); + assert_eq!(status, 200); + assert_eq!(ct, "application/json"); + let json: serde_json::Value = serde_json::from_str(&body).unwrap(); + let token = json["access_token"].as_str().unwrap(); + assert!( + token.starts_with("openshell:resolve:env:"), + "token should be a placeholder, got: {token}" + ); + assert!(!token.contains("ya29"), "real token must not be served"); + assert_eq!(json["token_type"], "Bearer"); + assert!(json["expires_in"].is_number()); + } + + #[test] + fn token_expires_in_computed_from_credential_expiry() { + let now_ms = openshell_core::time::now_ms(); + let expires_at = now_ms + 1_800_000; // 30 minutes from now + let ctx = make_context_with_expiry( + HashMap::from([("GCP_ADC_ACCESS_TOKEN".to_string(), "ya29.tok".to_string())]), + HashMap::from([("GCP_ADC_ACCESS_TOKEN".to_string(), expires_at)]), + ); + let (status, _, body) = route_request(&ctx, "GET", PATH_TOKEN, &flavor_headers()); + assert_eq!(status, 200); + let json: serde_json::Value = serde_json::from_str(&body).unwrap(); + let expires_in = json["expires_in"].as_i64().unwrap(); + assert!( + expires_in > 1700 && expires_in <= 1800, + "expires_in={expires_in}" + ); + } + + #[test] + fn token_no_expiry_defaults_to_3600() { + let ctx = make_context(HashMap::from([( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.tok".to_string(), + )])); + let (_, _, body) = route_request(&ctx, "GET", PATH_TOKEN, &flavor_headers()); + let json: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert_eq!(json["expires_in"], 3600); + } + + #[test] + fn missing_metadata_flavor_header_403() { + let ctx = make_context(HashMap::new()); + let (status, _, _) = route_request(&ctx, "GET", PATH_TOKEN, &[]); + assert_eq!(status, 403); + } + + #[test] + fn x_forwarded_for_header_403() { + let ctx = make_context(HashMap::new()); + let headers = vec![ + ("Metadata-Flavor".to_string(), "Google".to_string()), + ("X-Forwarded-For".to_string(), "10.0.0.1".to_string()), + ]; + let (status, _, _) = route_request(&ctx, "GET", PATH_TOKEN, &headers); + assert_eq!(status, 403); + } + + #[test] + fn unknown_path_404() { + let ctx = make_context(HashMap::new()); + let (status, _, _) = route_request( + &ctx, + "GET", + "/computeMetadata/v1/unknown", + &flavor_headers(), + ); + assert_eq!(status, 404); + } + + #[test] + fn no_credentials_503() { + let ctx = make_context(HashMap::new()); + let (status, _, _) = route_request(&ctx, "GET", PATH_TOKEN, &flavor_headers()); + assert_eq!(status, 503); + } + + #[test] + fn post_method_405() { + let ctx = make_context(HashMap::new()); + let (status, _, _) = route_request(&ctx, "POST", PATH_TOKEN, &flavor_headers()); + assert_eq!(status, 405); + } + + #[test] + fn project_id_served_as_plain_text() { + let ctx = make_context(HashMap::from([( + "GCP_PROJECT_ID".to_string(), + "my-project-123".to_string(), + )])); + let (status, ct, body) = route_request(&ctx, "GET", PATH_PROJECT_ID, &flavor_headers()); + assert_eq!(status, 200); + assert_eq!(ct, "text/plain"); + assert_eq!(body, "my-project-123"); + } + + #[test] + fn email_served_as_plain_text() { + let ctx = make_context(HashMap::from([( + "GCP_SERVICE_ACCOUNT_EMAIL".to_string(), + "sa@project.iam.gserviceaccount.com".to_string(), + )])); + let (status, ct, body) = route_request(&ctx, "GET", PATH_EMAIL, &flavor_headers()); + assert_eq!(status, 200); + assert_eq!(ct, "text/plain"); + assert_eq!(body, "sa@project.iam.gserviceaccount.com"); + } + + #[test] + fn scopes_returns_cloud_platform() { + let ctx = make_context(HashMap::new()); + let (status, _, body) = route_request(&ctx, "GET", PATH_SCOPES, &flavor_headers()); + assert_eq!(status, 200); + assert_eq!(body, "https://www.googleapis.com/auth/cloud-platform"); + } + + #[test] + fn query_parameters_ignored_for_routing() { + let ctx = make_context(HashMap::from([( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.tok".to_string(), + )])); + let path = format!("{PATH_TOKEN}?scopes=cloud-platform"); + let (status, _, _) = route_request(&ctx, "GET", &path, &flavor_headers()); + assert_eq!(status, 200); + } + + #[test] + fn metadata_flavor_case_insensitive() { + let ctx = make_context(HashMap::from([( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.tok".to_string(), + )])); + let headers = vec![("metadata-FLAVOR".to_string(), "google".to_string())]; + let (status, _, _) = route_request(&ctx, "GET", PATH_TOKEN, &headers); + assert_eq!(status, 200); + } + + #[test] + fn missing_env_var_returns_404() { + let ctx = make_context(HashMap::from([( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.tok".to_string(), + )])); + // project-id not set + let (status, _, _) = route_request(&ctx, "GET", PATH_PROJECT_ID, &flavor_headers()); + assert_eq!(status, 404); + } + + #[test] + fn trailing_slash_handled_for_service_account_default() { + let ctx = make_context(HashMap::from([( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.tok".to_string(), + )])); + let with_slash = route_request( + &ctx, + "GET", + "/computeMetadata/v1/instance/service-accounts/default/", + &flavor_headers(), + ); + let without_slash = route_request( + &ctx, + "GET", + "/computeMetadata/v1/instance/service-accounts/default", + &flavor_headers(), + ); + assert_eq!(with_slash.0, 200); + assert_eq!(without_slash.0, 200); + assert_eq!(with_slash.2, without_slash.2); + } + + #[test] + fn parse_request_headers_extracts_correctly() { + let raw = b"GET /path HTTP/1.1\r\nHost: example.com\r\nMetadata-Flavor: Google\r\n\r\n"; + let headers = parse_request_headers(raw); + assert_eq!(headers.len(), 2); + assert_eq!(headers[0].0, "Host"); + assert_eq!(headers[0].1, "example.com"); + assert_eq!(headers[1].0, "Metadata-Flavor"); + assert_eq!(headers[1].1, "Google"); + } +} diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index e9d8921b6..849d6aebb 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -10,11 +10,13 @@ pub mod bypass_monitor; mod child_env; pub mod debug_rpc; pub mod denial_aggregator; +mod google_cloud_metadata; mod grpc_client; mod identity; pub mod l7; pub mod log_push; pub mod mechanistic_mapper; +mod metadata_server; pub mod opa; mod policy; mod policy_local; @@ -422,7 +424,8 @@ pub async fn run_sandbox( provider_env, provider_credential_expires_at_ms, ); - let provider_env = provider_credentials.snapshot().child_env.clone(); + #[cfg_attr(not(target_os = "linux"), allow(unused_mut))] + let mut provider_env = provider_credentials.child_env_resolved(); // Create identity cache for SHA256 TOFU when OPA is active let identity_cache = opa_engine @@ -798,6 +801,45 @@ pub async fn run_sandbox( } }); + // Start GCE metadata loopback server inside the network namespace so + // Go's cloud.google.com/go/compute/metadata (which bypasses HTTP_PROXY) + // can reach it via direct TCP. Must start before SSH handler so SSH + // sessions also see corrected env vars on bind failure. + #[cfg(target_os = "linux")] + if let Some(ns_fd) = ssh_netns_fd + && provider_credentials + .snapshot() + .child_env + .contains_key("GCE_METADATA_HOST") + { + let ctx = google_cloud_metadata::MetadataContext::new(provider_credentials.clone()); + let (ready_tx, ready_rx) = tokio::sync::oneshot::channel(); + match metadata_server::bind_in_netns( + openshell_core::google_cloud::METADATA_LOOPBACK_ADDR, + ns_fd, + ) + .await + { + Ok(listener) => { + tokio::spawn(metadata_server::run(listener, ctx, ready_tx)); + match timeout(Duration::from_secs(5), ready_rx).await { + Ok(Ok(addr)) => { + info!(addr = %addr, "GCE metadata loopback server ready"); + } + Ok(Err(_)) => warn!("GCE metadata server readiness channel dropped"), + Err(_) => warn!("GCE metadata server readiness timeout"), + } + } + Err(e) => { + warn!(error = %e, "GCE metadata server bind failed, Go SDK may not discover credentials"); + provider_env.remove("GCE_METADATA_HOST"); + provider_env.remove("GCE_METADATA_IP"); + provider_env.remove("METADATA_SERVER_DETECTION"); + provider_credentials.remove_env_key("GCE_METADATA_HOST"); + } + } + } + let ssh_socket_path: Option = ssh_socket_path.map(std::path::PathBuf::from); if let Some(listen_path) = ssh_socket_path.clone() { let policy_clone = policy.clone(); diff --git a/crates/openshell-sandbox/src/metadata_server.rs b/crates/openshell-sandbox/src/metadata_server.rs new file mode 100644 index 000000000..2379e0513 --- /dev/null +++ b/crates/openshell-sandbox/src/metadata_server.rs @@ -0,0 +1,175 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Loopback HTTP server for cloud metadata emulators. +//! +//! Binds a TCP listener inside the sandbox network namespace so that +//! cloud SDKs that bypass `HTTP_PROXY` (e.g. Go's +//! `cloud.google.com/go/compute/metadata`) can reach the emulator via +//! direct TCP. +//! +//! The server is generic over [`MetadataHandler`] — any cloud provider +//! that needs an instance metadata emulator can implement the trait. + +use miette::Result; +use std::future::Future; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use tokio::net::TcpListener; +use tokio::sync::{Semaphore, oneshot}; +use tracing::{debug, warn}; + +const MAX_REQUEST_BYTES: usize = 4096; +const MAX_CONCURRENT_CONNECTIONS: usize = 32; + +/// Handler for cloud metadata HTTP requests. +/// +/// Implementors receive the parsed HTTP method, path, raw request bytes, +/// and a bidirectional stream to write the response. The handler owns the +/// response format (status, headers, body) — the server only does TCP +/// accept and HTTP request-line parsing. +pub trait MetadataHandler: Send + Sync + 'static { + fn handle( + &self, + method: &str, + path: &str, + request: &[u8], + stream: &mut S, + ) -> impl Future> + Send; +} + +/// Bind a TCP listener inside the sandbox network namespace. +/// +/// Uses a dedicated OS thread (not `spawn_blocking`) to avoid namespace +/// contamination of the tokio blocking pool. See `ssh.rs::connect_in_netns` +/// for the same pattern. +/// +/// # Safety contract +/// +/// The caller must ensure `netns_fd` remains valid (not closed) until this +/// function returns. In practice, `NetworkNamespace` owns the fd and outlives +/// the sandbox — but new call sites must verify this invariant. +#[cfg(target_os = "linux")] +pub async fn bind_in_netns( + addr: &str, + netns_fd: std::os::unix::io::RawFd, +) -> std::io::Result { + let addr = addr.to_string(); + let (tx, rx) = oneshot::channel(); + std::thread::spawn(move || { + let result = (|| -> std::io::Result { + // SAFETY: setns is safe to call; this is a dedicated thread that + // exits after binding. The thread's namespace state does not + // contaminate any thread pool. + #[allow(unsafe_code)] + let rc = unsafe { libc::setns(netns_fd, libc::CLONE_NEWNET) }; + if rc != 0 { + return Err(std::io::Error::last_os_error()); + } + std::net::TcpListener::bind(&addr) + })(); + let _ = tx.send(result); + }); + + let std_listener = rx + .await + .map_err(|_| std::io::Error::other("metadata server bind thread panicked"))??; + std_listener.set_nonblocking(true)?; + TcpListener::from_std(std_listener) +} + +/// Run the metadata server accept loop. +/// +/// Signals `ready_tx` with the bound address before entering the loop. +/// Returns when the listener encounters a fatal error or the runtime shuts down. +pub async fn run( + listener: TcpListener, + handler: H, + ready_tx: oneshot::Sender, +) { + let local_addr = match listener.local_addr() { + Ok(addr) => addr, + Err(e) => { + warn!("metadata server failed to get local address: {e}"); + return; + } + }; + + let _ = ready_tx.send(local_addr); + + let handler = Arc::new(handler); + let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CONNECTIONS)); + + loop { + let Ok(permit) = semaphore.clone().acquire_owned().await else { + break; + }; + + match listener.accept().await { + Ok((stream, _addr)) => { + let handler = handler.clone(); + tokio::spawn(async move { + if let Err(e) = handle_connection(handler.as_ref(), stream).await { + debug!("metadata server connection error: {e}"); + } + drop(permit); + }); + } + Err(e) => { + warn!("metadata server accept error: {e}"); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + } + } + } +} + +const READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5); + +async fn handle_connection( + handler: &H, + mut stream: tokio::net::TcpStream, +) -> Result<()> { + let mut buf = vec![0u8; MAX_REQUEST_BYTES]; + let mut used = 0; + let deadline = tokio::time::sleep(READ_TIMEOUT); + tokio::pin!(deadline); + loop { + tokio::select! { + result = stream.read(&mut buf[used..]) => { + let n = result.map_err(|e| miette::miette!("{e}"))?; + if n == 0 { + return Ok(()); + } + used += n; + if buf[..used].windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + if used >= buf.len() { + let _ = stream + .write_all(b"HTTP/1.1 413 Request Entity Too Large\r\nContent-Length: 0\r\n\r\n") + .await; + return Ok(()); + } + } + () = &mut deadline => { + return Ok(()); + } + } + } + let request = String::from_utf8_lossy(&buf[..used]); + let request_line = request.split("\r\n").next().unwrap_or(""); + let mut parts = request_line.split_whitespace(); + let method = parts.next().unwrap_or(""); + let path = parts.next().unwrap_or("/"); + + tokio::time::timeout( + READ_TIMEOUT, + handler.handle(method, path, &buf[..used], &mut stream), + ) + .await + .unwrap_or_else(|_| { + debug!(method, path, "metadata handler timed out"); + Ok(()) + }) +} diff --git a/crates/openshell-sandbox/src/provider_credentials.rs b/crates/openshell-sandbox/src/provider_credentials.rs index ae91e8d6e..768d48e29 100644 --- a/crates/openshell-sandbox/src/provider_credentials.rs +++ b/crates/openshell-sandbox/src/provider_credentials.rs @@ -4,7 +4,7 @@ //! Runtime provider credential snapshots. use crate::secrets::SecretResolver; -use std::collections::{HashMap, VecDeque}; +use std::collections::{HashMap, HashSet, VecDeque}; use std::sync::{Arc, RwLock}; const MAX_RETAINED_CREDENTIAL_GENERATIONS: usize = 8; @@ -21,6 +21,7 @@ struct ProviderCredentialStateInner { generations: VecDeque>, current_resolver: Option>, combined_resolver: Option>, + suppressed_keys: HashSet, } #[derive(Debug, Clone)] @@ -54,6 +55,7 @@ impl ProviderCredentialState { generations, current_resolver, combined_resolver, + suppressed_keys: HashSet::new(), })), } } @@ -74,13 +76,132 @@ impl ProviderCredentialState { .clone() } + /// Remove a key from the credential snapshot's child env. + /// + /// Used when a sandbox-side service (e.g., metadata server) fails to start + /// and the corresponding env var should not be inherited by child processes + /// or SSH sessions. + pub fn remove_env_key(&self, key: &str) { + let mut inner = self + .inner + .write() + .expect("provider credential state poisoned"); + inner.suppressed_keys.insert(key.to_string()); + let mut env = (*inner.current).clone(); + env.child_env.remove(key); + inner.current = Arc::new(env); + } + + /// Return `child_env` with GCP static config vars resolved to real values. + /// + /// The credential pipeline placeholderizes ALL env values, but GCP SDKs + /// and coding agents read certain vars (project ID, region, metadata host) + /// at process startup before any HTTP request flows through the proxy. + /// This method overrides those vars with resolved real values while + /// keeping secret credentials (like `GCP_ACCESS_TOKEN`) as placeholders. + /// + /// Three layers of env var injection: + /// 1. **Synthetic vars** (`GCE_METADATA_IP`, `METADATA_SERVER_DETECTION`) + /// — sandbox-internal config not from user + /// input, inserted directly here with real values. + /// 2. **`google_cloud::STATIC_CONFIG_KEYS`** — user-provided non-secret config + /// (project ID, region, SA email) that was placeholderized by + /// `ProviderPlugin::inject_env` → `SecretResolver`; un-placeholderized + /// here so SDKs can read them at startup. + /// 3. Everything else stays as placeholders for proxy-time resolution. + pub fn child_env_resolved(&self) -> HashMap { + use openshell_core::google_cloud; + + let inner = self + .inner + .read() + .expect("provider credential state poisoned"); + let mut env = inner.current.child_env.clone(); + + let has_gcp_metadata = env.contains_key("GCE_METADATA_HOST"); + let has_gcp_config = google_cloud::STATIC_CONFIG_KEYS + .iter() + .any(|k| env.contains_key(*k)); + + if !has_gcp_metadata && !has_gcp_config { + return env; + } + + if has_gcp_metadata { + // Synthetic vars: sandbox-internal config that doesn't originate + // from user input and was never placeholderized. + env.insert( + "GCE_METADATA_HOST".to_string(), + google_cloud::METADATA_LOOPBACK_ADDR.to_string(), + ); + // Python's google-auth builds its ping URL as http://{GCE_METADATA_IP} + // so the value must include the port. + env.insert( + "GCE_METADATA_IP".to_string(), + google_cloud::METADATA_LOOPBACK_ADDR.to_string(), + ); + // Node.js gcp-metadata uses METADATA_SERVER_DETECTION to skip the + // runtime ping that otherwise fails in sandboxed environments. + env.insert( + "METADATA_SERVER_DETECTION".to_string(), + "assume-present".to_string(), + ); + } + + // Un-placeholderize non-secret config vars so SDKs can read them + // at process startup before any HTTP flows through the proxy. + if let Some(ref resolver) = inner.combined_resolver { + for key in google_cloud::STATIC_CONFIG_KEYS { + let placeholder = crate::secrets::placeholder_for_env_key(key); + if let Some(value) = resolver.resolve_placeholder(&placeholder) { + env.insert(key.to_string(), value.to_string()); + } + } + } + + env + } + + /// Return the GCP token placeholder and its remaining lifetime in seconds. + /// + /// Searches `google_cloud::TOKEN_ENV_KEYS` in priority order (SA before + /// ADC) atomically to avoid inconsistency during credential + /// refresh. Returns `None` if no GCP token is configured or all are + /// expired. The `expires_in` defaults to 3600 when expiry is unknown. + pub fn gcp_token_response(&self) -> Option<(String, i64)> { + const DEFAULT_EXPIRES_IN: i64 = 3600; + let resolver = self.resolver()?; + for key in openshell_core::google_cloud::TOKEN_ENV_KEYS { + let placeholder = crate::secrets::placeholder_for_env_key(key); + if resolver.resolve_placeholder(&placeholder).is_none() { + continue; + } + let expires_in = resolver.expires_at_ms_for_placeholder(&placeholder).map_or( + DEFAULT_EXPIRES_IN, + |expires_at_ms| { + if expires_at_ms <= 0 { + DEFAULT_EXPIRES_IN + } else { + let now = openshell_core::time::now_ms(); + (expires_at_ms - now) / 1000 + } + }, + ); + if expires_in <= 0 { + continue; + } + return Some((placeholder, expires_in)); + } + None + } + pub fn install_environment( &self, revision: u64, env: HashMap, credential_expires_at_ms: HashMap, ) -> usize { - let (child_env, generation_resolver, current_resolver) = + let (mut child_env, generation_resolver, current_resolver) = SecretResolver::from_provider_env_for_current_revision( env, credential_expires_at_ms, @@ -91,6 +212,10 @@ impl ProviderCredentialState { .write() .expect("provider credential state poisoned"); + for key in &inner.suppressed_keys { + child_env.remove(key); + } + inner.current = Arc::new(ProviderCredentialSnapshot { revision, child_env, @@ -125,6 +250,7 @@ fn merge_resolvers( #[cfg(test)] mod tests { use super::*; + use openshell_core::google_cloud; #[test] fn snapshots_use_revision_scoped_placeholders() { @@ -226,4 +352,224 @@ mod tests { Some("new") ); } + + #[test] + fn child_env_resolved_without_gcp_returns_unchanged() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([("GITHUB_TOKEN".to_string(), "ghp_abc".to_string())]), + HashMap::new(), + ); + let env = state.child_env_resolved(); + assert_eq!( + env.get("GITHUB_TOKEN").map(String::as_str), + Some("openshell:resolve:env:v1_GITHUB_TOKEN"), + "non-GCP env should remain as placeholder" + ); + assert!(!env.contains_key("GCE_METADATA_HOST")); + assert!(!env.contains_key("CLAUDE_CODE_USE_VERTEX")); + } + + #[test] + fn child_env_resolved_overrides_gcp_static_vars() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([ + ("GCE_METADATA_HOST".to_string(), "marker".to_string()), + ( + "GCP_ADC_ACCESS_TOKEN".to_string(), + "ya29.secret".to_string(), + ), + ("GCP_PROJECT_ID".to_string(), "my-project".to_string()), + ("CLOUD_ML_REGION".to_string(), "us-central1".to_string()), + ]), + HashMap::new(), + ); + let env = state.child_env_resolved(); + + assert_eq!( + env.get("GCE_METADATA_HOST").map(String::as_str), + Some(google_cloud::METADATA_LOOPBACK_ADDR), + "GCE_METADATA_HOST should be the loopback address" + ); + assert!( + !env.contains_key("CLAUDE_CODE_USE_VERTEX"), + "inference-specific vars should not be injected" + ); + assert_eq!( + env.get("GCP_PROJECT_ID").map(String::as_str), + Some("my-project"), + "static config should be resolved to real value" + ); + assert_eq!( + env.get("CLOUD_ML_REGION").map(String::as_str), + Some("us-central1"), + ); + + let token = env.get("GCP_ADC_ACCESS_TOKEN").map(String::as_str).unwrap(); + assert!( + token.starts_with("openshell:resolve:env:"), + "GCP_ACCESS_TOKEN must stay as placeholder, got: {token}" + ); + } + + #[test] + fn child_env_resolved_handles_missing_config_keys() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([ + ("GCE_METADATA_HOST".to_string(), "marker".to_string()), + ("GCP_ADC_ACCESS_TOKEN".to_string(), "ya29.tok".to_string()), + ]), + HashMap::new(), + ); + let env = state.child_env_resolved(); + + assert_eq!( + env.get("GCE_METADATA_HOST").map(String::as_str), + Some(google_cloud::METADATA_LOOPBACK_ADDR), + ); + assert!( + !env.contains_key("GCP_PROJECT_ID") + || env + .get("GCP_PROJECT_ID") + .unwrap() + .starts_with("openshell:resolve:env:"), + "missing config key should not be injected with a real value" + ); + } + + #[test] + fn gcp_token_response_returns_sa_over_adc() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([ + ("GCP_SA_ACCESS_TOKEN".to_string(), "sa-tok".to_string()), + ("GCP_ADC_ACCESS_TOKEN".to_string(), "adc-tok".to_string()), + ]), + HashMap::new(), + ); + let (placeholder, _) = state.gcp_token_response().expect("should find token"); + assert!( + placeholder.contains("GCP_SA_ACCESS_TOKEN"), + "SA token should win over ADC, got: {placeholder}" + ); + } + + #[test] + fn gcp_token_response_falls_back_to_adc() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([("GCP_ADC_ACCESS_TOKEN".to_string(), "adc-tok".to_string())]), + HashMap::new(), + ); + let (placeholder, _) = state.gcp_token_response().expect("should find ADC token"); + assert!(placeholder.contains("GCP_ADC_ACCESS_TOKEN")); + } + + #[test] + fn gcp_token_response_returns_none_without_gcp() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([("GITHUB_TOKEN".to_string(), "ghp_abc".to_string())]), + HashMap::new(), + ); + assert!(state.gcp_token_response().is_none()); + } + + #[test] + fn gcp_token_response_defaults_expires_in_to_3600() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([("GCP_ADC_ACCESS_TOKEN".to_string(), "adc-tok".to_string())]), + HashMap::new(), + ); + let (_, expires_in) = state.gcp_token_response().unwrap(); + assert_eq!( + expires_in, 3600, + "should default to 3600 when no expiry set" + ); + } + + #[test] + fn gcp_token_response_calculates_remaining() { + let now_ms = i64::try_from( + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis(), + ) + .unwrap(); + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([("GCP_ADC_ACCESS_TOKEN".to_string(), "adc-tok".to_string())]), + HashMap::from([("GCP_ADC_ACCESS_TOKEN".to_string(), now_ms + 120_000)]), + ); + let (_, expires_in) = state.gcp_token_response().unwrap(); + assert!( + (110..=120).contains(&expires_in), + "expected ~120s remaining, got {expires_in}" + ); + } + + #[test] + fn child_env_resolved_resolves_vertex_vars_without_metadata_host() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([ + ("GOOSE_PROVIDER".to_string(), "gcp_vertex_ai".to_string()), + ( + "ANTHROPIC_VERTEX_PROJECT_ID".to_string(), + "my-vertex-proj".to_string(), + ), + ("VERTEX_LOCATION".to_string(), "us-east4".to_string()), + ]), + HashMap::new(), + ); + let env = state.child_env_resolved(); + assert_eq!( + env.get("GOOSE_PROVIDER").map(String::as_str), + Some("gcp_vertex_ai"), + "GOOSE_PROVIDER should be resolved to real value" + ); + assert_eq!( + env.get("ANTHROPIC_VERTEX_PROJECT_ID").map(String::as_str), + Some("my-vertex-proj"), + ); + assert_eq!( + env.get("VERTEX_LOCATION").map(String::as_str), + Some("us-east4"), + ); + assert!( + !env.contains_key("GCE_METADATA_IP"), + "metadata synthetic vars should not be injected without GCE_METADATA_HOST" + ); + } + + #[test] + fn suppressed_keys_survive_install_environment() { + let state = ProviderCredentialState::from_environment( + 1, + HashMap::from([ + ("GCE_METADATA_HOST".to_string(), "marker".to_string()), + ("GCP_ADC_ACCESS_TOKEN".to_string(), "tok".to_string()), + ]), + HashMap::new(), + ); + state.remove_env_key("GCE_METADATA_HOST"); + assert!(!state.snapshot().child_env.contains_key("GCE_METADATA_HOST")); + + state.install_environment( + 2, + HashMap::from([ + ("GCE_METADATA_HOST".to_string(), "marker".to_string()), + ("GCP_ADC_ACCESS_TOKEN".to_string(), "tok2".to_string()), + ]), + HashMap::new(), + ); + assert!( + !state.snapshot().child_env.contains_key("GCE_METADATA_HOST"), + "suppressed key must not reappear after install_environment" + ); + } } diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs index ec73b4dd7..56d6ab8e2 100644 --- a/crates/openshell-sandbox/src/secrets.rs +++ b/crates/openshell-sandbox/src/secrets.rs @@ -239,6 +239,12 @@ impl SecretResolver { } } + pub(crate) fn expires_at_ms_for_placeholder(&self, placeholder: &str) -> Option { + self.by_placeholder + .get(placeholder) + .map(|s| s.expires_at_ms) + } + pub(crate) fn rewrite_header_value( &self, value: &str, diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index 67fbc7e57..310b09b6e 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -457,7 +457,7 @@ impl russh::server::Handler for SshHandler { self.netns_fd, self.proxy_url.clone(), self.ca_file_paths.clone(), - &self.provider_credentials.snapshot().child_env, + &self.provider_credentials.child_env_resolved(), )?; let state = self.channels.get_mut(&channel).ok_or_else(|| { anyhow::anyhow!("subsystem_request on unknown channel {channel:?}") @@ -534,7 +534,7 @@ impl SshHandler { handle: Handle, command: Option, ) -> anyhow::Result<()> { - let provider_snapshot = self.provider_credentials.snapshot(); + let provider_env = self.provider_credentials.child_env_resolved(); let state = self .channels .get_mut(&channel) @@ -552,7 +552,7 @@ impl SshHandler { self.netns_fd, self.proxy_url.clone(), self.ca_file_paths.clone(), - &provider_snapshot.child_env, + &provider_env, )?; state.pty_master = Some(pty_master); state.input_sender = Some(input_sender); @@ -569,7 +569,7 @@ impl SshHandler { self.netns_fd, self.proxy_url.clone(), self.ca_file_paths.clone(), - &provider_snapshot.child_env, + &provider_env, )?; state.input_sender = Some(input_sender); } diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 7591bdd6b..ff1dc835c 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -434,6 +434,7 @@ pub(super) async fn resolve_provider_environment( let mut expires = std::collections::HashMap::new(); let now_ms = crate::persistence::current_time_ms(); validate_provider_environment_keys_unique_at(store, provider_names, None, now_ms).await?; + let registry = openshell_providers::ProviderRegistry::new(); for name in provider_names { let provider = store @@ -479,52 +480,7 @@ pub(super) async fn resolve_provider_environment( } } - // For Vertex AI providers, inject agent-specific config env vars so that - // Claude Code, Goose, and OpenCode inside the sandbox can reach Vertex AI - // without additional configuration. Credentials from the loop above take - // precedence via entry().or_insert(), and sandbox --env overrides are - // applied at the process level after this environment is installed, so - // they naturally shadow these values. - if openshell_core::inference::normalize_inference_provider_type(&provider.r#type) - == Some("google-vertex-ai") - { - let project_id = provider - .config - .get(openshell_core::inference::VERTEX_AI_PROJECT_ID_KEY) - .map(String::as_str) - .unwrap_or_default() - .trim(); - let region = provider - .config - .get(openshell_core::inference::VERTEX_AI_REGION_KEY) - .map(String::as_str) - .unwrap_or_default() - .trim(); - - // Static flags -- always present for Vertex AI providers. - env.entry("GOOSE_PROVIDER".to_string()) - .or_insert_with(|| "gcp_vertex_ai".to_string()); - - // Project ID derived vars. - if !project_id.is_empty() { - env.entry("ANTHROPIC_VERTEX_PROJECT_ID".to_string()) - .or_insert_with(|| project_id.to_string()); - env.entry("GCP_PROJECT_ID".to_string()) - .or_insert_with(|| project_id.to_string()); - env.entry("GOOGLE_CLOUD_PROJECT".to_string()) - .or_insert_with(|| project_id.to_string()); - } - - // Region derived vars. - if !region.is_empty() { - env.entry("CLOUD_ML_REGION".to_string()) - .or_insert_with(|| region.to_string()); - env.entry("GCP_LOCATION".to_string()) - .or_insert_with(|| region.to_string()); - env.entry("VERTEX_LOCATION".to_string()) - .or_insert_with(|| region.to_string()); - } - } + registry.inject_env(&provider, &mut env); } Ok(ProviderEnvironment { @@ -1819,6 +1775,7 @@ mod tests { "copilot", "cursor", "github", + "google-cloud", "google-vertex-ai", "nvidia", "pypi" @@ -4592,4 +4549,128 @@ mod tests { .count(); assert_eq!(new_keys_count, 1); } + + fn google_cloud_provider(config: HashMap) -> Provider { + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "my-google-cloud".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-cloud".to_string(), + credentials: HashMap::new(), + config, + credential_expires_at_ms: HashMap::new(), + } + } + + #[test] + fn inject_gcp_env_sets_metadata_host() { + use openshell_core::google_cloud; + let provider = google_cloud_provider(HashMap::new()); + let mut env = HashMap::new(); + openshell_providers::ProviderRegistry::new().inject_env(&provider, &mut env); + assert_eq!( + env.get("GCE_METADATA_HOST").map(String::as_str), + Some(google_cloud::METADATA_HOST), + ); + assert!( + !env.contains_key("CLAUDE_CODE_USE_VERTEX"), + "CLAUDE_CODE_USE_VERTEX is synthetic, should not be injected here" + ); + } + + #[test] + fn inject_gcp_env_propagates_project_id() { + use openshell_core::google_cloud; + let provider = google_cloud_provider(HashMap::from([( + "project_id".to_string(), + "my-project".to_string(), + )])); + let mut env = HashMap::new(); + openshell_providers::ProviderRegistry::new().inject_env(&provider, &mut env); + for var in google_cloud::PROJECT_ID_ENV_VARS { + assert_eq!( + env.get(*var).map(String::as_str), + Some("my-project"), + "{var} should be set to project_id config value" + ); + } + } + + #[test] + fn inject_gcp_env_propagates_region() { + use openshell_core::google_cloud; + let provider = google_cloud_provider(HashMap::from([( + "region".to_string(), + "us-central1".to_string(), + )])); + let mut env = HashMap::new(); + openshell_providers::ProviderRegistry::new().inject_env(&provider, &mut env); + for var in google_cloud::REGION_ENV_VARS { + assert_eq!( + env.get(*var).map(String::as_str), + Some("us-central1"), + "{var} should be set to region config value" + ); + } + } + + #[test] + fn inject_gcp_env_propagates_service_account_email() { + use openshell_core::google_cloud; + let provider = google_cloud_provider(HashMap::from([( + "service_account_email".to_string(), + "sa@proj.iam.gserviceaccount.com".to_string(), + )])); + let mut env = HashMap::new(); + openshell_providers::ProviderRegistry::new().inject_env(&provider, &mut env); + for var in google_cloud::SERVICE_ACCOUNT_EMAIL_ENV_VARS { + assert_eq!( + env.get(*var).map(String::as_str), + Some("sa@proj.iam.gserviceaccount.com"), + "{var} should be set to service_account_email config value" + ); + } + } + + #[test] + fn inject_gcp_env_does_not_overwrite_existing_values() { + let provider = google_cloud_provider(HashMap::from([( + "project_id".to_string(), + "from-config".to_string(), + )])); + let mut env = HashMap::from([("GCP_PROJECT_ID".to_string(), "user-override".to_string())]); + openshell_providers::ProviderRegistry::new().inject_env(&provider, &mut env); + assert_eq!( + env.get("GCP_PROJECT_ID").map(String::as_str), + Some("user-override"), + "user-provided value should not be overwritten" + ); + } + + #[test] + fn inject_non_gcp_provider_does_nothing() { + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "github".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "github".to_string(), + credentials: HashMap::new(), + config: HashMap::from([("project_id".to_string(), "should-be-ignored".to_string())]), + credential_expires_at_ms: HashMap::new(), + }; + let mut env = HashMap::new(); + openshell_providers::ProviderRegistry::new().inject_env(&provider, &mut env); + assert!( + env.is_empty(), + "non-GCP provider should not inject any env vars" + ); + } } diff --git a/docs/providers/google-cloud.mdx b/docs/providers/google-cloud.mdx new file mode 100644 index 000000000..127dae1d6 --- /dev/null +++ b/docs/providers/google-cloud.mdx @@ -0,0 +1,194 @@ +--- +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +title: "Google Cloud" +sidebar-title: "Google Cloud" +description: "Authenticate with GCP APIs inside OpenShell sandboxes." +keywords: "Generative AI, Google Cloud, Vertex AI, GCP, OAuth2, Credentials, Sandbox" +--- + +The `google-cloud` provider gives sandboxes native GCP credentials so +any Google Cloud SDK works out of the box — Cloud Storage, BigQuery, Drive, +Maps, Discovery Engine, or any other GCP API. A GCE metadata server emulator +on loopback provides credential placeholders that the +sandbox proxy resolves to real tokens at request time. The sandbox process +never holds a real GCP credential. + +## Quick Start + +If you already have `gcloud` configured with Application Default Credentials, +create a provider with automatic credential refresh in one command: + +```shell +openshell provider create \ + --name my-gcp \ + --type google-cloud \ + --from-gcloud-adc \ + --config project_id="$(gcloud config get-value project)" \ + --config region=global +``` + +`--from-gcloud-adc` reads your ADC file, configures OAuth2 refresh on the +gateway, and mints the first access token before the command returns. The +gateway rotates the token automatically — no manual refresh needed. + +## Authentication Flows + +Two credential flows are supported. Choose based on your environment. + +### Application Default Credentials (gcloud ADC) + +Use credentials from `gcloud auth application-default login`. The gateway +exchanges the refresh token for short-lived access tokens automatically. + +```shell +openshell provider create \ + --name my-gcp \ + --type google-cloud \ + --config project_id=my-project \ + --config region=us-central1 \ + --credential GCP_ADC_ACCESS_TOKEN=placeholder +``` + +Configure credential refresh with the ADC JSON fields: + +```shell +openshell provider refresh configure my-gcp \ + --credential-key GCP_ADC_ACCESS_TOKEN \ + --strategy oauth2-refresh-token \ + --material client_id=YOUR_CLIENT_ID \ + --material client_secret=YOUR_CLIENT_SECRET \ + --material refresh_token=YOUR_REFRESH_TOKEN \ + --secret-material-key client_secret \ + --secret-material-key refresh_token +``` + +Find these values in your ADC file at +`~/.config/gcloud/application_default_credentials.json`. + +Trigger the first token mint: + +```shell +openshell provider refresh rotate my-gcp \ + --credential-key GCP_ADC_ACCESS_TOKEN +``` + +### Service Account Key + +Use a GCP service account JSON key file. The gateway signs JWTs and +exchanges them for access tokens using the `google-service-account-jwt` +strategy. + +```shell +openshell provider create \ + --name my-gcp \ + --type google-cloud \ + --config project_id=my-project \ + --config region=us-central1 \ + --credential GCP_SA_ACCESS_TOKEN=placeholder +``` + +```shell +openshell provider refresh configure my-gcp \ + --credential-key GCP_SA_ACCESS_TOKEN \ + --strategy google-service-account-jwt \ + --material client_email=sa@my-project.iam.gserviceaccount.com \ + --material private_key="$(jq -r .private_key /path/to/sa-key.json)" \ + --secret-material-key private_key +``` + +```shell +openshell provider refresh rotate my-gcp \ + --credential-key GCP_SA_ACCESS_TOKEN +``` + +## Configuration Keys + +Set these with `--config key=value` during provider creation: + +| Key | Description | Example | +|-----|-------------|---------| +| `project_id` | GCP project ID | `my-project-123` | +| `region` | GCP region | `us-central1` | +| `service_account_email` | SA email for metadata endpoint | `sa@proj.iam.gserviceaccount.com` | + +## How It Works + +When a sandbox starts with the `google-cloud` provider attached: + +1. The gateway mints a fresh GCP access token and stores it in the + sandbox proxy's credential resolver. +2. A loopback HTTP server on `127.0.0.1:8174` emulates the GCE instance + metadata API, serving **credential placeholders** (not real tokens) to + GCP SDKs. The sandbox process never holds a real GCP credential. +3. When the SDK makes an API call, it sends the placeholder in the + `Authorization` header. The sandbox proxy TLS-terminates the + outbound connection, resolves the placeholder to the real token, + and forwards the request to GCP. +4. When the token approaches expiry, the gateway refreshes it. The + proxy's resolver is updated atomically — subsequent API calls + use the new token automatically. + +Configuration values (`project_id`, `region`, `service_account_email`) +are **visible in plain text** inside the sandbox — they appear as +environment variables and are served by the metadata endpoint. These are +non-secret identifiers, not credentials. Access tokens are never exposed; +only placeholders reach the sandbox process. + +### Injected Environment Variables + +The provider automatically injects these into the sandbox. Non-secret +vars are resolved to real values at process spawn time; token vars stay +as placeholders for proxy-time resolution. + +| Variable | Value | Purpose | +|----------|-------|---------| +| `GCE_METADATA_HOST` | `127.0.0.1:8174` | GCP SDK metadata discovery (loopback server) | +| `GCE_METADATA_IP` | `127.0.0.1:8174` | Python google-auth ping detection | +| `METADATA_SERVER_DETECTION` | `assume-present` | Node.js gcp-metadata skip detection | +| `GCP_PROJECT_ID` | from `project_id` config | GCP SDK project | +| `GOOGLE_CLOUD_PROJECT` | from `project_id` config | Alternative project var | +| `CLOUD_ML_REGION` | from `region` config | GCP region | +| `GCP_LOCATION` | from `region` config | Alternative region var | + +## Using with GCP APIs + +The metadata emulator serves tokens with the `cloud-platform` OAuth2 scope, +which grants access to any GCP API the underlying service account has IAM +permissions for. Add the target API hosts to your sandbox network policy: + +```yaml +network_policies: + gcp_apis: + name: gcp-apis + endpoints: + - host: "*.googleapis.com" + port: 443 + protocol: rest + access: read-write + enforcement: enforce + binaries: + - { path: /usr/bin/curl } + - { path: /usr/bin/node } + - { path: "/sandbox/.uv/python/**" } + - { path: "/sandbox/.venv/**" } +``` + +Or update a live sandbox directly: + +```shell +openshell policy update my-sandbox \ + --add-endpoint "*.googleapis.com:443:read-write:rest:enforce" \ + --binary "/usr/bin/curl" \ + --binary "/usr/bin/node" \ + --binary "/sandbox/.uv/python/**" \ + --binary "/sandbox/.venv/**" \ + --wait +``` + +## Network Policy + +The `google-cloud` provider type does not include any network policy +endpoints by default. You must add endpoint rules to your sandbox policy +for each GCP API the sandbox needs to reach. See "Using with GCP APIs" +above for an example. diff --git a/docs/providers/google-vertex-ai.mdx b/docs/providers/google-vertex-ai.mdx index 631d6c395..21b9d4d37 100644 --- a/docs/providers/google-vertex-ai.mdx +++ b/docs/providers/google-vertex-ai.mdx @@ -68,7 +68,7 @@ openshell provider create \ ADC-backed providers mint and rotate access tokens into `GOOGLE_VERTEX_AI_TOKEN`. -`--from-gcloud-adc` is only valid for `google-vertex-ai` providers. +`--from-gcloud-adc` is valid for `google-vertex-ai` and `google-cloud` providers. ## Configuration Keys diff --git a/providers/google-cloud.yaml b/providers/google-cloud.yaml new file mode 100644 index 000000000..77ed72e1c --- /dev/null +++ b/providers/google-cloud.yaml @@ -0,0 +1,64 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: google-cloud +display_name: Google Cloud (GCP APIs) +description: Native GCP SDK credentials for sandboxes via metadata emulator +category: other +inference_capable: false + +credentials: + # Service account JWT flow: gateway signs a JWT and exchanges it for an + # access token. Configure with `openshell provider refresh configure`. + - name: service_account_token + description: GCP access token minted from service account JWT + env_vars: [GCP_SA_ACCESS_TOKEN] + required: false + auth_style: bearer + header_name: authorization + refresh: + strategy: google_service_account_jwt + token_url: https://oauth2.googleapis.com/token + scopes: [https://www.googleapis.com/auth/cloud-platform] + refresh_before_seconds: 300 + max_lifetime_seconds: 3600 + material: + - name: client_email + description: Service account email address + required: true + - name: private_key + description: Service account RSA private key (PEM) + required: true + secret: true + - name: subject + description: Email for domain-wide delegation (optional) + + # ADC OAuth2 flow: gateway exchanges a gcloud ADC refresh token for an + # access token. Configure with `openshell provider refresh configure`. + - name: adc_token + description: GCP access token from gcloud application default credentials + env_vars: [GCP_ADC_ACCESS_TOKEN] + required: false + auth_style: bearer + header_name: authorization + refresh: + strategy: oauth2_refresh_token + token_url: https://oauth2.googleapis.com/token + scopes: [https://www.googleapis.com/auth/cloud-platform] + refresh_before_seconds: 300 + max_lifetime_seconds: 3600 + material: + - name: client_id + description: OAuth2 client ID from ADC JSON + required: true + - name: client_secret + description: OAuth2 client secret from ADC JSON + required: true + secret: true + - name: refresh_token + description: OAuth2 refresh token from ADC JSON + required: true + secret: true + +discovery: + credentials: [service_account_token, adc_token]