diff --git a/CHANGELOG.md b/CHANGELOG.md index b1c7b4e..0d81a1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Updated + +- Updated cel-rust from 0.12.0 to 0.13.0. + +### Added + +- `Context.set_variable_resolver(callback)` exposes cel 0.13's `VariableResolver` trait to Python. The callback receives a variable name and returns the value (or `None` to fall through to variables registered with `add_variable`). Useful for backing a CEL context with on-demand sources (database lookups, lazily-loaded config files, etc.) without materializing the full set of variables upfront. Exceptions raised by the resolver are logged and treated as "not handled". +- Idiomatic Python exception mapping for several CEL runtime errors that previously fell through to `ValueError`: + - Arithmetic overflow → `OverflowError` (e.g. `9223372036854775807 + 1`, including the new overflow-safe int math in cel 0.13). + - Division by zero / modulo by zero → `ZeroDivisionError`. + - List index out of bounds → `IndexError`. + - Missing map key (e.g. `{"a": 1}.b`) → `KeyError`. + +### Changed + +- **Behaviour change** (cel 0.13): bytes concatenation with `+` now works per the CEL spec (`b'hello' + b'world'` returns `b'helloworld'`). Previously raised `TypeError`. +- **Behaviour change** (cel 0.13): logical `&&` and `||` are now "err-resilient" per CEL spec — `X && false` short-circuits to `false` and `X || true` short-circuits to `true` even when `X` is not a boolean. Conversely, `false || X` and `true && X` now raise `TypeError` when `X` is not boolean (previously `false || X` returned `X`). +- **Error mapping**: more operations now route through CEL's `NoSuchOverload` (e.g. `1 + 2u`, `1 * 2u`, indexing into a string). These map to `TypeError` with a generic message listing common causes and conversion functions (`int(x)`, `uint(x)`, `double(x)`). The previous type-specific messages (e.g. "Cannot mix signed and unsigned integers") are still produced for the operand orderings cel-rust dispatches via `UnsupportedBinaryOperator`. Tests asserting on specific message text may need updating. +- **Behaviour change** (cel 0.13): no implicit type coercion on map index access; indexing into a string now raises `TypeError` (`NoSuchOverload`) per CEL spec. +- **Behaviour change** (cel 0.13): integer arithmetic overflow now raises `OverflowError` instead of silently wrapping. Affects `+`, `-`, `*` on both `int` and `uint` at the type's bounds. + +### Performance + +- Microbenchmarks comparing cel 0.13 vs 0.12 (release build, taking the min of 3 runs per scenario): + + | Scenario | compile | compiled execute | evaluate | + |-----------------------|---------:|-----------------:|---------:| + | `x + y * 2` | +2.8% | +8.4% | +3.3% | + | `greet + ' ' + name` | +23.0% | +4.3% | +0.5% | + | `size(items)` (1k) | ~0% | +63.4% | +53.2% | + | map field access | −7.1% | +8.3% | +6.5% | + | custom Python fn | +1.4% | +6.7% | −1.9% | + + Most scenarios are within ~10% of 0.12; the `size(items)` regression on a 1000-element list is cel 0.13's dyn-Val refactor adding per-element boxing overhead at the Python→CEL boundary. Smaller lists are not noticeably affected. + ## [0.5.6] - 2026-02-07 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 2971c5f..b3720b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,9 +112,9 @@ dependencies = [ [[package]] name = "cel" -version = "0.5.5" +version = "0.5.6" dependencies = [ - "cel 0.12.0", + "cel 0.13.0", "chrono", "log", "pyo3", @@ -123,9 +123,9 @@ dependencies = [ [[package]] name = "cel" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca1e5eda1b0f8476181bed1bfc9232a91d62ff0b9f1bc0e48afff3cbcb5b0b5c" +checksum = "47a40f338a8c3505921000b609279775792c07cc21f97a3011578c0c5e1738ae" dependencies = [ "antlr4rust", "base64", @@ -133,7 +133,7 @@ dependencies = [ "chrono", "lazy_static", "nom", - "paste", + "pastey", "regex", "serde", "serde_json", @@ -333,10 +333,10 @@ dependencies = [ ] [[package]] -name = "paste" -version = "1.0.15" +name = "pastey" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +checksum = "c5a797f0e07bdf071d15742978fc3128ec6c22891c31a3a931513263904c982a" [[package]] name = "portable-atomic" diff --git a/Cargo.toml b/Cargo.toml index 4416c12..7b4085c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.27", features = ["chrono", "py-clone"]} -cel = { version = "0.12.0", features = ["chrono", "json", "regex", "bytes"] } +cel = { version = "0.13.0", features = ["chrono", "json", "regex", "bytes"] } log = "0.4.27" pyo3-log = { git = "https://github.com/a1phyr/pyo3-log.git", branch = "pyo3_0.27" } chrono = { version = "0.4.42", features = ["serde"] } diff --git a/README.md b/README.md index c2837f4..9b6f446 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ uv run pytest ## Contributing Contributions are welcome! Please see our [documentation](https://python-common-expression-language.readthedocs.io/) for: -- [CEL compliance status](docs/reference/cel-compliance.md) +- [CHANGELOG](CHANGELOG.md) — release notes and behaviour changes - Development setup and guidelines - Areas where help is needed diff --git a/docs/contributing.md b/docs/contributing.md index 7085ffd..6d683ac 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -59,16 +59,9 @@ uv sync --dev # Build the Rust extension uv run maturin develop -# → 🔗 Found pyo3 bindings -# → 📦 Built wheel for CPython 3.11 to target/wheels/common_expression_language-0.11.0-cp311-cp311-linux_x86_64.whl -# → 📦 Installed common-expression-language-0.11.0 # Run tests to verify setup uv run pytest -# → ========================= test session starts ========================= -# → collected 300+ items -# → tests/test_basics.py ........ [ 95%] -# → ========================= 300 passed in 2.34s ========================= ``` ### Code Organization @@ -152,7 +145,7 @@ def test_lower_ascii_not_implemented(self): cel.evaluate('"HELLO".lowerAscii()') # → RuntimeError: Undefined variable or function 'lowerAscii' -@pytest.mark.xfail(reason="String utilities not implemented in cel v0.11.1", strict=False) +@pytest.mark.xfail(reason="String utility not implemented upstream yet", strict=False) def test_lower_ascii_expected_behavior(self): """This test will pass when upstream implements lowerAscii().""" result = cel.evaluate('"HELLO".lowerAscii()') @@ -266,16 +259,10 @@ See https://pyo3.rs/main/type-stub to opt-in to the automated types when impleme ```bash # Clean rebuild uv run maturin develop --release -# → 🔗 Found pyo3 bindings -# → 📦 Built wheel for CPython 3.11 to target/wheels/common_expression_language-0.11.0-cp311-cp311-linux_x86_64.whl -# → 📦 Installed common-expression-language-0.11.0 # Check Rust toolchain rustc --version -# → rustc 1.75.0 (82e1608df 2023-12-21) - cargo --version -# → cargo 1.75.0 (1d8b05cdd 2023-11-20) ``` **Test Failures:** diff --git a/docs/cookbook.md b/docs/cookbook.md index 48d2a53..58ac29b 100644 --- a/docs/cookbook.md +++ b/docs/cookbook.md @@ -325,7 +325,7 @@ print(result) # → True (cached on subsequent calls) > - Never trust user-provided expressions without sandboxing > - Monitor expression performance for DoS protection -**→ [Full Production Guide](how-to-guides/production-patterns-best-practices.md) | [API Reference](reference/python-api.md)** +**→ [API Reference](reference/python-api.md) | [Error Handling](how-to-guides/error-handling.md)** --- @@ -337,8 +337,7 @@ print(result) # → True (cached on subsequent calls) 2. **Learn Fundamentals**: [CEL Language Basics](tutorials/cel-language-basics.md) - Master the syntax 3. **Practice**: [CLI Recipes](#cli-recipes) - Get comfortable with the tools 4. **Build**: [Business Logic](#data-transformation) - Implement your first real use case -5. **Secure**: [Error Handling](#error-handling) - Make it production-ready -6. **Scale**: [Production Patterns](#production-patterns) - Deploy with confidence +5. **Handle errors**: [Error Handling](#error-handling) - Make it production-ready ## 💡 Can't Find What You're Looking For? diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index edf137c..933ffdf 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -13,27 +13,20 @@ Getting Python CEL up and running is quick and easy. ```bash uv add common-expression-language - # → Adding common-expression-language to dependencies - # → Resolved 15 packages in 1.23s - # → Installed common-expression-language-0.11.0 ``` === "uv tool (CLI only)" Install the CLI tool globally: - + ```bash uv tool install common-expression-language - # → Installed common-expression-language 0.11.0 - # → Installed executables: cel ``` === "pip" ```bash pip install common-expression-language - # → Collecting common-expression-language - # → Successfully installed common-expression-language-0.11.0 ``` @@ -56,8 +49,6 @@ print("✓ Basic evaluation working correctly") ```bash cel --version -# → cel 0.11.0 - cel '1 + 2' # → 3 ``` @@ -81,17 +72,10 @@ cd python-common-expression-language # Install in development mode pip install maturin -# → Successfully installed maturin-1.4.0 - maturin develop -# → 🔗 Found pyo3 bindings -# → 📦 Built wheel for CPython 3.11 to target/wheels/ -# → 📦 Installed common-expression-language-0.11.0 # Or with uv uv run maturin develop -# → 🔗 Found pyo3 bindings -# → 📦 Built wheel and installed successfully ``` ## Troubleshooting diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index 6ff84e2..7aa79dd 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -345,12 +345,12 @@ def evaluate_expression(expression: str, context: dict = None): """Evaluate expression with proper exception handling.""" return evaluate(expression, context or {}) -# For cases where you need fallback values +# For cases where you need fallback values def evaluate_with_default(expression: str, context: dict = None, default = None): """Evaluate with fallback value on errors.""" try: return evaluate(expression, context or {}) - except (ValueError, TypeError, RuntimeError): + except (ValueError, TypeError, RuntimeError, KeyError, IndexError): return default # Result-like pattern for detailed error information @@ -406,7 +406,7 @@ def validate_user_rules(rules: list[str], user_context: dict) -> dict[str, bool] for rule in rules: try: results[rule] = bool(evaluate_expression(rule, user_context)) - except (ValueError, TypeError, RuntimeError): + except (ValueError, TypeError, RuntimeError, KeyError, IndexError): results[rule] = False # Invalid rules are considered failed return results @@ -444,7 +444,7 @@ Congratulations! You've mastered basic CEL evaluation with dictionary context. N **🏢 Jump to Specific Applications:** - **[Access Control Policies](../how-to-guides/access-control-policies.md)** - Build permission systems (requires Context knowledge) - **[Business Logic & Data Transformation](../how-to-guides/business-logic-data-transformation.md)** - Implement business rules -- **[Production Patterns & Best Practices](../how-to-guides/production-patterns-best-practices.md)** - Deploy CEL safely +- **[Error Handling](../how-to-guides/error-handling.md)** - Exception types and safe-evaluation patterns **💡 Recommended Learning Path:** diff --git a/docs/how-to-guides/access-control-policies.md b/docs/how-to-guides/access-control-policies.md index aad6e39..24bc9fd 100644 --- a/docs/how-to-guides/access-control-policies.md +++ b/docs/how-to-guides/access-control-policies.md @@ -1,838 +1,169 @@ # Access Control Policies -Learn how to implement sophisticated access control that goes beyond simple role-based permissions using CEL expressions. +Define application authorization rules as CEL expressions instead of hard-coding `if`/`else` chains. The result is a tidy, auditable, side-effect-free policy that's easy to test and modify without redeploying. -## The Problem +## The shape of a policy -Your application needs sophisticated access control that goes beyond simple role-based permissions. You need to handle multiple factors like: - -- Time of day restrictions -- Resource ownership -- Collaboration permissions -- Context-sensitive rules - -Hard-coding these rules makes them difficult to update and test. - -## The CEL Solution - -Instead of complex if/else chains in your application code, define access policies as portable, safe expressions that can be updated without code changes. - -CEL enables sophisticated, multi-factor access control policies that handle complex business rules: +Pass the user, resource, and action into a context dict; encode the rules as one CEL expression. ```python from cel import evaluate -from datetime import datetime -def check_advanced_access_policy(user, resource, action, current_time=None): - """Enterprise-grade multi-factor access control policy.""" - - if current_time is None: - current_time = datetime.now() - - # Advanced policy with multiple business rules: - # 1. Admins can do anything, anytime - # 2. Resource owners have full access during business hours - # 3. Department members can read/collaborate on shared resources - # 4. External users need approval for sensitive resources - # 5. Compliance: audit logs required for financial data access - policy = """ - (user.role == "admin") || - (resource.owner == user.id && user.verified && - (action != "delete" || user.department == resource.department)) || - (user.department == resource.department && user.clearance_level >= resource.sensitivity_level && - action in ["read", "comment"] && is_business_hours(current_hour)) || - (user.role == "external" && user.id in resource.approved_external_users && - action == "read" && resource.external_access_allowed) || - (action == "read" && resource.public && - (user.role != "guest" || is_business_hours(current_hour))) - """ - - def is_business_hours(hour): - return 9 <= hour <= 17 - - context = { +policy = """ + user.role == "admin" || + (resource.owner == user.id && user.verified) || + (action == "read" && resource.public) +""" + +def authorize(user, resource, action): + return evaluate(policy, { "user": user, - "resource": resource, + "resource": resource, "action": action, - "current_hour": current_time.hour, - "is_business_hours": is_business_hours - } - - return evaluate(policy, context) - -# Example: Financial data access -financial_user = { - "id": "analyst1", - "role": "analyst", - "department": "finance", - "clearance_level": 3, - "verified": True -} - -financial_resource = { - "id": "q4_report", - "owner": "cfo", - "department": "finance", - "sensitivity_level": 3, - "external_access_allowed": False, - "approved_external_users": [], - "public": False -} - -# Test access during business hours -business_hour_time = datetime.now().replace(hour=14) # 2 PM -access_granted = check_advanced_access_policy( - financial_user, financial_resource, "read", business_hour_time -) -assert access_granted == True # → Access GRANTED: Department member reading financial data during business hours - -# Test access after hours (should be denied for non-admin) -after_hours_time = datetime.now().replace(hour=22) # 10 PM -access_denied = check_advanced_access_policy( - financial_user, financial_resource, "read", after_hours_time -) -assert access_denied == False # → Access DENIED: Time-based security - financial data restricted after business hours - -print("✓ Advanced access control policies working correctly") + }) + +assert authorize( + {"id": "alice", "role": "user", "verified": True}, + {"id": "doc1", "owner": "alice", "public": False}, + "write", +) is True + +assert authorize( + {"id": "bob", "role": "guest", "verified": False}, + {"id": "doc2", "owner": "alice", "public": True}, + "read", +) is True ``` -## Advanced Policy Patterns +## Role hierarchy -### Role Hierarchy +Translate role names to numeric levels in the context so the policy can compare them directly: ```python -def check_hierarchical_access(user, resource, action): - """Implement role hierarchy where higher roles inherit lower permissions.""" - - role_hierarchy = { - "guest": 0, - "user": 1, - "member": 2, - "manager": 3, - "admin": 4 - } - - policy = """ - user.role_level >= required_level && - ( - (action == "read" && resource.public) || - (action == "read" && user.id in resource.collaborators) || - (action in ["read", "write"] && resource.owner == user.id) || - (action in ["read", "write", "delete"] && user.role_level >= 3) - ) - """ - - context = { - "user": {**user, "role_level": role_hierarchy.get(user["role"], 0)}, - "resource": resource, - "action": action, - "required_level": 0 # Minimum level to access system - } - - return evaluate(policy, context) - -# Test the hierarchical access control -guest_user = {"role": "guest", "id": "guest1"} -user_account = {"role": "user", "id": "user1"} -manager_account = {"role": "manager", "id": "mgr1"} - -public_resource = {"public": True, "owner": "admin", "collaborators": []} -private_resource = {"public": False, "owner": "user1", "collaborators": ["guest1"]} - -# Test 1: Guest accessing public resource -result = check_hierarchical_access(guest_user, public_resource, "read") -assert result == True # → Access GRANTED: Public resources accessible to all authenticated users - -# Test 2: Guest accessing private resource (denied) -result = check_hierarchical_access(guest_user, private_resource, "write") -assert result == False # → Access DENIED: Insufficient role level - guests cannot write to private resources - -# Test 3: User accessing owned resource -result = check_hierarchical_access(user_account, private_resource, "write") -assert result == True # → Access GRANTED: Resource ownership grants full read/write permissions - -# Test 4: Manager can delete (role_level >= 3) -result = check_hierarchical_access(manager_account, private_resource, "delete") -assert result == True # → Access GRANTED: Management role hierarchy allows deletion of any resource - -# Test 5: Guest as collaborator can read -result = check_hierarchical_access(guest_user, private_resource, "read") -assert result == True # → Access GRANTED: Collaboration permissions override role restrictions for read access +from cel import evaluate -print("✓ Hierarchical access control working correctly") -``` +ROLE_LEVELS = {"guest": 0, "user": 1, "member": 2, "manager": 3, "admin": 4} -### Time-Based Access +policy = """ + user.level >= 4 || + (action == "read" && resource.public) || + (action in ["read", "write"] && resource.owner == user.id) || + (action == "read" && user.id in resource.collaborators) +""" -```python -def check_time_based_access(user, resource, action, current_time=None): - """Implement time-based access restrictions.""" - - if current_time is None: - current_time = datetime.now() - - policy = """ - user.role == "admin" || - ( - user.role in ["member", "user"] && - ( - (user.schedule == "standard" && hour >= 9 && hour <= 17) || - (user.schedule == "flexible" && (hour >= 6 && hour <= 22)) || - (user.schedule == "always") - ) - ) - """ - - context = { - "user": user, +def authorize(user, resource, action): + enriched_user = {**user, "level": ROLE_LEVELS.get(user["role"], 0)} + return evaluate(policy, { + "user": enriched_user, "resource": resource, "action": action, - "hour": current_time.hour, - "day_of_week": current_time.weekday() - } - - return evaluate(policy, context) - -# Test time-based access control -standard_user = {"role": "user", "schedule": "standard"} -flexible_user = {"role": "user", "schedule": "flexible"} -admin_user = {"role": "admin", "schedule": "standard"} -test_resource = {"id": "test_doc"} - -# Test 1: Standard user during business hours -business_time = datetime.now().replace(hour=14) # 2 PM -result = check_time_based_access(standard_user, test_resource, "read", business_time) -assert result == True # → Access GRANTED: Standard work schedule allows access during 9-5 business hours - -# Test 2: Standard user after hours (denied) -after_hours = datetime.now().replace(hour=22) # 10 PM -result = check_time_based_access(standard_user, test_resource, "read", after_hours) -assert result == False # → Access DENIED: Standard schedule restrictions prevent after-hours access - -# Test 3: Flexible user during extended hours -result = check_time_based_access(flexible_user, test_resource, "read", after_hours) -assert result == True # → Access GRANTED: Flexible schedule allows extended hours (6 AM - 10 PM) - -# Test 4: Admin always has access -early_morning = datetime.now().replace(hour=5) # 5 AM -result = check_time_based_access(admin_user, test_resource, "read", early_morning) -assert result == True # → Access GRANTED: Admin role bypasses all time-based restrictions - -print("✓ Time-based access control working correctly") -``` - -### Resource-Specific Policies - -```python -def check_resource_specific_access(user, resource, action): - """Different rules for different resource types.""" - - policies = { - "document": """ - user.role == "admin" || - (resource.owner == user.id) || - (resource.public && action == "read") || - (user.id in resource.collaborators && action in ["read", "comment"]) - """, - - "database": """ - user.role == "admin" || - (user.role == "developer" && action in ["read", "write"]) || - (user.role == "analyst" && action == "read") - """, - - "system": """ - user.role == "admin" || - (user.role == "operator" && action in ["read", "restart"]) || - (user.role == "monitor" && action == "read") - """ - } - - policy = policies.get(resource.get("type", "document"), policies["document"]) - - context = { - "user": user, - "resource": resource, - "action": action - } - - return evaluate(policy, context) - -# Test resource-specific access control -developer = {"role": "developer", "id": "dev1"} -analyst = {"role": "analyst", "id": "analyst1"} -operator = {"role": "operator", "id": "ops1"} - -document_resource = {"type": "document", "owner": "dev1", "public": False, "collaborators": ["analyst1"]} -database_resource = {"type": "database", "name": "prod_db"} -system_resource = {"type": "system", "name": "web_server"} - -# Test 1: Developer with database (can read/write) -result = check_resource_specific_access(developer, database_resource, "write") -assert result == True # → Access GRANTED: Developer role has full database read/write permissions - -result = check_resource_specific_access(developer, database_resource, "read") -assert result == True # → Access GRANTED: Developer role includes database read access - -# Test 2: Analyst with database (read-only) -result = check_resource_specific_access(analyst, database_resource, "read") -assert result == True # → Access GRANTED: Analyst role has read-only database access for reporting - -result = check_resource_specific_access(analyst, database_resource, "write") -assert result == False # → Access DENIED: Analyst role restricted from database modifications - -# Test 3: Operator with system (can read/restart) -result = check_resource_specific_access(operator, system_resource, "restart") -assert result == True # → Access GRANTED: Operator role can restart systems for maintenance + }) -# Test 4: Analyst as document collaborator -result = check_resource_specific_access(analyst, document_resource, "read") -assert result == True # → Access GRANTED: Collaborator status grants read access regardless of role - -result = check_resource_specific_access(analyst, document_resource, "write") -assert result == False # → Access DENIED: Collaborator read-only access - ownership required for writes - -print("✓ Resource-specific access control working correctly") +assert authorize( + {"id": "alice", "role": "manager"}, + {"id": "doc1", "owner": "bob", "public": False, "collaborators": ["alice"]}, + "read", +) is True ``` -## Kubernetes Validation Rules - -One of the most common real-world applications of CEL is in Kubernetes ValidatingAdmissionPolicies. CEL enables cluster administrators to write sophisticated admission control policies that validate resources before they're created or updated. +## Time-based access -### ValidatingAdmissionPolicy Examples +Inject the current hour into the context and compare it in the expression. Keep the time computation in Python — it's not the policy's concern. ```python +from datetime import datetime from cel import evaluate -import json - -def validate_kubernetes_pod(pod_spec, policy_expression): - """Validate a Kubernetes Pod specification using CEL expressions.""" - - # Normalize the pod spec to ensure consistent structure for policy evaluation - normalized_spec = normalize_pod_spec(pod_spec) - - context = { - "object": normalized_spec, - "request": { - "operation": "CREATE", - "userInfo": { - "username": "developer@company.com", - "groups": ["developers", "system:authenticated"] - } - } - } - - try: - return evaluate(policy_expression, context) - except Exception as e: - print(f"Policy validation failed: {e}") - return False - -def normalize_pod_spec(pod_spec): - """Normalize pod spec to ensure consistent structure.""" - normalized = pod_spec.copy() - - # Ensure securityContext exists with defaults - if "securityContext" not in normalized["spec"]: - normalized["spec"]["securityContext"] = {} - - # Set default runAsUser if not specified (1000 = non-root) - if "runAsUser" not in normalized["spec"]["securityContext"]: - normalized["spec"]["securityContext"]["runAsUser"] = 1000 - - return normalized - -# Example 1: Security Policy - Require non-root containers -# With normalized structure, we can use simple, reliable expressions -pod_security_policy = """ - object.spec.securityContext.runAsUser != 0 -""" -# Valid pod - runs as non-root user -secure_pod = { - "apiVersion": "v1", - "kind": "Pod", - "metadata": {"name": "secure-app"}, - "spec": { - "securityContext": {"runAsUser": 1000}, - "containers": [{ - "name": "app", - "image": "nginx:1.21" - }] - } -} - -# Test secure pod passes validation -assert validate_kubernetes_pod(secure_pod, pod_security_policy) == True # → SECURITY CHECK PASSED: Non-root user (1000) complies with security policy - -# Invalid pod - runs as root -insecure_pod = { - "apiVersion": "v1", - "kind": "Pod", - "metadata": {"name": "insecure-app"}, - "spec": { - "securityContext": {"runAsUser": 0}, # Root user! - "containers": [{ - "name": "app", - "image": "nginx:1.21" - }] - } -} - -# Test insecure pod fails validation -assert validate_kubernetes_pod(insecure_pod, pod_security_policy) == False # → SECURITY VIOLATION: Root user (UID 0) blocked by admission policy - -# Pod with no security context - should default to non-root and pass -default_pod = { - "apiVersion": "v1", - "kind": "Pod", - "metadata": {"name": "default-app"}, - "spec": { - "containers": [{ - "name": "app", - "image": "nginx:1.21" - }] - } -} +policy = """ + user.role == "admin" || + (user.schedule == "always") || + (user.schedule == "business" && hour >= 9 && hour <= 17) || + (user.schedule == "extended" && hour >= 6 && hour <= 22) +""" -# Test default pod gets normalized and passes validation -assert validate_kubernetes_pod(default_pod, pod_security_policy) == True # → SECURITY CHECK PASSED: Default non-root user applied through normalization +def authorize(user, *, now=None): + now = now or datetime.now() + return evaluate(policy, {"user": user, "hour": now.hour}) -print("✓ Kubernetes pod security validation working correctly") +assert authorize({"role": "user", "schedule": "business"}, now=datetime(2026, 1, 1, 14)) is True +assert authorize({"role": "user", "schedule": "business"}, now=datetime(2026, 1, 1, 22)) is False +assert authorize({"role": "admin", "schedule": "business"}, now=datetime(2026, 1, 1, 3)) is True ``` -### Resource Limit Enforcement - -```python -def validate_resource_limits(workload_spec): - """Enforce resource limits and requests for production workloads.""" - - # Policy: All containers must have CPU and memory limits set - # and requests must be at least 50% of limits - resource_policy = """ - object.spec.containers.all(container, - has(container.resources) && - has(container.resources.limits) && - has(container.resources.requests) && - has(container.resources.limits.cpu) && - has(container.resources.limits.memory) && - has(container.resources.requests.cpu) && - has(container.resources.requests.memory) - ) - """ - - context = {"object": workload_spec} - return evaluate(resource_policy, context) - -# Valid deployment with proper resource management -deployment_with_limits = { - "apiVersion": "apps/v1", - "kind": "Deployment", - "metadata": {"name": "web-app"}, - "spec": { - "containers": [{ - "name": "web", - "image": "nginx:1.21", - "resources": { - "limits": {"cpu": "200m", "memory": "256Mi"}, - "requests": {"cpu": "100m", "memory": "128Mi"} # 50% of limits - } - }] - } -} - -# Test deployment passes resource validation -assert validate_resource_limits(deployment_with_limits) == True # → RESOURCE POLICY PASSED: All containers have proper CPU/memory limits and requests +## Per-resource policies -print("✓ Kubernetes resource limit validation working correctly") -``` - -### Network Policy Validation +When different resource types need different rules, look up the policy by type: ```python -def validate_network_policy(network_policy_spec): - """Validate NetworkPolicy configurations for security compliance.""" - - # Policy: Ensure network policies have both ingress and egress rules - # and don't allow unrestricted access - network_security_policy = """ - has(object.spec.ingress) && size(object.spec.ingress) > 0 && - has(object.spec.egress) && size(object.spec.egress) > 0 && - object.spec.ingress.all(rule, - !has(rule.from) || size(rule.from) > 0 - ) && - object.spec.egress.all(rule, - !has(rule.to) || size(rule.to) > 0 - ) - """ - - context = {"object": network_policy_spec} - return evaluate(network_security_policy, context) - -# Valid network policy with restricted access -secure_network_policy = { - "apiVersion": "networking.k8s.io/v1", - "kind": "NetworkPolicy", - "metadata": {"name": "web-netpol"}, - "spec": { - "podSelector": {"matchLabels": {"app": "web"}}, - "ingress": [{ - "from": [{"podSelector": {"matchLabels": {"app": "frontend"}}}], - "ports": [{"protocol": "TCP", "port": 80}] - }], - "egress": [{ - "to": [{"podSelector": {"matchLabels": {"app": "database"}}}], - "ports": [{"protocol": "TCP", "port": 5432}] - }] - } -} - -# Test network policy passes validation -assert validate_network_policy(secure_network_policy) == True # → NETWORK SECURITY PASSED: Ingress/egress rules properly restrict traffic flow - -print("✓ Kubernetes network policy validation working correctly") -``` - -### Custom Resource Validation - -```python -def validate_custom_resource(custom_resource_spec, crd_validation_rules): - """Validate custom resources using CEL expressions.""" - - # Example: Validate a custom Application resource - app_validation_policy = """ - has(object.spec.replicas) && object.spec.replicas >= 1 && - has(object.spec.image) && object.spec.image.contains(':') && - !object.spec.image.endsWith(':latest') && - has(object.spec.environment) && - object.spec.environment in ['dev', 'staging', 'prod'] && - (object.spec.environment == 'prod' ? object.spec.replicas >= 3 : true) - """ - - context = {"object": custom_resource_spec} - return evaluate(app_validation_policy, context) - -# Valid production application -production_app = { - "apiVersion": "platform.company.com/v1", - "kind": "Application", - "metadata": {"name": "payment-service"}, - "spec": { - "replicas": 3, # Production requires >= 3 replicas - "image": "payment-service:v1.2.3", # Specific version, not latest - "environment": "prod" - } -} +from cel import evaluate -# Valid development application -development_app = { - "apiVersion": "platform.company.com/v1", - "kind": "Application", - "metadata": {"name": "test-service"}, - "spec": { - "replicas": 1, # Dev can have 1 replica - "image": "test-service:v0.1.0", - "environment": "dev" - } +POLICIES = { + "document": """ + user.role == "admin" || + resource.owner == user.id || + (resource.public && action == "read") || + (user.id in resource.collaborators && action in ["read", "comment"]) + """, + "database": """ + user.role == "admin" || + (user.role == "developer" && action in ["read", "write"]) || + (user.role == "analyst" && action == "read") + """, + "system": """ + user.role == "admin" || + (user.role == "operator" && action in ["read", "restart"]) || + (user.role == "monitor" && action == "read") + """, } -# Test both applications pass validation -assert validate_custom_resource(production_app, {}) == True # → COMPLIANCE PASSED: Production app meets replica and versioning requirements -assert validate_custom_resource(development_app, {}) == True # → COMPLIANCE PASSED: Development app allows lower replica count with proper versioning - -print("✓ Kubernetes custom resource validation working correctly") +def authorize(user, resource, action): + policy = POLICIES.get(resource.get("type"), POLICIES["document"]) + return evaluate(policy, {"user": user, "resource": resource, "action": action}) + +assert authorize( + {"role": "developer", "id": "d1"}, + {"type": "database", "name": "prod"}, + "write", +) is True + +assert authorize( + {"role": "analyst", "id": "a1"}, + {"type": "database", "name": "prod"}, + "write", +) is False ``` -### Production Kubernetes Policy Engine - -```python -from cel import evaluate, Context -from datetime import datetime -import re - -class KubernetesPolicyEngine: - """Production-grade policy engine for Kubernetes admission control.""" - - def __init__(self): - self.policies = {} - self.load_default_policies() - - def load_default_policies(self): - """Load standard security and compliance policies.""" - - self.policies = { - "pod-security": { - "expression": """ - object.spec.securityContext.runAsUser != 0 - """, - "message": "Pods must not run as root user" - }, - - "resource-quotas": { - "expression": """ - object.spec.containers.all(container, - size(container.resources.limits) > 0 && - size(container.resources.requests) > 0 - ) - """, - "message": "All containers must specify resource limits and requests" - }, - - "image-policy": { - "expression": """ - object.spec.containers.all(container, - container.image.startsWith('company-registry.com/') && - !container.image.endsWith(':latest') && - container.image.contains(':v') - ) - """, - "message": "Images must be from company registry with semantic versioning" - }, - - "namespace-compliance": { - "expression": """ - has(object.metadata.namespace) && - object.metadata.namespace != 'default' && - (object.metadata.namespace.startsWith('prod-') ? - (has(object.metadata.labels) && 'compliance.company.com/approved' in object.metadata.labels) : true) - """, - "message": "Production namespaces require compliance approval labels" - } - } - - def normalize_resource_spec(self, resource_spec): - """Normalize resource spec to ensure consistent structure for policy evaluation.""" - normalized = resource_spec.copy() - - # Ensure spec exists - if "spec" not in normalized: - normalized["spec"] = {} - - # For Pods, ensure securityContext with defaults - if normalized.get("kind") == "Pod": - if "securityContext" not in normalized["spec"]: - normalized["spec"]["securityContext"] = {} - - # Set default runAsUser if not specified (1000 = non-root) - if "runAsUser" not in normalized["spec"]["securityContext"]: - normalized["spec"]["securityContext"]["runAsUser"] = 1000 - - # Ensure containers list exists - if "containers" not in normalized["spec"]: - normalized["spec"]["containers"] = [] - - # Normalize container resources - for container in normalized["spec"]["containers"]: - if "resources" not in container: - container["resources"] = {"limits": {}, "requests": {}} - if "limits" not in container["resources"]: - container["resources"]["limits"] = {} - if "requests" not in container["resources"]: - container["resources"]["requests"] = {} - - # Ensure metadata and labels exist - if "metadata" not in normalized: - normalized["metadata"] = {} - if "labels" not in normalized["metadata"]: - normalized["metadata"]["labels"] = {} - - return normalized - - def validate_admission(self, resource_spec, operation="CREATE", user_info=None): - """Validate a Kubernetes resource admission request.""" - - if user_info is None: - user_info = {"username": "system", "groups": ["system:authenticated"]} - - # Normalize the resource to ensure consistent structure for policy evaluation - normalized_spec = self.normalize_resource_spec(resource_spec) - - context = Context() - context.add_variable("object", normalized_spec) - context.add_variable("operation", operation) - context.add_variable("userInfo", user_info) - context.add_variable("timestamp", datetime.now().isoformat()) - - results = [] - - for policy_name, policy_config in self.policies.items(): - try: - # Skip certain policies for system users - if (user_info.get("username", "").startswith("system:") and - policy_name == "image-policy"): - continue - - result = evaluate(policy_config["expression"], context) - results.append({ - "policy": policy_name, - "allowed": result, - "message": policy_config["message"] if not result else "Policy passed" - }) - - except Exception as e: - results.append({ - "policy": policy_name, - "allowed": False, - "message": f"Policy evaluation error: {e}" - }) - - # Overall admission decision - admission_allowed = all(r["allowed"] for r in results) - - return { - "allowed": admission_allowed, - "message": "Admission approved" if admission_allowed else "Admission denied", - "policy_results": results - } - -# Test the production policy engine -policy_engine = KubernetesPolicyEngine() - -# Test with a compliant pod -compliant_pod = { - "apiVersion": "v1", - "kind": "Pod", - "metadata": { - "name": "web-app", - "namespace": "prod-payments", - "labels": {"compliance.company.com/approved": "true"} - }, - "spec": { - "securityContext": {"runAsUser": 1000}, - "containers": [{ - "name": "app", - "image": "company-registry.com/web-app:v1.2.3", - "resources": { - "limits": {"cpu": "500m", "memory": "256Mi"}, - "requests": {"cpu": "250m", "memory": "128Mi"} - } - }] - } -} - -# Test admission -result = policy_engine.validate_admission( - compliant_pod, - operation="CREATE", - user_info={"username": "developer@company.com", "groups": ["developers"]} -) - -print(f"Admission allowed: {result['allowed']}") -print(f"Message: {result['message']}") -for policy_result in result['policy_results']: - status = "✓" if policy_result['allowed'] else "✗" - print(f" {status} {policy_result['policy']}: {policy_result['message']}") - -# The compliant pod should pass all policies -if not result['allowed']: - print(f"❌ Admission denied: {result['message']}") - for policy_result in result['policy_results']: - if not policy_result['allowed']: - print(f" Failed policy: {policy_result['policy']} - {policy_result['message']}") - -assert result['allowed'] == True, f"Expected admission to be allowed, but got: {result}" # → ADMISSION APPROVED: Pod meets all security, resource, and compliance policies - -print("\n✓ Kubernetes production policy engine working correctly") -``` +## Pre-compile hot-path policies -### Testing Kubernetes Policies with Python +If you evaluate the same policy many times per request, compile it once at startup: ```python -import pytest -from cel import evaluate +from cel import compile -def test_kubernetes_pod_security_policies(): - """Comprehensive test suite for Kubernetes pod security policies.""" - - def check_pod_security(pod_spec): - policy = """ - (!has(object.spec.securityContext) || - !has(object.spec.securityContext.runAsUser) || - object.spec.securityContext.runAsUser != 0) && - object.spec.containers.all(container, - !has(container.securityContext) || - !has(container.securityContext.privileged) || - container.securityContext.privileged == false - ) - """ - return evaluate(policy, {"object": pod_spec}) - - # Test case 1: Secure pod should pass - secure_pod = { - "spec": { - "securityContext": {"runAsUser": 1000}, - "containers": [{"name": "app", "image": "nginx"}] - } - } - assert check_pod_security(secure_pod) == True # → SECURITY VALID: Non-root user and no privileged containers - - # Test case 2: Root user should fail - root_pod = { - "spec": { - "securityContext": {"runAsUser": 0}, - "containers": [{"name": "app", "image": "nginx"}] - } - } - assert check_pod_security(root_pod) == False # → SECURITY VIOLATION: Root user (UID 0) poses container escape risk - - # Test case 3: Privileged container should fail - privileged_pod = { - "spec": { - "securityContext": {"runAsUser": 1000}, - "containers": [{ - "name": "app", - "image": "nginx", - "securityContext": {"privileged": True} - }] - } - } - assert check_pod_security(privileged_pod) == False # → SECURITY VIOLATION: Privileged containers bypass kernel security - - # Test case 4: Missing security context should pass (default behavior) - default_pod = { - "spec": { - "containers": [{"name": "app", "image": "nginx"}] - } - } - assert check_pod_security(default_pod) == True # → SECURITY ACCEPTABLE: Default runtime security context applied - -# Run the test -test_kubernetes_pod_security_policies() -print("✓ All Kubernetes policy tests passed") +policy = ''' + user.role == "admin" || + (resource.owner == user.id && user.verified) || + (action == "read" && resource.public) +''' +program = compile(policy) + +# Then on each call: +allowed = program.execute({ + "user": {"id": "alice", "role": "user", "verified": True}, + "resource": {"id": "doc1", "owner": "alice", "public": False}, + "action": "write", +}) +assert allowed is True ``` -These Kubernetes examples demonstrate CEL's real-world power in: - -- **ValidatingAdmissionPolicies**: Prevent insecure or non-compliant resources -- **Resource Management**: Enforce CPU/memory limits and requests -- **Security Compliance**: Block privileged containers and root users -- **Network Security**: Validate NetworkPolicy configurations -- **Custom Resources**: Validate application-specific requirements -- **Production Workflows**: Complete policy engines with multiple validation rules - -The Python CEL library is perfect for: -- **Testing Kubernetes policies locally** before deploying to clusters -- **Building admission webhook servers** that validate resources -- **Creating policy validation tools** for CI/CD pipelines -- **Developing custom operators** with CEL-based validation logic - -## Why This Works - -- **Readable**: Business stakeholders can understand the policy -- **Testable**: Each condition can be tested independently -- **Flexible**: New rules can be added without code changes -- **Safe**: No risk of infinite loops or side effects -- **Auditable**: Policy changes are visible and trackable - -## Best Practices +## Why CEL fits -1. **Keep policies simple**: Break complex policies into smaller, composable rules -2. **Use descriptive names**: Make variable and function names self-documenting -3. **Test thoroughly**: Write unit tests for all policy scenarios -4. **Version control**: Track policy changes in version control -5. **Monitor performance**: Profile policy evaluation in production +- **Readable.** Non-engineer stakeholders can audit the rules. +- **Testable.** Each clause is a pure expression; test it the same way you'd test a function. +- **Safe.** No loops, no side effects, no `eval()`. The expression can't break out of the sandbox. +- **Versioned cleanly.** Policy text fits in a Git diff or a config row. -## Related Topics +## Related topics -- [Business Logic & Data Transformation](business-logic-data-transformation.md) - Validate access control settings and transform user/resource data for policies -- [Production Patterns & Best Practices](production-patterns-best-practices.md) - Security and performance patterns \ No newline at end of file +- [Business Logic & Data Transformation](business-logic-data-transformation.md) — broader rule-engine patterns. +- [Error Handling](error-handling.md) — exception types and safe-evaluation patterns. diff --git a/docs/how-to-guides/business-logic-data-transformation.md b/docs/how-to-guides/business-logic-data-transformation.md index 8636b3d..cdc6987 100644 --- a/docs/how-to-guides/business-logic-data-transformation.md +++ b/docs/how-to-guides/business-logic-data-transformation.md @@ -1,943 +1,150 @@ # Business Logic and Data Transformation -Learn how to implement configurable business rules engines and data transformation pipelines using CEL expressions that business users can understand and modify. +Business rules and data-shape transformations change frequently, often driven by non-engineering stakeholders. CEL lets you express them as plain text — auditable, side-effect-free, and easy to update without code review. -## Business Rules Engine +## Business rules engine -### The Problem - -Your application has complex business rules that change frequently based on market conditions, regulations, or business strategy. These rules involve calculations, eligibility checks, and decision trees. Hard-coding them makes the application rigid and requires developer involvement for every change. - -### The CEL Solution - -Implement a configurable business rules engine where rules are defined as CEL expressions that business users can understand and modify: +Define each rule as a CEL expression in a table; evaluate them against a context. Adding or changing a rule is just editing a string. ```python -from cel import evaluate, Context -from datetime import datetime, timedelta - -class BusinessRulesEngine: - """Execute configurable business rules using CEL.""" - - def __init__(self): - self.rules = { - # Insurance pricing rules - "base_premium": """ - vehicle.type == "car" ? 800 : - vehicle.type == "motorcycle" ? 600 : - vehicle.type == "truck" ? 1200 : - 1000 - """, - - "age_multiplier": """ - driver.age < 25 ? 1.5 : - driver.age < 35 ? 1.2 : - driver.age < 60 ? 1.0 : - 1.1 - """, - - "experience_discount": """ - driver.years_experience >= 10 ? 0.9 : - driver.years_experience >= 5 ? 0.95 : - 1.0 - """, - - "safety_features_discount": """ - vehicle.anti_theft ? 0.95 : 1.0 - """, - - "claims_penalty": """ - driver.claims_count == 0 ? 0.9 : - driver.claims_count == 1 ? 1.0 : - driver.claims_count == 2 ? 1.2 : - 1.4 - """, - - # Loan eligibility rules - "credit_score_eligible": "applicant.credit_score >= 650", - - "income_sufficient": """ - loan.monthly_payment <= (double(applicant.monthly_income) * 0.28) - """, - - "debt_to_income_acceptable": """ - (applicant.existing_debt + loan.monthly_payment) <= (double(applicant.monthly_income) * 0.36) - """, - - "employment_stable": """ - applicant.employment_months >= 24 || applicant.employment_type == "self_employed" - """, - - # Shipping cost rules - "shipping_base_cost": """ - package.weight <= 1 ? 5.99 : - package.weight <= 5 ? 8.99 : - package.weight <= 20 ? 15.99 : - double(package.weight) * 1.2 - """, - - "shipping_distance_multiplier": """ - shipping.distance <= 50 ? 1.0 : - shipping.distance <= 200 ? 1.2 : - shipping.distance <= 1000 ? 1.5 : - 2.0 - """, - - "express_shipping_multiplier": "shipping.express ? 2.0 : 1.0", - - "free_shipping_eligible": """ - order.total >= 100 || customer.premium_member - """ - } - - def calculate_insurance_premium(self, driver, vehicle): - """Calculate insurance premium using business rules.""" - context = Context() - context.add_variable("driver", driver) - context.add_variable("vehicle", vehicle) - - # Calculate each component - base_premium = evaluate(self.rules["base_premium"], context) - age_multiplier = evaluate(self.rules["age_multiplier"], context) - experience_discount = evaluate(self.rules["experience_discount"], context) - safety_discount = evaluate(self.rules["safety_features_discount"], context) - claims_penalty = evaluate(self.rules["claims_penalty"], context) - - # Final calculation - premium = (base_premium * - age_multiplier * - experience_discount * - safety_discount * - claims_penalty) - - return round(premium, 2) - - def check_loan_eligibility(self, applicant, loan): - """Check loan eligibility using business rules.""" - context = Context() - context.add_variable("applicant", applicant) - context.add_variable("loan", loan) - - # Check each eligibility criterion - criteria = { - "credit_score": evaluate(self.rules["credit_score_eligible"], context), - "income": evaluate(self.rules["income_sufficient"], context), - "debt_to_income": evaluate(self.rules["debt_to_income_acceptable"], context), - "employment": evaluate(self.rules["employment_stable"], context) - } - - # All criteria must pass - eligible = all(criteria.values()) - - return { - "eligible": eligible, - "criteria": criteria, - "reasons": [k for k, v in criteria.items() if not v] - } - - def calculate_shipping_cost(self, package, shipping, order, customer): - """Calculate shipping cost using business rules.""" - context = Context() - context.add_variable("package", package) - context.add_variable("shipping", shipping) - context.add_variable("order", order) - context.add_variable("customer", customer) - - # Check if free shipping applies - if evaluate(self.rules["free_shipping_eligible"], context): - return 0.0 - - # Calculate shipping cost - base_cost = evaluate(self.rules["shipping_base_cost"], context) - distance_multiplier = evaluate(self.rules["shipping_distance_multiplier"], context) - express_multiplier = evaluate(self.rules["express_shipping_multiplier"], context) - - total_cost = base_cost * distance_multiplier * express_multiplier - - return round(total_cost, 2) - -# Example usage -rules_engine = BusinessRulesEngine() - -# Insurance premium calculation -young_driver = { - "age": 22, - "years_experience": 2, - "claims_count": 1 +from cel import evaluate + +RULES = { + "base_premium": """ + vehicle.type == "car" ? 800 : + vehicle.type == "motorcycle" ? 600 : + vehicle.type == "truck" ? 1200 : + 1000 + """, + "age_multiplier": """ + driver.age < 25 ? 1.5 : + driver.age < 35 ? 1.2 : + driver.age < 60 ? 1.0 : + 1.1 + """, + "experience_discount": """ + driver.years_experience >= 10 ? 0.9 : + driver.years_experience >= 5 ? 0.95 : + 1.0 + """, + "claims_penalty": """ + driver.claims_count == 0 ? 0.9 : + driver.claims_count == 1 ? 1.0 : + driver.claims_count == 2 ? 1.2 : + 1.4 + """, } -sports_car = { - "type": "car", - "anti_theft": True -} - -premium = rules_engine.calculate_insurance_premium(young_driver, sports_car) -# → 1140.0 # Young driver (22) + sports car: $800 * 1.5 (age) * 0.95 (experience) * 0.95 (anti-theft) * 1.0 (claims) -assert isinstance(premium, (int, float)) -assert premium > 0 - -# Loan eligibility check -loan_applicant = { - "credit_score": 720, - "monthly_income": 5000, - "existing_debt": 500, # Lower debt to pass debt-to-income ratio - "employment_months": 30, - "employment_type": "employed" -} - -loan_request = { - "monthly_payment": 1200 -} - -eligibility = rules_engine.check_loan_eligibility(loan_applicant, loan_request) -# → {"eligible": True, "criteria": {"credit_score": True, "income": True, "debt_to_income": True, "employment": True}, "reasons": []} -# → All criteria passed: 720 credit score ≥ 650, $1200 payment ≤ $1400 limit, $1700 total debt ≤ $1800 limit, 30 months ≥ 24 -assert isinstance(eligibility, dict) -assert "eligible" in eligibility -assert "criteria" in eligibility -# With $500 existing debt + $1200 loan = $1700 total (34% of income, under 36% limit) -assert eligibility["eligible"] == True - -# Shipping cost calculation -package = {"weight": 3.5} -shipping = {"distance": 150, "express": True} -order = {"total": 75} -customer = {"premium_member": False} - -shipping_cost = rules_engine.calculate_shipping_cost(package, shipping, order, customer) -# → 21.58 # 3.5kg package: $8.99 base * 1.2 distance (150 miles) * 2.0 express = $21.58 -assert isinstance(shipping_cost, (int, float)) -assert shipping_cost > 0 - -# Test with premium member (should get free shipping) -premium_customer = {"premium_member": True} -free_shipping_cost = rules_engine.calculate_shipping_cost(package, shipping, order, premium_customer) -# → 0.0 # Premium member gets free shipping regardless of order total or package size -assert free_shipping_cost == 0.0 +def quote_premium(driver, vehicle): + context = {"driver": driver, "vehicle": vehicle} + return ( + evaluate(RULES["base_premium"], context) + * evaluate(RULES["age_multiplier"], context) + * evaluate(RULES["experience_discount"], context) + * evaluate(RULES["claims_penalty"], context) + ) + +driver = {"age": 28, "years_experience": 6, "claims_count": 0} +vehicle = {"type": "car", "anti_theft": True} +premium = quote_premium(driver, vehicle) +assert 800 < premium < 900 ``` -## Data Transformation Pipeline - -### The Problem - -You need to transform data from various sources into a consistent format. The transformation rules are complex and change frequently. Hard-coding transformations makes them difficult to test and update, especially when business users need to modify the logic. +Same pattern works for loan eligibility, shipping costs, discount tiers — anything that's a deterministic function of input data. -### The CEL Solution +### Eligibility checks -Use CEL expressions to define transformation rules that can be easily understood and modified: +Use CEL booleans to express composite eligibility rules and surface a structured pass/fail: ```python -from cel import evaluate, Context - -class DataTransformationPipeline: - """Transform data using configurable CEL expressions.""" - - def __init__(self): - # Define transformation rules as CEL expressions - self.transformations = { - # Normalize user data from different sources - "normalize_user": { - "full_name": """ - has(input.first_name) && has(input.last_name) ? - input.first_name + " " + input.last_name : - has(input.name) ? input.name : "Unknown" - """, - "email": """ - has(input.email) ? input.email : - has(input.email_address) ? input.email_address : - "" - """, - "age": """ - has(input.age) ? input.age : - has(input.birth_year) ? (current_year - input.birth_year) : - null - """, - "score": """ - has(input.score) ? input.score : - has(input.rating) ? (double(input.rating) * 20.0) : // Convert 1-5 rating to 0-100 score - has(input.grade) ? grade_to_score(input.grade) : - 0 - """, - "status": """ - has(input.active) ? (input.active ? "active" : "inactive") : - has(input.status) ? input.status : - "unknown" - """ - }, - - # Calculate derived fields - "calculate_metrics": { - "engagement_score": """ - user.login_count + user.posts_count + user.comments_count - """, - "risk_level": """ - has(user.failed_logins) ? ( - user.failed_logins > 5 ? "high" : - user.failed_logins > 2 ? "medium" : - "low" - ) : "unknown" - """, - "subscription_tier": """ - has(user.premium) && user.premium && has(user.engagement_score) && user.engagement_score > 100 ? "platinum" : - has(user.premium) && user.premium ? "gold" : - has(user.engagement_score) && user.engagement_score > 50 ? "silver" : - "bronze" - """ - } - } - - def transform_user_data(self, input_data, current_year=2024): - """Transform user data using CEL expressions.""" - context = Context() - context.add_variable("input", input_data) - context.add_variable("current_year", current_year) - - # Add helper functions - context.add_function("grade_to_score", self._grade_to_score) - - # Apply normalization transformations - normalized = {} - for field, expression in self.transformations["normalize_user"].items(): - try: - result = evaluate(expression, context) - if result is not None: - normalized[field] = result - except Exception as e: - # Handle transformation errors gracefully - normalized[field] = None - - # Add normalized data to context for metric calculations - context.add_variable("user", normalized) - - # Calculate derived metrics - for field, expression in self.transformations["calculate_metrics"].items(): - try: - result = evaluate(expression, context) - normalized[field] = result - except Exception as e: - # Handle calculation errors gracefully - normalized[field] = None - - return normalized - - def _grade_to_score(self, grade): - """Convert letter grade to numeric score.""" - grade_map = {"A": 95, "B": 85, "C": 75, "D": 65, "F": 50} - return grade_map.get(grade.upper() if isinstance(grade, str) else "", 0) - -# Example: Transform data from different sources -pipeline = DataTransformationPipeline() - -# Data source 1: Has first_name, last_name, age -source1_data = { - "first_name": "John", - "last_name": "Doe", - "age": 30, - "email": "JOHN.DOE@EXAMPLE.COM", - "rating": 4, # 1-5 scale - "active": True, - "login_count": 50, - "posts_count": 10, - "comments_count": 25, - "premium": True, - "failed_logins": 1 -} +from cel import evaluate -# Data source 2: Has name, birth_year, different field names -source2_data = { - "name": "Jane Smith", - "birth_year": 1990, - "email_address": "jane.smith@example.com", - "score": 85, # Already 0-100 scale - "status": "ACTIVE", - "login_count": 30, - "posts_count": 5, - "comments_count": 15, - "premium": False, - "failed_logins": 3 +CRITERIA = { + "credit_score": "applicant.credit_score >= 650", + "income": "double(loan.monthly_payment) <= double(applicant.monthly_income) * 0.28", + "debt_ratio": "(applicant.existing_debt + loan.monthly_payment) <= double(applicant.monthly_income) * 0.36", + "employment": "applicant.employment_months >= 24 || applicant.employment_type == 'self_employed'", } -# Transform both data sources -result1 = pipeline.transform_user_data(source1_data) -result2 = pipeline.transform_user_data(source2_data) -# → result1: {"full_name": "John Doe", "email": "JOHN.DOE@EXAMPLE.COM", "age": 30, "score": 80.0, "status": "active", -# "engagement_score": 85, "risk_level": "low", "subscription_tier": "platinum"} -# → result2: {"full_name": "Jane Smith", "email": "jane.smith@example.com", "age": 34, "score": 85, "status": "ACTIVE", -# "engagement_score": 50, "risk_level": "medium", "subscription_tier": "silver"} - -# Verify transformed data from source 1 -assert "full_name" in result1 -assert "email" in result1 -assert "engagement_score" in result1 - -# Verify transformed data from source 2 -assert "full_name" in result2 -assert "email" in result2 -assert "engagement_score" in result2 - -# Both results now have consistent structure: -assert "full_name" in result1 and "full_name" in result2 -assert "email" in result1 and "email" in result2 -assert "engagement_score" in result1 and "engagement_score" in result2 -assert "subscription_tier" in result1 and "subscription_tier" in result2 - -# Verify transformations completed (actual values depend on CEL expression execution) -assert "full_name" in result1 and "full_name" in result2 -assert "email" in result1 and "email" in result2 -# Note: Actual transformation results may vary based on CEL capabilities +def loan_eligibility(applicant, loan): + ctx = {"applicant": applicant, "loan": loan} + return {name: evaluate(expr, ctx) for name, expr in CRITERIA.items()} + +result = loan_eligibility( + {"credit_score": 720, "monthly_income": 10000, "existing_debt": 1200, + "employment_months": 36, "employment_type": "salaried"}, + {"monthly_payment": 1800}, +) +# → {"credit_score": True, "income": True, "debt_ratio": True, "employment": True} +assert all(result.values()) ``` -## Advanced Patterns +## Data transformation -### Rule Composition and Inheritance +Normalize heterogeneous input by mapping each output field to a CEL expression. `has()` lets you handle optional/varying source fields cleanly: ```python -class ComposableRulesEngine(BusinessRulesEngine): - """Rules engine with rule composition and inheritance.""" - - def __init__(self): - super().__init__() - - # Define rule hierarchies - self.rule_hierarchies = { - "discount_rules": { - "base_discount": "0.0", - "volume_discount": "quantity >= 10 ? 0.05 : 0.0", - "loyalty_discount": "customer.loyalty_years >= 5 ? 0.1 : (customer.loyalty_years >= 2 ? 0.05 : 0.0)", - "seasonal_discount": "is_holiday_season() ? 0.15 : 0.0", - "combined_discount": "min(base_discount + volume_discount + loyalty_discount + seasonal_discount, 0.5)" - }, - - "risk_assessment": { - "financial_risk": "applicant.debt_ratio > 0.4 ? 0.3 : (applicant.debt_ratio > 0.2 ? 0.1 : 0.0)", - "credit_risk": "applicant.credit_score < 600 ? 0.4 : (applicant.credit_score < 700 ? 0.2 : 0.0)", - "employment_risk": "applicant.employment_type == 'contract' ? 0.2 : 0.0", - "total_risk": "min(financial_risk + credit_risk + employment_risk, 1.0)" - } - } - - def evaluate_rule_hierarchy(self, hierarchy_name, context_data): - """Evaluate all rules in a hierarchy.""" - if hierarchy_name not in self.rule_hierarchies: - return {} - - context = Context() - for key, value in context_data.items(): - context.add_variable(key, value) - - # Add helper functions - context.add_function("is_holiday_season", self._is_holiday_season) - context.add_function("min", min) - context.add_function("max", max) - - hierarchy = self.rule_hierarchies[hierarchy_name] - results = {} - - # Evaluate rules in order, making previous results available - for rule_name, rule_expression in hierarchy.items(): - try: - result = evaluate(rule_expression, context) - results[rule_name] = result - context.add_variable(rule_name, result) # Make available to subsequent rules - except Exception as e: - # Handle rule evaluation error gracefully - results[rule_name] = None - - return results - - def _is_holiday_season(self): - """Check if current date is in holiday season.""" - now = datetime.now() - # Holiday season: November-December - return now.month in [11, 12] - -# Example rule hierarchy evaluation -composable_engine = ComposableRulesEngine() - -discount_context = { - "quantity": 15, - "customer": {"loyalty_years": 3}, - "product": {"category": "electronics"} -} - -discount_results = composable_engine.evaluate_rule_hierarchy("discount_rules", discount_context) -# → {"base_discount": 0.0, "volume_discount": 0.05, "loyalty_discount": 0.05, "seasonal_discount": 0.15, "combined_discount": 0.25} -# → Customer gets 25% total discount: 5% volume (15+ items) + 5% loyalty (3 years) + 15% seasonal (if holiday season) -assert "combined_discount" in discount_results -assert isinstance(discount_results["combined_discount"], (int, float)) -assert discount_results["combined_discount"] >= 0 - -# Test the individual discount calculations -print("Testing rule composition calculations:") -print(f"Quantity: {discount_context['quantity']} (should trigger volume discount)") -print(f"Customer loyalty: {discount_context['customer']['loyalty_years']} years (should trigger loyalty discount)") -# → Quantity: 15 (should trigger volume discount) -# → Customer loyalty: 3 years (should trigger loyalty discount) - -# Verify individual discount amounts -assert discount_results["base_discount"] == 0.0, "Base discount should be 0" -assert discount_results["volume_discount"] == 0.05, "Volume discount should be 5% for 15+ items" -assert discount_results["loyalty_discount"] == 0.05, "Loyalty discount should be 5% for 2-4 years" - -# Verify seasonal discount (behavior depends on actual date) -seasonal_discount = discount_results["seasonal_discount"] -assert seasonal_discount >= 0.0, "Seasonal discount should be non-negative" -print(f"Seasonal discount: {seasonal_discount} ({'holiday season' if seasonal_discount > 0 else 'regular season'})") -# → Seasonal discount: 0.15 (holiday season) # or 0.0 (regular season) depending on current date - -# Verify combined discount calculation -expected_combined = discount_results["base_discount"] + discount_results["volume_discount"] + discount_results["loyalty_discount"] + seasonal_discount -expected_combined = min(expected_combined, 0.5) # Apply 50% cap -assert discount_results["combined_discount"] == expected_combined, f"Combined discount should be {expected_combined}" - -print(f"✓ Rule composition working: {discount_results['combined_discount']} total discount") -# → ✓ Rule composition working: 0.25 total discount - -# Test with customer who gets maximum discount (should be capped at 50%) -high_loyalty_context = { - "quantity": 20, - "customer": {"loyalty_years": 10}, # Higher loyalty discount - "product": {"category": "electronics"} -} - -high_discount_results = composable_engine.evaluate_rule_hierarchy("discount_rules", high_loyalty_context) -# → {"base_discount": 0.0, "volume_discount": 0.05, "loyalty_discount": 0.1, "seasonal_discount": 0.15, "combined_discount": 0.3} -# → High-value customer: 5% volume + 10% loyalty (10 years) + 15% seasonal = 30% total (under 50% cap) -assert high_discount_results["loyalty_discount"] == 0.1, "10-year customer should get 10% loyalty discount" - -# Calculate expected total based on actual seasonal discount -high_seasonal = high_discount_results["seasonal_discount"] -expected_total = min(0.0 + 0.05 + 0.1 + high_seasonal, 0.5) -assert high_discount_results["combined_discount"] == expected_total, "Should apply discount cap correctly" - -print(f"✓ High loyalty customer discount: {high_discount_results['combined_discount']}") -# → ✓ High loyalty customer discount: 0.3 +from cel import evaluate, Context -# Test risk assessment hierarchy -risk_context = { - "applicant": { - "debt_ratio": 0.3, - "credit_score": 650, - "employment_type": "contract" - } +NORMALIZE = { + "full_name": """ + has(input.first_name) && has(input.last_name) + ? input.first_name + " " + input.last_name + : (has(input.name) ? input.name : "Unknown") + """, + "email": """ + has(input.email) ? input.email : + (has(input.email_address) ? input.email_address : "") + """, + "age": """ + has(input.age) ? input.age : + (has(input.birth_year) ? (current_year - input.birth_year) : null) + """, + "status": """ + has(input.active) ? (input.active ? "active" : "inactive") : + (has(input.status) ? input.status : "unknown") + """, } -risk_results = composable_engine.evaluate_rule_hierarchy("risk_assessment", risk_context) -# → {"financial_risk": 0.1, "credit_risk": 0.2, "employment_risk": 0.2, "total_risk": 0.5} -# → Moderate risk applicant: 10% financial (30% debt ratio) + 20% credit (650 score) + 20% employment (contract) = 50% total risk -assert "total_risk" in risk_results, "Should calculate total risk" -print(f"✓ Risk assessment working: {risk_results['total_risk']} total risk") -# → ✓ Risk assessment working: 0.5 total risk -``` - -### Conditional Field Mapping for Data Transformation +def normalize(record, *, current_year=2026): + ctx = Context() + ctx.add_variable("input", record) + ctx.add_variable("current_year", current_year) + return {field: evaluate(expr, ctx) for field, expr in NORMALIZE.items()} -```python -def create_conditional_transformer(): - """Transform data with conditional field mapping.""" - - mapping_rules = { - "phone": """ - has(input.phone) ? format_phone(input.phone) : - has(input.mobile) ? format_phone(input.mobile) : - has(input.telephone) ? format_phone(input.telephone) : - null - """, - - "address": """ - has(input.address) ? input.address : - (has(input.street) && has(input.city)) ? - input.street + ", " + input.city + - (has(input.state) ? ", " + input.state : "") + - (has(input.zip) ? " " + string(input.zip) : "") : - null - """, - - "full_address": """ - has(user.address) ? user.address : - join_address_parts([ - get_field("input.street", ""), - get_field("input.city", ""), - get_field("input.state", ""), - get_field("input.postal_code", "") - ]) - """ - } - - def format_phone(phone): - """Format phone number consistently.""" - digits = "".join(filter(str.isdigit, str(phone))) - if len(digits) == 10: - return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}" - elif len(digits) == 11 and digits[0] == "1": - return f"+1 ({digits[1:4]}) {digits[4:7]}-{digits[7:]}" - return phone - - def get_field(path, default=""): - """Safely get nested field value.""" - # This is a placeholder - in real use, would get from current context - return default - - def join_address_parts(parts): - """Join non-empty address parts.""" - non_empty = [p for p in parts if p and p.strip()] - return ", ".join(non_empty) if non_empty else "" - - return mapping_rules, { - "format_phone": format_phone, - "get_field": get_field, - "join_address_parts": join_address_parts - } +source_a = {"first_name": "John", "last_name": "Doe", "age": 30, "active": True} +source_b = {"name": "Jane Smith", "birth_year": 1990, "email_address": "jane@x.com"} -# Test the transformer -rules, funcs = create_conditional_transformer() -# → rules: {"phone": "has(input.phone) ? format_phone(input.phone) : ...", "address": "has(input.address) ? ..."} -# → funcs: {"format_phone": , "get_field": , "join_address_parts": } -assert "phone" in rules -assert "format_phone" in funcs +assert normalize(source_a)["full_name"] == "John Doe" +assert normalize(source_b)["age"] == 36 +assert normalize(source_b)["email"] == "jane@x.com" ``` -### Dynamic Rule Loading - -```python -class DynamicRulesEngine: - """Rules engine that loads rules from external sources.""" - - def __init__(self): - self.rules = {} - self.rule_metadata = {} - - def load_rules_from_config(self, rules_config): - """Load rules from configuration dictionary.""" - for rule_name, rule_data in rules_config.items(): - self.rules[rule_name] = rule_data["expression"] - self.rule_metadata[rule_name] = { - "description": rule_data.get("description", ""), - "version": rule_data.get("version", "1.0"), - "last_modified": rule_data.get("last_modified", datetime.now().isoformat()), - "author": rule_data.get("author", "system"), - "tags": rule_data.get("tags", []) - } - - def validate_rule(self, rule_expression, test_context=None): - """Validate a rule expression.""" - if test_context is None: - test_context = { - "test_number": 42, - "test_string": "test", - "test_boolean": True, - "test_list": [1, 2, 3], - "test_object": {"field": "value"} - } - - try: - result = evaluate(rule_expression, test_context) - return True, result, None - except Exception as e: - return False, None, str(e) - - def update_rule(self, rule_name, new_expression, metadata=None, validation_context=None): - """Update a rule with validation.""" - is_valid, test_result, error = self.validate_rule(new_expression, validation_context) - - if not is_valid: - raise ValueError(f"Invalid rule expression: {error}") - - # Backup old rule - if rule_name in self.rules: - old_rule = self.rules[rule_name] - old_metadata = self.rule_metadata.get(rule_name, {}) - # Rule backed up (in real implementation, save to backup storage) - - # Update rule - self.rules[rule_name] = new_expression - - if metadata: - self.rule_metadata[rule_name] = { - **self.rule_metadata.get(rule_name, {}), - **metadata, - "last_modified": datetime.now().isoformat() - } - - return True - - def execute_rule(self, rule_name, context): - """Execute a specific rule.""" - if rule_name not in self.rules: - raise KeyError(f"Rule not found: {rule_name}") - - rule_expression = self.rules[rule_name] - - try: - return evaluate(rule_expression, context) - except Exception as e: - raise RuntimeError(f"Error executing rule {rule_name}: {e}") - - def get_rule_info(self, rule_name): - """Get information about a rule.""" - if rule_name not in self.rules: - return None - - return { - "name": rule_name, - "expression": self.rules[rule_name], - "metadata": self.rule_metadata.get(rule_name, {}) - } - -# Example dynamic rule loading -dynamic_engine = DynamicRulesEngine() - -# Load rules from configuration -rules_config = { - "customer_tier": { - "expression": """ - customer.annual_spend >= 10000 ? "platinum" : - customer.annual_spend >= 5000 ? "gold" : - customer.annual_spend >= 1000 ? "silver" : - "bronze" - """, - "description": "Determine customer tier based on annual spending", - "version": "2.1", - "author": "business_team", - "tags": ["customer", "segmentation"] - }, - - "fraud_score": { - "expression": """ - double(transaction.amount > double(customer.avg_transaction) * 5.0 ? 0.3 : 0.0) + - double(transaction.location != customer.usual_location ? 0.2 : 0.0) + - double(transaction.time_hour < 6 || transaction.time_hour > 22 ? 0.1 : 0.0) + - double(customer.failed_attempts_today > 3 ? 0.4 : 0.0) - """, - "description": "Calculate fraud risk score for transactions", - "version": "1.5", - "author": "security_team", - "tags": ["fraud", "security", "risk"] - } -} - -dynamic_engine.load_rules_from_config(rules_config) -# → Loaded 2 business rules: customer tier segmentation and fraud detection scoring - -# Test rule execution -customer_data = { - "customer": { - "annual_spend": 7500, - "avg_transaction": 150, - "usual_location": "NY", - "failed_attempts_today": 1 - }, - "transaction": { - "amount": 500, - "location": "NY", - "time_hour": 14 - } -} - -tier = dynamic_engine.execute_rule("customer_tier", customer_data) -fraud_score = dynamic_engine.execute_rule("fraud_score", customer_data) -# → tier: "gold" # $7500 annual spend qualifies for gold tier ($5000-$9999 range) -# → fraud_score: 0.0 # Normal transaction: same location, reasonable amount, daytime, low failed attempts +## Pre-compile hot-path expressions -assert tier == "gold" # Customer with annual_spend=7500 -assert isinstance(fraud_score, (int, float)) -assert 0 <= fraud_score <= 1 # Should be between 0 and 1 - -print(f"✓ Customer tier: {tier} (annual spend: $7500)") -print(f"✓ Fraud score: {fraud_score} (low risk transaction)") -# → ✓ Customer tier: gold (annual spend: $7500) -# → ✓ Fraud score: 0.0 (low risk transaction) - -# Test rule validation with invalid expression -try: - dynamic_engine.update_rule("test_rule", "invalid && syntax") - assert False, "Should reject invalid syntax" -except ValueError as e: - print(f"✓ Invalid rule rejected: {str(e)}") - # → ✓ Invalid rule rejected: Invalid rule expression: ... - -# Test rule validation with valid business rule expression -# Provide validation context that matches the rule's expected variables -validation_context = {"customer": {"annual_spend": 5000}} -success = dynamic_engine.update_rule("test_rule", "customer.annual_spend > 1000", - validation_context=validation_context) -# → True # Rule validation passed: expression is syntactically correct and executes successfully -assert success == True, "Should accept valid business rule" - -# Test rule execution with new rule (customer has $7500 annual spend) -test_result = dynamic_engine.execute_rule("test_rule", customer_data) -# → True # Customer's $7500 annual spend > $1000 threshold -assert test_result == True, "Customer with $7500 should pass $1000 threshold" -print("✓ Dynamic rule creation and execution working") -# → ✓ Dynamic rule creation and execution working - -# Verify rule management functionality -rule_info = dynamic_engine.get_rule_info("customer_tier") -# → {"name": "customer_tier", "expression": "customer.annual_spend >= 10000 ? ...", "metadata": {"description": "Determine customer tier...", "author": "business_team"}} -assert rule_info is not None -assert "expression" in rule_info -assert rule_info["metadata"]["author"] == "business_team" -print(f"✓ Rule metadata: {rule_info['metadata']['description']}") -# → ✓ Rule metadata: Determine customer tier based on annual spending - -# Test edge case: Different customer tiers -bronze_customer_data = {**customer_data, "customer": {**customer_data["customer"], "annual_spend": 500}} -bronze_tier = dynamic_engine.execute_rule("customer_tier", bronze_customer_data) -# → "bronze" # $500 annual spend < $1000 threshold for bronze tier -assert bronze_tier == "bronze", "Low-spend customer should be bronze tier" - -platinum_customer_data = {**customer_data, "customer": {**customer_data["customer"], "annual_spend": 15000}} -platinum_tier = dynamic_engine.execute_rule("customer_tier", platinum_customer_data) -# → "platinum" # $15000 annual spend >= $10000 threshold for platinum tier -assert platinum_tier == "platinum", "High-spend customer should be platinum tier" - -print(f"✓ Customer tier calculation: bronze($500), gold($7500), platinum($15000)") -# → ✓ Customer tier calculation: bronze($500), gold($7500), platinum($15000) -``` - -### Batch Transformation with Filtering +For pipelines that run the same rules over many records, compile once and reuse the program: ```python -def transform_batch_with_filters(data_list, transformation_config): - """Transform a batch of records with filtering and validation.""" - - def transform_record(record): - context = Context() - context.add_variable("input", record) - context.add_variable("current_timestamp", datetime.now().isoformat()) - - # Add transformation functions - for func_name, func in transformation_config.get("functions", {}).items(): - context.add_function(func_name, func) - - # Apply filters first - for filter_expr in transformation_config.get("filters", []): - try: - if not evaluate(filter_expr, context): - return None # Record filtered out - except Exception: - return None # Filter evaluation failed - - # Apply transformations - transformed = {} - for field, expr in transformation_config.get("transformations", {}).items(): - try: - result = evaluate(expr, context) - transformed[field] = result - except Exception as e: - # Handle transformation failure gracefully - transformed[field] = None - - return transformed - - results = [] - for record in data_list: - transformed = transform_record(record) - if transformed is not None: - results.append(transformed) - - return results - -# Example batch transformation configuration -batch_config = { - "filters": [ - "has(input.id)", # Must have ID - "input.active == true", # Must be active - "has(input.email) && size(input.email) > 0", # Must have email - ], - "transformations": { - "user_id": "input.id", - "display_name": """ - has(input.display_name) ? input.display_name : - has(input.first_name) ? input.first_name + " " + input.last_name : - input.email - """, - "normalized_email": "input.email", # CEL doesn't have lower() function - "account_age_days": """ - has(input.created_date) ? - days_between(input.created_date, current_timestamp) : - 0 - """, - "tier": """ - has(input.premium) && input.premium ? "premium" : - has(input.verified) && input.verified ? "verified" : - "basic" - """ - }, - "functions": { - "days_between": lambda start, end: 30 # Simplified for example - } -} - -# Sample data -sample_records = [ - {"id": "1", "email": "alice@example.com", "active": True, "premium": True, "first_name": "Alice", "last_name": "Smith"}, - {"id": "2", "email": "", "active": True}, # Will be filtered out - no email - {"id": "3", "email": "bob@example.com", "active": False}, # Will be filtered out - inactive - {"id": "4", "email": "carol@example.com", "active": True, "verified": True, "display_name": "Carol D."} -] - -transformed_batch = transform_batch_with_filters(sample_records, batch_config) -# → [{"user_id": "1", "display_name": "Alice Smith", "tier": "premium", ...}, {"user_id": "4", "display_name": "Carol D.", "tier": "verified", ...}] -# → Filtered out 2 records: record #2 (empty email), record #3 (inactive status) +from cel import compile -# Verify filtering worked correctly -expected_valid_records = 2 # Records 1 and 4 should pass filters (have ID, active=true, non-empty email) -assert len(transformed_batch) == expected_valid_records, f"Expected {expected_valid_records} records, got {len(transformed_batch)}" -print(f"✓ Batch processing filtered to {len(transformed_batch)} valid records") -# → ✓ Batch processing filtered to 2 valid records +compiled = {name: compile(expr) for name, expr in NORMALIZE.items()} -# Verify transformations worked correctly -for record in transformed_batch: - assert "user_id" in record, "Should have user_id field" - assert "display_name" in record, "Should have display_name field" - assert "tier" in record, "Should have tier field" - assert record["user_id"] is not None, "user_id should not be None" - print(f"✓ Record {record['user_id']}: {record['display_name']} ({record['tier']} tier)") - # → ✓ Record 1: Alice Smith (premium tier) - # → ✓ Record 4: Carol D. (verified tier) - -# Test specific transformations for known records -alice_record = next((r for r in transformed_batch if r["user_id"] == "1"), None) -assert alice_record is not None, "Alice's record should be in results" -assert alice_record["display_name"] == "Alice Smith", "Should combine first + last name" -assert alice_record["tier"] == "premium", "Alice should be premium tier" -# → Alice: Premium member (has premium=true), name built from first_name + last_name - -carol_record = next((r for r in transformed_batch if r["user_id"] == "4"), None) -assert carol_record is not None, "Carol's record should be in results" -assert carol_record["display_name"] == "Carol D.", "Should use display_name field" -assert carol_record["tier"] == "verified", "Carol should be verified tier" -# → Carol: Verified member (has verified=true, no premium), uses existing display_name - -print("✓ Batch transformation with filtering working correctly") -# → ✓ Batch transformation with filtering working correctly +def normalize_fast(record, *, current_year=2026): + ctx = {"input": record, "current_year": current_year} + return {field: program.execute(ctx) for field, program in compiled.items()} ``` -## Why This Works - -- **Business-Friendly**: Rules and transformations are written in a language business users can understand -- **Flexible**: Logic can be modified without code changes -- **Maintainable**: Each rule/transformation can be tested independently -- **Consistent**: Same logic applied consistently across the application -- **Scalable**: Handle large datasets with efficient expression evaluation -- **Auditable**: Changes can be tracked and versioned -- **Transparent**: The decision-making process is clearly visible +`compile()` parses once; `execute()` skips the parser on every record. This is a meaningful win for batches. -## Best Practices +## Why CEL fits -1. **Start simple**: Begin with basic rules and transformations, add complexity gradually -2. **Document clearly**: Provide descriptions and examples for each rule -3. **Version control**: Track changes and maintain backwards compatibility -4. **Test thoroughly**: Create comprehensive test suites for all scenarios -5. **Monitor performance**: Profile execution in production environments -6. **Business involvement**: Include business stakeholders in rule design and validation -7. **Handle missing data gracefully**: Always provide fallbacks for missing fields -8. **Use helper functions**: Create reusable functions for common patterns +- **Editable by non-engineers.** Stakeholders can review rule changes in a Git diff. +- **Deterministic.** Same input always produces the same output — no hidden state, no side effects. +- **Testable.** Each rule is a pure expression you can assert against directly. +- **Fast.** Compiled CEL is microseconds per evaluation; comparable to hand-written Python and often faster than dynamically-built `eval()`. -## Related Topics +## Related topics -- [Access Control Policies](access-control-policies.md) - User-specific business rules -- [Dynamic Query Filters](dynamic-query-filters.md) - Query-based rule applications -- [Production Patterns & Best Practices](production-patterns-best-practices.md) - Security and performance patterns -- [Error Handling](error-handling.md) - Robust error handling for rule execution \ No newline at end of file +- [Access Control Policies](access-control-policies.md) — applying the same pattern to authorization. +- [Dynamic Query Filters](dynamic-query-filters.md) — translating CEL into safe DB filters. +- [Error Handling](error-handling.md) — exception types and safe-evaluation patterns. diff --git a/docs/how-to-guides/dynamic-query-filters.md b/docs/how-to-guides/dynamic-query-filters.md index 8272baa..330f503 100644 --- a/docs/how-to-guides/dynamic-query-filters.md +++ b/docs/how-to-guides/dynamic-query-filters.md @@ -239,4 +239,4 @@ This ensures security constraints cannot be circumvented by user input. - [Access Control Policies](access-control-policies.md) - User permission patterns - [Business Logic & Data Transformation](business-logic-data-transformation.md) - Validate filter configurations -- [Production Patterns & Best Practices](production-patterns-best-practices.md) - Security and performance patterns \ No newline at end of file +- [Error Handling](error-handling.md) - Exception types and safe-evaluation patterns \ No newline at end of file diff --git a/docs/how-to-guides/error-handling.md b/docs/how-to-guides/error-handling.md index 90a86c7..515bf0a 100644 --- a/docs/how-to-guides/error-handling.md +++ b/docs/how-to-guides/error-handling.md @@ -1,587 +1,160 @@ # Error Handling -Learn how to handle errors gracefully in production CEL applications, from basic exception handling to advanced safety patterns for untrusted input. +CEL evaluation surfaces failures as Python exceptions. This guide covers the exception types you'll encounter and a canonical pattern for safely evaluating untrusted expressions. -## Understanding CEL Exceptions +## Exception types -The library raises specific exception types based on the underlying error type. Understanding these patterns helps you write robust error handling: +| Exception | When it's raised | +|----------------|---------------------------------------------------------------------------| +| `ValueError` | Parse / compile errors, including malformed syntax and empty expressions. | +| `RuntimeError` | Undefined variables, undefined functions, custom-function failures. | +| `TypeError` | Type mismatches — incompatible operands, no matching overload, etc. | -### `ValueError` - Parse and Compilation Errors - -Raised when the CEL expression has invalid syntax, is empty, or fails to compile: +### `ValueError` — parse and compile errors ```python from cel import evaluate try: - evaluate("1 + + 2") # Invalid syntax - assert False, "Expected ValueError" + evaluate("1 + + 2") except ValueError as e: assert "Failed to parse expression" in str(e) - # → ValueError: Failed to parse expression (graceful failure) try: - evaluate("") # Empty expression - assert False, "Expected ValueError" + evaluate("") except ValueError as e: assert "Failed to parse expression" in str(e) - # → ValueError: Failed to parse expression (safe error handling) ``` -### `RuntimeError` - Variable and Function Errors +Malformed input (unclosed quotes, mixed quote types, invalid syntax) raises `ValueError` cleanly — the library never panics or crashes the process. -Raised for undefined variables/functions and function execution errors: +### `RuntimeError` — variable and function errors ```python try: - evaluate("undefined_var", {}) # Variable not in context - assert False, "Expected RuntimeError" + evaluate("undefined_var", {}) except RuntimeError as e: assert "Undefined variable or function" in str(e) - # → RuntimeError: Undefined variable 'undefined_var' try: - evaluate("missing_func()", {}) # Function doesn't exist - assert False, "Expected RuntimeError" + evaluate("missing_func()", {}) except RuntimeError as e: assert "Undefined variable or function" in str(e) - # → RuntimeError: Undefined function 'missing_func' - -try: - evaluate("user.missing_field", {"user": {"name": "alice"}}) # Field access error - assert False, "Expected ValueError" -except ValueError as e: - assert "No such key" in str(e) - # → ValueError: No such key: missing_field ``` -### `TypeError` - Type Compatibility Errors - -Raised when operations are performed on incompatible types: +### `TypeError` — incompatible operand types ```python try: - evaluate("1 + 2u") # Mixed signed/unsigned arithmetic - assert False, "Expected TypeError" + evaluate("1 + 2u") # mixed signed/unsigned int except TypeError as e: - assert "Cannot mix signed and unsigned" in str(e) - # → TypeError: Cannot mix signed and unsigned integers - -try: - evaluate('"hello" && true') # String in logical operation - assert False, "Expected ValueError" -except ValueError as e: - assert "No such overload" in str(e) - # → ValueError: No such overload for mixed-type logical operations + assert "overload" in str(e).lower() or "signed and unsigned" in str(e) try: - evaluate("[1, 2, 3].map(x, x * 2.0)") # Mixed arithmetic in map - assert False, "Expected TypeError" + evaluate('"hello" && true') # non-bool in a logical op except TypeError as e: - assert "operation" in str(e) - # → TypeError: Unsupported operation between types -``` - -## ✅ Safe Error Handling for Malformed Input - -**Good News**: All malformed expressions, including those that previously caused panics, now raise proper Python exceptions instead of crashing the process. - -**Malformed syntax that now raises `ValueError`:** -- Unclosed quotes: `'timestamp("2024-01-01T00:00:00Z")` -- Mixed quote types: `"some text'` or `'some text"` -- Invalid syntax patterns - -**Examples that now raise clean errors:** -```python -from cel import evaluate - -try: - evaluate("'unclosed quote", {}) - assert False, "Should have raised ValueError" -except ValueError as e: - assert "Failed to parse expression" in str(e) - # → ValueError: Malformed input handled safely (no crash) - -try: - evaluate('"mixed quotes\'', {}) - assert False, "Should have raised ValueError" -except ValueError as e: - assert "Failed to parse expression" in str(e) - # → ValueError: Quote mismatch detected (process remains stable) + assert "No such overload" in str(e) ``` -**For untrusted input:** -The library now safely handles all malformed input by raising appropriate exceptions, making it safe to evaluate expressions from untrusted sources without additional pre-validation (though input validation is still a good practice for security). +CEL has no implicit numeric coercion: `int + double`, `int + uint`, and similar combinations all raise `TypeError`. Use `int(x)`, `uint(x)`, or `double(x)` to convert explicitly. -## Production Error Handling Patterns +## Safe evaluation wrapper -### 1. Safe Evaluation Wrapper - -Create a wrapper function that handles all CEL exceptions gracefully: +For untrusted input, wrap evaluation with a single handler that converts all CEL exceptions to a sentinel value: ```python from cel import evaluate from typing import Any, Optional, Dict import logging -def safe_evaluate(expression: str, context: Optional[Dict[str, Any]] = None) -> Optional[Any]: - """ - Safely evaluate a CEL expression with comprehensive error handling. - - Returns None if evaluation fails for any reason. - """ - try: - return evaluate(expression, context) - except ValueError as e: - logging.warning(f"CEL parse error: {e}") - return None - except TypeError as e: - logging.warning(f"CEL type error: {e}") - return None - except RuntimeError as e: - logging.warning(f"CEL runtime error: {e}") - return None - except Exception as e: - # Catch any other unexpected errors - logging.error(f"Unexpected CEL error: {e}") - return None - -# Usage -result = safe_evaluate("user.age >= 18", {"user": {"age": 25}}) -if result is not None: - assert result is True - # → True (safe evaluation with graceful error handling) -else: - assert False, "Expression evaluation should not have failed" -``` - -### 2. Context Validation {#context-validation} +log = logging.getLogger(__name__) -Validate context data before evaluation to prevent runtime errors: -```python -def validate_context(context: Dict[str, Any], required_fields: list[str]) -> None: - """Validate that all required fields are present in context.""" - for field in required_fields: - if field not in context: - raise ValueError(f"Missing required field: {field}") - -def validate_nested_field(context: Dict[str, Any], field_path: str) -> bool: - """Check if a nested field exists (e.g., 'user.profile.verified').""" - keys = field_path.split('.') - current = context - - for key in keys: - if not isinstance(current, dict) or key not in current: - return False - current = current[key] - - return True - -def safe_policy_evaluation(policy: str, context: Dict[str, Any]) -> bool: - """Evaluate a policy with context validation.""" +def safe_evaluate( + expression: str, + context: Optional[Dict[str, Any]] = None, +) -> Optional[Any]: + """Evaluate a CEL expression, returning None on any failure.""" try: - # Validate required top-level fields - validate_context(context, ["user", "resource"]) - - # Validate specific nested fields used in policy - if not validate_nested_field(context, "user.id"): - raise ValueError("Missing required field: user.id") - - result = evaluate(policy, context) - return bool(result) if result is not None else False - - except Exception as e: - logging.error(f"Policy evaluation failed: {e}") - return False # Deny access on any error - -# Usage -context = { - "user": {"id": "alice", "role": "user"}, - "resource": {"owner": "alice", "type": "document"} -} - -access_granted = safe_policy_evaluation( - 'user.role == "admin" || resource.owner == user.id', - context -) -assert access_granted is True -# → True (policy allows access - user owns resource) - -# Test 2: Missing required context field -incomplete_context = { - "user": {"id": "alice", "role": "user"} - # Missing "resource" field -} - -result = safe_policy_evaluation('user.role == "admin"', incomplete_context) -assert result == False, "Should deny access when required context is missing" -# → False (graceful degradation - deny when context incomplete) - -# Test 3: Missing nested required field -context_missing_user_id = { - "user": {"role": "user"}, # Missing "id" field - "resource": {"owner": "alice", "type": "document"} -} - -result = safe_policy_evaluation('resource.owner == user.id', context_missing_user_id) -assert result == False, "Should deny access when required nested field is missing" -# → False (fail-safe - deny access on missing data) - -# Test 4: Valid policy with different outcome -admin_context = { - "user": {"id": "bob", "role": "admin"}, - "resource": {"owner": "alice", "type": "document"} -} - -result = safe_policy_evaluation('user.role == "admin" || resource.owner == user.id', admin_context) -assert result == True, "Admin should have access regardless of ownership" -# → True (admin privilege overrides ownership check) - -print("✓ Safe policy evaluation with context validation working correctly") -# → Output: Defensive programming prevents security bypass -``` - -### 3. Input Sanitization for Untrusted Expressions {#input-sanitization-for-untrusted-expressions} + return evaluate(expression, context) + except (ValueError, RuntimeError, TypeError) as e: + log.warning("CEL evaluation failed: %s (expr=%r)", e, expression) + return None -When accepting CEL expressions from users, implement validation: -```python -import re -from typing import List, Optional - -class CELValidator: - """Validator for CEL expressions from untrusted sources.""" - - # Patterns that are commonly malformed and raise ValueError - DANGEROUS_PATTERNS = [ - r"'[^']*$", # Unclosed single quote - r'"[^"]*$', # Unclosed double quote - r"'[^']*\"", # Mixed quotes: single -> double - r'"[^"]*\'', # Mixed quotes: double -> single - ] - - # Maximum expression length to prevent DoS - MAX_EXPRESSION_LENGTH = 1000 - - def validate_expression(self, expression: str) -> List[str]: - """ - Validate a CEL expression for common issues. - - Returns list of validation errors (empty if valid). - """ - errors = [] - - # Check length - if len(expression) > self.MAX_EXPRESSION_LENGTH: - errors.append(f"Expression too long (max {self.MAX_EXPRESSION_LENGTH} chars)") - - # Check for dangerous patterns - for pattern in self.DANGEROUS_PATTERNS: - if re.search(pattern, expression): - errors.append("Expression contains potentially problematic syntax") - break - - # Check balanced quotes - if not self._quotes_balanced(expression): - errors.append("Unbalanced quotes detected") - - return errors - - def _quotes_balanced(self, expression: str) -> bool: - """Check if quotes are properly balanced.""" - single_quotes = expression.count("'") - double_quotes = expression.count('"') - - # Simple check - both should be even (assuming no escaping) - return single_quotes % 2 == 0 and double_quotes % 2 == 0 - -def safe_user_expression_eval(user_expression: str, context: Dict[str, Any]) -> tuple[bool, Optional[Any], List[str]]: - """ - Safely evaluate a user-provided CEL expression. - - Returns (success, result, errors). - """ - validator = CELValidator() - - # Validate expression first - validation_errors = validator.validate_expression(user_expression) - if validation_errors: - return False, None, validation_errors - - # Attempt evaluation - try: - result = evaluate(user_expression, context) - return True, result, [] - except Exception as e: - return False, None, [f"Evaluation error: {str(e)}"] - -# Usage -user_input = 'user.age >= 18 && user.verified == true' -context = {"user": {"age": 25, "verified": True}} - -success, result, errors = safe_user_expression_eval(user_input, context) -if success: - assert result is True - # → True (user meets age and verification requirements) -else: - assert False, f"Validation should not have failed: {errors}" - -# Test 2: Invalid expression (accessing nonexistent field) -dangerous_input = 'user.nonexistent_field' -success, result, errors = safe_user_expression_eval(dangerous_input, context) -assert success == False, "Expression with nonexistent field should be blocked" -assert len(errors) > 0, "Should report validation or runtime errors" -# → False, errors: ['Evaluation error: ...'] (field access error caught) - -# Test 3: Invalid syntax -invalid_syntax = 'user.age >=' # Incomplete comparison -success, result, errors = safe_user_expression_eval(invalid_syntax, context) -assert success == False, "Invalid syntax should be rejected" -assert len(errors) > 0, "Should report syntax errors" -# → False, errors: ['Evaluation error: Failed to parse'] (malformed input caught) - -# Test 4: Empty expression -success, result, errors = safe_user_expression_eval('', context) -assert success == False, "Empty expression should be rejected" -# → False, errors: ['Evaluation error: ...'] (empty input handled safely) - -# Test 5: Undefined variable -undefined_var = 'undefined_variable' -success, result, errors = safe_user_expression_eval(undefined_var, context) -assert success == False, "Undefined variable should cause error" -# → False, errors: ['Evaluation error: Undefined variable'] (prevents data leakage) - -print("✓ Safe expression validation working correctly") -# → Output: Comprehensive input validation working +# Examples +assert safe_evaluate("user.age >= 18", {"user": {"age": 25}}) is True +assert safe_evaluate("1 + + 2") is None # parse error +assert safe_evaluate("missing", {}) is None # undefined variable +assert safe_evaluate("1 + 'oops'") is None # type error ``` -## Defensive Expression Patterns +## Defensive expression patterns -### Safe Field Access - -Use CEL's built-in safety features to write robust expressions: +Within the expression itself, use `has()` and ternaries to short-circuit around missing fields rather than relying on exception handling: ```python -# ❌ Risky - will fail if fields don't exist -risky_expr = 'user.profile.settings.theme == "dark"' - -# ✅ Safe - check existence first -safe_expr = ''' - has(user.profile) && - has(user.profile.settings) && - has(user.profile.settings.theme) && - user.profile.settings.theme == "dark" +# Safe field access using has() +expr = ''' + has(user.profile) && has(user.profile.email) + ? user.profile.email + : "no-email" ''' +result = evaluate(expr, {"user": {"profile": {"email": "alice@example.com"}}}) +assert result == "alice@example.com" -# ✅ Even safer - use defaults (with has() checks) -safe_with_defaults = '''has(user.profile) && has(user.profile.settings) && - (has(user.profile.settings.theme) ? user.profile.settings.theme : "light") == "dark"''' - -# Test both approaches -context_complete = { - "user": { - "profile": { - "settings": {"theme": "dark"} - } - } -} - -context_missing = {"user": {"name": "alice"}} - -# Safe expressions work with both contexts -assert safe_evaluate(safe_expr, context_complete) is True -# → True (complete context allows proper evaluation) -assert safe_evaluate(safe_expr, context_missing) is False -# → False (missing fields cause safe failure) - -assert safe_evaluate(safe_with_defaults, context_complete) is True -# → True (theme setting detected correctly) -assert safe_evaluate(safe_with_defaults, context_missing) is False -# → False (defensive pattern prevents runtime errors) +# Default values for optional fields +result = evaluate( + 'has(config.timeout) ? config.timeout : 30', + {"config": {}}, +) +assert result == 30 ``` -### Type-Safe Operations +## Pre-compilation for performance -Prevent type errors with careful expression design: +If you're evaluating the same expression many times, compile once and reuse the program. Parse errors surface at `compile()` time; runtime errors at `execute()` time, which lets you handle the two failure modes separately: ```python -# ❌ Risky - assumes numeric types -risky_expr = 'user.age > 18' +from cel import compile -# ✅ Safe - use numeric conversion with error handling -safe_expr = 'has(user.age) && double(user.age) > 18.0' +program = compile("user.age >= 18") -# ✅ Alternative - check for common failure case first -defensive_expr = 'has(user.age) && user.age != null && user.age > 18' +# Then on each call: +try: + allowed = program.execute({"user": {"age": 25}}) +except (RuntimeError, TypeError) as e: + log.warning("Policy evaluation failed: %s", e) + allowed = False -# Note: type() function is not available in this CEL implementation -# Use conversion functions (double(), int()) for type safety instead +assert allowed is True ``` -## Logging and Monitoring - -### Structured Error Logging +## Testing error scenarios -Implement comprehensive logging for production debugging: - -```python -import logging -import json -from datetime import datetime, timezone - -def evaluate_with_logging(expression: str, context: Dict[str, Any], operation_id: str = None) -> Any: - """Evaluate with comprehensive logging for production debugging.""" - - start_time = datetime.now(timezone.utc) - - log_context = { - "operation_id": operation_id, - "expression": expression, - "context_keys": list(context.keys()) if context else [], - "timestamp": start_time.isoformat() - } - - try: - result = evaluate(expression, context) - - # Log successful evaluation - logging.info("CEL evaluation succeeded", extra={ - **log_context, - "result_type": type(result).__name__, - "duration_ms": (datetime.now(timezone.utc) - start_time).total_seconds() * 1000 - }) - - return result - - except Exception as e: - # Log detailed error information - logging.error("CEL evaluation failed", extra={ - **log_context, - "error_type": type(e).__name__, - "error_message": str(e), - "duration_ms": (datetime.now(timezone.utc) - start_time).total_seconds() * 1000 - }) - raise - -# Usage in web application -def get_user(user_id: str): - """Mock function to get user data.""" - return {"id": user_id, "role": "user"} - -def get_resource(resource_id: str): - """Mock function to get resource data.""" - return {"id": resource_id, "type": "document"} - -def check_access(user_id: str, resource_id: str, policy: str) -> bool: - context = { - "user": get_user(user_id), - "resource": get_resource(resource_id) - } - - operation_id = f"access_check_{user_id}_{resource_id}" - - try: - result = evaluate_with_logging(policy, context, operation_id) - return bool(result) - except Exception: - # Log and deny access on any error - return False - -# Test the function -result = check_access("alice", "doc1", "user.id == 'alice'") -assert result is True -# → True (access granted with comprehensive logging) -``` - -## Testing Error Scenarios - -### Unit Tests for Error Handling - -Write comprehensive tests for your error handling: +When testing code that evaluates CEL expressions, assert on the exception type — not the exact message, which can drift with cel-rust releases: ```python +import pytest from cel import evaluate -from typing import Any, Optional, Dict -import logging -def safe_evaluate(expression: str, context: Optional[Dict[str, Any]] = None) -> Optional[Any]: - """Safely evaluate a CEL expression with comprehensive error handling.""" - try: - return evaluate(expression, context) - except ValueError as e: - logging.warning(f"CEL parse error: {e}") - return None - except TypeError as e: - logging.warning(f"CEL type error: {e}") - return None - except RuntimeError as e: - logging.warning(f"CEL runtime error: {e}") - return None - except Exception as e: - logging.error(f"Unexpected CEL error: {e}") - return None - -def test_error_handling(): - """Test various error scenarios.""" - - # Test parse errors - try: +def test_invalid_syntax(): + with pytest.raises(ValueError): evaluate("1 + + 2") - assert False, "Should have raised ValueError" - except ValueError: - pass # Expected - # → ValueError caught (syntax error handled gracefully) - - # Test runtime errors - try: - evaluate("unknown_var", {}) - assert False, "Should have raised RuntimeError" - except RuntimeError: - pass # Expected - # → RuntimeError caught (undefined variable blocked safely) - - # Test type errors - try: - evaluate("1 + 2u") # Mixed signed/unsigned arithmetic - assert False, "Should have raised TypeError" - except (TypeError, ValueError): # May be TypeError or ValueError depending on operation - pass # Expected - # → Type error caught (incompatible types handled safely) - -def test_safe_evaluation(): - """Test safe evaluation wrapper.""" - - # Should return None for invalid expressions - assert safe_evaluate("1 + + 2") is None - # → None (parse error handled gracefully) - assert safe_evaluate("unknown_var", {}) is None - # → None (runtime error converted to safe None) - assert safe_evaluate("undefined_field", {}) is None - # → None (undefined variable error handled without crash) - - # Should work for valid expressions - assert safe_evaluate("1 + 2") == 3 - # → 3 (valid expression evaluates correctly) - assert safe_evaluate("name", {"name": "Alice"}) == "Alice" - # → "Alice" (context variable accessed safely) - -# Run tests to verify everything works -test_error_handling() -test_safe_evaluation() -print("✓ Error handling test examples working correctly") -# → Output: All error scenarios handled robustly -``` -## Best Practices Summary +def test_undefined_variable(): + with pytest.raises(RuntimeError): + evaluate("missing_var", {}) + +def test_type_mismatch(): + with pytest.raises(TypeError): + evaluate("1 + 2u") +``` -1. **Always use exception handling** in production code -2. **Validate context data** before evaluation -3. **Use defensive expressions** with `has()` and ternary operators -4. **Implement input validation** for untrusted expressions -5. **Log errors comprehensively** for debugging -6. **Test error scenarios** thoroughly -7. **Handle malformed input** with proper exception handling -8. **Fail safely** - deny access on evaluation errors +## Best practices -Remember: CEL is designed to be safe, but your application's error handling determines how gracefully it handles edge cases and malicious input. \ No newline at end of file +- **Catch by type, not message.** Exception classes are part of the public API; message text is not. +- **Use `has()` for optional fields** rather than catching exceptions from inside an expression. +- **Pre-compile hot-path expressions** with `compile()` so parse errors surface once, at startup. +- **Log the failing expression** when you catch an evaluation error — the expression text is usually the most useful debugging info. +- **Don't sandbox by exception handling alone** if the expression source is untrusted; also limit input size, expression depth, and execution time. diff --git a/docs/how-to-guides/production-patterns-best-practices.md b/docs/how-to-guides/production-patterns-best-practices.md deleted file mode 100644 index 717750f..0000000 --- a/docs/how-to-guides/production-patterns-best-practices.md +++ /dev/null @@ -1,706 +0,0 @@ -# Production Patterns & Best Practices - -This guide serves as your comprehensive hub for production CEL patterns, summarizing key practices and directing you to detailed implementations. Use this as your go-to reference for building robust, secure, and performant CEL applications. - -## 🛡️ Safe Expression Design - -### Always Use `has()` for Optional Fields - -**Key Practice**: Check field existence before accessing to prevent runtime errors. - -```cel -# ✅ Safe - won't crash if profile is missing -has("user.profile") && user.profile.verified - -# ✅ Safe - with fallback value -user.profile.verified if has("user.profile") else false -``` - -**Why It Matters**: Prevents runtime crashes when context data is incomplete or inconsistent. - -**Learn More**: See [Error Handling → Defensive Expression Patterns](error-handling.md#defensive-expression-patterns) for comprehensive examples and advanced patterns. - -### Validate Context Data Before Evaluation - -**Key Practice**: Don't trust input data - validate it first. - -```python -from cel import evaluate - -def safe_policy_evaluation(policy, context): - # Validate required fields exist - required_fields = ["user", "resource", "action"] - for field in required_fields: - if field not in context: - raise ValueError(f"Missing required field: {field}") - return evaluate(policy, context) - -# Test the function -context = {"user": {"id": "alice"}, "resource": {"type": "file"}, "action": "read"} -result = safe_policy_evaluation("user.id == 'alice'", context) -# → True (validates required security context fields present) -``` - -**Why It Matters**: Prevents evaluation errors and ensures consistent behavior across your application. - -**Learn More**: See [Error Handling → Context Validation](error-handling.md#context-validation) for complete validation patterns and production examples. - -### Build Defensive Expressions - -**Key Practice**: Write expressions that handle edge cases gracefully. - -```cel -# ✅ Handles missing fields, empty lists, null values -has("user.role") && user.role == "admin" || -(has("user.permissions") && size(user.permissions) > 0 && "admin" in user.permissions) -``` - -**Why It Matters**: Makes your expressions resilient to data variations and reduces failure rates. - -**Learn More**: See [Error Handling → Defensive Expression Patterns](error-handling.md#defensive-expression-patterns) for comprehensive defensive techniques. - -## 🌐 Web Framework Integration - -### Flask Integration Patterns - -**Key Practice**: Use decorators for policy-based route protection. - -```python -# Example decorator (implementation in web framework examples) -def require_policy(policy_name): - def decorator(func): - return func - return decorator - -@require_policy("admin_only") -def admin_endpoint(): - return {"data": "sensitive"} - -# Test the decorator -decorated_func = require_policy("admin_only")(admin_endpoint) -result = decorated_func() -# → {"data": "sensitive"} (policy-protected endpoint access granted) -``` - -**Core Components**: -- **Context Builders**: Create consistent CEL contexts from Flask requests -- **Policy Decorators**: Apply access control policies to routes -- **Error Handling**: Graceful policy evaluation failure handling - -**Implementation Details**: This involves several patterns including request context building, policy decorator implementation, and error handling. The complete Flask integration requires ~200 lines of production-ready code. - -**Get Full Implementation**: See [Web Framework Integration Examples](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/web-frameworks) for complete Flask, FastAPI, and Django integration examples. - -### FastAPI Integration Patterns - -**Key Practice**: Use dependency injection for async policy checking. - -```python -# Example classes (implementation in FastAPI examples) -class PolicyChecker: - def __init__(self, policy): - self.policy = policy - -def Depends(dependency): - return dependency - -class MockApp: - def get(self, path): - def decorator(func): - return func - return decorator - -app = MockApp() -require_admin = PolicyChecker("user.role == 'admin'") - -@app.get("/admin") -async def admin_route(authorized: bool = Depends(require_admin)): - return {"message": "Admin access granted"} - -# Test the setup -assert require_admin.policy == "user.role == 'admin'" -# → True (policy correctly configured for dependency injection) -assert Depends(require_admin) is require_admin -# → True (FastAPI dependency injection properly configured) -``` - -**Core Components**: -- **Async Context Building**: Handle async user authentication and context creation -- **Policy Dependencies**: Reusable policy checkers for route protection -- **Thread Pool Execution**: Handle CPU-bound CEL evaluation in async context - -**Get Full Implementation**: See [FastAPI CEL Integration Example](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/web-frameworks/fastapi) for complete async implementation. - -### Django Integration Patterns - -**Key Practice**: Use middleware for request-scoped CEL context. - -```python -# Example decorator (implementation in Django examples) -def cel_permission_required(policy): - def decorator(func): - return func - return decorator - -class JsonResponse: - def __init__(self, data): - self.data = data - -@cel_permission_required("user.is_staff && user.groups.contains('editors')") -def edit_view(request, article_id): - return JsonResponse({"message": f"Editing {article_id}"}) - -# Test the setup -class MockRequest: - pass - -response = edit_view(MockRequest(), "123") -# → JsonResponse({"message": "Editing 123"}) (Django view with CEL policy protection) -``` - -**Core Components**: -- **Middleware Integration**: Automatic CEL context creation for all requests -- **View Decorators**: Permission checking decorators for Django views -- **User Context**: Integration with Django's authentication system - -**Get Full Implementation**: See [Django CEL Integration Example](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/web-frameworks/django) for complete middleware and decorator implementation. - -## 🚀 Performance Optimization {#performance-optimization} - -### Context Design for Performance - -**Key Practice**: Design flat, efficient context structures. - -```python -from cel import evaluate - -# ✅ Efficient - flat structure -context_flat = { - "user_role": "admin", - "resource_type": "database", - "action": "delete" -} - -# ❌ Less efficient - deeply nested -context_nested = { - "request": { - "user": {"profile": {"role": "admin"}} - } -} - -# Test both contexts work -result1 = evaluate("user_role == 'admin'", context_flat) -# → True (fast evaluation: ~5μs with flat structure) -result2 = evaluate("request.user.profile.role == 'admin'", context_nested) -# → True (slower evaluation: ~15μs with nested structure) -``` - -**Why It Matters**: Flat structures reduce expression evaluation time and memory usage. - -**Learn More**: See [Performance Benchmarking](#performance-benchmarking) section below for measurement techniques. - -### Expression Caching Strategies - -**Key Practice**: Cache evaluation results for common scenarios using LRU cache. - -```python -from functools import lru_cache -from cel import evaluate - -class PolicyEngine: - @lru_cache(maxsize=1000) - def _evaluate_cached(self, policy, user_role, resource_public): - context = {"user": {"role": user_role}, "resource": {"public": resource_public}} - return evaluate(policy, context) - -# Test the cached evaluation -engine = PolicyEngine() -result1 = engine._evaluate_cached("user.role == 'admin'", "admin", True) -# → True (first evaluation: ~10μs, result cached for reuse) -result2 = engine._evaluate_cached("user.role == 'admin'", "admin", True) -# → True (cached lookup: ~0.1μs, 100x performance improvement) -``` - -**When to Use**: For high-frequency evaluations with repeated context patterns. - -**When Not to Use**: For constantly changing context data or user-specific evaluations. - -**Advanced Patterns**: For production caching strategies including cache invalidation, distributed caching, and performance monitoring, see the performance optimization examples in the repository. - -## 🔒 Security Best Practices {#security-best-practices} - -### Input Sanitization for Untrusted Expressions - -**Key Practice**: Validate and sanitize user-provided CEL expressions. - -```python -import re - -# Define security constants -MAX_EXPRESSION_LENGTH = 1000 -# Allow safe characters for CEL expressions -ALLOWED_PATTERN = re.compile(r'^[a-zA-Z0-9_\s\.\(\)\[\]\{\}\+\-\*\/\<\>\=\!\&\|\,]+$') - -def sanitize_expression(expression): - if len(expression) > MAX_EXPRESSION_LENGTH: - raise ValueError("Expression too long") - - if not ALLOWED_PATTERN.match(expression): - raise ValueError("Expression contains invalid characters") - - return expression - -# Test the sanitization function -valid_expr = "user.role == admin" # Simplified to avoid quote escaping issues -sanitized = sanitize_expression(valid_expr) -# → "user.role == admin" (expression passed security validation) - -# Test with a clearly invalid expression -try: - sanitize_expression("user.role == admin; DROP TABLE users;") - # → ValueError: Expression contains invalid characters (SQL injection blocked) - assert False, "Should have raised ValueError" -except ValueError as e: - # → "invalid characters" (security threat successfully detected and blocked) - assert "invalid characters" in str(e) -``` - -**Critical Security Concerns**: -- **Expression Length**: Prevent DoS attacks through extremely long expressions -- **Character Validation**: Block potentially dangerous patterns -- **Malformed Syntax**: Handle syntax errors that raise ValueError exceptions - -**Learn More**: See [Error Handling → Input Sanitization for Untrusted Expressions](error-handling.md#input-sanitization-for-untrusted-expressions) for complete validation patterns and security examples. - -### Context Isolation - -**Key Practice**: Only include necessary, safe data in CEL contexts. - -```python -from cel import Context, evaluate - -def create_isolated_context(user_data, resource_data): - # Only include explicitly allowed fields - safe_user = { - "id": user_data.get("id"), - "role": user_data.get("role"), - "verified": user_data.get("verified", False) - } - return Context({"user": safe_user}) - -# Test the isolation function -user_data = {"id": "alice", "role": "admin", "password": "secret", "verified": True} -resource_data = {"type": "file"} -context = create_isolated_context(user_data, resource_data) - -# Verify only safe fields are included by testing evaluation -assert evaluate("user.id", context) == "alice" -# → "alice" (safe field accessible in isolated context) -assert evaluate("user.role", context) == "admin" -# → "admin" (role information safely exposed for authorization) -assert evaluate("user.verified", context) is True -# → True (verification status available for security decisions) - -# Verify password is not accessible (this would fail if password was included) -try: - evaluate("user.password", context) - # → Exception (sensitive data successfully isolated from CEL context) - assert False, "Password should not be accessible" -except Exception: - pass # → Security isolation working: sensitive fields protected -``` - -**Why It Matters**: Prevents data leakage and reduces attack surface. - -**Learn More**: See [Access Control Policies → Best Practices](access-control-policies.md#best-practices) for comprehensive security patterns. - -## 🧪 Testing Strategies {#testing-strategies} - -### Unit Testing CEL Expressions - -**Key Practice**: Treat CEL expressions as code - write comprehensive tests. - -```python -from cel import evaluate - -def test_admin_access_policy(): - context = {"user": {"role": "admin"}} - policy = "user.role == 'admin'" - result = evaluate(policy, context) - # → True (admin access policy correctly grants permission) - assert result == True - -def test_missing_context_handled_safely(): - context = {"user": {"id": "alice"}} # No role - safe_policy = 'has(user.role) && user.role == "admin"' - result = evaluate(safe_policy, context) - # → False (defensive policy safely handles missing role field) - assert result == False - -# Run the tests -test_admin_access_policy() -# → Test passed: admin policy validation working correctly -test_missing_context_handled_safely() -# → Test passed: defensive patterns prevent runtime errors -``` - -**Testing Categories**: -- **Happy Path**: Test expected successful scenarios -- **Edge Cases**: Test missing data, null values, empty collections -- **Error Conditions**: Test invalid expressions and malformed context -- **Property-Based**: Use hypothesis for comprehensive input testing - -**Learn More**: See [Error Handling → Testing Error Scenarios](error-handling.md#testing-error-scenarios) for complete testing strategies and examples. - -### Integration Testing - -**Key Practice**: Test CEL integration within your web framework. - -```python -# Mock client for testing -class MockResponse: - def __init__(self, status_code): - self.status_code = status_code - -class MockClient: - def get(self, path, headers=None): - # Simple mock: admin tokens get 200, others get 403 - if headers and 'admin_token' in headers.get('Authorization', ''): - return MockResponse(200) - return MockResponse(403) - -def test_protected_route_access(): - client = MockClient() - - # Test admin access - response = client.get('/admin/users', - headers={'Authorization': 'Bearer admin_token'}) - # → 200 (admin successfully granted access to protected route) - assert response.status_code == 200 - - # Test user denial - response = client.get('/admin/users', - headers={'Authorization': 'Bearer user_token'}) - # → 403 (unauthorized user correctly denied access) - assert response.status_code == 403 - -# Run the test -test_protected_route_access() -# → Integration test passed: CEL policies properly protecting web routes -``` - -**Integration Test Areas**: -- **Route Protection**: Test policy decorators with different user roles -- **Context Building**: Test request context creation accuracy -- **Error Handling**: Test policy evaluation failure scenarios - -## 🔍 Monitoring & Debugging {#monitoring-and-debugging} - -### Expression Evaluation Logging - -**Key Practice**: Log CEL evaluations for production debugging. - -```python -import logging -from cel import evaluate - -# Configure logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -def evaluate_with_logging(expression, context, description=""): - try: - result = evaluate(expression, context) - logger.info(f"CEL evaluation {description}: '{expression}' -> {result}") - return result - except Exception as e: - logger.error(f"CEL evaluation failed {description}: '{expression}' -> {e}") - raise - -# Test the logging function -context = {"user": {"role": "admin"}} -result = evaluate_with_logging("user.role == 'admin'", context, "test") -# → True (logged: "CEL evaluation test: 'user.role == 'admin'' -> True") -``` - -**What to Log**: -- **Expression**: The CEL expression being evaluated -- **Result**: The evaluation result -- **Context Keys**: Available context fields (not values for security) -- **Performance**: Evaluation timing for slow expressions - -**Learn More**: See [Error Handling → Logging and Monitoring](error-handling.md#logging-and-monitoring) for production logging strategies. - -### Performance Monitoring - -**Key Practice**: Track evaluation performance in production. - -```python -import time -import logging -from cel import evaluate - -# Configure logger -logger = logging.getLogger(__name__) -logger.setLevel(logging.WARNING) - -class MonitoredPolicyEngine: - def evaluate_monitored(self, expression, context): - start_time = time.perf_counter() - try: - result = evaluate(expression, context) - return result - finally: - duration = time.perf_counter() - start_time - if duration > 0.001: # 1ms threshold - logger.warning(f"Slow CEL evaluation: {expression[:50]}") - -# Test the monitored evaluation -engine = MonitoredPolicyEngine() -context = {"user": {"role": "admin"}} -result = engine.evaluate_monitored("user.role == 'admin'", context) -# → True (performance monitored: <1ms, within acceptable limits) - -# Test with different expressions to verify monitoring -test_expressions = [ - ("user.role == 'admin'", True), - ("user.role == 'user'", False), - ("has(user.permissions) && 'admin' in user.permissions", False), - ("user.role in ['admin', 'manager', 'user']", True) -] - -for expression, expected in test_expressions: - result = engine.evaluate_monitored(expression, context) - # → True/False (each evaluation monitored for performance degradation) - assert result == expected, f"Expression '{expression}' should return {expected}" - -print("✓ Monitored evaluation tracking multiple expressions") -# → All expressions evaluated with performance monitoring enabled - -# Test monitoring behavior with slow expression (simulate complex logic) -complex_context = { - "user": {"role": "admin", "permissions": ["read", "write", "admin"]}, - "resources": [{"id": i, "type": "document", "public": False} for i in range(100)] -} - -# This expression will be more complex and potentially trigger monitoring -complex_expression = "user.role == 'admin' && size(resources) > 50 && user.permissions.all(p, p in ['read', 'write', 'admin'])" -result = engine.evaluate_monitored(complex_expression, complex_context) -# → True (complex evaluation completed, potential performance warning logged) -assert result == True, "Complex expression should return true" -print("✓ Complex expression monitoring works") -# → Complex logic monitored: may trigger slow evaluation alerts - -# Test error handling in monitoring -try: - engine.evaluate_monitored("undefined_variable == 'test'", context) - # → Exception (evaluation error properly tracked and logged) - assert False, "Should raise error for undefined variable" -except Exception: - print("✓ Monitoring correctly handles evaluation errors") - # → Error monitoring working: evaluation failures tracked for debugging -``` - -**Monitoring Metrics**: -- **Evaluation Time**: Track slow expressions -- **Expression Frequency**: Identify hot paths for optimization -- **Error Rates**: Monitor evaluation failures -- **Cache Hit Rates**: If using caching strategies - -## 📊 Performance Benchmarking {#performance-benchmarking} - -### Baseline Performance Measurement - -Run this benchmark to understand CEL performance on your hardware: - -```python -import time -from cel import evaluate - -def benchmark_cel_performance(): - """Comprehensive CEL performance benchmark matching documented claims.""" - - # Test scenarios matching the performance table - test_cases = [ - { - "name": "Simple expressions", - "expression": "x + y * 2", - "context": {"x": 10, "y": 20}, - "expected": 50, - "iterations": 10000 - }, - { - "name": "Complex expressions", - "expression": "user.active && user.role in ['admin', 'editor'] && has(user.permissions) && user.permissions.size() > 0", - "context": { - "user": { - "active": True, - "role": "admin", - "permissions": ["read", "write", "delete"] - } - }, - "expected": True, - "iterations": 5000 - }, - { - "name": "Function calls", - "expression": "double(x) + square(y)", - "context": { - "x": 5, - "y": 3, - "double": lambda x: x * 2, - "square": lambda x: x * x - }, - "expected": 19, # double(5) + square(3) = 10 + 9 - "iterations": 3000 - } - ] - - results = [] - - for test_case in test_cases: - print(f"\nBenchmarking: {test_case['name']}") - - # Verify the expression works correctly - result = evaluate(test_case["expression"], test_case["context"]) - # → Expected result (validates benchmark test case correctness) - assert result == test_case["expected"], f"Expected {test_case['expected']}, got {result}" - - # Warmup - for _ in range(100): - evaluate(test_case["expression"], test_case["context"]) - - # Benchmark - start_time = time.perf_counter() - for _ in range(test_case["iterations"]): - evaluate(test_case["expression"], test_case["context"]) - end_time = time.perf_counter() - - # Calculate metrics - total_time = end_time - start_time - avg_time_us = (total_time / test_case["iterations"]) * 1_000_000 - throughput = test_case["iterations"] / total_time - - result_data = { - "name": test_case["name"], - "avg_time_us": avg_time_us, - "throughput": throughput, - "iterations": test_case["iterations"] - } - results.append(result_data) - - print(f" Average time: {avg_time_us:.1f} μs") - print(f" Throughput: {throughput:,.0f} ops/sec") - - return results - -# Run the benchmark and display results -if __name__ == "__main__": - print("CEL Performance Benchmark") - print("=" * 40) - results = benchmark_cel_performance() - # → Comprehensive performance metrics for production capacity planning - - print("\nSummary:") - print("-" * 40) - for result in results: - print(f"{result['name']:20} | {result['avg_time_us']:6.1f} μs | {result['throughput']:8,.0f} ops/sec") - # → Production performance baseline: enables capacity planning and SLA definition -``` - -**Expected Results**: - -- **Simple expressions**: 5-15 μs per evaluation, 50,000+ ops/sec -- **Complex expressions**: 15-40 μs per evaluation, 25,000+ ops/sec -- **Function calls**: 20-50 μs per evaluation, 20,000+ ops/sec - -**Learn More**: See [Performance Benchmarking Examples](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/performance) for comprehensive benchmarking scripts. - -## 📚 Configuration Management {#configuration-management} - -### Dynamic Configuration Validation - -**Key Practice**: Use CEL expressions to validate application configuration. - -```python -from cel import evaluate - -validation_rules = [ - { - "field": "database.port", - "expression": "config.database.port > 0 && config.database.port < 65536", - "message": "Database port must be between 1 and 65535" - }, - { - "field": "ssl_required", - "expression": 'config.ssl_enabled || env == "development"', - "message": "SSL must be enabled in production" - } -] - -# Test validation rules -config_context = { - "config": { - "database": {"port": 5432}, - "ssl_enabled": True - }, - "env": "production" -} - -# Validate all rules -for rule in validation_rules: - result = evaluate(rule["expression"], config_context) - # → True (configuration validation passed: system is properly configured) - assert result is True, f"Validation failed: {rule['message']}" - -# Test invalid configuration -invalid_context = { - "config": { - "database": {"port": 70000}, # Invalid port - "ssl_enabled": False - }, - "env": "production" -} - -port_rule = validation_rules[0] -port_valid = evaluate(port_rule["expression"], invalid_context) -# → False (invalid configuration detected: prevents deployment of misconfigured system) -assert port_valid is False -``` - -**Benefits**: -- **Business-Readable Rules**: Non-developers can understand validation logic -- **Dynamic Configuration**: Rules can be updated without code changes -- **Environment-Aware**: Different rules for development vs production - -**Implementation**: Configuration validation requires a validation engine that processes rules and provides clear error messages. See [Business Logic & Data Transformation → Dynamic Rule Loading](business-logic-data-transformation.md#dynamic-rule-loading) for complete implementation. - -## 🎯 Quick Reference - -### Essential Patterns Summary - -| Pattern | Key Principle | Implementation Guide | -|---------|---------------|---------------------| -| **Safe Expressions** | Use `has()` for optional fields | [Error Handling](error-handling.md#defensive-expression-patterns) | -| **Context Validation** | Validate before evaluation | [Error Handling](error-handling.md#context-validation) | -| **Web Integration** | Use decorators/dependencies | [Framework Examples](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/web-frameworks) | -| **Performance** | Design flat contexts | [Performance Examples](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/performance) | -| **Security** | Sanitize untrusted input | [Error Handling](error-handling.md#input-sanitization-for-untrusted-expressions) | -| **Testing** | Test like code | [Error Handling](error-handling.md#testing-error-scenarios) | -| **Monitoring** | Log evaluations | [Error Handling](error-handling.md#logging-and-monitoring) | - -### Next Steps - -1. **Start with Safety**: Implement [defensive expression patterns](error-handling.md#defensive-expression-patterns) -2. **Add Web Integration**: Choose your framework integration from the [examples](https://github.com/hardbyte/python-common-expression-language/tree/main/examples/web-frameworks) -3. **Implement Monitoring**: Add [evaluation logging](error-handling.md#logging-and-monitoring) for production visibility -4. **Optimize Performance**: Run [benchmarks](#performance-benchmarking) and implement caching as needed -5. **Secure Your Application**: Add [input sanitization](error-handling.md#input-sanitization-for-untrusted-expressions) for untrusted expressions - -## Related Guides - -- **[Error Handling](error-handling.md)** - Comprehensive error handling strategies -- **[Business Logic & Data Transformation](business-logic-data-transformation.md)** - Complex business rules and data processing -- **[Access Control Policies](access-control-policies.md)** - User permission and authorization patterns -- **[Dynamic Query Filters](dynamic-query-filters.md)** - Database query construction and filtering -- **[CLI Usage Recipes](cli-recipes.md)** - Command-line tool integration patterns \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 87bfa94..53c6417 100644 --- a/docs/index.md +++ b/docs/index.md @@ -108,16 +108,16 @@ Built on Rust with PyO3 - evaluate expressions in **microseconds**, not millisec | **Function calls** (with custom Python functions) | 20-50 μs | 20,000+ ops/sec | **~5x faster** | | *Pure Python equivalent* | *100-800 μs* | *1,000-10,000 ops/sec* | *baseline* | -*Performance varies by hardware. [Run your own benchmarks →](how-to-guides/production-patterns-best-practices.md#performance-benchmarking)* +*Performance varies by hardware. See `examples/performance/compile_execute_benchmark.py` to run your own benchmarks.* ### 🛡️ **Safety** Safe by Design: Built on a memory-safe Rust core. The non-Turing complete nature of CEL prevents infinite loops, and comprehensive error handling traps evaluation errors as Python exceptions. ### 🎯 **Production Ready** -200+ tests, comprehensive CLI, type safety, and ~80% CEL compliance with transparent documentation. +380+ tests, comprehensive CLI, type safety, and high CEL spec compliance. -### 🚀 **Future-Proof** -Built on cel-rust v0.11.1 with modern architecture - upcoming features like type introspection, optional values, and enhanced string functions will work seamlessly. +### 🚀 **Up to Date** +Built on cel-rust 0.13 — tracks upstream improvements in correctness and performance. ### 🔧 **Developer Friendly** Dual interfaces (Python API + CLI), rich error messages, extensive documentation, and full IDE support. @@ -214,11 +214,10 @@ Simple, readable policies that handle complex business logic. - [**Extending CEL**](tutorials/extending-cel.md) - Advanced context and custom functions 🛠️ **Solve Problems:** -- [**Access Control Policies**](how-to-guides/access-control-policies.md) - Sophisticated permission systems -- [**Production Patterns & Best Practices**](how-to-guides/production-patterns-best-practices.md) - Comprehensive production guide -- [**Business Logic & Data Transformation**](how-to-guides/business-logic-data-transformation.md) - Transform data and implement business rules +- [**Access Control Policies**](how-to-guides/access-control-policies.md) - Permission systems +- [**Business Logic & Data Transformation**](how-to-guides/business-logic-data-transformation.md) - Business rules - [**Dynamic Query Filters**](how-to-guides/dynamic-query-filters.md) - Build safe, dynamic queries -- [**Error Handling**](how-to-guides/error-handling.md) - Robust error handling strategies +- [**Error Handling**](how-to-guides/error-handling.md) - Exceptions and safe-evaluation patterns --- diff --git a/docs/reference/cel-compliance.md b/docs/reference/cel-compliance.md deleted file mode 100644 index fec250b..0000000 --- a/docs/reference/cel-compliance.md +++ /dev/null @@ -1,539 +0,0 @@ -# CEL Specification Compliance - -This document tracks the compliance of this Python CEL implementation with the [Common Expression Language (CEL) specification](https://github.com/google/cel-spec). - -## Summary - -- **Implementation**: Based on [`cel`](https://crates.io/crates/cel) v0.11.1 Rust crate (formerly cel-interpreter) -- **Estimated Compliance**: ~80% of CEL specification features. -- **Test Coverage**: 300+ tests across 16+ test files including comprehensive CLI testing and upstream improvement detection - -## 🚨 Missing Features & Severity Overview - -| **Feature** | **Severity** | **Impact** | **Workaround Available** | **Upstream Priority** | -|-----------------------------------------------------|--------------|------------|--------------------------|----------------------| -| **OR operator behavior** | 🟢 **LOW** | ✅ FIXED v0.11.1 - now CEL-compliant (rejects mixed types) | Use boolean operands only | **RESOLVED** | -| **String utility functions** | 🟡 **MEDIUM** | Limited string processing capabilities | Use Python context functions | **HIGH** | -| **Type introspection (`type()`)** | 🟡 **MEDIUM** | No runtime type checking | Use Python type checking | **HIGH** | -| **Mixed int/uint arithmetic** | 🟡 **MEDIUM** | Manual type conversion needed | Use explicit casting | **MEDIUM** | -| **Mixed-type arithmetic in macros** | 🟡 **MEDIUM** | Type coercion issues in collections | Ensure type consistency | **MEDIUM** | -| **Bytes concatenation** | 🟢 **LOW** | Cannot concatenate byte arrays | Convert through string | **LOW** | -| **Math functions (`ceil`, `floor`)** | 🟢 **LOW** | No mathematical utilities | Use Python context functions | **LOW** | -| **Collection aggregation (`sum`, `fold`, `reduce`)** | 🟢 **LOW** | No aggregation functions | Use Python context functions | **LOW** | -| **Optional values** | 🟢 **LOW** | No optional chaining syntax | Use `has()` checks | **FUTURE** | - -**Legend**: 🔴 High Impact | 🟡 Medium Impact | 🟢 Low Impact - - -## Python Type Mappings - -📖 **See the complete [Type System documentation](python-api.md#type-system)** for detailed CEL ↔ Python type mappings, map type constraints, and examples. - -This implementation correctly follows the CEL specification where maps can have heterogeneous values at runtime while maintaining key type restrictions. - -### Arithmetic Operations - -| CEL Operation | Result Type | Example | Python Result | Notes | -|---------------|-------------|---------|---------------|-------| -| `int + int` | `int` | `1 + 2` | `3` | ✅ Works | -| `uint + uint` | `int` | `1u + 2u` | `3` | ✅ Works | -| `double + double` | `float` | `1.5 + 2.5` | `4.0` | ✅ Works | -| `int + double` | `float` | `1 + 2.0` | `3.0` | ⚠️ **FAILS** - Use `double(1) + 2.0` | -| `double + int` | `float` | `1.5 + 2` | `3.5` | ⚠️ **FAILS** - Use `1.5 + double(2)` | -| `int / int` | `int` | `10 / 2` | `5` | ✅ Works | -| `uint % uint` | `int` | `10u % 3u` | `1` | ✅ Works | -| `string + string` | `str` | `"hello" + " world"` | `"hello world"` | ✅ Works | - - -### Logical Operations - -| CEL Operation | CEL Spec Result | Our Result | Python Result | Notes | -|---------------|-----------------|------------|---------------|-------| -| `true && false` | `bool` (false) | `bool` | `False` | ✅ Correct | -| `true \|\| false` | `bool` (true) | `bool` | `True` | ✅ Correct | -| `!true` | `bool` (false) | `bool` | `False` | ✅ Correct | -| `42 \|\| false` | `bool` (true) | `int` | `42` | ⚠️ **Behavioral Difference**: Returns original truthy value (JavaScript-like) | -| `0 && true` | `bool` (false) | `bool` | `False` | ✅ Correct (0 is falsy) | -| `'' && true` | `bool` (false) | `bool` | `False` | ✅ Correct (empty string falsy) | - -## Working Features - -### ✅ Core Data Types -- **Integers**: Full support for 64-bit signed integers (`int`) -- **Unsigned Integers**: Support for 64-bit unsigned integers (`uint`) with `u` suffix -- **Floats**: IEEE 64-bit double precision floating-point -- **Booleans**: Standard true/false values -- **Strings**: Unicode string support with concatenation -- **Bytes**: Byte sequence support (no concatenation) -- **Null**: Proper null handling as `None` -- **Lists**: Ordered collections with indexing and size operations -- **Maps**: Key-value dictionaries with restricted key types (int, uint, bool, string) and mixed value types (fully CEL compliant) -- **Timestamps**: Full datetime support with timezone awareness -- **Durations**: Time span support via timedelta - -### ✅ Operators - -#### Arithmetic Operators -- `+` (addition) - Integers, floats, strings -- `-` (subtraction) - Integers, floats -- `*` (multiplication) - Integers, floats -- `/` (division) - Integers, floats -- `%` (remainder/modulo) - Integers only - -#### Comparison Operators -- `==` (equal) - All types -- `!=` (not equal) - All types -- `<`, `>`, `<=`, `>=` - Numbers, strings (lexicographic) - -#### Logical Operators -- `&&` (logical AND) - With short-circuit evaluation ⚠️ **Requires boolean operands in v0.11.1+** -- `||` (logical OR) - With short-circuit evaluation ⚠️ **Partially improved in v0.11.1 - some mixed-type coercion removed** -- `!` (logical NOT) - Boolean negation - -#### Other Operators -- `?:` (ternary conditional) - Conditional expressions -- `[]` (indexing) - Lists and maps only (string indexing not supported) -- `.` (member access) - Object property access - -### ✅ Built-in Functions - -| Function | Signature | Purpose | Python Result | Status | -|----------|-----------|---------|---------------|---------| -| `size()` | `size(collection) -> int` | Get collection/string length | `int` | ✅ Working | -| `string()` | `string(value) -> string` | Convert to string | `str` | ✅ Working | -| `bytes()` | `bytes(value) -> bytes` | Convert to bytes | `bytes` | ✅ Working | -| `int()` | `int(value) -> int` | Convert to signed integer | `int` | ✅ Working | -| `uint()` | `uint(value) -> uint` | Convert to unsigned integer | `int` | ✅ Working | -| `double()` | `double(value) -> double` | Convert to double | `float` | ✅ Working | -| `timestamp()` | `timestamp(string) -> timestamp` | Parse timestamp | `datetime.datetime` | ✅ Working | -| `duration()` | `duration(string) -> duration` | Parse duration | `datetime.timedelta` | ✅ Working | -| `has()` | `has(field) -> bool` | Check field presence | `bool` | ✅ Working | -| `matches()` | `string.matches(pattern) -> bool` | Regex matching | `bool` | ✅ Working | -| `min()` | `min(list) -> value` | Find minimum value | Various | ✅ Working | -| `max()` | `max(list) -> value` | Find maximum value | Various | ✅ Working | -| `sum()` | `sum(list) -> number` | Sum numeric values | N/A | ❌ **NOT AVAILABLE** | - -### ✅ String Operations -- **contains()**: `"hello".contains("ell")` → `True` -- **startsWith()**: `"hello".startsWith("he")` → `True` -- **endsWith()**: `"hello".endsWith("lo")` → `True` -- **matches()**: `"hello world".matches(".*world")` → `True` -- **String concatenation**: `"hello" + " world"` → `"hello world"` -- **String size**: `size("hello")` → `5` - -#### ❌ String Indexing Not Supported -- **String indexing**: `"hello"[1]` is **NOT** supported (returns "No such key" error) -- **Workaround**: Use `substring()` function (when available) or Python context functions - -### ✅ Collection Macros -- **all()**: `[1,2,3].all(x, x > 0)` → `True` -- **exists()**: `[1,2,3].exists(x, x == 2)` → `True` -- **filter()**: `[1,2,3].filter(x, x > 1)` → `[2.0, 3.0]` (with type coercion) -- **map()**: Limited due to type system restrictions ⚠️ **PARTIAL** (requires type-compatible operations) - -### ❌ Missing Collection Functions -- **fold()**: `[1,2,3].fold(0, sum, sum + x)` - Collection aggregation ❌ **NOT AVAILABLE** -- **reduce()**: `reduce([1,2,3], 0, sum + x)` - Reduction operations ❌ **NOT AVAILABLE** - -### ✅ Python Integration -- **Type conversion**: Seamless Python ↔ CEL type mapping -- **Context variables**: Access Python objects in expressions -- **Custom functions**: Call Python functions from CEL expressions -- **Error handling**: Proper exception propagation -- **Performance**: Efficient evaluation for frequent operations - ---- - -## 👩‍💻 For Developers Using This Library - -This section focuses on what you need to know to use CEL effectively in your applications. - -### 🔧 Safe Patterns & Workarounds - -#### String Processing Workarounds - -**Using cel.stdlib (Recommended)** - -This library provides Python implementations of missing CEL functions: - -```python -from cel import Context, evaluate -from cel.stdlib import add_stdlib_to_context - -# Add all standard library functions at once -context = Context() -add_stdlib_to_context(context) - -# substring() is now available as a function (not a method) -result = evaluate('substring("hello world", 0, 5)', context) # → "hello" -result = evaluate('substring("hello world", 6)', context) # → "world" - -# Note: Use function syntax, not method syntax -# ✅ substring("hello", 2, 4) - correct -# ❌ "hello".substring(2, 4) - not supported -``` - -**Using Custom Python Functions** - -You can also add your own custom functions: - -```python -from cel import Context, evaluate - -# Add custom functions for missing CEL features -context = Context() -context.add_function("lower", str.lower) -context.add_function("upper", str.upper) -context.add_function("find", str.find) - -# Add variables to the context -context.add_variable("name", "ALICE") -context.add_variable("text", "hello world") - -# Use Python functions in CEL expressions -result = evaluate('lower(name)', context) # → "alice" -result = evaluate('find(text, "world")', context) # → 6 -``` - -#### Type Safety Best Practices -```python -from cel import evaluate - -# ✅ SAFE: Explicit type conversions for mixed arithmetic -result = evaluate("int(value) + 1", {"value": "42"}) # → 43 - -# ⚠️ RISKY: Mixed int/uint arithmetic - use explicit conversion -# evaluate("1 + 2u") # This will fail -result = evaluate("1 + int(2u)") # → 3 (safe alternative) - -# ✅ SAFE: Use has() checks for optional fields -safe_expr = 'has(user.profile) && user.profile.verified' -result = evaluate(safe_expr, {"user": {}}) # → False (graceful handling) -``` - -#### Production-Safe Error Handling -```python -from cel import evaluate - -def safe_evaluate(expression, context): - """Wrapper for production CEL evaluation with proper error handling.""" - try: - return evaluate(expression, context) - except ValueError as e: - # Parse/syntax errors - log and return safe default - print(f"CEL syntax error: {e}") - return False # Fail-safe default - except RuntimeError as e: - # Undefined variables/functions - log and return safe default - print(f"CEL runtime error: {e}") - return False # Fail-safe default - except TypeError as e: - # Type mismatches - log and return safe default - print(f"CEL type error: {e}") - return False # Fail-safe default - -# Usage in access control (always fail-safe) -policy_expr = "user.verified && user.role == 'admin'" -user_context = {"user": {"verified": True, "role": "admin"}} -access_granted = safe_evaluate(policy_expr, user_context) -``` - -### 📚 What Works Reliably - -Use these features with confidence in production: - -- **Core data types**: int, float, bool, string, bytes, lists, maps -- **Arithmetic**: `+`, `-`, `*`, `/`, `%` (watch mixed types) -- **Comparisons**: `==`, `!=`, `<`, `>`, `<=`, `>=` -- **Logical operations**: `&&`, `!` (avoid `||` return values) -- **String operations**: `contains()`, `startsWith()`, `endsWith()`, `matches()` -- **Collection operations**: `size()`, `has()`, indexing with `[]` -- **Macros**: `all()`, `exists()`, `filter()` (ensure type consistency) -- **Type conversions**: `string()`, `int()`, `double()`, `bytes()` -- **Date/time**: `timestamp()`, `duration()` with proper ISO formats - ---- - -## 🔧 For Maintainers & Contributors - -This section covers upstream work, detection strategies, and contribution opportunities. - -### Known Issues & Missing Features - -### ❌ Actually Missing CEL Specification Features - -#### 1. String Utility Functions (Upstream Priority: HIGH) -- **Status**: Not implemented in cel v0.11.1 -- **Detection**: ✅ Comprehensive detection for all missing functions -- **Missing functions**: - - `lowerAscii()` - lowercase conversion - - `upperAscii()` - uppercase conversion - - `indexOf(substring)` - find position in strings - - `lastIndexOf(substring)` - find last occurrence - - `substring(start, end)` - extract substring - - `replace(old, new)` - replace substrings - - `split(delimiter)` - split into list - - `join(delimiter, list)` - join list to string - -**Example of missing functionality**: -```cel -// Should work but doesn't: -"Hello".lowerAscii() // case conversion -"hello world".indexOf("world") // substring search -"hello,world".split(",") // string splitting -``` - -**Impact**: Medium - useful for string processing -**Recommendation**: Contribute to cel crate upstream - -#### 2. Mixed Signed/Unsigned Integer Arithmetic -- **Status**: Partially supported -- **Detection**: ✅ Comprehensive detection for mixed operations -- **CEL Spec**: Supports both `int` and `uint` types with `u` suffix (`1u`, `42u`) -- **Our Implementation**: - - ✅ Unsigned literals work: `1u`, `42u` → Python `int` - - ✅ Pure unsigned arithmetic: `1u + 2u` → `3` - - ❌ Mixed arithmetic fails: `1 + 2u` throws "Unsupported binary operator" -- **Workaround**: Use explicit conversion: `uint(1) + 2u` or `int(2u) + 1` -- **Impact**: Medium - requires careful type management in expressions - -#### 3. Type Introspection Function (Upstream Priority: HIGH) -- **Status**: Not implemented in cel v0.11.1, but foundation exists -- **Detection**: ✅ Full detection with expected behavior tests -- **Missing function**: `type(value) -> string` -- **CEL Spec**: Should return runtime type as string -- **Example**: `type(42)` should return `"int"` -- **Our Implementation**: Throws "Undeclared reference to 'type'" -- **Recent Progress**: Upstream has introduced comprehensive type system infrastructure -- **Impact**: Medium - useful for dynamic type checking -- **Recommendation**: This function may be available in future releases - -#### 4. Mixed-Type Arithmetic in Macros (Upstream Priority: MEDIUM) -- **Status**: Type coercion issues in collection operations -- **Problem**: `[1,2,3].map(x, x * 2)` fails with "Unsupported binary operator 'mul': Int(1), Float(2.0)" -- **Impact**: Medium - affects advanced collection processing -- **Workaround**: Ensure type consistency in macro expressions -- **Recommendation**: Better type coercion in cel crate - -#### 5. Bytes Concatenation (Upstream Priority: LOW) -- **Status**: Not implemented in cel v0.11.1 -- **CEL Spec**: `b'hello' + b'world'` should return `b'helloworld'` -- **Our Implementation**: Throws "Unsupported binary operator" error -- **Workaround**: `bytes(string(part1) + string(part2))` -- **Impact**: Low - rarely used in practice - -#### 6. Advanced Built-ins (Upstream Priority: LOW) -- **Detection**: ✅ Full detection for all missing functions -**Missing functions**: -- Math: `ceil()`, `floor()`, `round()` - Mathematical functions -- Collection: `fold()`, `reduce()` - Collection aggregation functions -- Collection: Enhanced `in` operator behaviors -- URL/IP: `isURL()`, `isIP()` - Validation functions (available in some CEL implementations) - -#### 7. Optional Values (Future Feature) -- **Detection**: ✅ Full detection with expected behavior tests -**Missing features**: -- `optional.of(value)` - create optional -- `optional.orValue(default)` - unwrap with default -- `?` suffix for optional chaining - -**Recent Progress**: Upstream has introduced optional type infrastructure, suggesting these features may be implemented in future releases. - -### ⚠️ Behavioral Differences - -#### 1. OR Operator Behavior (CRITICAL ISSUE) -- **Detection**: ✅ We monitor for when this behavior gets fixed upstream -- **Status**: JavaScript-like behavior instead of CEL spec compliance -- **Upstream Priority**: **CRITICAL** - This affects specification conformance - -#### 2. Type Coercion in Logical Operations -- **Our Implementation**: Performs Python-like truthiness evaluation -- **CEL Spec**: May have different rules for type coercion -- **Example**: Empty strings, zero values treated as falsy -- **Impact**: Low - generally intuitive behavior - - -## 🔮 Future Improvements - -The underlying cel-rust implementation continues to evolve with improvements that will benefit this Python wrapper: - -### **Enhanced Type System** -- **Type Introspection**: Infrastructure being developed for the missing `type()` function -- **Better Type Checking**: More precise type information and operation support detection -- **Optional Types**: Foundation exists for safer null handling with optional values -- **Improved Error Messages**: Enhanced type information in error reporting - -### **Potential Future Features** -```cel -// May be available in future releases -type(42) // → "int" -type("hello") // → "string" -type([1, 2, 3]) // → "list" - -// Optional value handling -optional.of(value) // Create optional value -value.orValue(default) // Unwrap with default -field?.subfield?.property // Optional chaining -``` - -### **Development Benefits** -- **Backward Compatibility**: All improvements maintain API stability -- **Transparent Upgrades**: New features will be additive, not breaking -- **Better Standard Library**: Infrastructure exists for implementing missing string functions -- **CEL Spec Alignment**: Closer alignment with official CEL specification - -## Performance Characteristics - -### Strengths -- **Expression parsing**: Efficiently handled by Rust cel crate -- **Type conversion**: Optimized Python ↔ Rust boundaries -- **Memory usage**: Reasonable for typical use cases -- **Evaluation speed**: Microsecond-level evaluation times - -### Tested Performance Areas -- Large list/dict conversions: Handles 10,000+ elements -- Nested structure traversal: Deep object access -- String processing: Unicode-safe operations -- Mixed-type arithmetic: Efficient numeric operations - -## Error Handling - -| Feature | Status | Python Behavior | Notes | -| --- | --- | --- | --- | -| Parse errors | ✅ Supported | Raises `ValueError` | All syntax errors handled gracefully | -| Runtime errors | ✅ Supported | Raises `RuntimeError` | Undefined variables/functions, function execution errors | -| Type errors | ✅ Supported | Raises `TypeError` | Type mismatch detection | -| Undefined variables | ✅ Supported | Raises `RuntimeError` | Clear error messages | - -### Parser Error Handling ✅ - -All malformed syntax is now handled gracefully with proper Python exceptions: - -**Malformed syntax that raises `ValueError`:** -- Unclosed quotes: `'timestamp("2024-01-01T00:00:00Z")` -- Mixed quotes: `"hello'` or `'hello"` -- Unmatched brackets/parentheses in complex expressions - -**Examples of safe error handling:** -```python -from cel import evaluate - -# All of these now raise clean ValueError exceptions: -try: - evaluate("'unclosed quote", {}) -except ValueError as e: - print(f"Parse error: {e}") - -try: - evaluate('"mixed quotes\'', {}) -except ValueError as e: - print(f"Parse error: {e}") -``` - -**Consistent Behavior:** -Both the CLI tool and the core `evaluate()` function now handle all malformed input consistently by raising appropriate Python exceptions instead of panicking. - -## Test Coverage Analysis - -### Test Distribution (164 total tests) - -| Category | File | Test Count | Coverage Level | -|----------|------|------------|----------------| -| Basic Operations | test_basics.py | 42 | ✅ Comprehensive | -| Arithmetic | test_arithmetic.py | 31 | ✅ Comprehensive | -| Type Conversion | test_types.py | 23 | ✅ Comprehensive | -| Datetime | test_datetime.py | 25 | ✅ Comprehensive | -| Context | test_context.py | 11 | ✅ Good | -| Logical Operators | test_logical_operators.py | 12 | ✅ Good | -| Parser Errors | test_parser_errors.py | 10 | ✅ Good | -| Performance | test_performance_verification.py | 6 | ✅ Basic | -| Documentation | test_documentation.py | 10 | ✅ Good | -| Functions | test_functions.py | 2 | ⚠️ Minimal | -| Edge Cases | test_edge_cases.py | 1 | ⚠️ Minimal | - -### Coverage Gaps -- **String method testing**: Limited to basic operations -- **Parser error recovery**: All malformed input now handled gracefully -- **Boundary value testing**: Some edge cases not covered -- **Unicode/encoding edge cases**: Basic coverage only - -### 🎯 Upstream Contribution Priorities - -#### High Priority (Ready for Contribution) -1. **String utility functions** - ✅ **Detection Ready** (`test_upstream_detection.py`) - - Functions: `lowerAscii`, `upperAscii`, `indexOf`, `lastIndexOf`, `substring`, `replace`, `split`, `join` - - Impact: **MEDIUM** - Widely used in string processing applications - - Contribution path: cel crate standard library expansion - -2. **OR operator CEL spec compliance** - ✅ **Detection Ready** - - Issue: Returns original values instead of booleans - - Impact: **HIGH** - Breaks specification conformance - - Contribution path: Core logical operation fixes - -3. **Type introspection function** - ✅ **Detection Ready** (`test_upstream_detection.py`) - - Function: `type()` for runtime type checking - - Impact: **MEDIUM** - Useful for dynamic expressions - - Contribution path: Leverage existing type system infrastructure - -#### Medium Priority (Development Needed) -4. **Mixed-type arithmetic in macros** - ✅ **Detection Ready** - - Issue: Type coercion problems in collection operations - - Impact: **MEDIUM** - Affects advanced collection processing - - Contribution path: Macro type system improvements - -5. **Mixed int/uint arithmetic** - - Issue: `1 + 2u` operations fail - - Impact: **MEDIUM** - Requires careful type management - - Contribution path: Arithmetic type coercion enhancements - -#### Low Priority (Future Features) -6. **Collection aggregation functions** - ✅ **Detection Ready** - - Functions: `sum()`, `fold()`, `reduce()` - - Impact: **LOW** - Can be implemented via Python context - - Contribution path: Standard library expansion - -7. **Math functions** - ✅ **Detection Ready** - - Functions: `ceil`, `floor`, `round` - - Impact: **LOW** - Can be implemented via Python context - - Contribution path: Standard library expansion - -8. **Optional value handling** - ✅ **Detection Ready** - - Features: `optional.of()`, `.orValue()`, `?` chaining - - Impact: **LOW** - Alternative patterns exist - - Contribution path: Type system extensions - -### 🔧 Local Improvement Opportunities - -#### High Impact (Python Library) -1. **Enhanced error handling** - Better Python exception mapping and messages -2. **Performance benchmarking** - Systematic performance testing and optimization -3. **Comprehensive testing** - Cover newly discovered working features - -#### Medium Impact (Documentation & Tooling) -4. **Local utility functions** - Implement missing string functions via Python context -5. **Migration guides** - Help users transition from other CEL implementations -6. **Best practices documentation** - Safe patterns and workarounds - -### 🎬 Immediate Actions for Contributors - -1. ✅ **Monitoring system active** - All issues have upstream detection -2. 🔄 **Priority: OR operator fix** - Most critical specification compliance issue -3. 📝 **Priority: String utilities** - High-value, lower-risk contribution opportunity -4. 🚀 **Engage upstream** - Discuss contribution strategy with cel crate maintainers - -## Contributing - -When adding new features or fixing compliance issues: - -1. **Check CEL specification** at https://github.com/google/cel-spec -2. **Add comprehensive tests** for both positive and negative cases -3. **Document behavior** especially if it differs from spec -4. **Update this compliance document** with changes -5. **Consider upstream contributions** to cel crate - -## Related Resources - -- **CEL Specification**: https://github.com/google/cel-spec -- **cel crate**: https://crates.io/crates/cel -- **CEL Language Definition**: https://github.com/google/cel-spec/blob/master/doc/langdef.md -- **CEL Homepage**: https://cel.dev/ \ No newline at end of file diff --git a/docs/reference/cli-reference.md b/docs/reference/cli-reference.md index 14bdcaf..c62846d 100644 --- a/docs/reference/cli-reference.md +++ b/docs/reference/cli-reference.md @@ -17,7 +17,7 @@ The `cel` command-line tool provides a convenient way to evaluate CEL expression ## Standard Library Functions -The CLI automatically includes all [standard library functions](../reference/cel-compliance.md#using-celstdlib-recommended) from `cel.stdlib`. These functions are available without any additional setup: +The CLI automatically includes all standard library functions from `cel.stdlib`. These functions are available without any additional setup: ### Available Functions diff --git a/docs/reference/python-api.md b/docs/reference/python-api.md index aa03439..2177648 100644 --- a/docs/reference/python-api.md +++ b/docs/reference/python-api.md @@ -319,6 +319,75 @@ result = evaluate('validate_email("invalid-email")', context) assert result == False ``` +##### set_variable_resolver(resolver: Callable[[str], Any]) -> None + +Register a callback for **lazy variable resolution**. Instead of materializing +every variable upfront with `add_variable`, the resolver is invoked on demand +with the name of each variable an expression references. + +**Parameters:** +- `resolver`: A callable taking a variable name (`str`) and returning either + the value or `None`. Returning `None` falls through to variables registered + via `add_variable`. + +**When to use it:** +- The full variable set is expensive to build (database queries, file I/O, + remote API calls) and you only want to pay for variables the expression + actually references. +- The variable set isn't known ahead of time — the resolver decides on the fly. + +**Behaviour notes:** +- The resolver is consulted **before** statically-registered variables. + Return `None` from the resolver to delegate to those. +- Exceptions raised by the resolver are logged and treated as `None` — they + do not propagate to the caller. +- The callback runs synchronously while the GIL is held; keep it tight. + +**Example — only load what's referenced:** + +```python +import json +import os +import tempfile +from cel import Context, evaluate + +with tempfile.TemporaryDirectory() as cfg_dir: + # Each "setting" is a file on disk + for name, content in [("max_users", "100"), ("admin_email", '"ops@example.com"')]: + with open(os.path.join(cfg_dir, name), "w") as f: + f.write(content) + + loaded = [] + + def load_setting(name): + path = os.path.join(cfg_dir, name) + if not os.path.exists(path): + return None + loaded.append(name) + with open(path) as f: + return json.loads(f.read()) + + ctx = Context() + ctx.set_variable_resolver(load_setting) + + # Only `max_users` is referenced — `admin_email` is never loaded + assert evaluate("max_users > 50", ctx) is True + assert loaded == ["max_users"] +``` + +**Example — combine with statically-registered variables:** + +```python +from cel import Context, evaluate + +# Resolver handles dynamic lookups; static variables provide defaults +ctx = Context(variables={"environment": "prod"}) +ctx.set_variable_resolver(lambda name: {"feature_flags": ["new_ui"]}.get(name)) + +assert evaluate("environment", ctx) == "prod" # → static +assert evaluate("'new_ui' in feature_flags", ctx) is True # → resolver +``` + --- ## Type System @@ -459,26 +528,26 @@ from cel import evaluate # String + int operations raise TypeError try: evaluate('"hello" + 42') # String + int - # → TypeError: Unsupported addition operation between string and int + # → TypeError: No such overload (or Unsupported addition operation, depending on operand order) assert False, "Should have raised TypeError" except TypeError as e: - assert "Unsupported addition operation" in str(e) + assert "overload" in str(e).lower() or "Unsupported addition operation" in str(e) # Mixed signed/unsigned int operations raise TypeError try: - evaluate("1u + 2") # Mixed signed/unsigned int - # → TypeError: Cannot mix signed and unsigned integers + evaluate("1u + 2") # Mixed signed/unsigned int + # → TypeError: Cannot mix signed and unsigned integers (or "No such overload" depending on order) assert False, "Should have raised TypeError" except TypeError as e: - assert "Cannot mix signed and unsigned integers" in str(e) + assert "overload" in str(e).lower() or "signed and unsigned" in str(e) # Unsupported multiplication raises TypeError try: evaluate('"text" * "more"') # String multiplication - # → TypeError: Unsupported multiplication operation between strings + # → TypeError: No such overload (or Unsupported multiplication operation) assert False, "Should have raised TypeError" except TypeError as e: - assert "Unsupported multiplication operation" in str(e) + assert "overload" in str(e).lower() or "Unsupported multiplication operation" in str(e) ``` #### Mixed Type Arithmetic Errors @@ -492,7 +561,7 @@ from cel import evaluate try: evaluate("1 + 2.5") # int + double except TypeError as e: - assert "Unsupported addition operation" in str(e) + assert "overload" in str(e).lower() or "Unsupported addition operation" in str(e) print(f"Mixed arithmetic error: {e}") # Mixed types from context @@ -500,7 +569,7 @@ context = {"int_val": 10, "float_val": 2.5} try: evaluate("int_val * float_val", context) except TypeError as e: - assert "Unsupported multiplication operation" in str(e) + assert "overload" in str(e).lower() or "Unsupported multiplication operation" in str(e) print(f"Context type mixing error: {e}") # To fix mixed arithmetic, use consistent types: diff --git a/docs/tutorials/cel-language-basics.md b/docs/tutorials/cel-language-basics.md index 1163569..9fc5755 100644 --- a/docs/tutorials/cel-language-basics.md +++ b/docs/tutorials/cel-language-basics.md @@ -24,7 +24,7 @@ Python CEL implements a comprehensive subset of the CEL specification: ✅ **Type Functions**: `has()`, conversion functions ✅ **Python Integration**: Custom functions, Python ↔ CEL type conversion -See [CEL Compliance](../reference/cel-compliance.md) for detailed feature status. +See the [CEL specification](https://github.com/google/cel-spec) for the language reference. ## Literals @@ -356,13 +356,13 @@ Now that you've learned the complete CEL syntax, choose your next path based on - **[Thinking in CEL](thinking-in-cel.md)** - Core concepts, design principles, and when to use CEL **🛠️ Solve Specific Problems:** -- **[Access Control Policies](../how-to-guides/access-control-policies.md)** - Build sophisticated permission systems +- **[Access Control Policies](../how-to-guides/access-control-policies.md)** - Build permission systems - **[Business Logic & Data Transformation](../how-to-guides/business-logic-data-transformation.md)** - Implement configurable business rules -- **[Production Patterns & Best Practices](../how-to-guides/production-patterns-best-practices.md)** - Deploy CEL in production environments +- **[Error Handling](../how-to-guides/error-handling.md)** - Exception types and safe-evaluation patterns **📖 Reference Material:** -- **[CEL Compliance](../reference/cel-compliance.md)** - Detailed feature implementation status - **[Python API Reference](../reference/python-api.md)** - Complete Python API documentation +- **[CEL specification](https://github.com/google/cel-spec)** - The official CEL spec **💡 Pro Tip:** If you're new to CEL, we recommend: **Language Basics → [Your First Integration](your-first-integration.md) → [Access Control Policies](../how-to-guides/access-control-policies.md)** diff --git a/docs/tutorials/extending-cel.md b/docs/tutorials/extending-cel.md index 1f8cf23..d19c25a 100644 --- a/docs/tutorials/extending-cel.md +++ b/docs/tutorials/extending-cel.md @@ -340,7 +340,7 @@ This example demonstrates how custom functions enable complex business logic whi ### Function Best Practices -These patterns become essential when building production applications like those shown in [Access Control Policies](../how-to-guides/access-control-policies.md) and [Production Patterns & Best Practices](../how-to-guides/production-patterns-best-practices.md). +These patterns become essential when building production applications like those shown in [Access Control Policies](../how-to-guides/access-control-policies.md). #### 1. Error Handling @@ -674,12 +674,11 @@ Choose your next step based on what you want to build: - **[Business Logic & Data Transformation](../how-to-guides/business-logic-data-transformation.md)** - Build configurable rule engines with advanced Context patterns **🚀 Production Deployment:** -- **[Production Patterns & Best Practices](../how-to-guides/production-patterns-best-practices.md)** - Performance optimization, security, and integration patterns -- **[Error Handling Guide](../how-to-guides/error-handling.md)** - Robust error handling for production systems +- **[Error Handling Guide](../how-to-guides/error-handling.md)** - Exception types and safe-evaluation patterns **📖 Reference Material:** - **[Python API Reference](../reference/python-api.md)** - Complete API documentation for advanced usage -- **[CEL Compliance](../reference/cel-compliance.md)** - Feature support and limitations +- **[CEL specification](https://github.com/google/cel-spec)** - The official CEL spec **💡 Pro Tip:** With these advanced skills, you're ready to tackle enterprise-scale applications. Start with [Access Control Policies](../how-to-guides/access-control-policies.md) or [Business Logic & Data Transformation](../how-to-guides/business-logic-data-transformation.md) based on your use case. diff --git a/docs/tutorials/thinking-in-cel.md b/docs/tutorials/thinking-in-cel.md index 2053f51..2b8f370 100644 --- a/docs/tutorials/thinking-in-cel.md +++ b/docs/tutorials/thinking-in-cel.md @@ -461,12 +461,12 @@ Choose your path based on your current experience and goals: **🏢 Solve Specific Problems:** - **[Access Control Policies](../how-to-guides/access-control-policies.md)** - Perfect CEL use case - policies and security rules - **[Business Logic & Data Transformation](../how-to-guides/business-logic-data-transformation.md)** - Configurable business rules and validation -- **[Production Patterns & Best Practices](../how-to-guides/production-patterns-best-practices.md)** - Deploy CEL safely in production +- **[Error Handling](../how-to-guides/error-handling.md)** - Exception types and safe-evaluation patterns **💡 Recommended Learning Paths:** - **New to CEL:** Thinking in CEL → [Your First Integration](your-first-integration.md) → [Access Control Policies](../how-to-guides/access-control-policies.md) - **Have CEL experience:** Use this as a design reference when building complex applications -- **Evaluating CEL:** This tutorial + [CEL Compliance](../reference/cel-compliance.md) will help you decide if CEL fits your needs +- **Evaluating CEL:** This tutorial + the [CEL spec](https://github.com/google/cel-spec) will help you decide if CEL fits your needs Armed with these concepts, you're ready to build safe, maintainable, and powerful expression-based systems! \ No newline at end of file diff --git a/docs/tutorials/your-first-integration.md b/docs/tutorials/your-first-integration.md index 2eb2846..53ee6dc 100644 --- a/docs/tutorials/your-first-integration.md +++ b/docs/tutorials/your-first-integration.md @@ -565,7 +565,7 @@ Congratulations! You've mastered the Context class and custom Python functions. **🏢 Build Production Applications:** - **[Access Control Policies](../how-to-guides/access-control-policies.md)** - Start here for permission systems and security rules - **[Business Logic & Data Transformation](../how-to-guides/business-logic-data-transformation.md)** - Configurable rule engines and data processing -- **[Production Patterns & Best Practices](../how-to-guides/production-patterns-best-practices.md)** - Flask/FastAPI integration, performance, and security +- **[Error Handling](../how-to-guides/error-handling.md)** - Exception types and safe-evaluation patterns **💡 Recommended Next Steps:** diff --git a/mkdocs.yml b/mkdocs.yml index d634b87..db31046 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -80,7 +80,6 @@ nav: - Extending CEL: tutorials/extending-cel.md - Cookbook: - Recipe Index: cookbook.md - - Production Patterns & Best Practices: how-to-guides/production-patterns-best-practices.md - Business Logic & Data Transformation: how-to-guides/business-logic-data-transformation.md - Dynamic Query Filters: how-to-guides/dynamic-query-filters.md - Access Control Policies: how-to-guides/access-control-policies.md @@ -89,7 +88,6 @@ nav: - Reference: - Python API: reference/python-api.md - CLI Reference: reference/cli-reference.md - - CEL Compliance: reference/cel-compliance.md - Development: - Contributing & Developer Guide: contributing.md diff --git a/python/cel/cel.pyi b/python/cel/cel.pyi index b2d0582..ab258fd 100644 --- a/python/cel/cel.pyi +++ b/python/cel/cel.pyi @@ -26,6 +26,14 @@ class Context: """Add a function to the context.""" ... + def set_variable_resolver(self, resolver: Callable[[str], Any]) -> None: + """Register a callback for lazy variable resolution. + + The callback receives a variable name and returns the value, or None + to fall through to variables added via add_variable(). + """ + ... + def update(self, variables: Dict[str, Any]) -> None: """Update context with variables from a dictionary.""" ... diff --git a/src/context.rs b/src/context.rs index de09bd8..76e63e5 100644 --- a/src/context.rs +++ b/src/context.rs @@ -43,6 +43,9 @@ use std::collections::HashMap; pub struct Context { pub variables: HashMap, pub functions: HashMap>, + /// Optional Python callable for lazy variable resolution. Invoked with a + /// variable name; returns the value (or None to fall through to `variables`). + pub resolver: Option>, } #[pyo3::pymethods] @@ -122,6 +125,7 @@ impl Context { let mut context = Context { variables: HashMap::new(), functions: HashMap::new(), + resolver: None, }; if let Some(variables) = variables { @@ -203,6 +207,42 @@ impl Context { self.functions.insert(name, function); } + /// Registers a Python callable for lazy variable resolution. + /// + /// When evaluating an expression, CEL will call `resolver(name)` for each + /// unbound variable name encountered. The callback should return the value + /// (any Python type convertible to a CEL value) or `None` to fall through + /// to variables registered with `add_variable`. + /// + /// This is useful when materializing the full set of variables up front is + /// expensive — for example, a dict-like backed by a database, filesystem, + /// or remote API where you only want to fetch values the expression + /// actually references. + /// + /// Args: + /// resolver (Callable[[str], Any]): Function that takes a variable name + /// and returns the value or None. + /// + /// Notes: + /// - The resolver is consulted *before* explicitly-registered variables. + /// Return None from the resolver to delegate to those. + /// - Exceptions raised by the resolver are logged and treated as None. + /// - The callback is invoked from Rust holding the GIL; keep it simple + /// and avoid blocking on long-running I/O if possible. + /// + /// Example: + /// >>> from cel import Context, evaluate + /// >>> store = {"user": {"name": "Alice", "age": 30}} + /// >>> def lookup(name): + /// ... return store.get(name) + /// >>> ctx = Context() + /// >>> ctx.set_variable_resolver(lookup) + /// >>> evaluate("user.name", ctx) + /// 'Alice' + fn set_variable_resolver(&mut self, resolver: Py) { + self.resolver = Some(resolver); + } + /// Adds a variable to the context. /// /// Variables added to the context become available for use in CEL expressions. diff --git a/src/lib.rs b/src/lib.rs index 2cd6efa..f6122e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,13 @@ mod context; +use ::cel::context::VariableResolver; use ::cel::objects::{Key, OptionalValue, TryIntoValue}; use ::cel::{Context as CelContext, ExecutionError, Program, Value}; use log::warn; -use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; +use pyo3::exceptions::{ + PyIndexError, PyKeyError, PyOverflowError, PyRuntimeError, PyTypeError, PyValueError, + PyZeroDivisionError, +}; use pyo3::prelude::*; use pyo3::BoundObject; use std::panic::{self, AssertUnwindSafe}; @@ -316,13 +320,50 @@ impl<'a> RustyPyType<'a> { } } +/// Bridges a Python callable to cel-rust's `VariableResolver` trait so users +/// can resolve variables lazily on demand instead of materializing them up front. +/// +/// The callback receives the variable name as a string and returns either a +/// supported Python value or `None` (meaning "not handled — fall back to the +/// statically-defined variables map"). Any exception raised by the callback +/// is treated as "not handled" and a warning is logged. +struct PyVariableResolver { + callback: Py, +} + +impl VariableResolver for PyVariableResolver { + fn resolve(&self, variable: &str) -> Option { + Python::attach(|py| { + let result = match self.callback.call1(py, (variable,)) { + Ok(r) => r, + Err(e) => { + warn!("Variable resolver raised for '{variable}': {e}"); + return None; + } + }; + if result.is_none(py) { + return None; + } + let bound = result.bind(py); + match RustyPyType(bound).try_into_value() { + Ok(v) => Some(v), + Err(e) => { + warn!("Variable resolver for '{variable}' returned an unsupported value: {e}"); + None + } + } + }) + } +} + /// Build a CEL execution environment from an optional evaluation context. /// /// This consolidates the shared logic used by `evaluate()` and `Program.execute()` /// to keep behavior consistent between the two entrypoints. -fn build_environment( +fn build_environment<'r>( evaluation_context: Option<&Bound<'_, PyAny>>, - environment: &mut CelContext<'_>, + environment: &mut CelContext<'r>, + resolver_out: &'r mut Option, ) -> PyResult<()> { let mut ctx = context::Context::new(None, None)?; @@ -333,6 +374,11 @@ fn build_environment( // Clone variables and functions into our local Context ctx.variables = py_context_ref.variables.clone(); ctx.functions = py_context_ref.functions.clone(); + if let Some(cb) = py_context_ref.resolver.as_ref() { + *resolver_out = Some(PyVariableResolver { + callback: Python::attach(|py| cb.clone_ref(py)), + }); + } } else if let Ok(py_dict) = evaluation_context.cast::() { // User passed in a dict - let's process variables and functions from the dict ctx.update(py_dict)?; @@ -414,6 +460,13 @@ fn build_environment( } } + // Attach the lazy resolver if one was provided. The resolver lives in + // `*resolver_out` (caller-owned), and the cel::Context borrows it for + // its lifetime `'r`. + if let Some(resolver) = resolver_out.as_ref() { + environment.set_variable_resolver(resolver); + } + Ok(()) } @@ -468,6 +521,31 @@ fn map_execution_error_to_python(error: &ExecutionError) -> PyErr { "Function '{function}' error: {message}. Check function arguments and their types." )) }, + ExecutionError::NoSuchOverload => { + PyTypeError::new_err( + "No such overload. The operation isn't defined for the given operand types — \ + for example, mixing signed and unsigned integers (1 + 2u), indexing into a \ + string, or using an unsupported operator. Use explicit conversion \ + (int(x), uint(x), double(x)) or check the CEL specification." + ) + }, + ExecutionError::Overflow(op, left, right) => { + PyOverflowError::new_err(format!( + "Arithmetic overflow in '{op}' on {left:?} and {right:?}." + )) + }, + ExecutionError::DivisionByZero(_) => { + PyZeroDivisionError::new_err("division by zero in CEL expression") + }, + ExecutionError::RemainderByZero(_) => { + PyZeroDivisionError::new_err("modulo by zero in CEL expression") + }, + ExecutionError::IndexOutOfBounds(value) => { + PyIndexError::new_err(format!("index out of bounds: {value:?}")) + }, + ExecutionError::NoSuchKey(name) => { + PyKeyError::new_err(name.to_string()) + }, _ => { // Fallback for any other execution errors - provide helpful message based on error content let error_str = format!("{error:?}"); @@ -720,7 +798,8 @@ impl TryIntoValue for RustyPyType<'_> { #[pyfunction(signature = (src, evaluation_context=None))] fn evaluate(src: String, evaluation_context: Option<&Bound<'_, PyAny>>) -> PyResult { let mut environment = CelContext::default(); - build_environment(evaluation_context, &mut environment)?; + let mut resolver_slot: Option = None; + build_environment(evaluation_context, &mut environment, &mut resolver_slot)?; // Use panic::catch_unwind to handle parser panics gracefully let program = panic::catch_unwind(|| Program::compile(&src)) @@ -756,7 +835,8 @@ fn execute_compiled_program( evaluation_context: Option<&Bound<'_, PyAny>>, ) -> PyResult> { let mut environment = CelContext::default(); - build_environment(evaluation_context, &mut environment)?; + let mut resolver_slot: Option = None; + build_environment(evaluation_context, &mut environment, &mut resolver_slot)?; // Use panic::catch_unwind to handle execution panics gracefully // AssertUnwindSafe is needed because the environment contains function closures diff --git a/tests/test_arithmetic.py b/tests/test_arithmetic.py index 483a6ed..0a80ffa 100644 --- a/tests/test_arithmetic.py +++ b/tests/test_arithmetic.py @@ -84,20 +84,16 @@ def test_invalid_expression_raises_parse_value_error(self): class TestBytesArithmetic: """Test bytes operations and concatenation.""" - @pytest.mark.xfail( - reason="cel-interpreter 0.10.0 does not implement bytes concatenation (CEL spec requires it)" - ) def test_bytes_concatenation_context(self): - """CEL spec requires bytes concatenation with + operator, but cel-interpreter 0.10.0 doesn't implement it.""" + """CEL spec: bytes concatenation with + operator (supported as of cel 0.13).""" part1 = b"hello" part2 = b"world" result = cel.evaluate("part1 + b' ' + part2", {"part1": part1, "part2": part2}) assert result == b"hello world" - def test_bytes_concatenation_not_supported(self): - """Test direct bytes concatenation (CEL spec requires this but cel-interpreter 0.10.0 doesn't support it).""" - with pytest.raises(TypeError, match="Unsupported addition operation"): - cel.evaluate("b'hello' + b'world'") + def test_bytes_concatenation_literal(self): + """Test direct bytes concatenation between literals (supported as of cel 0.13).""" + assert cel.evaluate("b'hello' + b'world'") == b"helloworld" def test_bytes_concatenation_workaround(self): """Test bytes concatenation workaround using string conversion.""" diff --git a/tests/test_boolean_coercion.py b/tests/test_boolean_coercion.py index 4ba903e..1aea116 100644 --- a/tests/test_boolean_coercion.py +++ b/tests/test_boolean_coercion.py @@ -19,33 +19,10 @@ def test_not_operator_with_boolean(self): def test_not_operator_with_non_boolean_fails(self): """Test that NOT operator correctly fails with non-boolean operands.""" - # Numbers should fail - with pytest.raises(ValueError, match="No such overload"): - evaluate("!0") - - with pytest.raises(ValueError, match="No such overload"): - evaluate("!1") - - with pytest.raises(ValueError, match="No such overload"): - evaluate("!42") - - # Strings should fail - with pytest.raises(ValueError, match="No such overload"): - evaluate("!''") - - with pytest.raises(ValueError, match="No such overload"): - evaluate("!'hello'") - - # Collections should fail - with pytest.raises(ValueError, match="No such overload"): - evaluate("![]") - - with pytest.raises(ValueError, match="No such overload"): - evaluate("!{}") - - # Null should fail - with pytest.raises(ValueError, match="No such overload"): - evaluate("!null") + non_bool_exprs = ["!0", "!1", "!42", "!''", "!'hello'", "![]", "!{}", "!null"] + for expr in non_bool_exprs: + with pytest.raises(TypeError, match="No such overload"): + evaluate(expr) def test_logical_and_with_boolean_operands(self): """Test AND operator with boolean operands (correct CEL behavior).""" @@ -55,16 +32,23 @@ def test_logical_and_with_boolean_operands(self): assert evaluate("false && false") is False def test_logical_and_with_mixed_types_fails(self): - """Test that AND operator correctly fails with mixed-type operands.""" - with pytest.raises(ValueError, match="No such overload"): - evaluate("'string' && true") + """Test that AND operator fails with mixed-type operands when no short-circuit applies. - with pytest.raises(ValueError, match="No such overload"): - evaluate("42 && false") + CEL spec: errors short-circuit through AND when the other operand is definitively + false. So `42 && false` returns false (short-circuit), but cases where the boolean + operand is true (or both are non-bool) propagate the type error. + """ + with pytest.raises(TypeError, match="No such overload"): + evaluate("'string' && true") - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): evaluate("true && 1") + def test_logical_and_err_resilient_short_circuit(self): + """CEL spec: `error && false` short-circuits to false even if LHS isn't a bool.""" + assert evaluate("42 && false") is False + assert evaluate("'string' && false") is False + def test_logical_or_with_boolean_operands(self): """Test OR operator with boolean operands (correct CEL behavior).""" assert evaluate("true || true") is True @@ -72,26 +56,25 @@ def test_logical_or_with_boolean_operands(self): assert evaluate("false || true") is True assert evaluate("false || false") is False - def test_logical_or_special_cel_behavior(self): - """Test OR operator's special CEL behavior with boolean first operand.""" - # When first operand is boolean false, returns second operand - assert evaluate("false || 99") == 99 - assert evaluate("false || 'text'") == "text" - assert evaluate("false || null") is None - - # When first operand is boolean true, short-circuits to true + def test_logical_or_err_resilient_short_circuit(self): + """CEL spec: `X || true` short-circuits to true regardless of LHS type.""" + # When second operand is boolean true, short-circuits to true even if first isn't bool assert evaluate("true || 99") is True assert evaluate("true || 'anything'") is True + assert evaluate("'string' || true") is True - def test_logical_or_with_non_boolean_first_operand_fails(self): - """Test that OR operator correctly fails when first operand is not boolean.""" - with pytest.raises(ValueError, match="No such overload"): - evaluate("42 || false") + def test_logical_or_propagates_type_error_when_no_short_circuit(self): + """OR raises when neither operand provides a definitive boolean to short-circuit.""" + with pytest.raises(TypeError, match="No such overload"): + evaluate("false || 99") + + with pytest.raises(TypeError, match="No such overload"): + evaluate("false || 'text'") - with pytest.raises(ValueError, match="No such overload"): - evaluate("'string' || true") + with pytest.raises(TypeError, match="No such overload"): + evaluate("42 || false") - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): evaluate("0 || 'default'") def test_ternary_operator_requires_boolean_condition(self): @@ -101,10 +84,10 @@ def test_ternary_operator_requires_boolean_condition(self): assert evaluate("false ? 'yes' : 'no'") == "no" # Non-boolean conditions should fail - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): evaluate("42 ? 'yes' : 'no'") - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): evaluate("'string' ? 'yes' : 'no'") def test_boolean_comparisons_work_correctly(self): diff --git a/tests/test_context.py b/tests/test_context.py index d1da03c..dde8b74 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -107,3 +107,82 @@ def test_nested_context_none(): assert cel.evaluate("spec.host", cel_context) == "github.com" assert cel.evaluate("data['response-code']", cel_context) == "NOERROR" assert cel.evaluate("size(data.A)", cel_context) == 1 + + +class TestVariableResolver: + """Tests for lazy variable resolution via set_variable_resolver.""" + + def test_resolver_supplies_variable(self): + """Resolver callback can provide variables not registered statically.""" + ctx = cel.Context() + ctx.set_variable_resolver( + lambda name: {"name": "Alice", "age": 30} if name == "user" else None + ) + assert cel.evaluate("user.name", ctx) == "Alice" + assert cel.evaluate("user.age", ctx) == 30 + + def test_resolver_is_called_lazily(self): + """Resolver only fires for names the expression actually references.""" + accessed = [] + + def lookup(name): + accessed.append(name) + return {"limit": 50}.get(name) + + ctx = cel.Context() + ctx.set_variable_resolver(lookup) + assert cel.evaluate("limit > 10", ctx) is True + assert accessed == ["limit"] + + def test_resolver_none_falls_through_to_static_variables(self): + """Returning None from the resolver delegates to add_variable()-registered values.""" + ctx = cel.Context(variables={"static_var": 42}) + ctx.set_variable_resolver(lambda name: None) + assert cel.evaluate("static_var", ctx) == 42 + + def test_resolver_undefined_raises(self): + """When neither the resolver nor static variables supply a name, evaluate raises.""" + ctx = cel.Context() + ctx.set_variable_resolver(lambda name: None) + with pytest.raises(RuntimeError, match="Undefined variable or function"): + cel.evaluate("missing", ctx) + + def test_resolver_exception_is_swallowed(self): + """An exception from the resolver is treated as 'not handled' rather than propagated.""" + ctx = cel.Context(variables={"x": 7}) + + def explosive(name): + raise ValueError(f"boom on {name}") + + ctx.set_variable_resolver(explosive) + # Falls through to the static variable + assert cel.evaluate("x", ctx) == 7 + + def test_resolver_works_with_compiled_program(self): + """Resolver applies through compile()+execute(), not just evaluate().""" + program = cel.compile("user.name") + ctx = cel.Context() + ctx.set_variable_resolver(lambda name: {"name": "Bob"} if name == "user" else None) + assert program.execute(ctx) == "Bob" + + def test_resolver_returns_various_types(self): + """Resolver values can be any supported Python type.""" + + def lookup(name): + return { + "i": 42, + "f": 3.14, + "s": "hello", + "b": True, + "l": [1, 2, 3], + "m": {"k": "v"}, + }.get(name) + + ctx = cel.Context() + ctx.set_variable_resolver(lookup) + assert cel.evaluate("i", ctx) == 42 + assert cel.evaluate("f", ctx) == 3.14 + assert cel.evaluate("s", ctx) == "hello" + assert cel.evaluate("b", ctx) is True + assert cel.evaluate("size(l)", ctx) == 3 + assert cel.evaluate("m.k", ctx) == "v" diff --git a/tests/test_dual_mode_comprehensive.py b/tests/test_dual_mode_comprehensive.py index 1ace6d8..2b649e0 100644 --- a/tests/test_dual_mode_comprehensive.py +++ b/tests/test_dual_mode_comprehensive.py @@ -76,7 +76,7 @@ def test_mixed_arithmetic_fails_in_strict_mode(self): context = Context(ctx) if ctx else None # Should fail in Strict mode - with pytest.raises(TypeError, match="Unsupported.*operation"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate(expr, context) def test_same_type_arithmetic_works_in_strict_mode(self): @@ -153,7 +153,7 @@ def test_comprehensions_with_explicit_mixed_types_fail(self): for expr in mixed_type_arithmetic_comprehensions: # Should fail due to mixed arithmetic inside comprehension - with pytest.raises(TypeError, match="Unsupported.*operation"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate(expr) def test_comprehensions_with_mixed_comparisons_work(self): diff --git a/tests/test_enhanced_error_handling.py b/tests/test_enhanced_error_handling.py index 12b4cab..9ac4fdb 100644 --- a/tests/test_enhanced_error_handling.py +++ b/tests/test_enhanced_error_handling.py @@ -30,16 +30,15 @@ def test_undefined_function_runtime_error(self): assert "function name is spelled correctly" in error_msg def test_mixed_int_uint_arithmetic_type_error(self): - """Test that mixed signed/unsigned arithmetic raises TypeError with solution.""" + """Test that mixed signed/unsigned arithmetic raises TypeError with conversion advice.""" with pytest.raises(TypeError) as exc_info: cel.evaluate("1 + 2u", {}) error_msg = str(exc_info.value) - assert "Cannot mix signed and unsigned integers" in error_msg - assert ( - "Use explicit conversion: int(" in error_msg - or "Use explicit conversion: uint(" in error_msg - ) + # cel 0.13 routes int+uint through NoSuchOverload (no operand type info available); + # the generic message lists the common causes and conversion functions. + assert "overload" in error_msg.lower() or "signed and unsigned" in error_msg.lower() + assert "int(" in error_msg or "uint(" in error_msg def test_unsupported_multiplication_type_error(self): """Test multiplication type errors provide conversion suggestions.""" @@ -47,8 +46,8 @@ def test_unsupported_multiplication_type_error(self): cel.evaluate("[1,2,3].map(x, x * 2.0)", {}) error_msg = str(exc_info.value) - assert "Unsupported multiplication operation" in error_msg - assert "Use explicit conversion if needed: double(" in error_msg + assert "overload" in error_msg.lower() or "multiplication" in error_msg.lower() + assert "int(" in error_msg or "double(" in error_msg or "uint(" in error_msg def test_unsupported_addition_type_error(self): """Test addition type errors for incompatible types.""" @@ -123,20 +122,24 @@ def test_mixed_arithmetic_provides_conversion_examples(self): assert "value" in error_msg def test_detailed_operation_error_messages(self): - """Test that different operations provide specific guidance.""" - test_cases = [ - ("1 - 'hello'", "subtraction operation", "numeric"), - ("'hello' / 2", "division operation", "numeric"), - ("true % false", "operation", "types"), - ] - - for expr, expected_op, expected_guidance in test_cases: + """Test that incompatible-type operations raise TypeError with conversion advice.""" + # cel 0.13 routes most of these through NoSuchOverload (no per-operand type info), + # so we assert on the exception type and the presence of conversion guidance + # rather than specific operation names. + exprs = ["1 - 'hello'", "'hello' / 2", "true % false"] + + for expr in exprs: with pytest.raises(TypeError) as exc_info: cel.evaluate(expr, {}) - error_msg = str(exc_info.value) - assert expected_op in error_msg.lower() - assert expected_guidance in error_msg.lower() + error_msg = str(exc_info.value).lower() + assert "overload" in error_msg or "operation" in error_msg + assert ( + "int(" in error_msg + or "uint(" in error_msg + or "double(" in error_msg + or "type" in error_msg + ) class TestExceptionTypes: diff --git a/tests/test_issue16_string_literal_regression.py b/tests/test_issue16_string_literal_regression.py index 646531e..b0a2935 100644 --- a/tests/test_issue16_string_literal_regression.py +++ b/tests/test_issue16_string_literal_regression.py @@ -112,11 +112,11 @@ def test_mixed_expressions_with_actual_numbers(self): ctx = Context({"value": 0.4}) # Float in context # Mixed arithmetic should fail in strict mode - with pytest.raises(TypeError, match="Unsupported.*operation"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate("1 + 2.5", ctx) # Mixed type with context variables should also fail - with pytest.raises(TypeError, match="Unsupported.*operation"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate("value + 1", ctx) # 0.4 + 1 should fail in strict mode def test_complex_expressions_with_strings_and_numbers(self): diff --git a/tests/test_logical_operators.py b/tests/test_logical_operators.py index 1651fa2..693f3c1 100644 --- a/tests/test_logical_operators.py +++ b/tests/test_logical_operators.py @@ -105,37 +105,38 @@ def test_logical_with_null_values(self): """Test logical operators with null values.""" context = {"null_val": None, "true_val": True, "false_val": False} - # In CEL, null is generally falsy, but exact behavior may vary - # These tests verify current behavior + # CEL requires boolean operands for &&/||; null is not bool. cel 0.13 raises + # TypeError (NoSuchOverload) unless an err-resilient short-circuit applies. try: result = cel.evaluate("null_val && true_val", context) assert result is False or result is None - except ValueError: - # Some CEL implementations may throw errors for null in logical context + except (ValueError, TypeError): pass def test_logical_type_coercion(self): """Test that logical operators correctly reject mixed types per CEL specification. - CEL specification requires boolean operands for logical operators. - Mixed-type operations should fail with "No such overload". + CEL specification requires boolean operands for logical operators. Mixed-type + operations raise TypeError ("No such overload") unless an err-resilient + short-circuit applies (e.g. `X || true` returns true). """ - # These should fail - non-boolean operands not allowed per CEL spec - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): cel.evaluate("'string' && true") - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): cel.evaluate("'' && true") - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): cel.evaluate("42 || false") - with pytest.raises(ValueError, match="No such overload"): - cel.evaluate("0 || true") - - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): cel.evaluate("!'string'") + def test_logical_or_err_resilient_short_circuits(self): + """`X || true` short-circuits to true even when X is not a bool (cel 0.13).""" + assert cel.evaluate("0 || true") is True + assert cel.evaluate("'string' || true") is True + def test_logical_in_conditionals(self): """Test logical operators in conditional expressions.""" context = {"x": 5, "y": 10} diff --git a/tests/test_map_function.py b/tests/test_map_function.py index 3c93f2e..dd9fce5 100644 --- a/tests/test_map_function.py +++ b/tests/test_map_function.py @@ -48,15 +48,15 @@ def test_documented_map_limitations(self): # This is the documented issue: mixed int/float arithmetic in map() # See docs/reference/cel-compliance.md for details - with pytest.raises(TypeError, match="Unsupported.*operation.*Int.*Float"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate("[1, 2, 3].map(x, x * 2.0)") # Complex mixed arithmetic should also fail - with pytest.raises(TypeError, match="Unsupported.*operation.*Int.*Float"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate("[1, 2, 3].map(x, x * 2 + 1.5)") # Integer + float literal fails due to type mismatch - with pytest.raises(TypeError, match="Unsupported.*operation.*Int.*Float"): + with pytest.raises(TypeError, match="overload|Unsupported"): evaluate("[1, 2, 3].map(x, x + 1.0)") def test_map_function_workarounds(self): diff --git a/tests/test_upstream_improvements.py b/tests/test_upstream_improvements.py index c1eadb6..560438c 100644 --- a/tests/test_upstream_improvements.py +++ b/tests/test_upstream_improvements.py @@ -85,21 +85,13 @@ class TestMixedArithmetic: """Test mixed signed/unsigned arithmetic that currently fails.""" def test_mixed_int_uint_addition_fails(self): - """ - Test that mixed int/uint addition currently fails. - - When this test starts failing, mixed arithmetic has been fixed. - """ - with pytest.raises(TypeError, match="Cannot mix signed and unsigned integers"): + """Mixed int/uint addition raises TypeError per CEL spec (no implicit coercion).""" + with pytest.raises(TypeError, match="overload|signed and unsigned"): cel.evaluate("1 + 2u") def test_mixed_int_uint_multiplication_fails(self): - """ - Test that mixed int/uint multiplication currently fails. - - When this test starts failing, mixed arithmetic has been fixed. - """ - with pytest.raises(TypeError, match="Unsupported.*operation"): + """Mixed int/uint multiplication raises TypeError per CEL spec.""" + with pytest.raises(TypeError, match="overload|Unsupported"): cel.evaluate("3 * 2u") @pytest.mark.xfail( @@ -151,12 +143,8 @@ class TestMapFunctionImprovements: """Test map() function improvements for mixed type handling.""" def test_map_mixed_arithmetic_currently_fails(self): - """ - Test that map() with mixed arithmetic currently fails. - - When this test starts failing, map() type coercion has been improved. - """ - with pytest.raises(TypeError, match="Unsupported.*operation.*Int.*Float"): + """map() with mixed int/float arithmetic raises TypeError per CEL spec.""" + with pytest.raises(TypeError, match="overload|Unsupported"): cel.evaluate("[1, 2, 3].map(x, x * 2.0)") @pytest.mark.xfail( @@ -179,27 +167,35 @@ def test_or_operator_cel_compliant_behavior(self): """ Test OR operator behavior follows CEL specification requirements. - Per CEL specification, logical operators require boolean first operands. - Mixed-type operations like "42 || false" should fail with "No such overload". + Per CEL specification, logical operators require boolean operands; mixed-type + operations raise TypeError. As of cel 0.13 the operators are err-resilient, + meaning `X || true` short-circuits to true (and `X && false` to false) even + when X is not a bool. Reference: https://github.com/tektoncd/triggers/issues/644 """ - # These correctly fail - first operand must be boolean per CEL spec - with pytest.raises(ValueError, match="No such overload"): - cel.evaluate("42 || false") # Non-boolean first operand fails + # Non-bool LHS with no short-circuit path → TypeError + with pytest.raises(TypeError, match="No such overload"): + cel.evaluate("42 || false") + + with pytest.raises(TypeError, match="No such overload"): + cel.evaluate('0 || "default"') - with pytest.raises(ValueError, match="No such overload"): - cel.evaluate('0 || "default"') # Non-boolean first operand fails + # Boolean short-circuits + assert cel.evaluate("true || 99") # LHS true short-circuits to true + assert cel.evaluate("true || 'anything'") is True - # CEL's logical operators with boolean first operand work correctly - assert cel.evaluate("true || 99") # Short-circuits to True - assert cel.evaluate("false || 99") == 99 # Returns second operand per CEL spec - assert cel.evaluate("false || 'default'") == "default" # Any type for second operand + # `false || X` no longer returns X — both operands must be bool, or one must + # provide a definitive short-circuit. Neither applies here. + with pytest.raises(TypeError, match="No such overload"): + cel.evaluate("false || 99") + with pytest.raises(TypeError, match="No such overload"): + cel.evaluate("false || 'default'") - # AND operator has stricter requirements for both operands - assert not cel.evaluate("false && 99") # Short-circuits to False - with pytest.raises(ValueError, match="No such overload"): - cel.evaluate("true && 99") # AND requires both operands to be boolean when evaluated + # AND err-resilient short-circuit when RHS is definitively false + assert not cel.evaluate("false && 99") + with pytest.raises(TypeError, match="No such overload"): + cel.evaluate("true && 99") def test_or_operator_correct_boolean_behavior(self): """ @@ -230,7 +226,7 @@ def test_ternary_operator_requires_boolean_condition(self): assert cel.evaluate("false ? 42 : 0") == 0 # Non-boolean condition fails as expected - with pytest.raises(ValueError, match="No such overload"): + with pytest.raises(TypeError, match="No such overload"): cel.evaluate("42 ? true : false")