diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py index 7d5a7a94..92ba1476 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille.py +++ b/diff_diff/chaisemartin_dhaultfoeuille.py @@ -143,9 +143,26 @@ def _validate_and_aggregate_to_cells( Raises ------ ValueError - On missing columns, NaN treatment / outcome values, non-numeric - treatment / outcome that cannot be coerced, or non-binary raw - treatment values. + On missing columns; NaN values in any of the ``group``, ``time``, + ``treatment``, or ``outcome`` columns (``group`` and ``time`` are + rejected pre-``groupby`` because ``groupby`` silently drops NaN + keys, which would change the estimation sample without warning); + non-numeric treatment / outcome that cannot be coerced via + ``pd.to_numeric``; or within-cell-varying treatment (any + ``(group, time)`` cell where ``d_min != d_max``, since fuzzy DiD + is out of scope and deferred to a separate dCdH 2018 paper). + Integer-coded non-binary treatment (the ``by_path`` / + ``paths_of_interest`` requirement) is enforced separately at + ``fit()`` time, not here at aggregation time — this helper + accepts continuous ``d_gt`` cell means and lets ``fit()`` decide + whether the integer-only contract applies. + + Under the survey-weighted path (``weights`` is not ``None``), + zero-weight rows are pre-filtered before any NaN / coercion / + within-cell validation per the ``SurveyDesign.subpopulation()`` + out-of-sample contract — invalid values in zero-weight rows + therefore do NOT raise. NaN / coercion / within-cell checks + still apply to all positive-weight rows. """ # 1. Required columns missing = [c for c in (outcome, group, time, treatment) if c not in data.columns] diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 21286864..8c6dd470 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -625,16 +625,17 @@ class ChaisemartinDHaultfoeuilleResults: ) # Per-path joint sup-t simultaneous-band metadata. Keyed by path # tuple; each entry holds `{"crit_value", "alpha", "n_bootstrap", - # "method", "n_valid_horizons"}`. Populated when `by_path` is a - # positive int AND `n_bootstrap > 0`. The joint band itself is - # written per-horizon as `cband_conf_int` on - # `path_effects[path]["horizons"][l]` (mirrors the OVERALL - # `event_study_effects[l]["cband_conf_int"]` pattern at - # `chaisemartin_dhaultfoeuille.py:2865-2875`). Empty-state contract: - # `None` when not requested (no bootstrap or `by_path is None`); `{}` - # when requested but no path passed both gates (>=2 valid horizons - # AND a strict majority — more than 50% — of finite sup-t draws). - # The bands cover joint inference + # "method", "n_valid_horizons"}`. Populated when EITHER `by_path` is + # a positive int OR `paths_of_interest` is non-empty AND + # `n_bootstrap > 0`. The joint band itself is written per-horizon as + # `cband_conf_int` on `path_effects[path]["horizons"][l]` (mirrors + # the OVERALL `event_study_effects[l]["cband_conf_int"]` pattern + # populated alongside the bootstrap propagation in + # `chaisemartin_dhaultfoeuille.py::fit`). Empty-state contract: + # `None` when not requested (no bootstrap, or both `by_path` and + # `paths_of_interest` are `None`); `{}` when requested but no path + # passed both gates (>=2 valid horizons AND a strict majority — more + # than 50% — of finite sup-t draws). The bands cover joint inference # WITHIN a single path across horizons; they do NOT provide # simultaneous coverage across paths. path_sup_t_bands: Optional[Dict[Tuple[int, ...], Dict[str, Any]]] = field(