Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,26 @@ pip install -e .
### Usage

```python
from alsgls import als_gls, simulate_sur, nll_per_row, XB_from_Blist
from alsgls import ALSGLS, ALSGLSSystem, simulate_sur

Xs_tr, Y_tr, Xs_te, Y_te = simulate_sur(N_tr=240, N_te=120, K=60, p=3, k=4)
B, F, D, mem, _ = als_gls(Xs_tr, Y_tr, k=4)
Yhat_te = XB_from_Blist(Xs_te, B)
nll = nll_per_row(Y_te - Yhat_te, F, D)

# Scikit-learn style estimator
est = ALSGLS(rank="auto", max_sweeps=12)
est.fit(Xs_tr, Y_tr)
test_score = est.score(Xs_te, Y_te) # negative test NLL per observation

# Statsmodels-style system interface
system = {f"eq{j}": (Y_tr[:, j], Xs_tr[j]) for j in range(Y_tr.shape[1])}
sys_model = ALSGLSSystem(system, rank="auto")
sys_results = sys_model.fit()
params = sys_results.params_as_series() # pandas optional
```

See `examples/compare_als_vs_em.py` for a complete ALS versus EM comparison.
See `examples/compare_als_vs_em.py` for a complete ALS versus EM comparison. The
`benchmarks/compare_sur.py` script contrasts ALS-GLS with `statsmodels` and
`linearmodels` SUR implementations on matched simulation grids while recording
peak memory (via Memray, Fil, or the POSIX RSS high-water mark).

### Documentation and notebooks

Expand Down Expand Up @@ -63,3 +74,24 @@ To show the magnitude, we ran a Monte‑Carlo experiment with N = 300 observat

Statistically, the two estimators are indistinguishable (paired‑test p ≥ 0.14). Computationally, ALS needs only a few megabytes whereas EM needs tens to hundreds.

### Defaults, tuning knobs, and failure modes

- **Rank (`k`)** – By default the high-level APIs pick `min(8, ceil(K / 10))`, a
conservative fraction of the number of equations. Increase `rank` if the
cross-equation correlation matrix is slow to decay; decrease it when the
diagonal dominates.
- **ALS ridge terms (`lam_F`, `lam_B`)** – Defaults to `1e-3` for both the
latent-factor and regression updates; raise them slightly (e.g. `1e-2`) if CG
struggles to converge or the NLL trace plateaus early.
- **Noise floor (`d_floor`)** – Keeps the diagonal component positive; the
default `1e-8` protects against breakdowns when an equation is nearly
deterministic. Increase it in highly ill-conditioned settings.
- **Stopping criteria** – ALS stops when the relative drop in NLL per sweep is
below `1e-6` (configurable via `rel_tol`) or after `max_sweeps`. Inspect
`info["nll_trace"]` to diagnose stagnation.
- **Possible failures** – Large condition numbers or nearly-collinear regressors
can make the β-step CG solve slow; adjust `cg_tol`/`cg_maxit`, add stronger
ridge, or re-scale predictors. If `info["accept_t"]` stays at zero and the
NLL does not improve, the factor rank may be too large relative to the sample
size.

16 changes: 14 additions & 2 deletions alsgls/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from .als import als_gls
from .api import ALSGLS, ALSGLSSystem, ALSGLSSystemResults
from .em import em_gls
from .metrics import mse, nll_per_row
from .sim import simulate_sur, simulate_gls
from .ops import XB_from_Blist
from .sim import simulate_gls, simulate_sur

__all__ = ["als_gls", "em_gls", "mse", "nll_per_row", "simulate_sur", "simulate_gls", "XB_from_Blist"]
__all__ = [
"ALSGLS",
"ALSGLSSystem",
"ALSGLSSystemResults",
"XB_from_Blist",
"als_gls",
"em_gls",
"mse",
"nll_per_row",
"simulate_gls",
"simulate_sur",
]
6 changes: 5 additions & 1 deletion alsgls/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def als_gls(
*,
scale_correct: bool = True,
scale_floor: float = 1e-8,
rel_tol: float = 1e-6,
):
"""
Alternating-least-squares GLS with low-rank-plus-diagonal covariance.
Expand Down Expand Up @@ -51,6 +52,7 @@ def als_gls(
cg_tol : float CG relative tolerance
scale_correct : bool if True, try guarded MLE scale fix on Σ each sweep
scale_floor : float min scalar for scale correction
rel_tol : float relative NLL improvement threshold for early stopping

Returns
-------
Expand Down Expand Up @@ -81,6 +83,8 @@ def als_gls(
raise ValueError(f"k must be between 1 and min(K={K}, N={N})")
if lam_F < 0 or lam_B < 0:
raise ValueError("Regularization parameters must be non-negative")
if rel_tol < 0:
raise ValueError("rel_tol must be non-negative")

p_list = [X.shape[1] for X in Xs]

Expand Down Expand Up @@ -242,7 +246,7 @@ def try_with_scale(F_try, D_try):
# Convergence: stop if relative improvement w.r.t previous post-Σ NLL is tiny
rel_impr = (nll_prev - nll_curr) / max(1.0, abs(nll_prev))
nll_prev = nll_curr
if rel_impr < 1e-6:
if rel_impr < rel_tol:
break

# Memory estimate: F (K×k) + D (K) + U (N×k) doubles
Expand Down
Loading
Loading