From 3db14539c669806805be81f5348ad776d14e2928 Mon Sep 17 00:00:00 2001
From: William Kearney <william.kearney@uni-potsdam.de>
Date: Thu, 28 May 2026 11:59:15 +0200
Subject: [PATCH] PPS: Implement rhohat

This PR provides an implementation of the rhohat estimator. Not all of
the functionality of the MATLAB version is supported. Plotting is, as
usual, left to the user, and we return just the estimates of the
intensity and its confidence intervals. The ecdf function that is used
in rhohat is replaced by a ksdensity function that only does the
kernel density estimation required by rhohat. The ecdf function can be
added separately if necessary (the MATLAB ecdf function may try to do
too many things). ksdensity is also simplified by having it return all
of the bootstrap samples. Weighted datasets are not yet supported.

At the moment, the bandwidth selected by scipy.stats.gaussian_kde by
default is somewhat larger than that selected by MATLAB, especially
for small datasets.

Signed-off-by: William Kearney <william.kearney@uni-potsdam.de>
---
 src/topotoolbox/pps.py | 156 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)

diff --git a/src/topotoolbox/pps.py b/src/topotoolbox/pps.py
index ecbeafb..06afc9b 100644
--- a/src/topotoolbox/pps.py
+++ b/src/topotoolbox/pps.py
@@ -7,6 +7,8 @@
 import numpy as np
 import numpy.typing as npt
 
+import scipy.stats as st
+
 from .stream_object import StreamObject
 
 
@@ -147,3 +149,157 @@ def plotdz(self, z, distance=None,
         ax.autoscale_view(scalex=scalex, scaley=scaley)
 
         return ax
+
+    def get_covariate(self, covariate=None):
+        """Extract a covariate node attribute list
+
+        Parameters
+        ----------
+        covariate: array_like, optional
+          The covariate to be extracted. If it is not supplied, the
+          upstream distance is returned. Otherwise, it should be
+          either a node attribute list, a compatible GridObject or a
+          compatible 2D np.ndarray, which will be selected at the
+          points of the point process.
+
+        """
+        if covariate is None:
+            return self.s.upstream_distance()
+
+        return self.s.ezgetnal(covariate)
+
+    def ksdensity(self, covariate=None, n_bootstraps=0, rng=np.random.default_rng()):
+        """Estimate the distribution of covariate at the points using
+        kernel density estimation.
+
+        Parameters
+        ----------
+        covariate: array_like, optional
+          The covariate of interest. If it is not supplied, the
+          upstream distance is used. Otherwise, it should be
+          either a node attribute list, a compatible GridObject or a
+          compatible 2D np.ndarray, which will be selected at the
+          points of the point process.
+
+        n_bootstraps: int, optional
+          The number of bootstrap samples used to compute confidence
+          intervals. If not provided, no bootstrap samples are
+          returned.
+
+        rng: np.random.Generator
+          A random number generator used to select bootstrap
+          samples. Numpy's default_rng() is used by default.
+
+        Returns
+        -------
+        x: np.ndarray
+           The grid of points at which the densities are
+           evaluated. The grid is based on the range of the covariate.
+
+        n: np.ndarray
+           The density estimate restricted to the points of the PPS
+
+        nb: np.ndarray
+           The density estimate throughout the stream network.
+
+        ns: np.ndarray
+           An array of size (npoints, n_bootstraps) containing the
+           bootstrap sampled density estimates.
+
+        bw: float
+           The covariance factor used in the kernel density estimate routine.
+
+        """
+        c = self.get_covariate(covariate)
+
+        n_ks = st.gaussian_kde(c[self.pp])
+        bw = n_ks.covariance_factor()
+        nb_ks= st.gaussian_kde(c, bw_method=bw)
+
+        x = np.linspace(np.min(c), np.max(c), 1000)
+
+        n = n_ks.pdf(x)
+        nb = nb_ks.pdf(x)
+
+        csim = rng.choice(c[self.pp], (self.npoints, n_bootstraps))
+        ns = np.zeros((1000, n_bootstraps))
+        for i in range(n_bootstraps):
+            ns_ks = st.gaussian_kde(csim[:,i], bw_method=bw)
+            ns[:,i] = ns_ks.pdf(x)
+
+        return (x, n, nb, ns, bw)
+
+    def rhohat(self, covariate=None, alpha=0.05, n_bootstraps=1000, rng=np.random.default_rng()):
+        """Nonparametric estimation of point pattern dependence on covariate
+
+        Parameters
+        ----------
+        covariate: array_like, optional
+          The covariate of interest. If it is not supplied, the
+          upstream distance is used. Otherwise, it should be
+          either a node attribute list, a compatible GridObject or a
+          compatible 2D np.ndarray, which will be selected at the
+          points of the point process.
+
+        alpha: float, optional
+          The confidence level of the returned bootstrap confidence
+          intervals. Default is 0.05
+
+        n_bootstraps: int, optional
+          The number of bootstrap samples. Default is 1000.
+
+        rng: np.random.Generator
+          A random number generator used to select bootstrap
+          samples. Numpy's default_rng() is used by default.
+
+
+        Returns
+        -------
+        x: np.ndarray
+           The grid of points at which the densities are
+           evaluated. The grid is based on the range of the covariate.
+
+        rho: np.ndarray
+           The estimated intensity
+
+        rhol: np.ndarray
+           The lower limit of the bootstrapped confidence interval
+
+        rhou: np.ndarray
+           The upper limit of the bootstrapped confidence interval
+
+        Example
+        -------
+        .. plot ::
+
+           >>> import topotoolbox
+           >>> import matplotlib.pyplot as plt
+           >>> dem = topotoolbox.load_dem('bigtujunga')
+           >>> fd = topotoolbox.FlowObject(dem)
+           >>> s = topotoolbox.StreamObject(fd, threshold=1000)
+           >>> s = s.klargestconncomps(1)
+           >>> kp = s.knickpointfinder(dem, tolerance=30.0)
+           >>> p = topotoolbox.PPS.from_nal(s, kp)
+           >>> a = fd.flow_accumulation()
+           >>> c = s.chitransform(a)
+           >>> x, rho, rhol, rhou = p.rhohat(c)
+           >>> fig, ax = plt.subplots(1, 1)
+           >>> _ = ax.plot(x, rho)
+           >>> _ = ax.fill_between(x, rhol, rhou, alpha=0.5)
+           >>> plt.show()
+        """
+        c = self.get_covariate(covariate)
+
+        x, n, nb, ns, _ = self.ksdensity(c, n_bootstraps=n_bootstraps, rng=rng)
+
+        intensity = self.intensity()
+
+        nq = np.quantile(ns, [alpha/2, 1-alpha/2], axis=1)
+        nl = nq[0, :]
+        nu = nq[1, :]
+
+        rho = intensity * n / nb
+        rhou = intensity * nu / nb
+        rhol = intensity * nl / nb
+
+        return (x, rho, rhol, rhou)