Skip to content

Commit 6f23e89

Browse files
authored
fix: deep review optimizations — firmware + server
* feat(signal): subcarrier importance weighting via mincut partition (Phase 1) Adds subcarrier_importance_weights() to ruvector signal crate — converts mincut partition into per-subcarrier float weights (>1.0 for sensitive, 0.5 for insensitive subcarriers). Sensing server now uses weighted mean/variance in extract_features_from_frame instead of treating all 56 subcarriers equally. This emphasizes body-motion- sensitive subcarriers and reduces noise from static multipath. Expected: ~26% reduction in keypoint jitter (±15cm → ±11cm RMS). 284 tests pass (191 trainer + 51 lib + 18 vital_signs + 16 dataset + 8 multi_node). Co-Authored-By: claude-flow <ruv@ruv.net> * fix(firmware): stack overflow risk + tick-rate independence (review findings) Critical fixes from deep review: 1. **Stack overflow prevention**: Moved BPM scratch buffers (br_buf, hr_buf) from stack to static storage in both process_frame() and update_multi_person_vitals(). Combined stack was ~6.5-7.5 KB of 8 KB limit — now reduced by ~4 KB to safe margins. 2. **Tick-rate independence**: Post-batch yield now uses pdMS_TO_TICKS(20) with min-1 guard instead of raw vTaskDelay(2). Previously assumed 100Hz tick rate. 3. **EDGE_BATCH_LIMIT to header**: Moved from local const to edge_processing.h #define for configurability. Firmware builds clean at 843 KB. Co-Authored-By: claude-flow <ruv@ruv.net> * fix(server): stale node eviction, remove unsafe pointer (review findings) Critical fixes from deep review: 1. **Stale node eviction**: node_states HashMap now evicts nodes with no frame for >60 seconds, every 100 ticks. Prevents unbounded memory growth and stale smoothing data when nodes are replaced. 2. **Remove unsafe raw pointer**: Replaced the unsafe raw pointer to adaptive_model (used to break borrow checker deadlock with node_states) with a safe .clone() before the mutable borrow. AdaptiveModel derives Clone so this is a clean copy. 284 tests pass, zero failures. Co-Authored-By: claude-flow <ruv@ruv.net>
1 parent 1dcf5d4 commit 6f23e89

3 files changed

Lines changed: 43 additions & 27 deletions

File tree

firmware/esp32-csi-node/main/edge_processing.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ static const char *TAG = "edge_proc";
4343
static edge_ring_buf_t s_ring;
4444
static uint32_t s_ring_drops; /* Frames dropped due to full ring buffer. */
4545

46+
/* Scratch buffers for BPM estimation — moved from stack to static to avoid
47+
* stack overflow. process_frame + update_multi_person_vitals combined used
48+
* ~6.5-7.5 KB of the 8 KB task stack. These save ~4 KB of stack. */
49+
static float s_scratch_br[EDGE_PHASE_HISTORY_LEN];
50+
static float s_scratch_hr[EDGE_PHASE_HISTORY_LEN];
51+
4652
static inline bool ring_push(const uint8_t *iq, uint16_t len,
4753
int8_t rssi, uint8_t channel)
4854
{
@@ -513,20 +519,18 @@ static void update_multi_person_vitals(const uint8_t *iq_data, uint16_t n_sc,
513519

514520
/* Estimate BPM when we have enough history. */
515521
if (pv->history_len >= 64) {
516-
/* Build contiguous buffer for zero-crossing. */
517-
float br_buf[EDGE_PHASE_HISTORY_LEN];
518-
float hr_buf[EDGE_PHASE_HISTORY_LEN];
522+
/* Build contiguous buffer (reuse static scratch to save ~2 KB stack). */
519523
uint16_t buf_len = pv->history_len;
520524

521525
for (uint16_t i = 0; i < buf_len; i++) {
522526
uint16_t ri = (pv->history_idx + EDGE_PHASE_HISTORY_LEN
523527
- buf_len + i) % EDGE_PHASE_HISTORY_LEN;
524-
br_buf[i] = s_person_br_filt[p][ri];
525-
hr_buf[i] = s_person_hr_filt[p][ri];
528+
s_scratch_br[i] = s_person_br_filt[p][ri];
529+
s_scratch_hr[i] = s_person_hr_filt[p][ri];
526530
}
527531

528-
float br = estimate_bpm_zero_crossing(br_buf, buf_len, sample_rate);
529-
float hr = estimate_bpm_zero_crossing(hr_buf, buf_len, sample_rate);
532+
float br = estimate_bpm_zero_crossing(s_scratch_br, buf_len, sample_rate);
533+
float hr = estimate_bpm_zero_crossing(s_scratch_hr, buf_len, sample_rate);
530534

531535
/* Sanity clamp. */
532536
if (br >= 6.0f && br <= 40.0f) pv->breathing_bpm = br;
@@ -690,20 +694,18 @@ static void process_frame(const edge_ring_slot_t *slot)
690694

691695
/* --- Step 7: BPM estimation (zero-crossing) --- */
692696
if (s_history_len >= 64) {
693-
/* Build contiguous buffers from ring. */
694-
float br_buf[EDGE_PHASE_HISTORY_LEN];
695-
float hr_buf[EDGE_PHASE_HISTORY_LEN];
697+
/* Build contiguous buffers from ring (using static scratch to save stack). */
696698
uint16_t buf_len = s_history_len;
697699

698700
for (uint16_t i = 0; i < buf_len; i++) {
699701
uint16_t ri = (s_history_idx + EDGE_PHASE_HISTORY_LEN
700702
- buf_len + i) % EDGE_PHASE_HISTORY_LEN;
701-
br_buf[i] = s_breathing_filtered[ri];
702-
hr_buf[i] = s_heartrate_filtered[ri];
703+
s_scratch_br[i] = s_breathing_filtered[ri];
704+
s_scratch_hr[i] = s_heartrate_filtered[ri];
703705
}
704706

705-
float br_bpm = estimate_bpm_zero_crossing(br_buf, buf_len, sample_rate);
706-
float hr_bpm = estimate_bpm_zero_crossing(hr_buf, buf_len, sample_rate);
707+
float br_bpm = estimate_bpm_zero_crossing(s_scratch_br, buf_len, sample_rate);
708+
float hr_bpm = estimate_bpm_zero_crossing(s_scratch_hr, buf_len, sample_rate);
707709

708710
/* Sanity clamp: breathing 6-40 BPM, heart rate 40-180 BPM. */
709711
if (br_bpm >= 6.0f && br_bpm <= 40.0f) s_breathing_bpm = br_bpm;
@@ -839,23 +841,22 @@ static void edge_task(void *arg)
839841
* Without a batch limit the task processes frames back-to-back with
840842
* only 1-tick yields, which on high frame rates can still starve
841843
* IDLE1 enough to trip the 5-second task watchdog. See #266, #321. */
842-
const uint8_t BATCH_LIMIT = 4;
843844

844845
while (1) {
845846
uint8_t processed = 0;
846847

847-
while (processed < BATCH_LIMIT && ring_pop(&slot)) {
848+
while (processed < EDGE_BATCH_LIMIT && ring_pop(&slot)) {
848849
process_frame(&slot);
849850
processed++;
850851
/* 1-tick yield between frames within a batch. */
851852
vTaskDelay(1);
852853
}
853854

854855
if (processed > 0) {
855-
/* Post-batch yield: 2 ticks (~20 ms at 100 Hz) so IDLE1 can
856-
* run and feed the Core 1 watchdog even under sustained load.
857-
* This is intentionally longer than the 1-tick inter-frame yield. */
858-
vTaskDelay(2);
856+
/* Post-batch yield: ~20 ms so IDLE1 can run and feed the
857+
* Core 1 watchdog even under sustained load. Uses pdMS_TO_TICKS
858+
* for tick-rate independence (minimum 1 tick). */
859+
{ TickType_t d = pdMS_TO_TICKS(20); vTaskDelay(d > 0 ? d : 1); }
859860
} else {
860861
/* No frames available — sleep one full tick.
861862
* NOTE: pdMS_TO_TICKS(5) == 0 at 100 Hz, which would busy-spin. */

firmware/esp32-csi-node/main/edge_processing.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
#define EDGE_FALL_COOLDOWN_MS 5000 /**< Minimum ms between fall alerts (debounce). */
4747
#define EDGE_FALL_CONSEC_MIN 3 /**< Consecutive frames above threshold to trigger. */
4848

49+
/* ---- DSP task tuning ---- */
50+
#define EDGE_BATCH_LIMIT 4 /**< Max frames per batch before longer yield. */
51+
4952
/* ---- SPSC ring buffer slot ---- */
5053
typedef struct {
5154
uint8_t iq_data[EDGE_MAX_IQ_BYTES]; /**< Raw I/Q bytes from CSI callback. */

rust-port/wifi-densepose-rs/crates/wifi-densepose-sensing-server/src/main.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3184,7 +3184,10 @@ async fn udp_receiver_task(state: SharedState, udp_port: u16) {
31843184
// We scope the mutable borrow of node_states so we can
31853185
// access other AppStateInner fields afterward.
31863186
let node_id = frame.node_id;
3187-
let adaptive_model_ref = s.adaptive_model.as_ref().map(|m| m as *const _);
3187+
// Clone adaptive model before mutable borrow of node_states
3188+
// to avoid unsafe raw pointer (review finding #2).
3189+
let adaptive_model_clone = s.adaptive_model.clone();
3190+
31883191
let ns = s.node_states.entry(node_id).or_insert_with(NodeState::new);
31893192
ns.last_frame_time = Some(std::time::Instant::now());
31903193

@@ -3198,12 +3201,8 @@ async fn udp_receiver_task(state: SharedState, udp_port: u16) {
31983201
extract_features_from_frame(&frame, &ns.frame_history, sample_rate_hz);
31993202
smooth_and_classify_node(ns, &mut classification, raw_motion);
32003203

3201-
// SAFETY: adaptive_model_ref points into s which we hold
3202-
// via write lock; the model is not mutated here. We use a
3203-
// raw pointer to break the borrow-checker deadlock between
3204-
// node_states and adaptive_model (both inside s).
3205-
if let Some(model_ptr) = adaptive_model_ref {
3206-
let model: &adaptive_classifier::AdaptiveModel = unsafe { &*model_ptr };
3204+
// Adaptive override using cloned model (safe, no raw pointers).
3205+
if let Some(ref model) = adaptive_model_clone {
32073206
let amps = ns.frame_history.back()
32083207
.map(|v| v.as_slice())
32093208
.unwrap_or(&[]);
@@ -3318,6 +3317,19 @@ async fn udp_receiver_task(state: SharedState, udp_port: u16) {
33183317
let _ = s.tx.send(json);
33193318
}
33203319
s.latest_update = Some(update);
3320+
3321+
// Evict stale nodes every 100 ticks to prevent memory leak.
3322+
if tick % 100 == 0 {
3323+
let stale = Duration::from_secs(60);
3324+
let before = s.node_states.len();
3325+
s.node_states.retain(|_id, ns| {
3326+
ns.last_frame_time.map_or(false, |t| now.duration_since(t) < stale)
3327+
});
3328+
let evicted = before - s.node_states.len();
3329+
if evicted > 0 {
3330+
info!("Evicted {} stale node(s), {} active", evicted, s.node_states.len());
3331+
}
3332+
}
33213333
}
33223334
}
33233335
Err(e) => {

0 commit comments

Comments
 (0)