From b9ed72b12478f9f6f068c2c92241d0a7fe6f6954 Mon Sep 17 00:00:00 2001
From: Lee <lee@example.com>
Date: Fri, 15 May 2026 15:20:01 +0900
Subject: [PATCH] Harden daemon lifecycle and clarify reports

---
 CHANGELOG.md                          |  14 ++++
 README.md                             |  28 +++++--
 projects/bare-metal-1.0/handoff.ko.md |  89 +++++++++++------------
 projects/bare-metal-1.0/status.ko.md  | 101 +++++++++++++-------------
 src/gpu_usage_audit/__main__.py       |  69 ++++++++++++++----
 src/gpu_usage_audit/nvml.py           |  14 +++-
 src/gpu_usage_audit/render.py         |  32 +++++---
 src/gpu_usage_audit/report.py         |  99 +++++++++++++++++--------
 tests/test_render.py                  |  36 ++++++---
 tests/test_report.py                  |  45 ++++++++++--
 tests/test_smoke.py                   |  80 +++++++++++++++++++-
 11 files changed, 432 insertions(+), 175 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 64932f3..ec2267a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## Unreleased
+
+- Hardened `gua status` and `gua stop` so stale PID files do not act on
+  unrelated live processes.
+- Clarified report output by explaining sample units, classification rules,
+  interval-dependent GPU-hours, and heatmap density.
+- Split §2 from generic "Waste" into idle-held capacity and truly-idle
+  capacity. The equivalent-GPU figures now use GPUs present in the report
+  window instead of the entire database.
+- Made §4 Top identities aggregate by identity/GPU/tick before converting to
+  GPU-hours, so reports may show lower per-user GPU-hours when one user has
+  multiple processes on the same GPU at the same tick.
+- Warn when NVML process-list visibility is unavailable for a GPU.
+
 ## 1.0.1 - 2026-05-15
 
 - Made `gua` the documented command surface for daemon, report, demo, and doctor output.
diff --git a/README.md b/README.md
index 814c999..d593cbc 100644
--- a/README.md
+++ b/README.md
@@ -81,26 +81,33 @@ $ gua report --since 1h --interval 30s
 gua — lab-a100 (bare, driver 560.35.05)  Window: 1:00:00
 
 §1 Headline
+  basis: one sample = one GPU card at one daemon tick
+  rules: active >=10% util; idle-held <10% util with >100 MB process memory
   █████████▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░
   active       █   15.7%
   idle-held    ▒   45.1%       ← this is the number conventional tools miss
   truly-idle   ░   39.2%
   (51 samples)
 
-§2 Waste
-  ~0.43 GPU-hours idle, ~2.53 GPUs equivalently unused
+§2 Idle capacity
+  converted from card-ticks to GPU-hours using the report --interval
+  idle-held: ~0.31 GPU-hours, ~1.53 GPUs equivalently unavailable
+  truly-idle: ~0.12 GPU-hours, ~1.00 GPUs equivalently free
 
 §3 Per-GPU
+  per-card share of samples in the same three states
   GPU-0     active  47.1%  idle-held  35.3%  truly-idle  17.6%
   GPU-1     active   0.0%  idle-held 100.0%  truly-idle   0.0%
   GPU-2     active   0.0%  idle-held   0.0%  truly-idle 100.0%
 
 §4 Top identities
-  identity              gpu-hours   idle-held
-  alice                      0.42       42.9%
-  bob                        0.28      100.0%
+  one identity counts once per GPU/tick after its processes are summed
+  identity              gpu-hours   idle-held   samples
+  alice                      0.42       42.9%        51
+  bob                        0.28      100.0%        34
 
 §5 Time-of-day heatmap (UTC)
+  darker means higher active share; blank means no samples
         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
   Mon               .
 ```
@@ -108,7 +115,10 @@ gua — lab-a100 (bare, driver 560.35.05)  Window: 1:00:00
 The 3-bar collapses every card × every tick over the window into the
 active / idle-held / truly-idle split. **`idle-held` rows are the
 embarrassing category**: a process is holding GPU memory but the SM
-utilization is below 10%.
+utilization is below 10%. §2 converts those card-ticks into GPU-hours
+with `--interval`; §4 groups process rows by identity, GPU, and tick
+before ranking users, so multiple same-user processes on one GPU/tick
+count once.
 
 ## Demo (no GPU required)
 
@@ -185,7 +195,7 @@ point remains installed for compatibility, but new examples use `gua`.
 | -------- | ----------------------------------------------------------- |
 | `daemon` | Starts the collector in the background. Samples real NVML telemetry on every tick and writes to a new database. NVIDIA host required. |
 | `start`  | Alias for `gua daemon`. |
-| `status` | Shows whether the background collector PID is still running. |
+| `status` | Shows whether the background collector PID is still running. Also clears a stale PID file when it points to a missing or unrelated process. |
 | `stop`   | Stops the background collector with SIGTERM. |
 | `report` | One-shot read against the accumulated database. Safe to run **while the daemon is still writing** — SQLite WAL mode handles the concurrency. |
 | `demo`   | Self-contained showcase. Records N fake ticks and immediately prints the report. No GPU, no second shell, no operational meaning — just to see the output shape. |
@@ -213,6 +223,8 @@ By default, `gua daemon` returns after the collector starts. Each tick is
 written to the log file; on shutdown the cumulative row count is written
 there too. `gua daemon --foreground` prints the tick summaries directly
 to the terminal and exits on Ctrl+C, SIGTERM, or `systemctl stop`.
+`gua status` and `gua stop` verify that the PID file points to the
+managed collector before acting on it; stale PID files are cleared.
 
 ### `report`
 
@@ -227,7 +239,7 @@ gua report [--db PATH] [--since D] [--interval D] [--width N]
   of oldest sample), so passing a huge `--since` is the same as "all
   data". Units: `ms`, `s`, `m`, `h`, `d` (no `w`; use `7d`).
 - `--interval D` (default `30s`) — **must match what the daemon used**.
-  This is how §2 (Waste) and §4 (Top identities) convert tick counts
+  This is how §2 (Idle capacity) and §4 (Top identities) convert tick counts
   to GPU-hours. Mismatched intervals → wrong GPU-hours.
 - `--width N` (default `60`) — width of the §1 three-bar in characters.
 
diff --git a/projects/bare-metal-1.0/handoff.ko.md b/projects/bare-metal-1.0/handoff.ko.md
index f14b0ee..7970eab 100644
--- a/projects/bare-metal-1.0/handoff.ko.md
+++ b/projects/bare-metal-1.0/handoff.ko.md
@@ -4,81 +4,78 @@
 
 ## 이어받을 때 먼저 볼 것
 
-- `projects/bare-metal-1.0/plan.ko.md`: 범위와 PR A-D 계획의 source of truth.
-- `projects/bare-metal-1.0/status.ko.md`: 현재 완료/대기 상태와 마지막 검증 결과.
-- `README.md`: 실제 사용자 문서와 release/install/runbook 표면.
-- `pyproject.toml`: 현재 package version과 dependency 정책.
+- `projects/bare-metal-1.0/status.ko.md`: 현재 완료 상태, 1.0.1 검증 결과, cleanup 리뷰 결과.
+- `README.md`: 실제 사용자 문서와 release/install/runbook/report 표면.
+- `src/gpu_usage_audit/__main__.py`: `gua` CLI, background daemon lifecycle, PID handling.
+- `src/gpu_usage_audit/report.py`: report SQL 집계.
+- `src/gpu_usage_audit/render.py`: report 사람이 읽는 출력.
 - `.github/workflows/release.yml`: tag release, GitHub Release, PyPI publish 경로.
 
 ## 고정된 결정
 
 - 1.0은 단일 로컬 베어메탈 NVIDIA 호스트만 본다.
-- Kubernetes, Slurm, Docker/Podman fallback, remote node, managed
-  `gua start/status/stop/uninstall`은 1.0 사용자 표면에서 제외한다.
+- Kubernetes, Slurm, Docker/Podman fallback, remote node, cluster-wide report는 1.0 범위 밖이다.
 - `nvidia-ml-py`는 기본 dependency다.
 - `gpu-usage-audit[nvml]` extra는 compatibility를 위해 빈 alias로 남긴다.
 - DB schema는 v1을 유지한다: `host`, `gpu_sample`, `proc_sample`.
 - 기본 DB는 `/tmp/gua.db`다.
+- `gua daemon`은 기본 백그라운드 실행이다.
+- `gua daemon --foreground`는 systemd/debugging 용도다.
+- `gua start`는 `gua daemon` alias다.
+- `gua status`와 `gua stop`은 pid file 기반 background collector 관리용이다.
 - `daemon`은 기존 DB 파일이 있으면 실패한다.
 - `report`는 DB 파일이 없으면 실패한다.
-- `gua`의 사용자 표면은 `doctor`만 남긴다.
-- auto-runtime proposal/project 문서는 삭제했다. Kubernetes/Slurm/Docker/Podman
-  확장을 다시 시작하려면 새 proposal로 시작한다.
+- `daemon`과 `demo`는 host row의 `env_kind`를 항상 `"bare"`로 기록한다.
+- auto-runtime proposal/project 문서는 삭제했다. Kubernetes/Slurm/Docker/Podman 확장을 다시
+  시작하려면 새 proposal로 시작한다.
 
 ## 현재 상태
 
 - PR A: implemented in PR #9.
 - PR B: implemented in PR #10.
-- Post-1.0 cleanup: 완료. auto-runtime 문서와 `RuntimePlan`/env detection
-  잔재를 제거했다.
-- PR C: implemented in release prep.
-- PR D: 진행 중. 현재 버전은 `1.0.0`으로 bump했고, local build/wheel smoke는
-  통과했다. NVIDIA host acceptance와 tag publish가 남았다.
+- Post-1.0 cleanup: completed in PR #11.
+- Bare-metal 1.0 release: completed in PR #12 and tag `v1.0.0`.
+- 1.0.1 command surface/background daemon release: completed in PR #13 and tag `v1.0.1`.
+- GitHub Release `v1.0.1`: published.
+- PyPI `gpu-usage-audit 1.0.1`: published.
+- NVIDIA host acceptance: 사용자가 실제 host에서 수집 정상 동작을 확인했다.
 
-마지막 로컬 검증은 모두 통과했다.
+## 마지막 로컬 검증
 
 ```sh
 uv run ruff check
 uv run ruff format --check
 uv run mypy
 uv run pytest
-uv build --out-dir /tmp/gua-dist-1.0.0-prep
-bash scripts/smoke-dist-wheel.sh /tmp/gua-dist-1.0.0-prep/gpu_usage_audit-1.0.0-py3-none-any.whl
-env GITHUB_REF_NAME=v1.0.0 uv run python scripts/check-tag-version.py
+uv build --out-dir /tmp/gua-dist-1.0.1-status
+bash scripts/smoke-dist-wheel.sh /tmp/gua-dist-1.0.1-status/gpu_usage_audit-1.0.1-py3-none-any.whl
+env GITHUB_REF_NAME=v1.0.1 uv run python scripts/check-tag-version.py
 ```
 
-cleanup 후 결과는 `pytest` 107 passed, `mypy` 25 source files, `ruff format`
-26 files 기준이다. release prep에서는 `/tmp/gua-dist-1.0.0-prep`로 build와
-wheel smoke를 확인한다.
+결과는 `pytest` 114 passed, `mypy` 25 source files, `ruff format` 26 files 기준이다.
+
+## 현재 cleanup PR 방향
+
+- `/tmp/gua.pid`가 PID 재사용으로 다른 프로세스를 가리킬 수 있으므로 `status`/`stop` 전에
+  해당 PID가 실제 managed `gpu_usage_audit daemon` 프로세스인지 확인한다.
+- report §2는 low-util 전체를 "waste"로 합치지 말고 `idle-held`와 `truly-idle`을 분리한다.
+- report §4는 process row가 아니라 identity/GPU/tick 단위로 먼저 접어서 사용자별 GPU-hours를 계산한다.
+- report 출력 자체에 sample 의미, classification rule, `--interval` 의존성, heatmap 의미를 짧게 노출한다.
+- NVML process list 조회 실패는 idle-held를 과소평가할 수 있으므로 warning으로 남긴다.
+- `projects/bare-metal-1.0/*` 문서는 1.0.1 완료 상태로 갱신한다.
 
 ## 주의할 점
 
-- 현재 로컬 개발 머신은 NVIDIA host가 아니다. `gua doctor`가 unsupported를 내는 것은
-  정상이다.
-- `/tmp/gua.db`가 이미 존재한다. 기본 경로 daemon 테스트는 이 파일 때문에 실패하는
-  것이 기대 동작이다.
-- 실제 1.0 acceptance는 NVIDIA 베어메탈 호스트에서만 닫을 수 있다.
-- `daemon`과 `demo`는 host row의 `env_kind`를 항상 `"bare"`로 기록한다. 1.0은
-  container/k8s runtime 감지를 하지 않는다.
-- PR C를 닫기 전에 문서만 보고 끝내지 말고, 기존 DB 존재/부재 error UX가 README와
-  CLI 출력에서 서로 같은 메시지를 주는지 확인한다.
-- PR D에서 tag를 만들기 전에는 `env GITHUB_REF_NAME=v1.0.0 uv run python
-  scripts/check-tag-version.py`가 통과해야 한다.
+- 현재 로컬 개발 머신은 NVIDIA host가 아니다. `gua doctor`가 unsupported를 내는 것은 정상이다.
+- `/tmp/gua.db`가 이미 존재한다. 기본 경로 daemon 실행이 거부되는 것은 기대 동작이다.
+- `report --interval`은 daemon 수집 interval과 같아야 GPU-hours가 맞다.
+- SQLite WAL sidecar(`*.db-wal`, `*.db-shm`)는 마지막 connection이 닫히면 정리된다.
+- 1.0.2를 자를 경우 `env GITHUB_REF_NAME=v1.0.2 uv run python scripts/check-tag-version.py`가
+  통과해야 한다.
 
 ## 다음 세션 추천 순서
 
 1. `git status --short`로 사용자 변경 여부를 먼저 확인한다.
-2. `projects/bare-metal-1.0/status.ko.md`를 읽고 마지막 검증 이후 차이를 확인한다.
-3. NVIDIA host acceptance를 실행한다.
-4. release prep PR을 main에 머지한다.
-5. `v1.0.0` tag를 push하기 전 아래를 다시 실행한다.
-
-```sh
-uv run ruff check
-uv run ruff format --check
-uv run mypy
-uv run pytest
-uv build
-bash scripts/smoke-dist-wheel.sh
-env GITHUB_REF_NAME=v1.0.0 uv run python scripts/check-tag-version.py
-```
+2. cleanup PR의 CI 결과와 review comments를 확인한다.
+3. 필요하면 report wording을 실제 운영자가 읽기 쉬운 형태로 한 번 더 다듬는다.
+4. merge 후 patch release가 필요하면 version bump와 changelog를 별도 PR로 처리한다.
diff --git a/projects/bare-metal-1.0/status.ko.md b/projects/bare-metal-1.0/status.ko.md
index a1503d4..a9535b4 100644
--- a/projects/bare-metal-1.0/status.ko.md
+++ b/projects/bare-metal-1.0/status.ko.md
@@ -4,29 +4,30 @@
 
 ## 요약
 
-Bare Metal 1.0은 단일 NVIDIA 베어메탈 호스트만 대상으로 하는 방향으로 정리되어
-있다. PR A/B/C와 post-1.0 cleanup은 완료됐고, 현재는 PR D release prep을
-진행 중이다.
+Bare Metal 1.0은 단일 NVIDIA 베어메탈 호스트만 대상으로 하는 형태로 1.0.1까지
+릴리스됐다. `v1.0.1` GitHub Release와 PyPI publish는 완료됐고, 사용자가 실제
+NVIDIA host에서 telemetry 수집이 정상 동작하는 것도 확인했다.
 
-cleanup 시작 시 워크트리는 깨끗했다.
+현재 작업은 1.0.1 이후 코드 퀄리티 cleanup이다. 주요 초점은 background daemon
+PID 안전성, report 의미 가시성, 내부 문서 정합성이다.
 
 ## 구현 상태
 
 | 영역 | 상태 | 메모 |
 | --- | --- | --- |
-| Scope reset | 완료 | Kubernetes/Slurm/Docker/remote/managed runtime 표면 제거. |
+| Scope reset | 완료 | Kubernetes/Slurm/Docker/remote runtime 표면 제거. |
 | `gua doctor` | 완료 | 현재 머신의 `/dev/nvidia*`, `nvidia-smi -L`, NVML, DB path만 진단. |
 | Packaging UX | 완료 | `nvidia-ml-py`가 기본 dependency이고 `nvml` extra는 빈 compatibility alias. |
-| `daemon`/`report` DB UX | 구현됨 | 기본 DB는 `/tmp/gua.db`; daemon은 기존 DB를 거부하고 report는 없는 DB를 거부. |
-| README bare-metal 문서 | 완료 | 2-shell flow, systemd 예시, 운영 notes가 들어가 있음. |
-| Post-1.0 cleanup | 완료 | auto-runtime proposal/project 문서, k8s/docker env 감지, `RuntimePlan` 잔재 제거. |
-| PR C closure | 완료 | runbook과 기존 DB 존재/부재 UX가 README/CLI에 반영됨. |
-| PR D release prep | 진행 중 | package version은 `1.0.0`; local build/wheel smoke 완료, tag publish가 남음. |
-| NVIDIA host acceptance | 미검증 | 현재 로컬 머신에는 NVIDIA device/driver가 없어 실제 host 수집 loop는 확인하지 못함. |
+| `gua` command surface | 완료 | `doctor`, `daemon`, `start`, `status`, `stop`, `report`, `demo` 제공. |
+| Background daemon UX | 완료 | `gua daemon`은 기본 백그라운드 실행, `--foreground`는 systemd/debug용. |
+| `daemon`/`report` DB UX | 완료 | 기본 DB는 `/tmp/gua.db`; daemon은 기존 DB를 거부하고 report는 없는 DB를 거부. |
+| README bare-metal 문서 | 완료 | install, runbook, systemd 예시, 운영 notes가 1.0.1 기준. |
+| Release | 완료 | `v1.0.1` tag, GitHub Release, PyPI publish 완료. |
+| NVIDIA host acceptance | 완료 | 실제 NVIDIA host에서 수집 정상 동작 확인. |
 
-## 검증 결과
+## 마지막 확인 결과
 
-2026-05-15 release prep 로컬 검증:
+2026-05-15 1.0.1 상태 확인:
 
 ```sh
 git status --short
@@ -34,43 +35,50 @@ uv run ruff check
 uv run ruff format --check
 uv run mypy
 uv run pytest
-uv build --out-dir /tmp/gua-dist-1.0.0-prep
-bash scripts/smoke-dist-wheel.sh /tmp/gua-dist-1.0.0-prep/gpu_usage_audit-1.0.0-py3-none-any.whl
-env GITHUB_REF_NAME=v1.0.0 uv run python scripts/check-tag-version.py
+env GITHUB_REF_NAME=v1.0.1 uv run python scripts/check-tag-version.py
+uv build --out-dir /tmp/gua-dist-1.0.1-status
+bash scripts/smoke-dist-wheel.sh /tmp/gua-dist-1.0.1-status/gpu_usage_audit-1.0.1-py3-none-any.whl
 ```
 
 결과:
 
-- `git status --short`: release prep 변경분만 존재.
+- 작업트리 clean.
 - `ruff check`: pass.
 - `ruff format --check`: 26 files already formatted.
 - `mypy`: no issues in 25 source files.
-- `pytest`: 107 passed.
+- `pytest`: 114 passed.
+- tag-version check: `v1.0.1`과 `pyproject.toml` version 일치.
 - `uv build`: sdist/wheel build 성공.
 - wheel smoke: 성공.
-- tag-version check: `v1.0.0`과 `pyproject.toml` version 일치.
-
-2026-05-15 release prep 변경:
-
-- `pyproject.toml` / `uv.lock` package version을 `1.0.0`으로 갱신.
-- README status와 GitHub Release asset 예시를 `v1.0.0` 기준으로 갱신.
-- `CHANGELOG.md`에 1.0.0 release notes 추가.
-
-## 이번 cleanup 변경
-
-- `proposals/design-auto-runtime*.md` 삭제.
-- `projects/auto-runtime-audit/plan*.md` 삭제.
-- `src/gpu_usage_audit/env.py`와 `tests/test_env.py` 삭제.
-- `daemon`/`demo`는 1.0 계약대로 host `env_kind`를 `"bare"`로 직접 기록.
-- `RuntimePlan` 모델 제거. `gua doctor`는 내부 `DoctorPlan`으로 host/unsupported,
-  reasons, blockers, warnings만 유지.
-- `DoctorPlan` JSON에서 post-1.0 placeholder였던 `scheduler`, `telemetry`,
-  `confidence`, `required_privileges`, `actions` 필드 제거.
+- Release workflow: `v1.0.1` success.
+- PyPI latest: `gpu-usage-audit 1.0.1`.
+
+## 1.0.1에서 바뀐 점
+
+- `gua`를 documented command surface로 정리했다.
+- `gua daemon`은 collector를 백그라운드로 시작한다.
+- `gua daemon --foreground`는 systemd와 debugging 용도로 유지한다.
+- `gua start`, `gua status`, `gua stop`을 추가했다.
+- README의 install/run/report 예시는 `gua` 기준으로 정리됐다.
+
+## 현재 cleanup 리뷰 결과
+
+- `/tmp/gua.pid` 숫자만 믿고 `gua stop`이 SIGTERM을 보내면 PID 재사용 시 다른
+  프로세스를 건드릴 수 있다. pid가 실제 `python -m gpu_usage_audit daemon`
+  프로세스인지 확인해야 한다.
+- §2 report가 `idle-held`와 `truly-idle`을 모두 "idle/waste"로 합쳐 보여주면
+  제품 메시지가 흐려진다. 사용자가 못 쓰는 용량과 실제 빈 용량을 분리해야 한다.
+- §4 Top identities는 process row를 바로 세면 같은 사용자의 여러 프로세스가
+  같은 GPU/tick에서 과대계상될 수 있다. identity/GPU/tick 단위로 먼저 접어야 한다.
+- report는 "sample"의 의미, threshold, `--interval` 의존성을 출력 자체에서 더
+  잘 설명해야 한다.
+- NVML process list를 읽지 못하는 경우 low-util GPU가 `truly-idle`처럼 보일 수
+  있으므로 최소한 경고가 필요하다.
 
 ## 로컬 `doctor` 상태
 
-현재 개발 머신은 NVIDIA host가 아니므로 `uv run gua doctor --json`은
-`unsupported`가 정상 결과다.
+현재 개발 머신은 NVIDIA host가 아니므로 `uv run gua doctor`는 `unsupported`가
+정상 결과다.
 
 관찰된 blocker:
 
@@ -79,18 +87,11 @@ env GITHUB_REF_NAME=v1.0.0 uv run python scripts/check-tag-version.py
 - NVML init 실패: `libnvidia-ml.so.1` 없음.
 - `/tmp/gua.db`가 이미 있어 daemon은 기본 경로로 시작하지 않음.
 
-이 결과는 로컬 환경 한계이며, 제품 regression으로 보지는 않는다. 실제 acceptance는
-NVIDIA 베어메탈 호스트에서 다시 실행해야 한다.
+이 결과는 로컬 환경 한계이며, 제품 regression으로 보지 않는다.
 
 ## 다음 작업
 
-1. NVIDIA host에서 acceptance command를 실행한다.
-2. release prep PR을 main에 머지한다.
-3. `v1.0.0` tag를 push해서 GitHub Release와 PyPI publish workflow를 실행한다.
-
-```sh
-uv tool install gpu-usage-audit
-gua doctor
-gpu-usage-audit daemon --interval 30s
-gpu-usage-audit report --since 1h --interval 30s
-```
+1. cleanup PR에서 PID 검증, report 가시성, 문서 정합성을 반영한다.
+2. `uv run ruff check`, `uv run ruff format --check`, `uv run mypy`, `uv run pytest`를
+   다시 실행한다.
+3. 필요하면 1.0.2 patch release 후보로 묶는다.
diff --git a/src/gpu_usage_audit/__main__.py b/src/gpu_usage_audit/__main__.py
index 84fb731..c4f299a 100644
--- a/src/gpu_usage_audit/__main__.py
+++ b/src/gpu_usage_audit/__main__.py
@@ -48,17 +48,17 @@
 from .render import (
     render_headline,
     render_heatmap,
+    render_idle_capacity,
     render_per_gpu,
     render_top_identities,
-    render_waste,
 )
 from .report import (
     load_headline,
     load_heatmap,
     load_host,
+    load_idle_capacity,
     load_per_gpu,
     load_top_identities,
-    load_waste,
 )
 from .tier import FakeTier
 
@@ -137,7 +137,7 @@ def build_parser() -> argparse.ArgumentParser:
         "--interval",
         type=_duration,
         default=timedelta(seconds=30),
-        help="Daemon tick interval — for §2 Waste / §4 time conversion [default: 30s]",
+        help="Daemon tick interval — for §2 Idle capacity / §4 time conversion [default: 30s]",
     )
     p_report.add_argument(
         "--width",
@@ -206,7 +206,7 @@ def _add_report_args(parser: argparse.ArgumentParser) -> None:
         "--interval",
         type=_duration,
         default=timedelta(seconds=30),
-        help="Daemon tick interval — for §2 Waste / §4 time conversion [default: 30s]",
+        help="Daemon tick interval — for §2 Idle capacity / §4 time conversion [default: 30s]",
     )
     parser.add_argument(
         "--width",
@@ -355,10 +355,15 @@ def _cmd_gua_start(args: argparse.Namespace) -> int:
     log_path = Path(args.log_file)
 
     existing_pid = _read_pid(pid_path)
-    if existing_pid is not None and _pid_alive(existing_pid):
-        print(f"gua daemon: already running (pid {existing_pid})")
-        return 0
     if existing_pid is not None:
+        if _pid_alive(existing_pid) and _pid_is_managed_daemon(existing_pid):
+            print(f"gua daemon: already running (pid {existing_pid})")
+            return 0
+        if _pid_alive(existing_pid):
+            print(
+                f"gua daemon: pid {existing_pid} belongs to another process; "
+                "clearing stale pid file"
+            )
         _unlink_if_exists(pid_path)
 
     if db_path.exists():
@@ -418,13 +423,20 @@ def _cmd_gua_status(args: argparse.Namespace) -> int:
     if pid is None:
         print("gua daemon: not running")
         return 0
-    if _pid_alive(pid):
+    if _pid_alive(pid) and _pid_is_managed_daemon(pid):
         print(f"gua daemon: running (pid {pid})")
         print(f"  pid file: {pid_path}")
         print(f"  log: {log_path}")
         return 0
-    print(f"gua daemon: not running (stale pid {pid})")
-    _unlink_if_exists(pid_path)
+    if _pid_alive(pid):
+        _unlink_if_exists(pid_path)
+        print(
+            f"gua daemon: not running (pid {pid} belongs to another process; "
+            "cleared stale pid file)"
+        )
+    else:
+        print(f"gua daemon: not running (stale pid {pid})")
+        _unlink_if_exists(pid_path)
     return 0
 
 
@@ -438,7 +450,17 @@ def _cmd_gua_stop(args: argparse.Namespace) -> int:
         _unlink_if_exists(pid_path)
         print(f"gua daemon: not running (removed stale pid {pid})")
         return 0
+    if not _pid_is_managed_daemon(pid):
+        _unlink_if_exists(pid_path)
+        print(
+            f"gua daemon: not running (pid {pid} belongs to another process; "
+            "cleared stale pid file)"
+        )
+        return 0
 
+    # The identity check above closes the common stale-PID-file case. A tiny
+    # check-then-kill race remains if the process exits and the OS reuses the
+    # PID before SIGTERM; avoiding that needs a stronger lock model.
     try:
         os.kill(pid, signal.SIGTERM)
     except PermissionError:
@@ -525,12 +547,12 @@ def _cmd_report(args: argparse.Namespace) -> int:
         cutoff = datetime.now(UTC) - args.since
         host = load_host(conn)
         headline = load_headline(conn, cutoff)
-        waste = load_waste(conn, cutoff, args.interval)
+        idle_capacity = load_idle_capacity(conn, cutoff, args.interval)
         per_gpu = load_per_gpu(conn, cutoff)
         top = load_top_identities(conn, cutoff, args.interval)
         heat = load_heatmap(conn, cutoff)
         render_headline(sys.stdout, host, headline, args.since, args.width)
-        render_waste(sys.stdout, waste)
+        render_idle_capacity(sys.stdout, idle_capacity)
         render_per_gpu(sys.stdout, per_gpu)
         render_top_identities(sys.stdout, top)
         render_heatmap(sys.stdout, heat)
@@ -586,7 +608,7 @@ def _cmd_demo(args: argparse.Namespace) -> int:
         cutoff = datetime.now(UTC) - window
         loaded_host = load_host(conn)
         render_headline(sys.stdout, loaded_host, load_headline(conn, cutoff), window, width=60)
-        render_waste(sys.stdout, load_waste(conn, cutoff, args.interval))
+        render_idle_capacity(sys.stdout, load_idle_capacity(conn, cutoff, args.interval))
         render_per_gpu(sys.stdout, load_per_gpu(conn, cutoff))
         render_top_identities(sys.stdout, load_top_identities(conn, cutoff, args.interval))
         render_heatmap(sys.stdout, load_heatmap(conn, cutoff))
@@ -677,6 +699,27 @@ def _pid_alive(pid: int) -> bool:
     return True
 
 
+def _pid_is_managed_daemon(pid: int) -> bool:
+    """Return True for the subprocess shape created by `_cmd_gua_start`.
+
+    Keep this in sync with the spawn command in `_cmd_gua_start`; status/stop
+    use it to avoid acting on unrelated processes from stale PID files.
+    """
+    args = _read_proc_cmdline(pid)
+    for i, arg in enumerate(args):
+        if arg == "-m" and args[i + 1 : i + 3] == ["gpu_usage_audit", "daemon"]:
+            return True
+    return False
+
+
+def _read_proc_cmdline(pid: int) -> list[str]:
+    try:
+        raw = Path(f"/proc/{pid}/cmdline").read_bytes()
+    except OSError:
+        return []
+    return [part.decode("utf-8", errors="replace") for part in raw.split(b"\0") if part]
+
+
 def _unlink_if_exists(path: Path) -> None:
     with contextlib.suppress(FileNotFoundError):
         path.unlink()
diff --git a/src/gpu_usage_audit/nvml.py b/src/gpu_usage_audit/nvml.py
index a6bde86..27ef14b 100644
--- a/src/gpu_usage_audit/nvml.py
+++ b/src/gpu_usage_audit/nvml.py
@@ -10,11 +10,14 @@
 from __future__ import annotations
 
 import contextlib
+import logging
 from datetime import datetime
 from typing import Any
 
 from .model import GPUSample, ProcSample, Snapshot
 
+logger = logging.getLogger(__name__)
+
 
 class NVMLNotAvailableError(RuntimeError):
     """pynvml 미설치 또는 NVML 초기화 실패. 사용자 facing 메시지로도 사용."""
@@ -59,6 +62,7 @@ class NVMLTier:
     def __init__(self) -> None:
         self._nvml: Any | None = None  # pynvml ModuleType
         self._initialized = False
+        self._process_list_warning_uuids: set[str] = set()
 
     def __enter__(self) -> NVMLTier:
         return self
@@ -97,7 +101,15 @@ def collect(self, ts: datetime) -> Snapshot:
             # 해당 카드의 process list 만 비우고 진행.
             try:
                 running = nvml.nvmlDeviceGetComputeRunningProcesses(h)
-            except nvml.NVMLError:
+            except nvml.NVMLError as e:
+                if uuid not in self._process_list_warning_uuids:
+                    logger.warning(
+                        "NVML process list unavailable for %s; idle-held classification "
+                        "may be understated: %s",
+                        uuid,
+                        e,
+                    )
+                    self._process_list_warning_uuids.add(uuid)
                 running = []
 
             for p in running:
diff --git a/src/gpu_usage_audit/render.py b/src/gpu_usage_audit/render.py
index c3881b2..b9a82c9 100644
--- a/src/gpu_usage_audit/render.py
+++ b/src/gpu_usage_audit/render.py
@@ -1,4 +1,4 @@
-"""§1~§5 report 렌더링. Go v0.1.0 의 renderHeadline/Waste/PerGPU/Top/Heatmap 동등.
+"""§1~§5 report 렌더링.
 
 색 의존성 회피 — TTY/isatty 토글 없이 *글자 자체* (█/▒/░) 로 세 분류를
 시각적으로 구분. 파일 redirect 시에도 출력이 깨끗.
@@ -13,7 +13,7 @@
 from typing import TextIO
 
 from .model import HostRow
-from .report import Headline, HeatmapCell, PerGPU, TopIdentity, Waste
+from .report import Headline, HeatmapCell, IdleCapacity, PerGPU, TopIdentity
 
 # 카테고리별로 *다른 글자* 를 써서 색깔 없이도 시각적 구분이 되게.
 GLYPH_ACTIVE = "█"  # 가장 진한 블록
@@ -48,6 +48,11 @@ def render_headline(
         print(f"gua — {host.hostname} ({ctx})  Window: {since}\n", file=w)
 
     print("§1 Headline", file=w)
+    print("  basis: one sample = one GPU card at one daemon tick", file=w)
+    print(
+        "  rules: active >=10% util; idle-held <10% util with >100 MB process memory",
+        file=w,
+    )
     if h.samples == 0:
         print("  (no samples in window)", file=w)
         return
@@ -63,15 +68,21 @@ def render_headline(
     print(f"  ({h.samples} samples)", file=w)
 
 
-def render_waste(w: TextIO, waste: Waste) -> None:
+def render_idle_capacity(w: TextIO, idle_capacity: IdleCapacity) -> None:
     print(file=w)
-    print("§2 Waste", file=w)
-    if waste.samples == 0:
+    print("§2 Idle capacity", file=w)
+    print("  converted from card-ticks to GPU-hours using the report --interval", file=w)
+    if idle_capacity.samples == 0:
         print("  (no samples in window)", file=w)
         return
     print(
-        f"  ~{waste.idle_gpu_hours:.2f} GPU-hours idle, "
-        f"~{waste.equiv_unused:.2f} GPUs equivalently unused",
+        f"  idle-held: ~{idle_capacity.idle_held_gpu_hours:.2f} GPU-hours, "
+        f"~{idle_capacity.idle_held_equiv_gpus:.2f} GPUs equivalently unavailable",
+        file=w,
+    )
+    print(
+        f"  truly-idle: ~{idle_capacity.truly_idle_gpu_hours:.2f} GPU-hours, "
+        f"~{idle_capacity.truly_idle_equiv_gpus:.2f} GPUs equivalently free",
         file=w,
     )
 
@@ -79,6 +90,7 @@ def render_waste(w: TextIO, waste: Waste) -> None:
 def render_per_gpu(w: TextIO, rows: list[PerGPU]) -> None:
     print(file=w)
     print("§3 Per-GPU", file=w)
+    print("  per-card share of samples in the same three states", file=w)
     if not rows:
         print("  (no GPU cards in window)", file=w)
         return
@@ -94,13 +106,14 @@ def render_per_gpu(w: TextIO, rows: list[PerGPU]) -> None:
 def render_top_identities(w: TextIO, rows: list[TopIdentity]) -> None:
     print(file=w)
     print("§4 Top identities", file=w)
+    print("  one identity counts once per GPU/tick after its processes are summed", file=w)
     if not rows:
         print("  (no processes in window)", file=w)
         return
-    print(f"  {'identity':<20} {'gpu-hours':>10}  {'idle-held':>10}", file=w)
+    print(f"  {'identity':<20} {'gpu-hours':>10}  {'idle-held':>10}  {'samples':>8}", file=w)
     for r in rows:
         print(
-            f"  {r.identity:<20} {r.gpu_hours:>10.2f}  {r.idle_held * 100:>9.1f}%",
+            f"  {r.identity:<20} {r.gpu_hours:>10.2f}  {r.idle_held * 100:>9.1f}%  {r.samples:>8}",
             file=w,
         )
 
@@ -108,6 +121,7 @@ def render_top_identities(w: TextIO, rows: list[TopIdentity]) -> None:
 def render_heatmap(w: TextIO, cells: list[HeatmapCell]) -> None:
     print(file=w)
     print("§5 Time-of-day heatmap (UTC)", file=w)
+    print("  darker means higher active share; blank means no samples", file=w)
     if not cells:
         print("  (no samples in window)", file=w)
         return
diff --git a/src/gpu_usage_audit/report.py b/src/gpu_usage_audit/report.py
index a05a032..89b449d 100644
--- a/src/gpu_usage_audit/report.py
+++ b/src/gpu_usage_audit/report.py
@@ -83,41 +83,69 @@ def load_headline(conn: sqlite3.Connection, cutoff: datetime) -> Headline:
     )
 
 
-# ── §2 Waste ────────────────────────────────────────────────────
+# ── §2 Idle capacity ─────────────────────────────────────────────
 #
-# idle 틱 수 × interval(초) / 3600 = idle GPU-시간.
-# equiv_unused = idle 비율 × 카드 수. "8장 중 3.2장이 통째로 놀았다" 식.
+# low-util 틱 수 × interval(초) / 3600 = GPU-시간.
+# idle-held 와 truly-idle 을 분리한다. 둘 다 util<10 이지만 의미가 다르다:
+# idle-held 는 프로세스 메모리가 카드를 잡고 있어 다른 사용자가 쓰기 어렵고,
+# truly-idle 은 실제로 비어 있는 용량이다.
+# equiv_gpus = 상태 비율 × 카드 수. "8장 중 3.2장이 해당 상태였다" 식.
 # 카드 수는 *gpu_sample 에서 distinct* 로 추론 — v2 는 별도 gpu 인벤토리
 # 테이블이 없음 (단순화). interval 은 Python 에서 인자로 받는다 —
 # 데몬과 report 가 *같은* interval 을 약속해야 의미가 맞음.
-WASTE_QUERY = """
+IDLE_CAPACITY_QUERY = """
+WITH s AS (
+    SELECT gs.gpu_uuid, gs.ts, gs.util_pct,
+           COALESCE(SUM(ps.mem_used_mb), 0) AS proc_mem_mb
+    FROM gpu_sample gs
+    LEFT JOIN proc_sample ps
+        ON ps.gpu_uuid = gs.gpu_uuid AND ps.ts = gs.ts
+    WHERE gs.ts >= ?
+    GROUP BY gs.gpu_uuid, gs.ts
+),
+gpu_count AS (
+    SELECT COUNT(DISTINCT gpu_uuid) AS n FROM s
+)
 SELECT
-    SUM(CASE WHEN util_pct < 10 THEN 1 ELSE 0 END) * ? / 3600.0          AS idle_gpu_hours,
+    SUM(CASE WHEN util_pct < 10 AND proc_mem_mb >  100 THEN 1 ELSE 0 END) * ? / 3600.0 AS idle_held_gpu_hours,
+    SUM(CASE WHEN util_pct < 10 AND proc_mem_mb <= 100 THEN 1 ELSE 0 END) * ? / 3600.0 AS truly_idle_gpu_hours,
     CASE WHEN COUNT(*) = 0 THEN 0.0
-         ELSE SUM(CASE WHEN util_pct < 10 THEN 1.0 ELSE 0.0 END) / COUNT(*)
-              * (SELECT COUNT(DISTINCT gpu_uuid) FROM gpu_sample)
-    END                                                                  AS equiv_unused,
-    COUNT(*)                                                             AS samples
-FROM gpu_sample
-WHERE ts >= ?
+         ELSE SUM(CASE WHEN util_pct < 10 AND proc_mem_mb > 100 THEN 1.0 ELSE 0.0 END) / COUNT(*)
+              * (SELECT n FROM gpu_count)
+    END AS idle_held_equiv_gpus,
+    CASE WHEN COUNT(*) = 0 THEN 0.0
+         ELSE SUM(CASE WHEN util_pct < 10 AND proc_mem_mb <= 100 THEN 1.0 ELSE 0.0 END) / COUNT(*)
+              * (SELECT n FROM gpu_count)
+    END AS truly_idle_equiv_gpus,
+    COUNT(*) AS samples
+FROM s
 """
 
 
 @dataclass(slots=True)
-class Waste:
-    idle_gpu_hours: float = 0.0
-    equiv_unused: float = 0.0
+class IdleCapacity:
+    idle_held_gpu_hours: float = 0.0
+    truly_idle_gpu_hours: float = 0.0
+    idle_held_equiv_gpus: float = 0.0
+    truly_idle_equiv_gpus: float = 0.0
     samples: int = 0
 
 
-def load_waste(conn: sqlite3.Connection, cutoff: datetime, interval: timedelta) -> Waste:
-    row = conn.execute(WASTE_QUERY, (interval.total_seconds(), _ts(cutoff))).fetchone()
+def load_idle_capacity(
+    conn: sqlite3.Connection,
+    cutoff: datetime,
+    interval: timedelta,
+) -> IdleCapacity:
+    interval_s = interval.total_seconds()
+    row = conn.execute(IDLE_CAPACITY_QUERY, (_ts(cutoff), interval_s, interval_s)).fetchone()
     if row is None:
-        return Waste()
-    idle_h, equiv, samples = row
-    return Waste(
-        idle_gpu_hours=idle_h or 0.0,
-        equiv_unused=equiv or 0.0,
+        return IdleCapacity()
+    idle_held_h, truly_idle_h, idle_held_equiv, truly_idle_equiv, samples = row
+    return IdleCapacity(
+        idle_held_gpu_hours=idle_held_h or 0.0,
+        truly_idle_gpu_hours=truly_idle_h or 0.0,
+        idle_held_equiv_gpus=idle_held_equiv or 0.0,
+        truly_idle_equiv_gpus=truly_idle_equiv or 0.0,
         samples=samples,
     )
 
@@ -173,14 +201,25 @@ def load_per_gpu(conn: sqlite3.Connection, cutoff: datetime) -> list[PerGPU]:
 #
 # 누가 GPU-시간을 가장 많이 소비했나 + 그 중 idle-held 비율.
 # COALESCE 로 NULL loginuid_user 를 'unknown' 으로 묶음.
+# 같은 identity 가 같은 GPU/tick 에 여러 프로세스를 띄워도 한 번만 센다.
 TOP_IDENTITIES_QUERY = """
+WITH owned AS (
+    SELECT
+        COALESCE(loginuid_user, 'unknown') AS identity,
+        gpu_uuid,
+        ts,
+        SUM(mem_used_mb) AS mem_used_mb
+    FROM proc_sample
+    WHERE ts >= ?
+    GROUP BY identity, gpu_uuid, ts
+)
 SELECT
-    COALESCE(ps.loginuid_user, 'unknown')                                                      AS identity,
-    COUNT(*) * ? / 3600.0                                                                      AS gpu_hours,
-    AVG(CASE WHEN gs.util_pct < 10 AND ps.mem_used_mb > 100 THEN 1.0 ELSE 0.0 END)             AS idle_held
-FROM proc_sample ps
-JOIN gpu_sample gs ON gs.gpu_uuid = ps.gpu_uuid AND gs.ts = ps.ts
-WHERE ps.ts >= ?
+    owned.identity                                                                  AS identity,
+    COUNT(*) * ? / 3600.0                                                           AS gpu_hours,
+    AVG(CASE WHEN gs.util_pct < 10 AND owned.mem_used_mb > 100 THEN 1.0 ELSE 0.0 END) AS idle_held,
+    COUNT(*)                                                                        AS samples
+FROM owned
+JOIN gpu_sample gs ON gs.gpu_uuid = owned.gpu_uuid AND gs.ts = owned.ts
 GROUP BY identity
 ORDER BY gpu_hours DESC
 LIMIT 10
@@ -192,6 +231,7 @@ class TopIdentity:
     identity: str
     gpu_hours: float
     idle_held: float
+    samples: int
 
 
 def load_top_identities(
@@ -200,14 +240,15 @@ def load_top_identities(
     interval: timedelta,
 ) -> list[TopIdentity]:
     out: list[TopIdentity] = []
-    for identity, gpu_hours, idle_held in conn.execute(
-        TOP_IDENTITIES_QUERY, (interval.total_seconds(), _ts(cutoff))
+    for identity, gpu_hours, idle_held, samples in conn.execute(
+        TOP_IDENTITIES_QUERY, (_ts(cutoff), interval.total_seconds())
     ):
         out.append(
             TopIdentity(
                 identity=identity,
                 gpu_hours=gpu_hours or 0.0,
                 idle_held=idle_held or 0.0,
+                samples=samples,
             )
         )
     return out
diff --git a/tests/test_render.py b/tests/test_render.py
index 90ab7f6..ea5adf0 100644
--- a/tests/test_render.py
+++ b/tests/test_render.py
@@ -16,11 +16,11 @@
     GLYPH_TRULY_IDLE,
     render_headline,
     render_heatmap,
+    render_idle_capacity,
     render_per_gpu,
     render_top_identities,
-    render_waste,
 )
-from gpu_usage_audit.report import Headline, HeatmapCell, PerGPU, TopIdentity, Waste
+from gpu_usage_audit.report import Headline, HeatmapCell, IdleCapacity, PerGPU, TopIdentity
 
 
 def _render(fn, *args, **kwargs) -> str:  # type: ignore[no-untyped-def]
@@ -74,15 +74,28 @@ def test_render_headline_no_host_row() -> None:
 # ── §2 ──────────────────────────────────────────────────────────
 
 
-def test_render_waste() -> None:
-    out = _render(render_waste, Waste(idle_gpu_hours=0.43, equiv_unused=2.53, samples=51))
-    assert "§2 Waste" in out
-    assert "0.43" in out
-    assert "2.53" in out
+def test_render_idle_capacity() -> None:
+    out = _render(
+        render_idle_capacity,
+        IdleCapacity(
+            idle_held_gpu_hours=0.31,
+            truly_idle_gpu_hours=0.12,
+            idle_held_equiv_gpus=1.53,
+            truly_idle_equiv_gpus=1.00,
+            samples=51,
+        ),
+    )
+    assert "§2 Idle capacity" in out
+    assert "idle-held" in out
+    assert "truly-idle" in out
+    assert "0.31" in out
+    assert "1.53" in out
+    assert "0.12" in out
+    assert "1.00" in out
 
 
-def test_render_waste_empty() -> None:
-    out = _render(render_waste, Waste())
+def test_render_idle_capacity_empty() -> None:
+    out = _render(render_idle_capacity, IdleCapacity())
     assert "(no samples in window)" in out
 
 
@@ -113,14 +126,15 @@ def test_render_top_identities() -> None:
     out = _render(
         render_top_identities,
         [
-            TopIdentity(identity="bob", gpu_hours=0.42, idle_held=1.0),
-            TopIdentity(identity="alice", gpu_hours=0.28, idle_held=0.0),
+            TopIdentity(identity="bob", gpu_hours=0.42, idle_held=1.0, samples=5),
+            TopIdentity(identity="alice", gpu_hours=0.28, idle_held=0.0, samples=3),
         ],
     )
     assert "§4 Top identities" in out
     assert "bob" in out
     assert "alice" in out
     assert "100.0%" in out and "0.0%" in out
+    assert "samples" in out
 
 
 def test_render_top_identities_empty() -> None:
diff --git a/tests/test_report.py b/tests/test_report.py
index 161ed69..3b2aaef 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -26,9 +26,9 @@
     load_headline,
     load_heatmap,
     load_host,
+    load_idle_capacity,
     load_per_gpu,
     load_top_identities,
-    load_waste,
 )
 
 INTERVAL = timedelta(seconds=10)
@@ -138,14 +138,16 @@ def test_load_headline_cutoff_past_all(db_loaded: sqlite3.Connection) -> None:
     assert h.samples == 0
 
 
-# ── load_waste ──────────────────────────────────────────────────
+# ── load_idle_capacity ──────────────────────────────────────────
 
 
-def test_load_waste(db_loaded: sqlite3.Connection) -> None:
-    w = load_waste(db_loaded, BASE, INTERVAL)
-    assert w.samples == 8
-    assert _close(w.idle_gpu_hours, 6 * 10 / 3600)
-    assert _close(w.equiv_unused, 1.5)
+def test_load_idle_capacity(db_loaded: sqlite3.Connection) -> None:
+    idle_capacity = load_idle_capacity(db_loaded, BASE, INTERVAL)
+    assert idle_capacity.samples == 8
+    assert _close(idle_capacity.idle_held_gpu_hours, 4 * 10 / 3600)
+    assert _close(idle_capacity.truly_idle_gpu_hours, 2 * 10 / 3600)
+    assert _close(idle_capacity.idle_held_equiv_gpus, 1.0)
+    assert _close(idle_capacity.truly_idle_equiv_gpus, 0.5)
 
 
 # ── load_per_gpu ────────────────────────────────────────────────
@@ -177,9 +179,38 @@ def test_load_top_identities(db_loaded: sqlite3.Connection) -> None:
     assert bob.identity == "bob"
     assert _close(bob.gpu_hours, 4 * 10 / 3600)
     assert _close(bob.idle_held, 1.0)
+    assert bob.samples == 4
     assert alice.identity == "alice"
     assert _close(alice.gpu_hours, 2 * 10 / 3600)
     assert _close(alice.idle_held, 0.0)
+    assert alice.samples == 2
+
+
+def test_load_top_identities_collapses_same_identity_on_same_gpu_tick(tmp_path: Path) -> None:
+    conn = open_db(tmp_path / "top-collapse.db")
+    try:
+        write_snapshot(
+            conn,
+            BASE,
+            _fixture_host(),
+            Snapshot(
+                gpus=[GPUSample(uuid="GPU-0", util_pct=2)],
+                procs=[
+                    ProcSample(gpu_uuid="GPU-0", pid=100, mem_used_mb=70000, loginuid_user="alice"),
+                    ProcSample(gpu_uuid="GPU-0", pid=101, mem_used_mb=200, loginuid_user="alice"),
+                ],
+            ),
+        )
+
+        rows = load_top_identities(conn, BASE, INTERVAL)
+        assert len(rows) == 1
+        alice = rows[0]
+        assert alice.identity == "alice"
+        assert alice.samples == 1
+        assert _close(alice.gpu_hours, 10 / 3600)
+        assert _close(alice.idle_held, 1.0)
+    finally:
+        conn.close()
 
 
 # ── load_heatmap ────────────────────────────────────────────────
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 8298a9d..61b7fb5 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -21,6 +21,8 @@
     DEFAULT_DB_PATH,
     DISPLAY_COMMAND_ENV,
     _duration,
+    _pid_is_managed_daemon,
+    _read_proc_cmdline,
     build_gua_parser,
     build_parser,
     gua_main,
@@ -322,6 +324,76 @@ def test_gua_status_and_stop_are_idempotent_without_pid_file(
     assert "not running" in capsys.readouterr().out
 
 
+def test_gua_status_removes_live_pid_that_is_not_gua_daemon(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    pid_file = tmp_path / "gua.pid"
+    pid_file.write_text("4242\n", encoding="utf-8")
+
+    monkeypatch.setattr("gpu_usage_audit.__main__._pid_alive", lambda _pid: True)
+    monkeypatch.setattr("gpu_usage_audit.__main__._pid_is_managed_daemon", lambda _pid: False)
+
+    rc = gua_main(["status", "--pid-file", str(pid_file)])
+
+    assert rc == 0
+    assert not pid_file.exists()
+    out = capsys.readouterr().out
+    assert "not running" in out
+    assert "belongs to another process" in out
+
+
+def test_gua_stop_does_not_signal_live_pid_that_is_not_gua_daemon(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    pid_file = tmp_path / "gua.pid"
+    pid_file.write_text("4242\n", encoding="utf-8")
+    kill_calls: list[tuple[int, int]] = []
+
+    monkeypatch.setattr("gpu_usage_audit.__main__._pid_alive", lambda _pid: True)
+    monkeypatch.setattr("gpu_usage_audit.__main__._pid_is_managed_daemon", lambda _pid: False)
+    monkeypatch.setattr(
+        "gpu_usage_audit.__main__.os.kill",
+        lambda pid, sig: kill_calls.append((pid, sig)),
+    )
+
+    rc = gua_main(["stop", "--pid-file", str(pid_file)])
+
+    assert rc == 0
+    assert kill_calls == []
+    assert not pid_file.exists()
+    out = capsys.readouterr().out
+    assert "not running" in out
+    assert "belongs to another process" in out
+
+
+@pytest.mark.parametrize(
+    ("argv", "want"),
+    [
+        ([sys.executable, "-m", "gpu_usage_audit", "daemon", "--db", "/tmp/gua.db"], True),
+        ([sys.executable, "-m", "gpu_usage_audit", "report"], False),
+        ([sys.executable, "-m", "other_module", "daemon"], False),
+        (["gua", "daemon", "--foreground"], False),
+        ([sys.executable, "-m"], False),
+        ([], False),
+    ],
+)
+def test_pid_is_managed_daemon_matches_background_spawn_shape(
+    monkeypatch: pytest.MonkeyPatch,
+    argv: list[str],
+    want: bool,
+) -> None:
+    monkeypatch.setattr("gpu_usage_audit.__main__._read_proc_cmdline", lambda _pid: argv)
+    assert _pid_is_managed_daemon(4242) is want
+
+
+def test_read_proc_cmdline_returns_empty_for_missing_pid() -> None:
+    assert _read_proc_cmdline(999_999_999) == []
+
+
 def _fake_doctor_report(*, db_path: str | Path = DEFAULT_DB_PATH) -> DoctorReport:
     return DoctorReport(
         generated_at=datetime(2026, 5, 14, 0, 0, tzinfo=UTC),
@@ -397,7 +469,13 @@ def test_demo_command_records_and_prints_report(
     captured = capsys.readouterr()
     assert rc == 0
     # §1~§5 다 등장.
-    for section in ("§1 Headline", "§2 Waste", "§3 Per-GPU", "§4 Top identities", "§5"):
+    for section in (
+        "§1 Headline",
+        "§2 Idle capacity",
+        "§3 Per-GPU",
+        "§4 Top identities",
+        "§5",
+    ):
         assert section in captured.out, f"{section} not in demo output"
     # DB 파일 생성됐는지.
     assert db_path.exists()