Devnil434 · kashviporwal-byte · May 21, 2026 · coderabbitai · May 21, 2026 · coderabbitai
diff --git a/benchmark.py b/benchmark.py
@@ -8,7 +8,7 @@
 import cv2
 import urllib.request
 from libs.config.settings import settings
-from ultralytics import YOLO  # <-- FIX: Yeh line missing thi, ab model load ho jayega!
+from ultralytics import YOLO  # <-- FIX: Loaded successfully
 
 class PipelineBenchmark:
     def __init__(self, redis_url=settings.REDIS_URL):
@@ -46,14 +46,24 @@ def monitor_memory(self):
                 pass
             time.sleep(0.05)
 
-    def run_full_pipeline_benchmark(self, model_path, video_source="data/sample_videos/sample.mp4", num_frames=100, img_size=320):
+    def run_full_pipeline_benchmark(self, model_path, video_source="data/sample_videos/sample.mp4", num_frames=100, img_size=320, device=None):
         # 1. Reset run state and metrics at start (CodeRabbit State Fix)
         self._stop_memory_monitor = False
         self.peak_ram = 0
         for key in self.metrics:
             self.metrics[key].clear()
 
-        print(f"\n🚀 Starting End-to-End Pipeline Performance Benchmark using model: {model_path}...")
+        # Dynamic device determination
+        if device is None:
+            if model_path.endswith(".engine"):
+                device = "cuda"
+            else:
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        print(f"\n🚀 Starting End-to-End Pipeline Performance Benchmark...")
+        print(f"   Model: {model_path}")
+        print(f"   Device: {device.upper()}")
+        print(f"   Frames: {num_frames}")
 
         # Cross-Machine Reproducibility Check: Video download automation fallback
         if not os.path.exists(video_source) and video_source == "data/sample_videos/sample.mp4":
@@ -83,8 +93,10 @@ def run_full_pipeline_benchmark(self, model_path, video_source="data/sample_vide
             model = YOLO(model_path, task='detect')
             # Warmup frames setup
             fake_tensor = torch.rand(1, 3, img_size, img_size)
+            if "cuda" in device.lower():
+                fake_tensor = fake_tensor.cuda()
             for _ in range(5):
-                model.predict(fake_tensor, verbose=False, device='cpu', imgsz=img_size)
+                model.predict(fake_tensor, verbose=False, device=device, imgsz=img_size)
             use_real_model = True
             print("✨ Real YOLO model successfully loaded into the benchmark pipeline!")
         except Exception as e:
@@ -116,11 +128,13 @@ def run_full_pipeline_benchmark(self, model_path, video_source="data/sample_vide
 
                 if frame_to_process is None:
                     frame_to_process = torch.rand(1, 3, img_size, img_size)
+                    if "cuda" in device.lower():
+                        frame_to_process = frame_to_process.cuda()
 
                 # 1. Measure Detection Speed
                 t0 = time.time()
                 if use_real_model:
-                    model.predict(frame_to_process, verbose=False, device='cpu', imgsz=img_size)
+                    model.predict(frame_to_process, verbose=False, device=device, imgsz=img_size)
                 else:
                     time.sleep(0.015) 
                 self.metrics["detection_times"].append(time.time() - t0)
@@ -164,7 +178,7 @@ def run_full_pipeline_benchmark(self, model_path, video_source="data/sample_vide
                 cap.release()
 
         total_duration = time.time() - start_total
-        self.generate_report(total_duration, model_path, "Real Video Asset" if use_real_video else "Synthetic Tensor Stream")
+        return self.generate_report(total_duration, model_path, "Real Video Asset" if use_real_video else "Synthetic Tensor Stream")
 
     def generate_report(self, total_duration, model_used, source_used):
         avg_det_time = np.mean(self.metrics["detection_times"])
@@ -177,7 +191,6 @@ def generate_report(self, total_duration, model_used, source_used):
 
         os.makedirs("docs/benchmarks", exist_ok=True)
 
-        # Dynamic logic for Mermaid timelines (CodeRabbit Dynamic Timeline Fix)
         det_ms = avg_det_time * 1000
         track_ms = avg_track
         redis_ms = avg_redis
@@ -227,10 +240,95 @@ def generate_report(self, total_duration, model_used, source_used):
         print("\n🏆 Benchmark ran successfully!")
         print(f"📊 Workload Source Verified: {source_used}")
         print("📁 Report generated at: docs/benchmarks/pipeline_benchmark.md")
+
+        return {
+            "model": model_used,
+            "fps": fps,
+            "latency_ms": det_ms,
+            "e2e_ms": avg_e2e,
+            "ram_mb": self.peak_ram
+        }
+
+def run_comparative_benchmark(benchrunner, models, num_frames=100):
+    """Runs performance benchmarking across multiple model formats and outputs a consolidated report."""
+    results = []
+    print("\n🔍 Initiating Cross-Format Model Benchmark Comparison...")
+
+    for label, path in models.items():
+        if os.path.exists(path):
+            try:
+                # Decide device automatically based on model suffix
+                device = "cuda" if path.endswith(".engine") or torch.cuda.is_available() else "cpu"
+                res = benchrunner.run_full_pipeline_benchmark(model_path=path, num_frames=num_frames, device=device)
+                res["format"] = label
+                results.append(res)
+            except Exception as e:
+                print(f"❌ Failed to run benchmark for {label} ({path}): {e}")
+        else:
+            print(f"⚠️ Skipping comparison for format '{label}' since file was not found at '{path}'.")
+
+    if not results:
+        print("❌ No models were successfully benchmarked.")
+        return
+
+    # Generate Markdown Table Comparison
+    table_rows = []
+    for r in results:
+        table_rows.append(
+            f"| **{r['format']}** | `{r['model']}` | {r['fps']:.2f} | {r['latency_ms']:.2f} ms | {r['e2e_ms']:.2f} ms | {r['ram_mb']:.1f} MB |"
+        )
+
+    comparison_md = (
+        f"# Consolidated Model Format Comparison Report\n\n"
+        f"Generated automatically on: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+        f"This report compares the performance of Eagle core detection using various model formats on the current hardware.\n\n"
+        f"## Performance Summary\n\n"
+        f"| Model Format | Model Path | Throughput (FPS) | Detection Latency | E2E Latency | Peak RAM Usage |\n"
+        f"| :--- | :--- | :--- | :--- | :--- | :--- |\n"
+        + "\n".join(table_rows) + "\n\n"
+        f"### Hardware / Environmental Diagnostics\n"
+        f"- **CUDA Available:** `{torch.cuda.is_available()}`\n"
+        f"- **Active GPU:** `{torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None (CPU)'}`\n\n"
+        f"### Summary Analysis\n"
+        f"- **TensorRT (.engine)** provides compiled CUDA-kernel optimization for the absolute lowest possible latency and highest FPS throughput on NVIDIA devices.\n"
+        f"- **ONNX (.onnx)** formats offer standardized execution via ONNX Runtime with substantial speedups compared to raw PyTorch CPU inference.\n"
+        f"- **PyTorch (.pt)** files serve as the robust development standard and baseline framework.\n"
+    )
+
+    os.makedirs("docs/benchmarks", exist_ok=True)
+    report_path = "docs/benchmarks/comparison_report.md"
+    with open(report_path, "w", encoding="utf-8") as f:
+        f.write(comparison_md)
+
+    print("\n==========================================================================")
+    print("🏆 Consolidated Cross-Format Comparison Complete!")
+    print(f"📁 Comparison report generated at: {report_path}")
+    print("==========================================================================")
+
 
 if __name__ == "__main__":
-    int8_path = "yolov8n_int8_openvino_model" 
+    import argparse
+    parser = argparse.ArgumentParser(description="Run Eagle performance benchmarks")
+    parser.add_argument("--model", type=str, default=None, help="Path to a specific model to benchmark")
+    parser.add_argument("--compare", action="store_true", default=True, help="Run comparisons across formats (.pt, .onnx, .engine)")
+    parser.add_argument("--frames", type=int, default=100, help="Number of frames to benchmark")
+    args = parser.parse_args()
+
     REDIS_ENV_URL = os.getenv("REDIS_URL", settings.REDIS_URL)
-
     benchrunner = PipelineBenchmark(redis_url=REDIS_ENV_URL)
-    benchrunner.run_full_pipeline_benchmark(model_path=int8_path, num_frames=100)
+
+    if args.model:
+        # Benchmark specific model
+        benchrunner.run_full_pipeline_benchmark(model_path=args.model, num_frames=args.frames)
+    elif args.compare:
+        # Cross-format comparison candidate paths
+        candidate_models = {
+            "PyTorch (.pt)": "yolov8n.pt",
+            "ONNX (.onnx)": "yolov8n.onnx",
+            "TensorRT (.engine)": "yolov8n.engine"
+        }
+        run_comparative_benchmark(benchrunner, candidate_models, num_frames=args.frames)
+    else:
+        # Default single run
+        int8_path = "yolov8n_int8_openvino_model"
+        benchrunner.run_full_pipeline_benchmark(model_path=int8_path, num_frames=args.frames)
diff --git a/docs/tensorrt_conversion.md b/docs/tensorrt_conversion.md
@@ -0,0 +1,101 @@
+# TensorRT Compilation & Optimization Guide
+
+This guide covers installing dependencies, converting models into high-performance TensorRT `.engine` formats, and running optimized inference using Eagle’s smart automatic fallback protocol.
+
+---
+
+## 🚀 Why TensorRT?
+
+NVIDIA TensorRT is a high-performance deep learning inference library that optimizes neural network models for deployment on NVIDIA GPUs and Jetson hardware. Utilizing `.engine` formats provides:
+
+* **Up to 5x Faster Inference**: Highly optimized CUDA kernels tailored directly to your GPU.
+* **Low Latency & High FPS**: Crucial for real-time surveillance and anomaly detection.
+* **FP16 Half-Precision Optimization**: Reduces memory footprint and doubles processing speed with negligible accuracy loss.
+* **Dynamic Batching & Memory Efficiency**: Saves critical GPU memory (VRAM) bounds.
+
+---
+
+## 🛠️ 1. Installation & Setup
+
+To compile and execute `.engine` models, your host machine requires the CUDA Toolkit, cuDNN, TensorRT, and PyCUDA python APIs.
+
+### Step A: Install NVIDIA Drivers & CUDA Toolkit
+1. Download and install compatible **NVIDIA GPU Drivers** from [NVIDIA Driver Downloads](https://www.nvidia.com/Download/index.aspx).
+2. Download and install **CUDA Toolkit 11.8 or 12.x** from the [CUDA Toolkit Archive](https://developer.nvidia.com/cuda-toolkit-archive).
+3. Ensure CUDA is added to your environment `PATH` variables. Verify by running:
+   ```bash
+   nvcc --version
+   ```
+
+### Step B: Install cuDNN
+1. Download **cuDNN** (matching your CUDA version) from the [cuDNN Download Portal](https://developer.nvidia.com/cudnn).
+2. Copy cuDNN headers and libraries into your local CUDA Toolkit directory.
+
+### Step C: Install TensorRT
+1. Download **NVIDIA TensorRT** matching your CUDA version from [TensorRT Portal](https://developer.nvidia.com/tensorrt).
+2. Follow the installation guide to unzip and add TensorRT binaries to your system library path.
+3. Install the TensorRT Python wheel matching your Python version (found in the `python/` directory of the TensorRT package):
+   ```bash
+   pip install tensorrt
+   ```
+
+### Step D: Install PyCUDA
+PyCUDA is required for low-level memory copies (DMA transfer coordination) on NVIDIA GPUs.
+```bash
+pip install pycuda
+```
+
+---
+
+## 📦 2. Model Conversion Using `export_tensorrt.py`
+
+We have provided a streamlined conversion script in `scripts/export_tensorrt.py` to automate the compilation of `.pt` or `.onnx` models into `.engine` format.
+
+### Basic Compilation (Recommended FP16)
+To compile a PyTorch YOLOv8 baseline model using optimized **FP16 half-precision**, run:
+```bash
+python scripts/export_tensorrt.py --model yolov8n.pt --fp16
+```
+This automatically compiles the model and saves a newly optimized `yolov8n.engine` file in the same directory!
+
+### Command Parameters
+| Flag | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `--model` | `str` | `yolov8n.pt` | Path to the source `.pt` or `.onnx` model file to compile. |
+| `--fp16` | `bool` | `True` | Enables FP16 half-precision optimization (highly recommended). |
+| `--int8` | `bool` | `False` | Enables INT8 quantization (requires calibrating dataset). |
+| `--imgsz` | `int` | `640` | Resolution width/height of input frames (default: 640). |
+| `--device` | `str` | `cuda:0` | GPU device ID to execute compiling (default: `cuda:0`). |
+
+---
+
+## 🧠 3. Smart Automatic Fallback Execution
+
+You **do not need** to modify your application logic or worry about crashing on non-GPU/non-TensorRT machines. The system implements a **smart fallback routing layer**:
+
+1. **Auto-Search**: The `Detector` class checks if a matching `.engine` file exists in the directory of your configured model (e.g. if `yolov8n.pt` is requested, it looks for `yolov8n.engine`).
+2. **Auto-Promote**: If the `.engine` model is present and CUDA/TensorRT drivers are available, the detector automatically loads the optimized TensorRT engine for accelerated performance.
+3. **Resilient Fallback**: If the `.engine` file is missing, corrupted, compiled on a different GPU, or if TensorRT is not supported on the host system, the code:
+   - Prints a non-blocking warning log: `Failed to load TensorRT engine. Triggering automatic fallback...`
+   - Automatically loads the baseline `.pt` or `.onnx` file and continues normal execution without interruption.
+
+---
+
+## 📊 4. Running the Performance Benchmarks
+
+To measure the latency and FPS throughput improvements, we have upgraded `benchmark.py` to test and compare multiple formats.
+
+### Run Multi-Format Comparative Benchmark:
+```bash
+python benchmark.py --compare
+```
+
+This runs a simulated video pipeline processing frames across `.pt`, `.onnx`, and `.engine` files, and generates a unified report under:
+`docs/benchmarks/comparison_report.md`
+
+### Benchmark a Single Specific Model:
+```bash
+python benchmark.py --model yolov8n.engine
+```
+Report generated under:
+`docs/benchmarks/pipeline_benchmark.md`