Monitoring-logging/3.prometheus_exporter.py at master · antsig/Monitoring-logging · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from fastapi import FastAPI, Request, Response
from prometheus_client import make_asgi_app, Counter, Histogram, Gauge, Summary
import time
import random
import uvicorn

# Membangkitkan app FastAPI dan mengekspos endpoint /metrics untuk Prometheus
app = FastAPI()
metrics_app = make_asgi_app()
app.mount("/metrics", metrics_app)

# Kriteria Advance: Minimal 10 metrics yang berbeda
# 1. Total Requests (Counter)
REQUEST_COUNT = Counter('app_requests_total', 'Total HTTP requests', ['method', 'endpoint', 'http_status'])
# 2. Request latency (Histogram)
REQUEST_LATENCY = Histogram('app_request_latency_seconds', 'HTTP request latency', ['endpoint'])
# 3. Model Inference Time (Histogram)
INFERENCE_TIME = Histogram('model_inference_time_seconds', 'Time taken to run model inference')
# 4. CPU Usage Mock (Gauge)
CPU_USAGE = Gauge('system_cpu_usage_percent', 'Mock of system CPU usage')
# 5. Memory Usage Mock (Gauge)
MEMORY_USAGE = Gauge('system_memory_usage_bytes', 'Mock of system memory usage')
# 6. Active Connections (Gauge)
ACTIVE_CONNECTIONS = Gauge('app_active_connections', 'Number of active connections')
# 7. Total Exceptions (Counter)
EXCEPTION_COUNT = Counter('app_exceptions_total', 'Total number of exceptions raised')
# 8. Prediction Output Classes (Counter)
PREDICTION_CLASS_COUNT = Counter('model_predictions_total', 'Total predictions by class', ['class'])
# 9. Request Payload Size (Summary)
PAYLOAD_SIZE = Summary('app_payload_size_bytes', 'Size of request payload in bytes')
# 10. Failed Inferences (Counter)
FAILED_INFERENCE = Counter('model_failed_inference_total', 'Total failed inferences')

@app.middleware("http")
async def monitor_requests(request: Request, call_next):
    ACTIVE_CONNECTIONS.inc()
    start_time = time.time()

    # Mocking CPU and Memory
    CPU_USAGE.set(random.uniform(10.0, 80.0))
    MEMORY_USAGE.set(random.uniform(500e6, 2e9))

    try:
        response = await call_next(request)
        status_code = response.status_code
    except Exception as e:
        status_code = 500
        EXCEPTION_COUNT.inc()
        raise e
    finally:
        latency = time.time() - start_time
        REQUEST_LATENCY.labels(endpoint=request.url.path).observe(latency)
        REQUEST_COUNT.labels(method=request.method, endpoint=request.url.path, http_status=status_code).inc()
        ACTIVE_CONNECTIONS.dec()

    return response

@app.get("/")
def read_root():
    return {"message": "SML API is running"}

@app.post("/predict")
async def predict_dummy(request: Request):
    # Observe payload size
    body = await request.body()
    PAYLOAD_SIZE.observe(len(body))

    with INFERENCE_TIME.time():
        # Simulasi proses inferensi
        time.sleep(random.uniform(0.1, 0.5))

        if random.random() < 0.05:
            FAILED_INFERENCE.inc()
            return Response(status_code=500, content="Inference Failed")

        prediction = random.choice(['malignant', 'benign'])
        PREDICTION_CLASS_COUNT.labels(prediction).inc()

    return {"prediction": prediction}

if __name__ == '__main__':
    # Memulai server eksportir di port 8000
    uvicorn.run(app, host="0.0.0.0", port=8000)