Mosul/app.py at main · CodeNexa/Mosul · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import uvicorn, os, time
from fastapi import FastAPI
from pydantic import BaseModel
import torch
from model import VAE, load_model
from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST

MODEL_PATH = os.getenv('MODEL_PATH', 'models/vae_demo.pt')
model = VAE(n_features=10, latent_dim=4)
if os.path.exists(MODEL_PATH):
    model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

REQUESTS = Counter('mosul_model_requests_total', 'Total model requests')
ANOMALIES = Counter('mosul_model_anomalies_total', 'Total detected anomalies')
LATENCY = Histogram('mosul_model_latency_seconds', 'Model latency seconds')

app = FastAPI(title='Mosul Model Service with Metrics')

class FeaturePayload(BaseModel):
    features: list

@app.get('/metrics')
def metrics():
    return generate_latest(), 200, {'Content-Type': CONTENT_TYPE_LATEST}

@app.post('/predict')
async def predict(payload: FeaturePayload):
    REQUESTS.inc()
    start = time.time()
    x = torch.tensor(payload.features, dtype=torch.float32).unsqueeze(0)
    with torch.no_grad():
        recon, z, mu, logvar = model(x)
        recon_error = torch.mean((recon - x)**2).item()
        z_norm = torch.norm(z).item()
        score = recon_error + 0.1*z_norm
    latency = time.time() - start
    LATENCY.observe(latency)
    if score > 0.05:
        ANOMALIES.inc()
    return {'anomaly_score': score, 'recon_error': recon_error, 'z_norm': z_norm, 'latency': latency}

if __name__ == '__main__':
    uvicorn.run(app, host='0.0.0.0', port=8000)