Files
proxpanel/backend/src/routes/monitoring.routes.ts

392 lines
12 KiB
TypeScript

import {
AlertChannel,
HealthCheckTargetType,
HealthCheckType,
MonitoringAlertStatus,
Severity
} from "@prisma/client";
import { Router } from "express";
import { z } from "zod";
import { HttpError } from "../lib/http-error";
import { prisma } from "../lib/prisma";
import { toPrismaJsonValue } from "../lib/prisma-json";
import { authorize, isTenantScopedUser, requireAuth } from "../middleware/auth";
import { logAudit } from "../services/audit.service";
import {
clusterResourceForecast,
createAlertRule,
createHealthCheckDefinition,
evaluateAlertRulesNow,
faultyDeploymentInsights,
listAlertEvents,
listAlertNotifications,
listAlertRules,
listHealthCheckResults,
listHealthChecks,
monitoringOverview,
runHealthCheckNow,
updateAlertRule,
updateHealthCheckDefinition
} from "../services/monitoring.service";
const router = Router();
const healthCheckSchema = z.object({
name: z.string().min(2),
description: z.string().optional(),
target_type: z.nativeEnum(HealthCheckTargetType),
check_type: z.nativeEnum(HealthCheckType).optional(),
tenant_id: z.string().optional(),
vm_id: z.string().optional(),
node_id: z.string().optional(),
cpu_warn_pct: z.number().min(0).max(100).optional(),
cpu_critical_pct: z.number().min(0).max(100).optional(),
ram_warn_pct: z.number().min(0).max(100).optional(),
ram_critical_pct: z.number().min(0).max(100).optional(),
disk_warn_pct: z.number().min(0).max(100).optional(),
disk_critical_pct: z.number().min(0).max(100).optional(),
disk_io_read_warn: z.number().min(0).optional(),
disk_io_read_critical: z.number().min(0).optional(),
disk_io_write_warn: z.number().min(0).optional(),
disk_io_write_critical: z.number().min(0).optional(),
network_in_warn: z.number().min(0).optional(),
network_in_critical: z.number().min(0).optional(),
network_out_warn: z.number().min(0).optional(),
network_out_critical: z.number().min(0).optional(),
latency_warn_ms: z.number().int().min(1).optional(),
latency_critical_ms: z.number().int().min(1).optional(),
schedule_minutes: z.number().int().min(1).max(1440).optional(),
enabled: z.boolean().optional(),
metadata: z.record(z.unknown()).optional()
});
const alertRuleSchema = z.object({
name: z.string().min(2),
description: z.string().optional(),
tenant_id: z.string().optional(),
vm_id: z.string().optional(),
node_id: z.string().optional(),
cpu_threshold_pct: z.number().min(0).max(100).optional(),
ram_threshold_pct: z.number().min(0).max(100).optional(),
disk_threshold_pct: z.number().min(0).max(100).optional(),
disk_io_read_threshold: z.number().min(0).optional(),
disk_io_write_threshold: z.number().min(0).optional(),
network_in_threshold: z.number().min(0).optional(),
network_out_threshold: z.number().min(0).optional(),
consecutive_breaches: z.number().int().min(1).max(20).optional(),
evaluation_window_minutes: z.number().int().min(1).max(1440).optional(),
severity: z.nativeEnum(Severity).optional(),
channels: z.array(z.nativeEnum(AlertChannel)).optional(),
enabled: z.boolean().optional(),
metadata: z.record(z.unknown()).optional()
});
async function ensureVmTenantScope(vmId: string, req: Pick<Express.Request, "user">) {
const vm = await prisma.virtualMachine.findUnique({
where: { id: vmId },
select: {
id: true,
tenant_id: true,
name: true
}
});
if (!vm) {
throw new HttpError(404, "VM not found", "VM_NOT_FOUND");
}
if (isTenantScopedUser(req) && req.user?.tenant_id && vm.tenant_id !== req.user.tenant_id) {
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
}
return vm;
}
function scopedTenantId(req: Pick<Express.Request, "user">) {
return isTenantScopedUser(req) ? req.user?.tenant_id ?? undefined : undefined;
}
function queryTenantId(req: { query?: Record<string, unknown> }) {
return typeof req.query?.tenant_id === "string" ? req.query.tenant_id : undefined;
}
router.get("/overview", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const data = await monitoringOverview({
tenant_id: scopedTenantId(req)
});
return res.json(data);
} catch (error) {
return next(error);
}
});
router.get("/health-checks", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const data = await listHealthChecks({
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
enabled: typeof req.query.enabled === "string" ? req.query.enabled === "true" : undefined
});
return res.json({ data });
} catch (error) {
return next(error);
}
});
router.post("/health-checks", requireAuth, authorize("security:manage"), async (req, res, next) => {
try {
const payload = healthCheckSchema.parse(req.body ?? {});
if (payload.vm_id) {
await ensureVmTenantScope(payload.vm_id, req);
}
const tenantId = scopedTenantId(req) ?? payload.tenant_id;
const check = await createHealthCheckDefinition({
...payload,
tenant_id: tenantId,
created_by: req.user?.email
});
await logAudit({
action: "monitoring.health_check.create",
resource_type: "SECURITY",
resource_id: check.id,
resource_name: check.name,
actor_email: req.user!.email,
actor_role: req.user!.role,
details: toPrismaJsonValue(payload),
ip_address: req.ip
});
return res.status(201).json(check);
} catch (error) {
return next(error);
}
});
router.patch("/health-checks/:id", requireAuth, authorize("security:manage"), async (req, res, next) => {
try {
const payload = healthCheckSchema.partial().parse(req.body ?? {});
const existing = await prisma.serverHealthCheck.findUnique({
where: { id: req.params.id },
select: {
id: true,
tenant_id: true
}
});
if (!existing) {
throw new HttpError(404, "Health check not found", "HEALTH_CHECK_NOT_FOUND");
}
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
}
if (payload.vm_id) {
await ensureVmTenantScope(payload.vm_id, req);
}
const updated = await updateHealthCheckDefinition(req.params.id, {
...payload,
tenant_id: scopedTenantId(req) ?? payload.tenant_id
});
return res.json(updated);
} catch (error) {
return next(error);
}
});
router.post("/health-checks/:id/run", requireAuth, authorize("security:manage"), async (req, res, next) => {
try {
const existing = await prisma.serverHealthCheck.findUnique({
where: { id: req.params.id },
select: { id: true, tenant_id: true }
});
if (!existing) {
throw new HttpError(404, "Health check not found", "HEALTH_CHECK_NOT_FOUND");
}
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
}
const result = await runHealthCheckNow(existing.id);
return res.json(result);
} catch (error) {
return next(error);
}
});
router.get("/health-checks/:id/results", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const existing = await prisma.serverHealthCheck.findUnique({
where: { id: req.params.id },
select: { id: true, tenant_id: true }
});
if (!existing) {
throw new HttpError(404, "Health check not found", "HEALTH_CHECK_NOT_FOUND");
}
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
}
const limit = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
const data = await listHealthCheckResults(existing.id, limit);
return res.json({ data });
} catch (error) {
return next(error);
}
});
router.get("/alerts/rules", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const data = await listAlertRules({
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
enabled: typeof req.query.enabled === "string" ? req.query.enabled === "true" : undefined
});
return res.json({ data });
} catch (error) {
return next(error);
}
});
router.post("/alerts/rules", requireAuth, authorize("security:manage"), async (req, res, next) => {
try {
const payload = alertRuleSchema.parse(req.body ?? {});
if (payload.vm_id) {
await ensureVmTenantScope(payload.vm_id, req);
}
const tenantId = scopedTenantId(req) ?? payload.tenant_id;
const rule = await createAlertRule({
...payload,
tenant_id: tenantId,
created_by: req.user?.email
});
await logAudit({
action: "monitoring.alert_rule.create",
resource_type: "SECURITY",
resource_id: rule.id,
resource_name: rule.name,
actor_email: req.user!.email,
actor_role: req.user!.role,
details: toPrismaJsonValue(payload),
ip_address: req.ip
});
return res.status(201).json(rule);
} catch (error) {
return next(error);
}
});
router.patch("/alerts/rules/:id", requireAuth, authorize("security:manage"), async (req, res, next) => {
try {
const payload = alertRuleSchema.partial().parse(req.body ?? {});
const existing = await prisma.monitoringAlertRule.findUnique({
where: { id: req.params.id },
select: {
id: true,
tenant_id: true
}
});
if (!existing) {
throw new HttpError(404, "Alert rule not found", "ALERT_RULE_NOT_FOUND");
}
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
}
if (payload.vm_id) {
await ensureVmTenantScope(payload.vm_id, req);
}
const updated = await updateAlertRule(req.params.id, {
...payload,
tenant_id: scopedTenantId(req) ?? payload.tenant_id
});
return res.json(updated);
} catch (error) {
return next(error);
}
});
router.get("/alerts/events", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const statusRaw = typeof req.query.status === "string" ? req.query.status.toUpperCase() : undefined;
const status = Object.values(MonitoringAlertStatus).includes(statusRaw as MonitoringAlertStatus)
? (statusRaw as MonitoringAlertStatus)
: undefined;
const limit = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
const data = await listAlertEvents({
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
status,
limit
});
return res.json({ data });
} catch (error) {
return next(error);
}
});
router.get("/alerts/notifications", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const limit = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
const data = await listAlertNotifications({
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
limit
});
return res.json({ data });
} catch (error) {
return next(error);
}
});
router.post("/alerts/evaluate", requireAuth, authorize("security:manage"), async (req, res, next) => {
try {
const result = await evaluateAlertRulesNow(scopedTenantId(req));
return res.json(result);
} catch (error) {
return next(error);
}
});
router.get("/insights/faulty-deployments", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const days = typeof req.query.days === "string" ? Number(req.query.days) : undefined;
const data = await faultyDeploymentInsights({
days,
tenant_id: scopedTenantId(req) ?? queryTenantId(req)
});
return res.json(data);
} catch (error) {
return next(error);
}
});
router.get("/insights/cluster-forecast", requireAuth, authorize("security:read"), async (req, res, next) => {
try {
const horizon = typeof req.query.horizon_days === "string" ? Number(req.query.horizon_days) : undefined;
const data = await clusterResourceForecast({
horizon_days: horizon,
tenant_id: scopedTenantId(req) ?? queryTenantId(req)
});
return res.json(data);
} catch (error) {
return next(error);
}
});
export default router;