392 lines
12 KiB
TypeScript
392 lines
12 KiB
TypeScript
import {
|
|
AlertChannel,
|
|
HealthCheckTargetType,
|
|
HealthCheckType,
|
|
MonitoringAlertStatus,
|
|
Severity
|
|
} from "@prisma/client";
|
|
import { Router } from "express";
|
|
import { z } from "zod";
|
|
import { HttpError } from "../lib/http-error";
|
|
import { prisma } from "../lib/prisma";
|
|
import { toPrismaJsonValue } from "../lib/prisma-json";
|
|
import { authorize, isTenantScopedUser, requireAuth } from "../middleware/auth";
|
|
import { logAudit } from "../services/audit.service";
|
|
import {
|
|
clusterResourceForecast,
|
|
createAlertRule,
|
|
createHealthCheckDefinition,
|
|
evaluateAlertRulesNow,
|
|
faultyDeploymentInsights,
|
|
listAlertEvents,
|
|
listAlertNotifications,
|
|
listAlertRules,
|
|
listHealthCheckResults,
|
|
listHealthChecks,
|
|
monitoringOverview,
|
|
runHealthCheckNow,
|
|
updateAlertRule,
|
|
updateHealthCheckDefinition
|
|
} from "../services/monitoring.service";
|
|
|
|
const router = Router();
|
|
|
|
const healthCheckSchema = z.object({
|
|
name: z.string().min(2),
|
|
description: z.string().optional(),
|
|
target_type: z.nativeEnum(HealthCheckTargetType),
|
|
check_type: z.nativeEnum(HealthCheckType).optional(),
|
|
tenant_id: z.string().optional(),
|
|
vm_id: z.string().optional(),
|
|
node_id: z.string().optional(),
|
|
cpu_warn_pct: z.number().min(0).max(100).optional(),
|
|
cpu_critical_pct: z.number().min(0).max(100).optional(),
|
|
ram_warn_pct: z.number().min(0).max(100).optional(),
|
|
ram_critical_pct: z.number().min(0).max(100).optional(),
|
|
disk_warn_pct: z.number().min(0).max(100).optional(),
|
|
disk_critical_pct: z.number().min(0).max(100).optional(),
|
|
disk_io_read_warn: z.number().min(0).optional(),
|
|
disk_io_read_critical: z.number().min(0).optional(),
|
|
disk_io_write_warn: z.number().min(0).optional(),
|
|
disk_io_write_critical: z.number().min(0).optional(),
|
|
network_in_warn: z.number().min(0).optional(),
|
|
network_in_critical: z.number().min(0).optional(),
|
|
network_out_warn: z.number().min(0).optional(),
|
|
network_out_critical: z.number().min(0).optional(),
|
|
latency_warn_ms: z.number().int().min(1).optional(),
|
|
latency_critical_ms: z.number().int().min(1).optional(),
|
|
schedule_minutes: z.number().int().min(1).max(1440).optional(),
|
|
enabled: z.boolean().optional(),
|
|
metadata: z.record(z.unknown()).optional()
|
|
});
|
|
|
|
const alertRuleSchema = z.object({
|
|
name: z.string().min(2),
|
|
description: z.string().optional(),
|
|
tenant_id: z.string().optional(),
|
|
vm_id: z.string().optional(),
|
|
node_id: z.string().optional(),
|
|
cpu_threshold_pct: z.number().min(0).max(100).optional(),
|
|
ram_threshold_pct: z.number().min(0).max(100).optional(),
|
|
disk_threshold_pct: z.number().min(0).max(100).optional(),
|
|
disk_io_read_threshold: z.number().min(0).optional(),
|
|
disk_io_write_threshold: z.number().min(0).optional(),
|
|
network_in_threshold: z.number().min(0).optional(),
|
|
network_out_threshold: z.number().min(0).optional(),
|
|
consecutive_breaches: z.number().int().min(1).max(20).optional(),
|
|
evaluation_window_minutes: z.number().int().min(1).max(1440).optional(),
|
|
severity: z.nativeEnum(Severity).optional(),
|
|
channels: z.array(z.nativeEnum(AlertChannel)).optional(),
|
|
enabled: z.boolean().optional(),
|
|
metadata: z.record(z.unknown()).optional()
|
|
});
|
|
|
|
async function ensureVmTenantScope(vmId: string, req: Pick<Express.Request, "user">) {
|
|
const vm = await prisma.virtualMachine.findUnique({
|
|
where: { id: vmId },
|
|
select: {
|
|
id: true,
|
|
tenant_id: true,
|
|
name: true
|
|
}
|
|
});
|
|
|
|
if (!vm) {
|
|
throw new HttpError(404, "VM not found", "VM_NOT_FOUND");
|
|
}
|
|
|
|
if (isTenantScopedUser(req) && req.user?.tenant_id && vm.tenant_id !== req.user.tenant_id) {
|
|
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
|
|
}
|
|
|
|
return vm;
|
|
}
|
|
|
|
function scopedTenantId(req: Pick<Express.Request, "user">) {
|
|
return isTenantScopedUser(req) ? req.user?.tenant_id ?? undefined : undefined;
|
|
}
|
|
|
|
function queryTenantId(req: { query?: Record<string, unknown> }) {
|
|
return typeof req.query?.tenant_id === "string" ? req.query.tenant_id : undefined;
|
|
}
|
|
|
|
router.get("/overview", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const data = await monitoringOverview({
|
|
tenant_id: scopedTenantId(req)
|
|
});
|
|
return res.json(data);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/health-checks", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const data = await listHealthChecks({
|
|
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
|
|
enabled: typeof req.query.enabled === "string" ? req.query.enabled === "true" : undefined
|
|
});
|
|
return res.json({ data });
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.post("/health-checks", requireAuth, authorize("security:manage"), async (req, res, next) => {
|
|
try {
|
|
const payload = healthCheckSchema.parse(req.body ?? {});
|
|
|
|
if (payload.vm_id) {
|
|
await ensureVmTenantScope(payload.vm_id, req);
|
|
}
|
|
|
|
const tenantId = scopedTenantId(req) ?? payload.tenant_id;
|
|
const check = await createHealthCheckDefinition({
|
|
...payload,
|
|
tenant_id: tenantId,
|
|
created_by: req.user?.email
|
|
});
|
|
|
|
await logAudit({
|
|
action: "monitoring.health_check.create",
|
|
resource_type: "SECURITY",
|
|
resource_id: check.id,
|
|
resource_name: check.name,
|
|
actor_email: req.user!.email,
|
|
actor_role: req.user!.role,
|
|
details: toPrismaJsonValue(payload),
|
|
ip_address: req.ip
|
|
});
|
|
|
|
return res.status(201).json(check);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.patch("/health-checks/:id", requireAuth, authorize("security:manage"), async (req, res, next) => {
|
|
try {
|
|
const payload = healthCheckSchema.partial().parse(req.body ?? {});
|
|
const existing = await prisma.serverHealthCheck.findUnique({
|
|
where: { id: req.params.id },
|
|
select: {
|
|
id: true,
|
|
tenant_id: true
|
|
}
|
|
});
|
|
|
|
if (!existing) {
|
|
throw new HttpError(404, "Health check not found", "HEALTH_CHECK_NOT_FOUND");
|
|
}
|
|
|
|
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
|
|
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
|
|
}
|
|
|
|
if (payload.vm_id) {
|
|
await ensureVmTenantScope(payload.vm_id, req);
|
|
}
|
|
|
|
const updated = await updateHealthCheckDefinition(req.params.id, {
|
|
...payload,
|
|
tenant_id: scopedTenantId(req) ?? payload.tenant_id
|
|
});
|
|
|
|
return res.json(updated);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.post("/health-checks/:id/run", requireAuth, authorize("security:manage"), async (req, res, next) => {
|
|
try {
|
|
const existing = await prisma.serverHealthCheck.findUnique({
|
|
where: { id: req.params.id },
|
|
select: { id: true, tenant_id: true }
|
|
});
|
|
|
|
if (!existing) {
|
|
throw new HttpError(404, "Health check not found", "HEALTH_CHECK_NOT_FOUND");
|
|
}
|
|
|
|
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
|
|
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
|
|
}
|
|
|
|
const result = await runHealthCheckNow(existing.id);
|
|
return res.json(result);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/health-checks/:id/results", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const existing = await prisma.serverHealthCheck.findUnique({
|
|
where: { id: req.params.id },
|
|
select: { id: true, tenant_id: true }
|
|
});
|
|
|
|
if (!existing) {
|
|
throw new HttpError(404, "Health check not found", "HEALTH_CHECK_NOT_FOUND");
|
|
}
|
|
|
|
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
|
|
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
|
|
}
|
|
|
|
const limit = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
|
|
const data = await listHealthCheckResults(existing.id, limit);
|
|
return res.json({ data });
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/alerts/rules", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const data = await listAlertRules({
|
|
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
|
|
enabled: typeof req.query.enabled === "string" ? req.query.enabled === "true" : undefined
|
|
});
|
|
return res.json({ data });
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.post("/alerts/rules", requireAuth, authorize("security:manage"), async (req, res, next) => {
|
|
try {
|
|
const payload = alertRuleSchema.parse(req.body ?? {});
|
|
|
|
if (payload.vm_id) {
|
|
await ensureVmTenantScope(payload.vm_id, req);
|
|
}
|
|
|
|
const tenantId = scopedTenantId(req) ?? payload.tenant_id;
|
|
const rule = await createAlertRule({
|
|
...payload,
|
|
tenant_id: tenantId,
|
|
created_by: req.user?.email
|
|
});
|
|
|
|
await logAudit({
|
|
action: "monitoring.alert_rule.create",
|
|
resource_type: "SECURITY",
|
|
resource_id: rule.id,
|
|
resource_name: rule.name,
|
|
actor_email: req.user!.email,
|
|
actor_role: req.user!.role,
|
|
details: toPrismaJsonValue(payload),
|
|
ip_address: req.ip
|
|
});
|
|
|
|
return res.status(201).json(rule);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.patch("/alerts/rules/:id", requireAuth, authorize("security:manage"), async (req, res, next) => {
|
|
try {
|
|
const payload = alertRuleSchema.partial().parse(req.body ?? {});
|
|
const existing = await prisma.monitoringAlertRule.findUnique({
|
|
where: { id: req.params.id },
|
|
select: {
|
|
id: true,
|
|
tenant_id: true
|
|
}
|
|
});
|
|
|
|
if (!existing) {
|
|
throw new HttpError(404, "Alert rule not found", "ALERT_RULE_NOT_FOUND");
|
|
}
|
|
|
|
if (isTenantScopedUser(req) && req.user?.tenant_id && existing.tenant_id && existing.tenant_id !== req.user.tenant_id) {
|
|
throw new HttpError(403, "Access denied for tenant scope", "TENANT_SCOPE_VIOLATION");
|
|
}
|
|
|
|
if (payload.vm_id) {
|
|
await ensureVmTenantScope(payload.vm_id, req);
|
|
}
|
|
|
|
const updated = await updateAlertRule(req.params.id, {
|
|
...payload,
|
|
tenant_id: scopedTenantId(req) ?? payload.tenant_id
|
|
});
|
|
return res.json(updated);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/alerts/events", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const statusRaw = typeof req.query.status === "string" ? req.query.status.toUpperCase() : undefined;
|
|
const status = Object.values(MonitoringAlertStatus).includes(statusRaw as MonitoringAlertStatus)
|
|
? (statusRaw as MonitoringAlertStatus)
|
|
: undefined;
|
|
|
|
const limit = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
|
|
const data = await listAlertEvents({
|
|
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
|
|
status,
|
|
limit
|
|
});
|
|
return res.json({ data });
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/alerts/notifications", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const limit = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
|
|
const data = await listAlertNotifications({
|
|
tenant_id: scopedTenantId(req) ?? queryTenantId(req),
|
|
limit
|
|
});
|
|
return res.json({ data });
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.post("/alerts/evaluate", requireAuth, authorize("security:manage"), async (req, res, next) => {
|
|
try {
|
|
const result = await evaluateAlertRulesNow(scopedTenantId(req));
|
|
return res.json(result);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/insights/faulty-deployments", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const days = typeof req.query.days === "string" ? Number(req.query.days) : undefined;
|
|
const data = await faultyDeploymentInsights({
|
|
days,
|
|
tenant_id: scopedTenantId(req) ?? queryTenantId(req)
|
|
});
|
|
return res.json(data);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
router.get("/insights/cluster-forecast", requireAuth, authorize("security:read"), async (req, res, next) => {
|
|
try {
|
|
const horizon = typeof req.query.horizon_days === "string" ? Number(req.query.horizon_days) : undefined;
|
|
const data = await clusterResourceForecast({
|
|
horizon_days: horizon,
|
|
tenant_id: scopedTenantId(req) ?? queryTenantId(req)
|
|
});
|
|
return res.json(data);
|
|
} catch (error) {
|
|
return next(error);
|
|
}
|
|
});
|
|
|
|
export default router;
|