Agent: "Create Grafana dashboard for:
- Service overview with SLIs
- Detailed performance metrics
- Infrastructure monitoring
"title" : "Service Overview" ,
"uid" : "service-overview" ,
"tags" : [ "production" , "sli" ],
"datasource" : "$datasource" ,
"query" : "label_values(kube_namespace_created, namespace)" ,
"datasource" : "$datasource" ,
"query" : "label_values(up{namespace=~ \" $namespace \" }, job)" ,
"title" : "Service Level Indicators" ,
"gridPos" : { "h" : 8 , "w" : 24 , "x" : 0 , "y" : 0 },
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 0 , "y" : 1 },
"expr" : "sum(rate(http_requests_total{status!~ \" 5.. \" ,namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) / sum(rate(http_requests_total{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m]))" ,
"legendFormat" : "Availability"
{ "color" : "red" , "value" : null },
{ "color" : "yellow" , "value" : 0.99 },
{ "color" : "green" , "value" : 0.995 }
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 6 , "y" : 1 },
"expr" : "sum(rate(http_requests_total{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) by (status)" ,
"legendFormat" : "{{status}}"
{ "format" : "reqps" , "label" : "Requests/sec" }
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 12 , "y" : 1 },
"expr" : "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) by (le))" ,
{ "format" : "s" , "label" : "Latency" }
"params" : [ "A" , "5m" , "now" ]
"name" : "High P95 Latency" ,
"noDataState" : "no_data" ,
"gridPos" : { "h" : 8 , "w" : 6 , "x" : 18 , "y" : 1 },
"expr" : "sum(rate(http_requests_total{status=~ \" 5.. \" ,namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m])) / sum(rate(http_requests_total{namespace=~ \" $namespace \" ,job=~ \" $service \" }[5m]))" ,
"legendFormat" : "Error Rate"
{ "color" : "green" , "value" : null },
{ "color" : "yellow" , "value" : 0.01 },
{ "color" : "red" , "value" : 0.05 }
"title" : "Infrastructure Metrics" ,
"gridPos" : { "h" : 8 , "w" : 24 , "x" : 0 , "y" : 9 },
"gridPos" : { "h" : 8 , "w" : 8 , "x" : 0 , "y" : 10 },
"expr" : "sum(rate(container_cpu_usage_seconds_total{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }[5m])) by (pod)" ,
"legendFormat" : "{{pod}}"
"gridPos" : { "h" : 8 , "w" : 8 , "x" : 8 , "y" : 10 },
"expr" : "sum(container_memory_working_set_bytes{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }) by (pod)" ,
"legendFormat" : "{{pod}}"
"gridPos" : { "h" : 8 , "w" : 8 , "x" : 16 , "y" : 10 },
"expr" : "sum(rate(container_network_receive_bytes_total{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }[5m])) by (pod)" ,
"legendFormat" : "RX {{pod}}"
"expr" : "-sum(rate(container_network_transmit_bytes_total{namespace=~ \" $namespace \" ,pod=~ \" .*$service.* \" }[5m])) by (pod)" ,
"legendFormat" : "TX {{pod}}"