267 lines
6.7 KiB
Markdown
267 lines
6.7 KiB
Markdown
# 024-monitoring.mdc (Deep Reference)
|
||
|
||
> 该文件为原始详细规范归档,供 Tier 3 按需读取。
|
||
|
||
---
|
||
|
||
|
||
# 📊 Logging & Error Monitoring Standards (Hyperf + Vue 3)
|
||
|
||
## 日志规范
|
||
|
||
### 后端结构化日志 (Hyperf Monolog)
|
||
|
||
```php
|
||
// config/autoload/logger.php
|
||
<?php
|
||
|
||
declare(strict_types=1);
|
||
|
||
use Monolog\Formatter\JsonFormatter;
|
||
use Monolog\Handler\RotatingFileHandler;
|
||
use Monolog\Level;
|
||
|
||
return [
|
||
'default' => [
|
||
'handler' => [
|
||
'class' => RotatingFileHandler::class,
|
||
'constructor' => [
|
||
'filename' => BASE_PATH . '/runtime/logs/hyperf.log',
|
||
'maxFiles' => 30,
|
||
'level' => Level::Info,
|
||
],
|
||
],
|
||
'formatter' => [
|
||
'class' => JsonFormatter::class,
|
||
'constructor' => [
|
||
'batchMode' => JsonFormatter::BATCH_MODE_JSON,
|
||
'appendNewline' => true,
|
||
'includeStacktraces' => true,
|
||
],
|
||
],
|
||
],
|
||
'sql' => [
|
||
'handler' => [
|
||
'class' => RotatingFileHandler::class,
|
||
'constructor' => [
|
||
'filename' => BASE_PATH . '/runtime/logs/sql.log',
|
||
'maxFiles' => 14,
|
||
'level' => Level::Debug,
|
||
],
|
||
],
|
||
],
|
||
];
|
||
```
|
||
|
||
### 日志级别规则
|
||
|
||
| 级别 | 使用场景 | 示例 |
|
||
|------|---------|------|
|
||
| `debug` | 开发调试,生产禁用 | 函数入参、SQL 查询 |
|
||
| `info` | 关键业务事件 | 用户登录、订单创建、审批通过 |
|
||
| `warning` | 非预期但可恢复 | 缓存未命中、重试、Token 即将过期 |
|
||
| `error` | 需要处理的错误 | API 调用失败、数据库异常、队列失败 |
|
||
| `critical` | 服务无法继续运行 | 启动失败、关键依赖丢失 |
|
||
|
||
### 请求链路追踪 (TraceId)
|
||
|
||
```php
|
||
// app/Middleware/TraceIdMiddleware.php
|
||
<?php
|
||
|
||
declare(strict_types=1);
|
||
|
||
namespace App\Middleware;
|
||
|
||
use Hyperf\Context\Context;
|
||
use Psr\Http\Message\ResponseInterface;
|
||
use Psr\Http\Message\ServerRequestInterface;
|
||
use Psr\Http\Server\MiddlewareInterface;
|
||
use Psr\Http\Server\RequestHandlerInterface;
|
||
|
||
class TraceIdMiddleware implements MiddlewareInterface
|
||
{
|
||
public function process(ServerRequestInterface $request, RequestHandlerInterface $handler): ResponseInterface
|
||
{
|
||
$traceId = $request->getHeaderLine('X-Trace-Id') ?: bin2hex(random_bytes(16));
|
||
Context::set('trace_id', $traceId);
|
||
|
||
$response = $handler->handle($request);
|
||
|
||
return $response->withHeader('X-Trace-Id', $traceId);
|
||
}
|
||
}
|
||
```
|
||
|
||
```php
|
||
// Usage in Service
|
||
$this->logger->info('Order created', [
|
||
'trace_id' => Context::get('trace_id'),
|
||
'order_id' => $order->id,
|
||
'user_id' => Context::get('current_user')?->id,
|
||
]);
|
||
```
|
||
|
||
### 禁止事项
|
||
|
||
```
|
||
✗ 不在日志中记录密码、Token、信用卡号等敏感信息
|
||
✗ 不使用 var_dump / dd / echo 替代结构化日志
|
||
✗ 不在循环中记录 debug 日志(性能影响)
|
||
✗ 不记录完整的请求/响应体(可能含敏感数据)
|
||
✗ 生产环境禁止 debug 级别日志
|
||
```
|
||
|
||
---
|
||
|
||
## 前端错误监控 (Vue 3)
|
||
|
||
### 全局错误捕获
|
||
|
||
```typescript
|
||
// src/plugins/error-handler.ts
|
||
import { App } from 'vue'
|
||
|
||
export function setupErrorHandler(app) {
|
||
app.config.errorHandler = (error, instance, info) => {
|
||
console.error('Vue component error:', {
|
||
error: error instanceof Error ? error.message : String(error),
|
||
component: instance?.$options?.name,
|
||
info,
|
||
})
|
||
// Report to Sentry or custom error service
|
||
}
|
||
|
||
window.addEventListener('unhandledrejection', (event) => {
|
||
console.error('Unhandled promise rejection:', event.reason)
|
||
})
|
||
}
|
||
```
|
||
|
||
### Sentry 集成 (Vue 3 版)
|
||
|
||
```typescript
|
||
// src/plugins/sentry.ts
|
||
import * as Sentry from '@sentry/vue'
|
||
|
||
export function setupSentry(app, router) {
|
||
if (import.meta.env.PROD) {
|
||
Sentry.init({
|
||
app,
|
||
dsn: import.meta.env.VITE_SENTRY_DSN,
|
||
environment: import.meta.env.MODE,
|
||
integrations: [
|
||
Sentry.browserTracingIntegration({ router }),
|
||
],
|
||
tracesSampleRate: 0.1,
|
||
beforeSend(event) {
|
||
if (event.request?.headers) {
|
||
delete event.request.headers['authorization']
|
||
}
|
||
return event
|
||
},
|
||
})
|
||
}
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
## 服务端监控
|
||
|
||
### Swoole 进程监控
|
||
|
||
```php
|
||
// Health check endpoint
|
||
#[RequestMapping(path: '/admin/health', methods: ['GET'])]
|
||
public function health(): ResponseInterface
|
||
{
|
||
$stats = $this->server->stats();
|
||
|
||
return $this->success([
|
||
'status' => 'ok',
|
||
'uptime' => time() - $stats['start_time'],
|
||
'connections' => $stats['connection_num'],
|
||
'requests' => $stats['request_count'],
|
||
'coroutine_num' => $stats['coroutine_num'],
|
||
'worker_num' => $stats['worker_num'],
|
||
]);
|
||
}
|
||
```
|
||
|
||
### MySQL 慢查询监控
|
||
|
||
```sql
|
||
-- 启用慢查询日志
|
||
SET GLOBAL slow_query_log = 'ON';
|
||
SET GLOBAL long_query_time = 1;
|
||
SET GLOBAL slow_query_log_file = '/var/log/mysql/slow.log';
|
||
```
|
||
|
||
```php
|
||
// Hyperf SQL 日志(开发环境)
|
||
// config/autoload/databases.php
|
||
'commands' => [
|
||
'gen:model' => [
|
||
'with_comments' => true,
|
||
],
|
||
],
|
||
```
|
||
|
||
### Redis 监控
|
||
|
||
```bash
|
||
# 关键指标
|
||
redis-cli INFO stats | grep -E 'keyspace_hits|keyspace_misses|connected_clients'
|
||
redis-cli INFO memory | grep used_memory_human
|
||
```
|
||
|
||
---
|
||
|
||
## 告警策略
|
||
|
||
### 告警优先级
|
||
|
||
| 优先级 | 触发条件 | 响应时间 | 通知方式 |
|
||
|--------|---------|---------|---------|
|
||
| P0 - 紧急 | 服务不可用、数据丢失 | 15 分钟内 | 电话 + 企业微信 |
|
||
| P1 - 高 | 错误率 > 5%、P99 > 5s | 1 小时内 | 企业微信 |
|
||
| P2 - 中 | 错误率 > 1%、异常流量 | 4 小时内 | 邮件 |
|
||
| P3 - 低 | 资源使用率异常 | 次日 | 日报 |
|
||
|
||
### 监控指标
|
||
|
||
```
|
||
# 业务指标
|
||
- API 请求成功率(目标 > 99.9%)
|
||
- API P50/P95/P99 响应时间
|
||
- 每分钟错误数(Error Rate)
|
||
|
||
# 系统指标
|
||
- CPU / 内存使用率(告警阈值:80%)
|
||
- Swoole Worker 协程数(告警阈值:90%)
|
||
- MySQL 连接池使用率(告警阈值:70%)
|
||
- Redis 内存使用率(告警阈值:70%)
|
||
- 磁盘使用率(告警阈值:85%)
|
||
- 队列堆积数(告警阈值:1000)
|
||
```
|
||
|
||
## 日志保留策略
|
||
|
||
| 环境 | 保留周期 | 存储方式 |
|
||
|------|---------|---------|
|
||
| 开发 | 7 天 | 本地文件 (runtime/logs/) |
|
||
| 测试 | 30 天 | 对象存储 |
|
||
| 生产 | 90 天(error 保留 1 年) | ELK / CloudWatch |
|
||
|
||
## 检查清单
|
||
|
||
- [ ] 所有 API 路由有请求日志(通过 TraceId 中间件)
|
||
- [ ] 错误捕获后记录完整 stack trace
|
||
- [ ] traceId 贯穿整个请求链路
|
||
- [ ] 敏感字段已从日志中过滤
|
||
- [ ] 生产环境 debug 日志已关闭
|
||
- [ ] 关键业务事件有对应告警规则
|
||
- [ ] 前端有全局错误捕获
|
||
- [ ] Swoole 健康检查端点可用
|