Files
boss/docs/source-material/ops_repair_swimlane_cn.svg
2026-03-26 23:16:56 +08:00

167 lines
10 KiB
XML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<svg width="2200" height="1420" viewBox="0 0 2200 1420" fill="none" xmlns="http://www.w3.org/2000/svg">
<defs>
<linearGradient id="bg" x1="160" y1="40" x2="2040" y2="1380" gradientUnits="userSpaceOnUse">
<stop stop-color="#F8FBFF"/>
<stop offset="1" stop-color="#EEF5FF"/>
</linearGradient>
<filter id="shadow" x="0" y="0" width="2200" height="1420" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
<feDropShadow dx="0" dy="14" stdDeviation="18" flood-color="#1F2937" flood-opacity="0.10"/>
</filter>
<style>
.title { font: 700 36px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #15253B; }
.subtitle { font: 500 17px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #51657C; }
.stage { font: 700 20px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #17314D; }
.lane { font: 700 18px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #132238; }
.laneSub { font: 500 13px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #5A6E84; }
.cardTitle { font: 700 16px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #132238; }
.cardText { font: 500 13px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #44586E; }
.tech { font: 700 12px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #0F62FE; }
.footer { font: 500 14px -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif; fill: #60758C; }
</style>
</defs>
<rect width="2200" height="1420" rx="36" fill="url(#bg)"/>
<text x="90" y="92" class="title">运维层与运维审计层抢修流程泳道图</text>
<text x="90" y="126" class="subtitle">覆盖动态巡检、日志命名、在线审批修复、主控离线紧急接管、修复复验与主控恢复回放。</text>
<!-- Stage headers -->
<g filter="url(#shadow)">
<rect x="250" y="162" width="280" height="76" rx="20" fill="#FFFFFF"/>
<rect x="550" y="162" width="280" height="76" rx="20" fill="#FFFFFF"/>
<rect x="850" y="162" width="280" height="76" rx="20" fill="#FFFFFF"/>
<rect x="1150" y="162" width="280" height="76" rx="20" fill="#FFFFFF"/>
<rect x="1450" y="162" width="280" height="76" rx="20" fill="#FFFFFF"/>
<rect x="1750" y="162" width="330" height="76" rx="20" fill="#FFFFFF"/>
</g>
<text x="331" y="207" class="stage">1. 动态巡检</text>
<text x="623" y="207" class="stage">2. 异常分类</text>
<text x="929" y="207" class="stage">3. 审批 / 授权</text>
<text x="1231" y="207" class="stage">4. 修复执行</text>
<text x="1543" y="207" class="stage">5. 复验</text>
<text x="1831" y="207" class="stage">6. 汇报与回放</text>
<!-- Lane backgrounds -->
<g opacity="0.96">
<rect x="60" y="280" width="2060" height="184" rx="26" fill="#FFFFFF"/>
<rect x="60" y="484" width="2060" height="184" rx="26" fill="#FFFFFF"/>
<rect x="60" y="688" width="2060" height="184" rx="26" fill="#FFFFFF"/>
<rect x="60" y="892" width="2060" height="184" rx="26" fill="#FFFFFF"/>
<rect x="60" y="1096" width="2060" height="184" rx="26" fill="#FFFFFF"/>
</g>
<!-- Grid -->
<g stroke="#D8E2EF" stroke-width="2">
<line x1="230" y1="280" x2="230" y2="1280"/>
<line x1="530" y1="280" x2="530" y2="1280"/>
<line x1="830" y1="280" x2="830" y2="1280"/>
<line x1="1130" y1="280" x2="1130" y2="1280"/>
<line x1="1430" y1="280" x2="1430" y2="1280"/>
<line x1="1730" y1="280" x2="1730" y2="1280"/>
<line x1="60" y1="484" x2="2120" y2="484"/>
<line x1="60" y1="688" x2="2120" y2="688"/>
<line x1="60" y1="892" x2="2120" y2="892"/>
<line x1="60" y1="1096" x2="2120" y2="1096"/>
</g>
<!-- Lanes -->
<rect x="78" y="310" width="134" height="48" rx="18" fill="#E8F1FF"/>
<text x="102" y="340" class="lane">主 Agent</text>
<text x="84" y="374" class="laneSub">在线审批与接收回放</text>
<rect x="78" y="514" width="144" height="48" rx="18" fill="#EAF9EE"/>
<text x="102" y="544" class="lane">Ops Agent</text>
<text x="84" y="578" class="laneSub">巡检、分类、执行修复</text>
<rect x="78" y="718" width="188" height="48" rx="18" fill="#F4E8FF"/>
<text x="102" y="748" class="lane">Ops Audit Agent</text>
<text x="84" y="782" class="laneSub">监督、判权、复验、紧急决策</text>
<rect x="78" y="922" width="162" height="48" rx="18" fill="#FEEAEA"/>
<text x="102" y="952" class="lane">终端服务层</text>
<text x="84" y="986" class="laneSub">主控 / Worker / 网关 / 硬件桥</text>
<rect x="78" y="1126" width="176" height="48" rx="18" fill="#E4F6FD"/>
<text x="102" y="1156" class="lane">数据与容灾层</text>
<text x="84" y="1190" class="laneSub">Ops Ledger / Event / Standby</text>
<!-- Cards -->
<g filter="url(#shadow)">
<rect x="552" y="506" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="552" y="710" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1752" y="302" width="320" height="136" rx="20" fill="#FFFFFF"/>
<rect x="852" y="506" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="852" y="710" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1152" y="506" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1452" y="710" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1152" y="914" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1452" y="914" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1152" y="1118" width="258" height="136" rx="20" fill="#FFFFFF"/>
<rect x="1752" y="1118" width="320" height="136" rx="20" fill="#FFFFFF"/>
<rect x="252" y="506" width="258" height="136" rx="20" fill="#FFFFFF"/>
</g>
<text x="272" y="538" class="cardTitle">动态巡检模式</text>
<text x="272" y="566" class="cardText">高频使用时每 5 分钟巡检,系统空闲时每 1 小时巡检。</text>
<text x="272" y="618" class="tech">技术Ops Policy Engine、Mode Switch Rules</text>
<text x="572" y="538" class="cardTitle">日志归一与故障聚类</text>
<text x="572" y="566" class="cardText">按 `LAYER.DOMAIN.COMPONENT.ACTION.ERROR_CODE` 归类,生成最小证据包。</text>
<text x="572" y="618" class="tech">技术fault_key、Runbook Map、Log Index</text>
<text x="872" y="538" class="cardTitle">主控在线时请求审批</text>
<text x="872" y="566" class="cardText">主 Agent 在线且可响应时Ops Agent 先提修复建议,不能越权直接修。</text>
<text x="872" y="618" class="tech">技术Approval Request、Repair Ticket</text>
<text x="872" y="742" class="cardTitle">主控失联时做最终判断</text>
<text x="872" y="770" class="cardText">确认主控失联后Ops Audit Agent 联合 Chief Ops Audit Agent 决定是否抢修。</text>
<text x="872" y="822" class="tech">技术Emergency Authority、Offline Quorum</text>
<text x="1172" y="538" class="cardTitle">执行修复 Runbook</text>
<text x="1172" y="566" class="cardText">重启服务、切换账号、恢复心跳、释放孤儿租约、重建连接等。</text>
<text x="1172" y="618" class="tech">技术Runbook Executor、Safe Action Policy</text>
<text x="1472" y="742" class="cardTitle">复验修复是否真的成功</text>
<text x="1472" y="770" class="cardText">看原故障是否消失、服务是否恢复、事件流是否恢复、是否有副作用。</text>
<text x="1472" y="822" class="tech">技术Ops Audit Verification、Synthetic Probe</text>
<text x="1172" y="946" class="cardTitle">被修复对象恢复服务</text>
<text x="1172" y="974" class="cardText">主控、Worker、Thread Gateway、Audit Orchestrator、Test Rig、gptpluscontrol 等恢复。</text>
<text x="1172" y="1026" class="tech">技术Service Supervisor、Health Probe</text>
<text x="1472" y="946" class="cardTitle">产生新健康状态与证据</text>
<text x="1472" y="974" class="cardText">生成复验日志、关键快照、队列状态、额度状态和回放材料。</text>
<text x="1472" y="1026" class="tech">技术Evidence Collector、Health Snapshot</text>
<text x="1172" y="1150" class="cardTitle">修复账本留痕</text>
<text x="1172" y="1178" class="cardText">写入 repair ticket、动作、复验结果、主控是否在线、后续风险。</text>
<text x="1172" y="1230" class="tech">技术Ops Ledger、Event Store、Postgres</text>
<text x="1772" y="334" class="cardTitle">主控每小时检查运维层</text>
<text x="1772" y="362" class="cardText">确认各节点 Ops Agent / Ops Audit Agent 是否在线且最近巡检成功。</text>
<text x="1772" y="414" class="tech">技术Master Health Check、Ops Node Matrix</text>
<text x="1772" y="1150" class="cardTitle">恢复后回放给主控</text>
<text x="1772" y="1178" class="cardText">主控恢复在线后,自动收到故障摘要、修复动作、复验结论和剩余风险。</text>
<text x="1772" y="1230" class="tech">技术Repair Report Sync、Recovery Replay</text>
<text x="572" y="742" class="cardTitle">监督 Ops Agent 是否越权</text>
<text x="572" y="770" class="cardText">确认主控是否在线、当前动作是否在自动修复白名单、是否需要升级。</text>
<text x="572" y="822" class="tech">技术Ops Audit Policy、Privilege Gate</text>
<!-- Flow arrows -->
<g stroke-linecap="round" stroke-width="5">
<path d="M510 574H552" stroke="#22C55E"/>
<path d="M810 574H852" stroke="#22C55E"/>
<path d="M1110 574H1152" stroke="#22C55E"/>
<path d="M1410 982H1452" stroke="#EF4444"/>
<path d="M1710 778H1752V1186" stroke="#A855F7"/>
<path d="M2012 438V1118" stroke="#0EA5E9"/>
<path d="M980 642V710" stroke="#A855F7"/>
<path d="M1280 642V914" stroke="#EF4444"/>
<path d="M1280 1050V1118" stroke="#0EA5E9"/>
<path d="M1610 846V914" stroke="#EF4444"/>
</g>
<text x="90" y="1376" class="footer">图文件:/Users/kris/code/Talking/ops_repair_swimlane_cn.svg</text>
</svg>