From fa3a7c78e02428401ec4162d09e789c6b9f8abc9 Mon Sep 17 00:00:00 2001 From: Symphony Agent Date: Fri, 17 Apr 2026 17:39:32 +0000 Subject: [PATCH] feat(server): phase 4 polish and observability (SYM-24) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Intent Harden the AgentProbe server surfaces delivered in Phases 1–3 for long-running daily operation by adding in-process observability adapters, SSE reconnect guarantees, dashboard keyboard navigation and empty/error/loading polish, deterministic latency-budget and soak harnesses, and the supporting operational docs. ## Behavior changes - `src/runtime/server/observability/` now ships a narrow metrics registry (`server.http.requests`, `server.runs.active`, `server.runs.started_total`, `server.runs.finished_total`, `server.sse.connections`), a span recorder (`server.run.start.validation`, `server.run.controller.execute`, `server.run.suite.boot`), a structured logger, and redaction helpers for config/startup output. - `startAgentProbeServer` emits a single redacted `server.startup` log line, attaches per-request logs with `method`, `route`, `status`, `duration_ms`, and `request_id`, tags run-controller logs with `runId` and preset id, and exposes the observability handle on `StartedServer`. - SSE responses emit `retry:` directives, honor `Last-Event-ID` from both header and `last_event_id` query parameter, keep proxy-friendly headers (`no-store`, `no-transform`, `x-accel-buffering: no`, `keep-alive`), and guarantee exactly one terminal event (`run_finished`, `run_cancelled`, or `run_failed`) per run — including replays for historical runs whose ring buffer has been evicted. - Dashboard adds global keyboard shortcuts (`/`, `j`/`k`, `g r`, `g p`, `g s`), a filter input on the Runs page with its own empty state, and dedicated empty/error/loading affordances on the suites and filtered-runs surfaces. Shortcuts are suppressed in form inputs, textareas, selects, contenteditable targets, and when modifier keys are held. Focus rings are visible on list rows and the filter input. - New repo entrypoints: `bun run latency-budget` prints p50/p95/p99 for `GET /`, `GET /api/runs`, `POST /api/runs`, and SSE first-event on seeded local data; `bun run soak` runs a fast CI soak by default (`--manual` extends to ~1h) and emits the operator summary documented in `docs/RELIABILITY.md`. ## Documentation - `docs/RELIABILITY.md` lists the shipped metrics, spans, and latency budgets plus the SSE hardening contract and soak-harness modes. - `docs/playbooks/agent-probe-server.md` adds proxy SSE + nginx buffering guidance, backup/restore, migration recovery, dashboard cache behaviour, request-id troubleshooting, and the keyboard shortcut reference. - `docs/product-specs/platform.md`, `current-state.md`, and `e2e-checklist.md` gain Phase 4 acceptance scenarios and their coverage mapping. ## Validation - [x] `bun run docs:validate` - [x] `bun run typecheck` - [x] `bun run test` - [x] `bun run test:e2e` - [x] `bun run dashboard:build` - [x] `bun run fast-feedback` - [x] `bun run soak --duration-ms 5000 --runs 30 --sse-connections 2` (active runs at shutdown == 0, no failures) - [x] `bun run latency-budget --samples 5 --report-only` (all surfaces well under budget on loopback) Co-Authored-By: Claude Opus 4.7 (1M context) --- dashboard/dist/index.html | 26 +- dashboard/src/App.tsx | 235 ++++++++++++---- dashboard/src/hooks/useKeyboardShortcuts.ts | 121 +++++++++ dashboard/src/styles.css | 37 +++ docs/RELIABILITY.md | 66 +++++ docs/playbooks/agent-probe-server.md | 117 ++++++++ docs/product-specs/current-state.md | 7 + docs/product-specs/e2e-checklist.md | 7 + docs/product-specs/platform.md | 83 ++++++ package.json | 4 +- scripts/latency-budget.ts | 205 ++++++++++++++ scripts/soak.ts | 255 ++++++++++++++++++ src/runtime/server/app-server.ts | 69 +++-- .../server/controllers/run-controller.ts | 98 ++++++- src/runtime/server/observability/index.ts | 48 ++++ src/runtime/server/observability/logger.ts | 97 +++++++ src/runtime/server/observability/metrics.ts | 142 ++++++++++ src/runtime/server/observability/redaction.ts | 53 ++++ src/runtime/server/observability/spans.ts | 108 ++++++++ src/runtime/server/routes/sse.ts | 104 +++++-- src/runtime/server/streams/events.ts | 15 ++ .../integration/server/sse-reconnect.test.ts | 162 +++++++++++ .../dashboard/keyboard-shortcuts.test.tsx | 146 ++++++++++ .../unit/server/observability/logger.test.ts | 56 ++++ .../unit/server/observability/metrics.test.ts | 73 +++++ .../server/observability/redaction.test.ts | 54 ++++ tests/unit/server/observability/spans.test.ts | 54 ++++ 27 files changed, 2340 insertions(+), 102 deletions(-) create mode 100644 dashboard/src/hooks/useKeyboardShortcuts.ts create mode 100644 scripts/latency-budget.ts create mode 100644 scripts/soak.ts create mode 100644 src/runtime/server/observability/index.ts create mode 100644 src/runtime/server/observability/logger.ts create mode 100644 src/runtime/server/observability/metrics.ts create mode 100644 src/runtime/server/observability/redaction.ts create mode 100644 src/runtime/server/observability/spans.ts create mode 100644 tests/integration/server/sse-reconnect.test.ts create mode 100644 tests/unit/dashboard/keyboard-shortcuts.test.tsx create mode 100644 tests/unit/server/observability/logger.test.ts create mode 100644 tests/unit/server/observability/metrics.test.ts create mode 100644 tests/unit/server/observability/redaction.test.ts create mode 100644 tests/unit/server/observability/spans.test.ts diff --git a/dashboard/dist/index.html b/dashboard/dist/index.html index 31e5062..def75a0 100644 --- a/dashboard/dist/index.html +++ b/dashboard/dist/index.html @@ -4,7 +4,7 @@ AgentProbe Dashboard - - +`+a.stack}}var $u=Object.prototype.hasOwnProperty,ku=s.unstable_scheduleCallback,Wu=s.unstable_cancelCallback,Wo=s.unstable_shouldYield,Fo=s.unstable_requestPaint,ut=s.unstable_now,Io=s.unstable_getCurrentPriorityLevel,js=s.unstable_ImmediatePriority,xs=s.unstable_UserBlockingPriority,Tn=s.unstable_NormalPriority,Po=s.unstable_LowPriority,Es=s.unstable_IdlePriority,lh=s.log,th=s.unstable_setDisableYieldValue,Ma=null,it=null;function le(l){if(typeof lh=="function"&&th(l),it&&typeof it.setStrictMode=="function")try{it.setStrictMode(Ma,l)}catch{}}var ct=Math.clz32?Math.clz32:nh,eh=Math.log,ah=Math.LN2;function nh(l){return l>>>=0,l===0?32:31-(eh(l)/ah|0)|0}var zn=256,An=262144,On=4194304;function Te(l){var t=l&42;if(t!==0)return t;switch(l&-l){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:return 64;case 128:return 128;case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:return l&261888;case 262144:case 524288:case 1048576:case 2097152:return l&3932160;case 4194304:case 8388608:case 16777216:case 33554432:return l&62914560;case 67108864:return 67108864;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 0;default:return l}}function Mn(l,t,e){var a=l.pendingLanes;if(a===0)return 0;var n=0,u=l.suspendedLanes,c=l.pingedLanes;l=l.warmLanes;var f=a&134217727;return f!==0?(a=f&~u,a!==0?n=Te(a):(c&=f,c!==0?n=Te(c):e||(e=f&~l,e!==0&&(n=Te(e))))):(f=a&~u,f!==0?n=Te(f):c!==0?n=Te(c):e||(e=a&~l,e!==0&&(n=Te(e)))),n===0?0:t!==0&&t!==n&&(t&u)===0&&(u=n&-n,e=t&-t,u>=e||u===32&&(e&4194048)!==0)?t:n}function Da(l,t){return(l.pendingLanes&~(l.suspendedLanes&~l.pingedLanes)&t)===0}function uh(l,t){switch(l){case 1:case 2:case 4:case 8:case 64:return t+250;case 16:case 32:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return t+5e3;case 4194304:case 8388608:case 16777216:case 33554432:return-1;case 67108864:case 134217728:case 268435456:case 536870912:case 1073741824:return-1;default:return-1}}function Ns(){var l=On;return On<<=1,(On&62914560)===0&&(On=4194304),l}function Fu(l){for(var t=[],e=0;31>e;e++)t.push(l);return t}function Ra(l,t){l.pendingLanes|=t,t!==268435456&&(l.suspendedLanes=0,l.pingedLanes=0,l.warmLanes=0)}function ih(l,t,e,a,n,u){var c=l.pendingLanes;l.pendingLanes=e,l.suspendedLanes=0,l.pingedLanes=0,l.warmLanes=0,l.expiredLanes&=e,l.entangledLanes&=e,l.errorRecoveryDisabledLanes&=e,l.shellSuspendCounter=0;var f=l.entanglements,d=l.expirationTimes,p=l.hiddenUpdates;for(e=c&~e;0"u")return null;try{return l.activeElement||l.body}catch{return l.body}}var oh=/[\n"\\]/g;function gt(l){return l.replace(oh,function(t){return"\\"+t.charCodeAt(0).toString(16)+" "})}function ai(l,t,e,a,n,u,c,f){l.name="",c!=null&&typeof c!="function"&&typeof c!="symbol"&&typeof c!="boolean"?l.type=c:l.removeAttribute("type"),t!=null?c==="number"?(t===0&&l.value===""||l.value!=t)&&(l.value=""+yt(t)):l.value!==""+yt(t)&&(l.value=""+yt(t)):c!=="submit"&&c!=="reset"||l.removeAttribute("value"),t!=null?ni(l,c,yt(t)):e!=null?ni(l,c,yt(e)):a!=null&&l.removeAttribute("value"),n==null&&u!=null&&(l.defaultChecked=!!u),n!=null&&(l.checked=n&&typeof n!="function"&&typeof n!="symbol"),f!=null&&typeof f!="function"&&typeof f!="symbol"&&typeof f!="boolean"?l.name=""+yt(f):l.removeAttribute("name")}function qs(l,t,e,a,n,u,c,f){if(u!=null&&typeof u!="function"&&typeof u!="symbol"&&typeof u!="boolean"&&(l.type=u),t!=null||e!=null){if(!(u!=="submit"&&u!=="reset"||t!=null)){ei(l);return}e=e!=null?""+yt(e):"",t=t!=null?""+yt(t):e,f||t===l.value||(l.value=t),l.defaultValue=t}a=a??n,a=typeof a!="function"&&typeof a!="symbol"&&!!a,l.checked=f?l.checked:!!a,l.defaultChecked=!!a,c!=null&&typeof c!="function"&&typeof c!="symbol"&&typeof c!="boolean"&&(l.name=c),ei(l)}function ni(l,t,e){t==="number"&&Un(l.ownerDocument)===l||l.defaultValue===""+e||(l.defaultValue=""+e)}function We(l,t,e,a){if(l=l.options,t){t={};for(var n=0;n"u"||typeof window.document>"u"||typeof window.document.createElement>"u"),fi=!1;if(qt)try{var Ba={};Object.defineProperty(Ba,"passive",{get:function(){fi=!0}}),window.addEventListener("test",Ba,Ba),window.removeEventListener("test",Ba,Ba)}catch{fi=!1}var ee=null,ri=null,Hn=null;function Vs(){if(Hn)return Hn;var l,t=ri,e=t.length,a,n="value"in ee?ee.value:ee.textContent,u=n.length;for(l=0;l=Ga),Ws=" ",Fs=!1;function Is(l,t){switch(l){case"keyup":return Qh.indexOf(t.keyCode)!==-1;case"keydown":return t.keyCode!==229;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function Ps(l){return l=l.detail,typeof l=="object"&&"data"in l?l.data:null}var la=!1;function Lh(l,t){switch(l){case"compositionend":return Ps(t);case"keypress":return t.which!==32?null:(Fs=!0,Ws);case"textInput":return l=t.data,l===Ws&&Fs?null:l;default:return null}}function Zh(l,t){if(la)return l==="compositionend"||!vi&&Is(l,t)?(l=Vs(),Hn=ri=ee=null,la=!1,l):null;switch(l){case"paste":return null;case"keypress":if(!(t.ctrlKey||t.altKey||t.metaKey)||t.ctrlKey&&t.altKey){if(t.char&&1=t)return{node:e,offset:t-l};l=a}l:{for(;e;){if(e.nextSibling){e=e.nextSibling;break l}e=e.parentNode}e=void 0}e=sf(e)}}function rf(l,t){return l&&t?l===t?!0:l&&l.nodeType===3?!1:t&&t.nodeType===3?rf(l,t.parentNode):"contains"in l?l.contains(t):l.compareDocumentPosition?!!(l.compareDocumentPosition(t)&16):!1:!1}function df(l){l=l!=null&&l.ownerDocument!=null&&l.ownerDocument.defaultView!=null?l.ownerDocument.defaultView:window;for(var t=Un(l.document);t instanceof l.HTMLIFrameElement;){try{var e=typeof t.contentWindow.location.href=="string"}catch{e=!1}if(e)l=t.contentWindow;else break;t=Un(l.document)}return t}function Si(l){var t=l&&l.nodeName&&l.nodeName.toLowerCase();return t&&(t==="input"&&(l.type==="text"||l.type==="search"||l.type==="tel"||l.type==="url"||l.type==="password")||t==="textarea"||l.contentEditable==="true")}var Fh=qt&&"documentMode"in document&&11>=document.documentMode,ta=null,bi=null,Za=null,pi=!1;function of(l,t,e){var a=e.window===e?e.document:e.nodeType===9?e:e.ownerDocument;pi||ta==null||ta!==Un(a)||(a=ta,"selectionStart"in a&&Si(a)?a={start:a.selectionStart,end:a.selectionEnd}:(a=(a.ownerDocument&&a.ownerDocument.defaultView||window).getSelection(),a={anchorNode:a.anchorNode,anchorOffset:a.anchorOffset,focusNode:a.focusNode,focusOffset:a.focusOffset}),Za&&La(Za,a)||(Za=a,a=Au(bi,"onSelect"),0>=c,n-=c,Mt=1<<32-ct(t)+n|e<k?(el=X,X=null):el=X.sibling;var sl=j(y,X,b[k],z);if(sl===null){X===null&&(X=el);break}l&&X&&sl.alternate===null&&t(y,X),h=u(sl,h,k),cl===null?Z=sl:cl.sibling=sl,cl=sl,X=el}if(k===b.length)return e(y,X),ul&&Gt(y,k),Z;if(X===null){for(;kk?(el=X,X=null):el=X.sibling;var Ne=j(y,X,sl.value,z);if(Ne===null){X===null&&(X=el);break}l&&X&&Ne.alternate===null&&t(y,X),h=u(Ne,h,k),cl===null?Z=Ne:cl.sibling=Ne,cl=Ne,X=el}if(sl.done)return e(y,X),ul&&Gt(y,k),Z;if(X===null){for(;!sl.done;k++,sl=b.next())sl=A(y,sl.value,z),sl!==null&&(h=u(sl,h,k),cl===null?Z=sl:cl.sibling=sl,cl=sl);return ul&&Gt(y,k),Z}for(X=a(X);!sl.done;k++,sl=b.next())sl=x(X,y,k,sl.value,z),sl!==null&&(l&&sl.alternate!==null&&X.delete(sl.key===null?k:sl.key),h=u(sl,h,k),cl===null?Z=sl:cl.sibling=sl,cl=sl);return l&&X.forEach(function(gv){return t(y,gv)}),ul&&Gt(y,k),Z}function gl(y,h,b,z){if(typeof b=="object"&&b!==null&&b.type===Tl&&b.key===null&&(b=b.props.children),typeof b=="object"&&b!==null){switch(b.$$typeof){case dl:l:{for(var Z=b.key;h!==null;){if(h.key===Z){if(Z=b.type,Z===Tl){if(h.tag===7){e(y,h.sibling),z=n(h,b.props.children),z.return=y,y=z;break l}}else if(h.elementType===Z||typeof Z=="object"&&Z!==null&&Z.$$typeof===il&&qe(Z)===h.type){e(y,h.sibling),z=n(h,b.props),ka(z,b),z.return=y,y=z;break l}e(y,h);break}else t(y,h);h=h.sibling}b.type===Tl?(z=Re(b.props.children,y.mode,z,b.key),z.return=y,y=z):(z=Kn(b.type,b.key,b.props,null,y.mode,z),ka(z,b),z.return=y,y=z)}return c(y);case I:l:{for(Z=b.key;h!==null;){if(h.key===Z)if(h.tag===4&&h.stateNode.containerInfo===b.containerInfo&&h.stateNode.implementation===b.implementation){e(y,h.sibling),z=n(h,b.children||[]),z.return=y,y=z;break l}else{e(y,h);break}else t(y,h);h=h.sibling}z=zi(b,y.mode,z),z.return=y,y=z}return c(y);case il:return b=qe(b),gl(y,h,b,z)}if(nt(b))return G(y,h,b,z);if(Yl(b)){if(Z=Yl(b),typeof Z!="function")throw Error(r(150));return b=Z.call(b),J(y,h,b,z)}if(typeof b.then=="function")return gl(y,h,In(b),z);if(b.$$typeof===al)return gl(y,h,$n(y,b),z);Pn(y,b)}return typeof b=="string"&&b!==""||typeof b=="number"||typeof b=="bigint"?(b=""+b,h!==null&&h.tag===6?(e(y,h.sibling),z=n(h,b),z.return=y,y=z):(e(y,h),z=Ti(b,y.mode,z),z.return=y,y=z),c(y)):e(y,h)}return function(y,h,b,z){try{$a=0;var Z=gl(y,h,b,z);return oa=null,Z}catch(X){if(X===da||X===Wn)throw X;var cl=ft(29,X,null,y.mode);return cl.lanes=z,cl.return=y,cl}finally{}}}var Ge=Hf(!0),Bf=Hf(!1),ce=!1;function Gi(l){l.updateQueue={baseState:l.memoizedState,firstBaseUpdate:null,lastBaseUpdate:null,shared:{pending:null,lanes:0,hiddenCallbacks:null},callbacks:null}}function Qi(l,t){l=l.updateQueue,t.updateQueue===l&&(t.updateQueue={baseState:l.baseState,firstBaseUpdate:l.firstBaseUpdate,lastBaseUpdate:l.lastBaseUpdate,shared:l.shared,callbacks:null})}function se(l){return{lane:l,tag:0,payload:null,callback:null,next:null}}function fe(l,t,e){var a=l.updateQueue;if(a===null)return null;if(a=a.shared,(rl&2)!==0){var n=a.pending;return n===null?t.next=t:(t.next=n.next,n.next=t),a.pending=t,t=Vn(l),bf(l,null,e),t}return Zn(l,a,t,e),Vn(l)}function Wa(l,t,e){if(t=t.updateQueue,t!==null&&(t=t.shared,(e&4194048)!==0)){var a=t.lanes;a&=l.pendingLanes,e|=a,t.lanes=e,Ts(l,e)}}function Xi(l,t){var e=l.updateQueue,a=l.alternate;if(a!==null&&(a=a.updateQueue,e===a)){var n=null,u=null;if(e=e.firstBaseUpdate,e!==null){do{var c={lane:e.lane,tag:e.tag,payload:e.payload,callback:null,next:null};u===null?n=u=c:u=u.next=c,e=e.next}while(e!==null);u===null?n=u=t:u=u.next=t}else n=u=t;e={baseState:a.baseState,firstBaseUpdate:n,lastBaseUpdate:u,shared:a.shared,callbacks:a.callbacks},l.updateQueue=e;return}l=e.lastBaseUpdate,l===null?e.firstBaseUpdate=t:l.next=t,e.lastBaseUpdate=t}var Li=!1;function Fa(){if(Li){var l=ra;if(l!==null)throw l}}function Ia(l,t,e,a){Li=!1;var n=l.updateQueue;ce=!1;var u=n.firstBaseUpdate,c=n.lastBaseUpdate,f=n.shared.pending;if(f!==null){n.shared.pending=null;var d=f,p=d.next;d.next=null,c===null?u=p:c.next=p,c=d;var N=l.alternate;N!==null&&(N=N.updateQueue,f=N.lastBaseUpdate,f!==c&&(f===null?N.firstBaseUpdate=p:f.next=p,N.lastBaseUpdate=d))}if(u!==null){var A=n.baseState;c=0,N=p=d=null,f=u;do{var j=f.lane&-536870913,x=j!==f.lane;if(x?(tl&j)===j:(a&j)===j){j!==0&&j===fa&&(Li=!0),N!==null&&(N=N.next={lane:0,tag:f.tag,payload:f.payload,callback:null,next:null});l:{var G=l,J=f;j=t;var gl=e;switch(J.tag){case 1:if(G=J.payload,typeof G=="function"){A=G.call(gl,A,j);break l}A=G;break l;case 3:G.flags=G.flags&-65537|128;case 0:if(G=J.payload,j=typeof G=="function"?G.call(gl,A,j):G,j==null)break l;A=U({},A,j);break l;case 2:ce=!0}}j=f.callback,j!==null&&(l.flags|=64,x&&(l.flags|=8192),x=n.callbacks,x===null?n.callbacks=[j]:x.push(j))}else x={lane:j,tag:f.tag,payload:f.payload,callback:f.callback,next:null},N===null?(p=N=x,d=A):N=N.next=x,c|=j;if(f=f.next,f===null){if(f=n.shared.pending,f===null)break;x=f,f=x.next,x.next=null,n.lastBaseUpdate=x,n.shared.pending=null}}while(!0);N===null&&(d=A),n.baseState=d,n.firstBaseUpdate=p,n.lastBaseUpdate=N,u===null&&(n.shared.lanes=0),me|=c,l.lanes=c,l.memoizedState=A}}function qf(l,t){if(typeof l!="function")throw Error(r(191,l));l.call(t)}function Yf(l,t){var e=l.callbacks;if(e!==null)for(l.callbacks=null,l=0;lu?u:8;var c=_.T,f={};_.T=f,cc(l,!1,t,e);try{var d=n(),p=_.S;if(p!==null&&p(f,d),d!==null&&typeof d=="object"&&typeof d.then=="function"){var N=im(d,a);tn(l,t,N,mt(l))}else tn(l,t,a,mt(l))}catch(A){tn(l,t,{then:function(){},status:"rejected",reason:A},mt())}finally{B.p=u,c!==null&&f.types!==null&&(c.types=f.types),_.T=c}}function om(){}function uc(l,t,e,a){if(l.tag!==5)throw Error(r(476));var n=yr(l).queue;vr(l,n,t,K,e===null?om:function(){return gr(l),e(a)})}function yr(l){var t=l.memoizedState;if(t!==null)return t;t={memoizedState:K,baseState:K,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Zt,lastRenderedState:K},next:null};var e={};return t.next={memoizedState:e,baseState:e,baseQueue:null,queue:{pending:null,lanes:0,dispatch:null,lastRenderedReducer:Zt,lastRenderedState:e},next:null},l.memoizedState=t,l=l.alternate,l!==null&&(l.memoizedState=t),t}function gr(l){var t=yr(l);t.next===null&&(t=l.alternate.memoizedState),tn(l,t.next.queue,{},mt())}function ic(){return Kl(bn)}function Sr(){return Rl().memoizedState}function br(){return Rl().memoizedState}function hm(l){for(var t=l.return;t!==null;){switch(t.tag){case 24:case 3:var e=mt();l=se(e);var a=fe(t,l,e);a!==null&&(et(a,t,e),Wa(a,t,e)),t={cache:Hi()},l.payload=t;return}t=t.return}}function mm(l,t,e){var a=mt();e={lane:a,revertLane:0,gesture:null,action:e,hasEagerState:!1,eagerState:null,next:null},fu(l)?jr(t,e):(e=Ni(l,t,e,a),e!==null&&(et(e,l,a),xr(e,t,a)))}function pr(l,t,e){var a=mt();tn(l,t,e,a)}function tn(l,t,e,a){var n={lane:a,revertLane:0,gesture:null,action:e,hasEagerState:!1,eagerState:null,next:null};if(fu(l))jr(t,n);else{var u=l.alternate;if(l.lanes===0&&(u===null||u.lanes===0)&&(u=t.lastRenderedReducer,u!==null))try{var c=t.lastRenderedState,f=u(c,e);if(n.hasEagerState=!0,n.eagerState=f,st(f,c))return Zn(l,t,n,0),pl===null&&Ln(),!1}catch{}finally{}if(e=Ni(l,t,n,a),e!==null)return et(e,l,a),xr(e,t,a),!0}return!1}function cc(l,t,e,a){if(a={lane:2,revertLane:Gc(),gesture:null,action:a,hasEagerState:!1,eagerState:null,next:null},fu(l)){if(t)throw Error(r(479))}else t=Ni(l,e,a,2),t!==null&&et(t,l,2)}function fu(l){var t=l.alternate;return l===$||t!==null&&t===$}function jr(l,t){ma=eu=!0;var e=l.pending;e===null?t.next=t:(t.next=e.next,e.next=t),l.pending=t}function xr(l,t,e){if((e&4194048)!==0){var a=t.lanes;a&=l.pendingLanes,e|=a,t.lanes=e,Ts(l,e)}}var en={readContext:Kl,use:uu,useCallback:zl,useContext:zl,useEffect:zl,useImperativeHandle:zl,useLayoutEffect:zl,useInsertionEffect:zl,useMemo:zl,useReducer:zl,useRef:zl,useState:zl,useDebugValue:zl,useDeferredValue:zl,useTransition:zl,useSyncExternalStore:zl,useId:zl,useHostTransitionStatus:zl,useFormState:zl,useActionState:zl,useOptimistic:zl,useMemoCache:zl,useCacheRefresh:zl};en.useEffectEvent=zl;var Er={readContext:Kl,use:uu,useCallback:function(l,t){return kl().memoizedState=[l,t===void 0?null:t],l},useContext:Kl,useEffect:ir,useImperativeHandle:function(l,t,e){e=e!=null?e.concat([l]):null,cu(4194308,4,rr.bind(null,t,l),e)},useLayoutEffect:function(l,t){return cu(4194308,4,l,t)},useInsertionEffect:function(l,t){cu(4,2,l,t)},useMemo:function(l,t){var e=kl();t=t===void 0?null:t;var a=l();if(Qe){le(!0);try{l()}finally{le(!1)}}return e.memoizedState=[a,t],a},useReducer:function(l,t,e){var a=kl();if(e!==void 0){var n=e(t);if(Qe){le(!0);try{e(t)}finally{le(!1)}}}else n=t;return a.memoizedState=a.baseState=n,l={pending:null,lanes:0,dispatch:null,lastRenderedReducer:l,lastRenderedState:n},a.queue=l,l=l.dispatch=mm.bind(null,$,l),[a.memoizedState,l]},useRef:function(l){var t=kl();return l={current:l},t.memoizedState=l},useState:function(l){l=lc(l);var t=l.queue,e=pr.bind(null,$,t);return t.dispatch=e,[l.memoizedState,e]},useDebugValue:ac,useDeferredValue:function(l,t){var e=kl();return nc(e,l,t)},useTransition:function(){var l=lc(!1);return l=vr.bind(null,$,l.queue,!0,!1),kl().memoizedState=l,[!1,l]},useSyncExternalStore:function(l,t,e){var a=$,n=kl();if(ul){if(e===void 0)throw Error(r(407));e=e()}else{if(e=t(),pl===null)throw Error(r(349));(tl&127)!==0||Vf(a,t,e)}n.memoizedState=e;var u={value:e,getSnapshot:t};return n.queue=u,ir(Jf.bind(null,a,u,l),[l]),a.flags|=2048,ya(9,{destroy:void 0},Kf.bind(null,a,u,e,t),null),e},useId:function(){var l=kl(),t=pl.identifierPrefix;if(ul){var e=Dt,a=Mt;e=(a&~(1<<32-ct(a)-1)).toString(32)+e,t="_"+t+"R_"+e,e=au++,0<\/script>",u=u.removeChild(u.firstChild);break;case"select":u=typeof a.is=="string"?c.createElement("select",{is:a.is}):c.createElement("select"),a.multiple?u.multiple=!0:a.size&&(u.size=a.size);break;default:u=typeof a.is=="string"?c.createElement(n,{is:a.is}):c.createElement(n)}}u[Zl]=t,u[Wl]=a;l:for(c=t.child;c!==null;){if(c.tag===5||c.tag===6)u.appendChild(c.stateNode);else if(c.tag!==4&&c.tag!==27&&c.child!==null){c.child.return=c,c=c.child;continue}if(c===t)break l;for(;c.sibling===null;){if(c.return===null||c.return===t)break l;c=c.return}c.sibling.return=c.return,c=c.sibling}t.stateNode=u;l:switch(wl(u,n,a),n){case"button":case"input":case"select":case"textarea":a=!!a.autoFocus;break l;case"img":a=!0;break l;default:a=!1}a&&Kt(t)}}return xl(t),jc(t,t.type,l===null?null:l.memoizedProps,t.pendingProps,e),null;case 6:if(l&&t.stateNode!=null)l.memoizedProps!==a&&Kt(t);else{if(typeof a!="string"&&t.stateNode===null)throw Error(r(166));if(l=L.current,ca(t)){if(l=t.stateNode,e=t.memoizedProps,a=null,n=Vl,n!==null)switch(n.tag){case 27:case 5:a=n.memoizedProps}l[Zl]=t,l=!!(l.nodeValue===e||a!==null&&a.suppressHydrationWarning===!0||Ld(l.nodeValue,e)),l||ue(t,!0)}else l=Ou(l).createTextNode(a),l[Zl]=t,t.stateNode=l}return xl(t),null;case 31:if(e=t.memoizedState,l===null||l.memoizedState!==null){if(a=ca(t),e!==null){if(l===null){if(!a)throw Error(r(318));if(l=t.memoizedState,l=l!==null?l.dehydrated:null,!l)throw Error(r(557));l[Zl]=t}else Ue(),(t.flags&128)===0&&(t.memoizedState=null),t.flags|=4;xl(t),l=!1}else e=Di(),l!==null&&l.memoizedState!==null&&(l.memoizedState.hydrationErrors=e),l=!0;if(!l)return t.flags&256?(dt(t),t):(dt(t),null);if((t.flags&128)!==0)throw Error(r(558))}return xl(t),null;case 13:if(a=t.memoizedState,l===null||l.memoizedState!==null&&l.memoizedState.dehydrated!==null){if(n=ca(t),a!==null&&a.dehydrated!==null){if(l===null){if(!n)throw Error(r(318));if(n=t.memoizedState,n=n!==null?n.dehydrated:null,!n)throw Error(r(317));n[Zl]=t}else Ue(),(t.flags&128)===0&&(t.memoizedState=null),t.flags|=4;xl(t),n=!1}else n=Di(),l!==null&&l.memoizedState!==null&&(l.memoizedState.hydrationErrors=n),n=!0;if(!n)return t.flags&256?(dt(t),t):(dt(t),null)}return dt(t),(t.flags&128)!==0?(t.lanes=e,t):(e=a!==null,l=l!==null&&l.memoizedState!==null,e&&(a=t.child,n=null,a.alternate!==null&&a.alternate.memoizedState!==null&&a.alternate.memoizedState.cachePool!==null&&(n=a.alternate.memoizedState.cachePool.pool),u=null,a.memoizedState!==null&&a.memoizedState.cachePool!==null&&(u=a.memoizedState.cachePool.pool),u!==n&&(a.flags|=2048)),e!==l&&e&&(t.child.flags|=8192),mu(t,t.updateQueue),xl(t),null);case 4:return bl(),l===null&&Zc(t.stateNode.containerInfo),xl(t),null;case 10:return Xt(t.type),xl(t),null;case 19:if(O(Dl),a=t.memoizedState,a===null)return xl(t),null;if(n=(t.flags&128)!==0,u=a.rendering,u===null)if(n)nn(a,!1);else{if(Al!==0||l!==null&&(l.flags&128)!==0)for(l=t.child;l!==null;){if(u=tu(l),u!==null){for(t.flags|=128,nn(a,!1),l=u.updateQueue,t.updateQueue=l,mu(t,l),t.subtreeFlags=0,l=e,e=t.child;e!==null;)pf(e,l),e=e.sibling;return q(Dl,Dl.current&1|2),ul&&Gt(t,a.treeForkCount),t.child}l=l.sibling}a.tail!==null&&ut()>bu&&(t.flags|=128,n=!0,nn(a,!1),t.lanes=4194304)}else{if(!n)if(l=tu(u),l!==null){if(t.flags|=128,n=!0,l=l.updateQueue,t.updateQueue=l,mu(t,l),nn(a,!0),a.tail===null&&a.tailMode==="hidden"&&!u.alternate&&!ul)return xl(t),null}else 2*ut()-a.renderingStartTime>bu&&e!==536870912&&(t.flags|=128,n=!0,nn(a,!1),t.lanes=4194304);a.isBackwards?(u.sibling=t.child,t.child=u):(l=a.last,l!==null?l.sibling=u:t.child=u,a.last=u)}return a.tail!==null?(l=a.tail,a.rendering=l,a.tail=l.sibling,a.renderingStartTime=ut(),l.sibling=null,e=Dl.current,q(Dl,n?e&1|2:e&1),ul&&Gt(t,a.treeForkCount),l):(xl(t),null);case 22:case 23:return dt(t),Vi(),a=t.memoizedState!==null,l!==null?l.memoizedState!==null!==a&&(t.flags|=8192):a&&(t.flags|=8192),a?(e&536870912)!==0&&(t.flags&128)===0&&(xl(t),t.subtreeFlags&6&&(t.flags|=8192)):xl(t),e=t.updateQueue,e!==null&&mu(t,e.retryQueue),e=null,l!==null&&l.memoizedState!==null&&l.memoizedState.cachePool!==null&&(e=l.memoizedState.cachePool.pool),a=null,t.memoizedState!==null&&t.memoizedState.cachePool!==null&&(a=t.memoizedState.cachePool.pool),a!==e&&(t.flags|=2048),l!==null&&O(Be),null;case 24:return e=null,l!==null&&(e=l.memoizedState.cache),t.memoizedState.cache!==e&&(t.flags|=2048),Xt(Ul),xl(t),null;case 25:return null;case 30:return null}throw Error(r(156,t.tag))}function bm(l,t){switch(Oi(t),t.tag){case 1:return l=t.flags,l&65536?(t.flags=l&-65537|128,t):null;case 3:return Xt(Ul),bl(),l=t.flags,(l&65536)!==0&&(l&128)===0?(t.flags=l&-65537|128,t):null;case 26:case 27:case 5:return _n(t),null;case 31:if(t.memoizedState!==null){if(dt(t),t.alternate===null)throw Error(r(340));Ue()}return l=t.flags,l&65536?(t.flags=l&-65537|128,t):null;case 13:if(dt(t),l=t.memoizedState,l!==null&&l.dehydrated!==null){if(t.alternate===null)throw Error(r(340));Ue()}return l=t.flags,l&65536?(t.flags=l&-65537|128,t):null;case 19:return O(Dl),null;case 4:return bl(),null;case 10:return Xt(t.type),null;case 22:case 23:return dt(t),Vi(),l!==null&&O(Be),l=t.flags,l&65536?(t.flags=l&-65537|128,t):null;case 24:return Xt(Ul),null;case 25:return null;default:return null}}function wr(l,t){switch(Oi(t),t.tag){case 3:Xt(Ul),bl();break;case 26:case 27:case 5:_n(t);break;case 4:bl();break;case 31:t.memoizedState!==null&&dt(t);break;case 13:dt(t);break;case 19:O(Dl);break;case 10:Xt(t.type);break;case 22:case 23:dt(t),Vi(),l!==null&&O(Be);break;case 24:Xt(Ul)}}function un(l,t){try{var e=t.updateQueue,a=e!==null?e.lastEffect:null;if(a!==null){var n=a.next;e=n;do{if((e.tag&l)===l){a=void 0;var u=e.create,c=e.inst;a=u(),c.destroy=a}e=e.next}while(e!==n)}}catch(f){ml(t,t.return,f)}}function oe(l,t,e){try{var a=t.updateQueue,n=a!==null?a.lastEffect:null;if(n!==null){var u=n.next;a=u;do{if((a.tag&l)===l){var c=a.inst,f=c.destroy;if(f!==void 0){c.destroy=void 0,n=t;var d=e,p=f;try{p()}catch(N){ml(n,d,N)}}}a=a.next}while(a!==u)}}catch(N){ml(t,t.return,N)}}function $r(l){var t=l.updateQueue;if(t!==null){var e=l.stateNode;try{Yf(t,e)}catch(a){ml(l,l.return,a)}}}function kr(l,t,e){e.props=Xe(l.type,l.memoizedProps),e.state=l.memoizedState;try{e.componentWillUnmount()}catch(a){ml(l,t,a)}}function cn(l,t){try{var e=l.ref;if(e!==null){switch(l.tag){case 26:case 27:case 5:var a=l.stateNode;break;case 30:a=l.stateNode;break;default:a=l.stateNode}typeof e=="function"?l.refCleanup=e(a):e.current=a}}catch(n){ml(l,t,n)}}function Rt(l,t){var e=l.ref,a=l.refCleanup;if(e!==null)if(typeof a=="function")try{a()}catch(n){ml(l,t,n)}finally{l.refCleanup=null,l=l.alternate,l!=null&&(l.refCleanup=null)}else if(typeof e=="function")try{e(null)}catch(n){ml(l,t,n)}else e.current=null}function Wr(l){var t=l.type,e=l.memoizedProps,a=l.stateNode;try{l:switch(t){case"button":case"input":case"select":case"textarea":e.autoFocus&&a.focus();break l;case"img":e.src?a.src=e.src:e.srcSet&&(a.srcset=e.srcSet)}}catch(n){ml(l,l.return,n)}}function xc(l,t,e){try{var a=l.stateNode;Xm(a,l.type,e,t),a[Wl]=t}catch(n){ml(l,l.return,n)}}function Fr(l){return l.tag===5||l.tag===3||l.tag===26||l.tag===27&&be(l.type)||l.tag===4}function Ec(l){l:for(;;){for(;l.sibling===null;){if(l.return===null||Fr(l.return))return null;l=l.return}for(l.sibling.return=l.return,l=l.sibling;l.tag!==5&&l.tag!==6&&l.tag!==18;){if(l.tag===27&&be(l.type)||l.flags&2||l.child===null||l.tag===4)continue l;l.child.return=l,l=l.child}if(!(l.flags&2))return l.stateNode}}function Nc(l,t,e){var a=l.tag;if(a===5||a===6)l=l.stateNode,t?(e.nodeType===9?e.body:e.nodeName==="HTML"?e.ownerDocument.body:e).insertBefore(l,t):(t=e.nodeType===9?e.body:e.nodeName==="HTML"?e.ownerDocument.body:e,t.appendChild(l),e=e._reactRootContainer,e!=null||t.onclick!==null||(t.onclick=Bt));else if(a!==4&&(a===27&&be(l.type)&&(e=l.stateNode,t=null),l=l.child,l!==null))for(Nc(l,t,e),l=l.sibling;l!==null;)Nc(l,t,e),l=l.sibling}function vu(l,t,e){var a=l.tag;if(a===5||a===6)l=l.stateNode,t?e.insertBefore(l,t):e.appendChild(l);else if(a!==4&&(a===27&&be(l.type)&&(e=l.stateNode),l=l.child,l!==null))for(vu(l,t,e),l=l.sibling;l!==null;)vu(l,t,e),l=l.sibling}function Ir(l){var t=l.stateNode,e=l.memoizedProps;try{for(var a=l.type,n=t.attributes;n.length;)t.removeAttributeNode(n[0]);wl(t,a,e),t[Zl]=l,t[Wl]=e}catch(u){ml(l,l.return,u)}}var Jt=!1,Bl=!1,_c=!1,Pr=typeof WeakSet=="function"?WeakSet:Set,Ql=null;function pm(l,t){if(l=l.containerInfo,Jc=Bu,l=df(l),Si(l)){if("selectionStart"in l)var e={start:l.selectionStart,end:l.selectionEnd};else l:{e=(e=l.ownerDocument)&&e.defaultView||window;var a=e.getSelection&&e.getSelection();if(a&&a.rangeCount!==0){e=a.anchorNode;var n=a.anchorOffset,u=a.focusNode;a=a.focusOffset;try{e.nodeType,u.nodeType}catch{e=null;break l}var c=0,f=-1,d=-1,p=0,N=0,A=l,j=null;t:for(;;){for(var x;A!==e||n!==0&&A.nodeType!==3||(f=c+n),A!==u||a!==0&&A.nodeType!==3||(d=c+a),A.nodeType===3&&(c+=A.nodeValue.length),(x=A.firstChild)!==null;)j=A,A=x;for(;;){if(A===l)break t;if(j===e&&++p===n&&(f=c),j===u&&++N===a&&(d=c),(x=A.nextSibling)!==null)break;A=j,j=A.parentNode}A=x}e=f===-1||d===-1?null:{start:f,end:d}}else e=null}e=e||{start:0,end:0}}else e=null;for(wc={focusedElem:l,selectionRange:e},Bu=!1,Ql=t;Ql!==null;)if(t=Ql,l=t.child,(t.subtreeFlags&1028)!==0&&l!==null)l.return=t,Ql=l;else for(;Ql!==null;){switch(t=Ql,u=t.alternate,l=t.flags,t.tag){case 0:if((l&4)!==0&&(l=t.updateQueue,l=l!==null?l.events:null,l!==null))for(e=0;e title"))),wl(u,a,e),u[Zl]=l,Gl(u),a=u;break l;case"link":var c=uo("link","href",n).get(a+(e.href||""));if(c){for(var f=0;fgl&&(c=gl,gl=J,J=c);var y=ff(f,J),h=ff(f,gl);if(y&&h&&(x.rangeCount!==1||x.anchorNode!==y.node||x.anchorOffset!==y.offset||x.focusNode!==h.node||x.focusOffset!==h.offset)){var b=A.createRange();b.setStart(y.node,y.offset),x.removeAllRanges(),J>gl?(x.addRange(b),x.extend(h.node,h.offset)):(b.setEnd(h.node,h.offset),x.addRange(b))}}}}for(A=[],x=f;x=x.parentNode;)x.nodeType===1&&A.push({element:x,left:x.scrollLeft,top:x.scrollTop});for(typeof f.focus=="function"&&f.focus(),f=0;fe?32:e,_.T=null,e=Rc,Rc=null;var u=ye,c=Ft;if(ql=0,ja=ye=null,Ft=0,(rl&6)!==0)throw Error(r(331));var f=rl;if(rl|=4,rd(u.current),cd(u,u.current,c,e),rl=f,hn(0,!1),it&&typeof it.onPostCommitFiberRoot=="function")try{it.onPostCommitFiberRoot(Ma,u)}catch{}return!0}finally{B.p=n,_.T=a,Ad(l,t)}}function Md(l,t,e){t=bt(e,t),t=dc(l.stateNode,t,2),l=fe(l,t,2),l!==null&&(Ra(l,2),Ut(l))}function ml(l,t,e){if(l.tag===3)Md(l,l,e);else for(;t!==null;){if(t.tag===3){Md(t,l,e);break}else if(t.tag===1){var a=t.stateNode;if(typeof t.type.getDerivedStateFromError=="function"||typeof a.componentDidCatch=="function"&&(ve===null||!ve.has(a))){l=bt(e,l),e=Dr(2),a=fe(t,e,2),a!==null&&(Rr(e,a,t,l),Ra(a,2),Ut(a));break}}t=t.return}}function Bc(l,t,e){var a=l.pingCache;if(a===null){a=l.pingCache=new Em;var n=new Set;a.set(t,n)}else n=a.get(t),n===void 0&&(n=new Set,a.set(t,n));n.has(e)||(Ac=!0,n.add(e),l=Am.bind(null,l,t,e),t.then(l,l))}function Am(l,t,e){var a=l.pingCache;a!==null&&a.delete(t),l.pingedLanes|=l.suspendedLanes&e,l.warmLanes&=~e,pl===l&&(tl&e)===e&&(Al===4||Al===3&&(tl&62914560)===tl&&300>ut()-Su?(rl&2)===0&&xa(l,0):Oc|=e,pa===tl&&(pa=0)),Ut(l)}function Dd(l,t){t===0&&(t=Ns()),l=De(l,t),l!==null&&(Ra(l,t),Ut(l))}function Om(l){var t=l.memoizedState,e=0;t!==null&&(e=t.retryLane),Dd(l,e)}function Mm(l,t){var e=0;switch(l.tag){case 31:case 13:var a=l.stateNode,n=l.memoizedState;n!==null&&(e=n.retryLane);break;case 19:a=l.stateNode;break;case 22:a=l.stateNode._retryCache;break;default:throw Error(r(314))}a!==null&&a.delete(t),Dd(l,e)}function Dm(l,t){return ku(l,t)}var _u=null,Na=null,qc=!1,Tu=!1,Yc=!1,Se=0;function Ut(l){l!==Na&&l.next===null&&(Na===null?_u=Na=l:Na=Na.next=l),Tu=!0,qc||(qc=!0,Um())}function hn(l,t){if(!Yc&&Tu){Yc=!0;do for(var e=!1,a=_u;a!==null;){if(l!==0){var n=a.pendingLanes;if(n===0)var u=0;else{var c=a.suspendedLanes,f=a.pingedLanes;u=(1<<31-ct(42|l)+1)-1,u&=n&~(c&~f),u=u&201326741?u&201326741|1:u?u|2:0}u!==0&&(e=!0,Hd(a,u))}else u=tl,u=Mn(a,a===pl?u:0,a.cancelPendingCommit!==null||a.timeoutHandle!==-1),(u&3)===0||Da(a,u)||(e=!0,Hd(a,u));a=a.next}while(e);Yc=!1}}function Rm(){Rd()}function Rd(){Tu=qc=!1;var l=0;Se!==0&&Zm()&&(l=Se);for(var t=ut(),e=null,a=_u;a!==null;){var n=a.next,u=Ud(a,t);u===0?(a.next=null,e===null?_u=n:e.next=n,n===null&&(Na=e)):(e=a,(l!==0||(u&3)!==0)&&(Tu=!0)),a=n}ql!==0&&ql!==5||hn(l),Se!==0&&(Se=0)}function Ud(l,t){for(var e=l.suspendedLanes,a=l.pingedLanes,n=l.expirationTimes,u=l.pendingLanes&-62914561;0f)break;var N=d.transferSize,A=d.initiatorType;N&&Zd(A)&&(d=d.responseEnd,c+=N*(d"u"?null:document;function to(l,t,e){var a=_a;if(a&&typeof t=="string"&&t){var n=gt(t);n='link[rel="'+l+'"][href="'+n+'"]',typeof e=="string"&&(n+='[crossorigin="'+e+'"]'),lo.has(n)||(lo.add(n),l={rel:l,crossOrigin:e,href:t},a.querySelector(n)===null&&(t=a.createElement("link"),wl(t,"link",l),Gl(t),a.head.appendChild(t)))}}function Im(l){It.D(l),to("dns-prefetch",l,null)}function Pm(l,t){It.C(l,t),to("preconnect",l,t)}function lv(l,t,e){It.L(l,t,e);var a=_a;if(a&&l&&t){var n='link[rel="preload"][as="'+gt(t)+'"]';t==="image"&&e&&e.imageSrcSet?(n+='[imagesrcset="'+gt(e.imageSrcSet)+'"]',typeof e.imageSizes=="string"&&(n+='[imagesizes="'+gt(e.imageSizes)+'"]')):n+='[href="'+gt(l)+'"]';var u=n;switch(t){case"style":u=Ta(l);break;case"script":u=za(l)}_t.has(u)||(l=U({rel:"preload",href:t==="image"&&e&&e.imageSrcSet?void 0:l,as:t},e),_t.set(u,l),a.querySelector(n)!==null||t==="style"&&a.querySelector(gn(u))||t==="script"&&a.querySelector(Sn(u))||(t=a.createElement("link"),wl(t,"link",l),Gl(t),a.head.appendChild(t)))}}function tv(l,t){It.m(l,t);var e=_a;if(e&&l){var a=t&&typeof t.as=="string"?t.as:"script",n='link[rel="modulepreload"][as="'+gt(a)+'"][href="'+gt(l)+'"]',u=n;switch(a){case"audioworklet":case"paintworklet":case"serviceworker":case"sharedworker":case"worker":case"script":u=za(l)}if(!_t.has(u)&&(l=U({rel:"modulepreload",href:l},t),_t.set(u,l),e.querySelector(n)===null)){switch(a){case"audioworklet":case"paintworklet":case"serviceworker":case"sharedworker":case"worker":case"script":if(e.querySelector(Sn(u)))return}a=e.createElement("link"),wl(a,"link",l),Gl(a),e.head.appendChild(a)}}}function ev(l,t,e){It.S(l,t,e);var a=_a;if(a&&l){var n=$e(a).hoistableStyles,u=Ta(l);t=t||"default";var c=n.get(u);if(!c){var f={loading:0,preload:null};if(c=a.querySelector(gn(u)))f.loading=5;else{l=U({rel:"stylesheet",href:l,"data-precedence":t},e),(e=_t.get(u))&&ls(l,e);var d=c=a.createElement("link");Gl(d),wl(d,"link",l),d._p=new Promise(function(p,N){d.onload=p,d.onerror=N}),d.addEventListener("load",function(){f.loading|=1}),d.addEventListener("error",function(){f.loading|=2}),f.loading|=4,Du(c,t,a)}c={type:"stylesheet",instance:c,count:1,state:f},n.set(u,c)}}}function av(l,t){It.X(l,t);var e=_a;if(e&&l){var a=$e(e).hoistableScripts,n=za(l),u=a.get(n);u||(u=e.querySelector(Sn(n)),u||(l=U({src:l,async:!0},t),(t=_t.get(n))&&ts(l,t),u=e.createElement("script"),Gl(u),wl(u,"link",l),e.head.appendChild(u)),u={type:"script",instance:u,count:1,state:null},a.set(n,u))}}function nv(l,t){It.M(l,t);var e=_a;if(e&&l){var a=$e(e).hoistableScripts,n=za(l),u=a.get(n);u||(u=e.querySelector(Sn(n)),u||(l=U({src:l,async:!0,type:"module"},t),(t=_t.get(n))&&ts(l,t),u=e.createElement("script"),Gl(u),wl(u,"link",l),e.head.appendChild(u)),u={type:"script",instance:u,count:1,state:null},a.set(n,u))}}function eo(l,t,e,a){var n=(n=L.current)?Mu(n):null;if(!n)throw Error(r(446));switch(l){case"meta":case"title":return null;case"style":return typeof e.precedence=="string"&&typeof e.href=="string"?(t=Ta(e.href),e=$e(n).hoistableStyles,a=e.get(t),a||(a={type:"style",instance:null,count:0,state:null},e.set(t,a)),a):{type:"void",instance:null,count:0,state:null};case"link":if(e.rel==="stylesheet"&&typeof e.href=="string"&&typeof e.precedence=="string"){l=Ta(e.href);var u=$e(n).hoistableStyles,c=u.get(l);if(c||(n=n.ownerDocument||n,c={type:"stylesheet",instance:null,count:0,state:{loading:0,preload:null}},u.set(l,c),(u=n.querySelector(gn(l)))&&!u._p&&(c.instance=u,c.state.loading=5),_t.has(l)||(e={rel:"preload",as:"style",href:e.href,crossOrigin:e.crossOrigin,integrity:e.integrity,media:e.media,hrefLang:e.hrefLang,referrerPolicy:e.referrerPolicy},_t.set(l,e),u||uv(n,l,e,c.state))),t&&a===null)throw Error(r(528,""));return c}if(t&&a!==null)throw Error(r(529,""));return null;case"script":return t=e.async,e=e.src,typeof e=="string"&&t&&typeof t!="function"&&typeof t!="symbol"?(t=za(e),e=$e(n).hoistableScripts,a=e.get(t),a||(a={type:"script",instance:null,count:0,state:null},e.set(t,a)),a):{type:"void",instance:null,count:0,state:null};default:throw Error(r(444,l))}}function Ta(l){return'href="'+gt(l)+'"'}function gn(l){return'link[rel="stylesheet"]['+l+"]"}function ao(l){return U({},l,{"data-precedence":l.precedence,precedence:null})}function uv(l,t,e,a){l.querySelector('link[rel="preload"][as="style"]['+t+"]")?a.loading=1:(t=l.createElement("link"),a.preload=t,t.addEventListener("load",function(){return a.loading|=1}),t.addEventListener("error",function(){return a.loading|=2}),wl(t,"link",e),Gl(t),l.head.appendChild(t))}function za(l){return'[src="'+gt(l)+'"]'}function Sn(l){return"script[async]"+l}function no(l,t,e){if(t.count++,t.instance===null)switch(t.type){case"style":var a=l.querySelector('style[data-href~="'+gt(e.href)+'"]');if(a)return t.instance=a,Gl(a),a;var n=U({},e,{"data-href":e.href,"data-precedence":e.precedence,href:null,precedence:null});return a=(l.ownerDocument||l).createElement("style"),Gl(a),wl(a,"style",n),Du(a,e.precedence,l),t.instance=a;case"stylesheet":n=Ta(e.href);var u=l.querySelector(gn(n));if(u)return t.state.loading|=4,t.instance=u,Gl(u),u;a=ao(e),(n=_t.get(n))&&ls(a,n),u=(l.ownerDocument||l).createElement("link"),Gl(u);var c=u;return c._p=new Promise(function(f,d){c.onload=f,c.onerror=d}),wl(u,"link",a),t.state.loading|=4,Du(u,e.precedence,l),t.instance=u;case"script":return u=za(e.src),(n=l.querySelector(Sn(u)))?(t.instance=n,Gl(n),n):(a=e,(n=_t.get(u))&&(a=U({},e),ts(a,n)),l=l.ownerDocument||l,n=l.createElement("script"),Gl(n),wl(n,"link",a),l.head.appendChild(n),t.instance=n);case"void":return null;default:throw Error(r(443,t.type))}else t.type==="stylesheet"&&(t.state.loading&4)===0&&(a=t.instance,t.state.loading|=4,Du(a,e.precedence,l));return t.instance}function Du(l,t,e){for(var a=e.querySelectorAll('link[rel="stylesheet"][data-precedence],style[data-precedence]'),n=a.length?a[a.length-1]:null,u=n,c=0;c title"):null)}function iv(l,t,e){if(e===1||t.itemProp!=null)return!1;switch(l){case"meta":case"title":return!0;case"style":if(typeof t.precedence!="string"||typeof t.href!="string"||t.href==="")break;return!0;case"link":if(typeof t.rel!="string"||typeof t.href!="string"||t.href===""||t.onLoad||t.onError)break;switch(t.rel){case"stylesheet":return l=t.disabled,typeof t.precedence=="string"&&l==null;default:return!0}case"script":if(t.async&&typeof t.async!="function"&&typeof t.async!="symbol"&&!t.onLoad&&!t.onError&&t.src&&typeof t.src=="string")return!0}return!1}function co(l){return!(l.type==="stylesheet"&&(l.state.loading&3)===0)}function cv(l,t,e,a){if(e.type==="stylesheet"&&(typeof a.media!="string"||matchMedia(a.media).matches!==!1)&&(e.state.loading&4)===0){if(e.instance===null){var n=Ta(a.href),u=t.querySelector(gn(n));if(u){t=u._p,t!==null&&typeof t=="object"&&typeof t.then=="function"&&(l.count++,l=Uu.bind(l),t.then(l,l)),e.state.loading|=4,e.instance=u,Gl(u);return}u=t.ownerDocument||t,a=ao(a),(n=_t.get(n))&&ls(a,n),u=u.createElement("link"),Gl(u);var c=u;c._p=new Promise(function(f,d){c.onload=f,c.onerror=d}),wl(u,"link",a),e.instance=u}l.stylesheets===null&&(l.stylesheets=new Map),l.stylesheets.set(e,t),(t=e.state.preload)&&(e.state.loading&3)===0&&(l.count++,e=Uu.bind(l),t.addEventListener("load",e),t.addEventListener("error",e))}}var es=0;function sv(l,t){return l.stylesheets&&l.count===0&&Hu(l,l.stylesheets),0es?50:800)+t);return l.unsuspend=e,function(){l.unsuspend=null,clearTimeout(a),clearTimeout(n)}}:null}function Uu(){if(this.count--,this.count===0&&(this.imgCount===0||!this.waitingForImages)){if(this.stylesheets)Hu(this,this.stylesheets);else if(this.unsuspend){var l=this.unsuspend;this.unsuspend=null,l()}}}var Cu=null;function Hu(l,t){l.stylesheets=null,l.unsuspend!==null&&(l.count++,Cu=new Map,t.forEach(fv,l),Cu=null,Uu.call(l))}function fv(l,t){if(!(t.state.loading&4)){var e=Cu.get(l);if(e)var a=e.get(null);else{e=new Map,Cu.set(l,e);for(var n=l.querySelectorAll("link[data-precedence],style[data-precedence]"),u=0;u"u"||typeof __REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE!="function"))try{__REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE(s)}catch(o){console.error(o)}}return s(),ds.exports=_v(),ds.exports}var zv=Tv();function gs(s){return s==null?0:Math.max(0,Math.min(100,Math.round(s*100)))}function Av({d:s}){const o=gs(s.avg);return i.jsxs("div",{className:"avg-dim",children:[i.jsxs("div",{className:"avg-dim-header",children:[i.jsx("span",{className:"avg-dim-name",children:s.dimension_name}),i.jsx("span",{className:"avg-dim-score",children:s.avg.toFixed(2)})]}),i.jsx("div",{className:"dim-bar",children:i.jsx("div",{className:"dim-fill",style:{width:`${o}%`}})}),i.jsxs("div",{className:"avg-dim-range",children:[s.min.toFixed(2)," – ",s.max.toFixed(2)," (",s.n," runs)"]})]})}function Ov({avg:s,onSelectRun:o}){const v=Object.entries(s.failure_modes),r=s.n>0?(s.pass_count/s.n*100).toFixed(0):"0";return i.jsx("tr",{children:i.jsx("td",{colSpan:6,style:{padding:0},children:i.jsxs("div",{className:"avg-detail",children:[i.jsxs("div",{className:"avg-detail-header",children:[i.jsx("div",{className:"avg-detail-title",children:s.scenario_name??s.base_id}),i.jsx("div",{className:"avg-detail-subtitle",children:s.base_id})]}),i.jsxs("div",{className:"avg-detail-stats",children:[i.jsxs("div",{className:"avg-stat",children:[i.jsxs("div",{className:"avg-stat-value",style:{color:"var(--green)"},children:[r,"%"]}),i.jsx("div",{className:"avg-stat-label",children:"Pass Rate"})]}),i.jsxs("div",{className:"avg-stat",children:[i.jsxs("div",{className:"avg-stat-value",children:[s.pass_count,"/",s.n]}),i.jsx("div",{className:"avg-stat-label",children:"Pass/Total"})]}),i.jsxs("div",{className:"avg-stat",children:[i.jsx("div",{className:"avg-stat-value",children:s.avg.toFixed(3)}),i.jsx("div",{className:"avg-stat-label",children:"Mean Score"})]}),i.jsxs("div",{className:"avg-stat",children:[i.jsx("div",{className:"avg-stat-value",children:s.spread.toFixed(3)}),i.jsx("div",{className:"avg-stat-label",children:"Spread"})]})]}),v.length>0&&i.jsxs("div",{className:"avg-section",children:[i.jsx("div",{className:"section-label",children:"Failure Modes"}),i.jsx("div",{className:"avg-failure-modes",children:v.map(([g,E])=>i.jsxs("span",{className:"avg-failure-pill",children:[g," ",i.jsxs("span",{className:"avg-failure-count",children:["×",E]})]},g))})]}),s.dimensions.length>0&&i.jsxs("div",{className:"avg-section",children:[i.jsx("div",{className:"section-label",children:"Dimension Averages"}),i.jsx("div",{className:"avg-dims-grid",children:s.dimensions.map(g=>i.jsx(Av,{d:g},g.dimension_id))})]}),s.judge_notes.length>0&&i.jsxs("div",{className:"avg-section",children:[i.jsxs("div",{className:"section-label",children:["Judge Notes (",s.judge_notes.length,")"]}),i.jsx("div",{className:"avg-notes",children:s.judge_notes.map((g,E)=>i.jsxs("div",{className:"avg-note",children:[i.jsxs("span",{className:"avg-note-num",children:["#",E+1]}),g]},E))})]}),i.jsxs("div",{className:"avg-section",children:[i.jsx("div",{className:"section-label",children:"Individual Runs"}),i.jsx("div",{className:"avg-runs",children:s.ordinals.map((g,E)=>i.jsxs("button",{type:"button",className:"avg-run-btn",onClick:()=>o(g),children:["Run #",E+1]},g))})]})]})})})}function Bo({averages:s,onSelectRun:o}){const[v,r]=D.useState(null);return s.length===0?null:i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"section-title",children:["Averages (across repeats)"," ",i.jsx("span",{style:{color:"var(--muted)",fontWeight:400,fontSize:12},children:"(click to expand)"})]}),i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Scenario"}),i.jsx("th",{style:{textAlign:"right"},children:"Avg"}),i.jsx("th",{style:{textAlign:"right"},children:"Min"}),i.jsx("th",{style:{textAlign:"right"},children:"Max"}),i.jsx("th",{style:{textAlign:"right"},children:"Spread"}),i.jsx("th",{style:{textAlign:"right"},children:"N"})]})}),i.jsx("tbody",{children:s.map(g=>i.jsxs(D.Fragment,{children:[i.jsxs("tr",{className:`${g.avg>=.7?"avg-pass":"avg-fail"} clickable-row`,onClick:()=>r(v===g.base_id?null:g.base_id),children:[i.jsxs("td",{children:[g.base_id,i.jsx("span",{className:"avg-expand-icon",style:{marginLeft:6,display:"inline-block",transform:v===g.base_id?"rotate(90deg)":"none",transition:"transform .15s"},children:"▸"})]}),i.jsx("td",{style:{textAlign:"right"},children:g.avg.toFixed(3)}),i.jsx("td",{style:{textAlign:"right"},children:g.min.toFixed(2)}),i.jsx("td",{style:{textAlign:"right"},children:g.max.toFixed(2)}),i.jsx("td",{style:{textAlign:"right"},children:g.spread.toFixed(3)}),i.jsx("td",{style:{textAlign:"right"},children:g.n})]}),v===g.base_id&&i.jsx(Ov,{avg:g,onSelectRun:o})]},g.base_id))})]})]})}function Ro(s){return s?{authorization:`Bearer ${s}`}:{}}function Mv(s){return s===null?"—":s.toFixed(2)}function Uo(s){if(s===null)return"—";const o=s.toFixed(2);return s>0?`+${o}`:o}function Dv(s){return s==="pass"?"PASS":s==="fail"?"FAIL":s==="harness_fail"?"HARNESS":s==="error"?"ERROR":s==="missing"?"—":s==="running"?"RUN":s}function Rv(s){const v=new URLSearchParams(s).get("run_ids");return v?v.split(",").map(r=>r.trim()).filter(Boolean):[]}function Uv(s){return new URLSearchParams(s).get("only")==="changes"}function Cv(s,o){const v=new URLSearchParams;s.length>0&&v.set("run_ids",s.join(",")),o&&v.set("only","changes");const r=v.toString(),E=`/compare${r?`?${r}`:""}`;window.location.pathname+window.location.search!==E&&window.history.replaceState(null,"",E)}function Hv({token:s,apiBase:o=""}){const[v,r]=D.useState(()=>Rv(window.location.search)),[g,E]=D.useState(()=>Uv(window.location.search)),[C,R]=D.useState(null),[T,S]=D.useState(null),[Y,U]=D.useState(!1),[V,dl]=D.useState(v.length<2),[I,Tl]=D.useState([]),[_l,Sl]=D.useState(new Set(v)),Xl=_l.size>=2&&_l.size<=10,al=D.useCallback(async()=>{try{const H=await fetch(`${o}/api/runs?limit=100`,{headers:Ro(s)});if(!H.ok)throw new Error(`/api/runs returned ${H.status}`);const il=await H.json();Tl(il.runs.map(nl=>({runId:nl.runId,status:nl.status,label:nl.label??null,startedAt:nl.startedAt})))}catch(H){S(H instanceof Error?H.message:String(H))}},[o,s]);D.useEffect(()=>{al()},[al]),D.useEffect(()=>{if(v.length<2){R(null);return}let H=!1;U(!0),S(null);const il=new URLSearchParams({run_ids:v.join(",")});return fetch(`${o}/api/comparisons?${il.toString()}`,{headers:Ro(s)}).then(async nl=>{if(!nl.ok){const Ll=await nl.json();throw new Error(Ll.error?.message??`/api/comparisons returned ${nl.status}`)}return await nl.json()}).then(nl=>{H||R(nl)}).catch(nl=>{H||(S(nl instanceof Error?nl.message:String(nl)),R(null))}).finally(()=>{H||U(!1)}),()=>{H=!0}},[o,v,s]),D.useEffect(()=>{Cv(v,g)},[v,g]);const P=D.useCallback(H=>{const il=H.target.value;Sl(nl=>{const Ll=new Set(nl);return Ll.has(il)?Ll.delete(il):Ll.add(il),Ll})},[]),Ol=D.useCallback(H=>{H.preventDefault();const il=Array.from(_l);if(il.length<2){S("Select at least 2 runs to compare.");return}if(il.length>10){S("Select at most 10 runs to compare.");return}S(null),r(il),dl(!1)},[_l]),El=D.useMemo(()=>C?g?C.scenarios.filter(H=>H.status_change!=="unchanged"):C.scenarios:[],[C,g]);return i.jsxs("div",{className:"compare-view",children:[i.jsxs("header",{className:"compare-header",children:[i.jsx("h1",{children:"Compare Runs"}),i.jsxs("div",{className:"compare-actions",children:[i.jsx("button",{type:"button",onClick:()=>{Sl(new Set(v)),dl(H=>!H)},children:V?"Hide picker":"Choose runs"}),i.jsxs("label",{children:[i.jsx("input",{type:"checkbox",checked:g,onChange:H=>E(H.target.checked)}),"Only changes"]})]})]}),T&&i.jsx("div",{className:"compare-error",children:T}),Y&&i.jsx("div",{className:"compare-loading",children:"Loading comparison…"}),V&&i.jsxs("form",{className:"compare-picker",onSubmit:Ol,children:[i.jsxs("p",{children:["Select 2–10 runs to compare. Currently selected:"," ",i.jsx("strong",{children:_l.size})]}),i.jsx("ul",{children:I.map(H=>i.jsx("li",{children:i.jsxs("label",{children:[i.jsx("input",{type:"checkbox",value:H.runId,checked:_l.has(H.runId),onChange:P}),i.jsx("code",{children:H.runId}),i.jsxs("span",{className:"compare-run-label",children:[H.label?` · ${H.label}`:""," · ",H.status," ·"," ",new Date(H.startedAt).toLocaleString()]})]})},H.runId))}),i.jsx("button",{type:"submit",disabled:!Xl,children:"Apply"})]}),C&&i.jsxs(i.Fragment,{children:[i.jsxs("section",{className:"compare-summary","data-sticky":"true",children:[i.jsxs("div",{children:[i.jsx("strong",{children:"Alignment:"})," ",i.jsx("code",{children:C.alignment})]}),i.jsxs("div",{children:[i.jsx("strong",{children:"Total:"})," ",C.summary.total_scenarios]}),i.jsxs("div",{children:[i.jsx("strong",{children:"Regressed:"})," ",C.summary.scenarios_regressed]}),i.jsxs("div",{children:[i.jsx("strong",{children:"Improved:"})," ",C.summary.scenarios_improved]}),i.jsxs("div",{children:[i.jsx("strong",{children:"Mixed/missing:"})," ",C.summary.scenarios_missing_in_some]}),i.jsxs("div",{children:[i.jsx("strong",{children:"Δ avg score:"})," ",Uo(C.summary.average_score_delta)]})]}),i.jsxs("table",{className:"compare-table",children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Scenario"}),C.runs.map(H=>i.jsxs("th",{children:[i.jsx("div",{children:i.jsx("code",{children:H.run_id.slice(0,10)})}),i.jsx("div",{className:"compare-run-meta",children:H.label??H.status})]},H.run_id)),i.jsx("th",{children:"Δ score"}),i.jsx("th",{children:"Change"})]})}),i.jsx("tbody",{children:El.map(H=>i.jsxs("tr",{className:`compare-row compare-row--${H.status_change}`,children:[i.jsxs("td",{children:[i.jsx("div",{children:i.jsx("strong",{children:H.scenario_name??H.scenario_id})}),i.jsxs("div",{className:"compare-scenario-meta",children:[i.jsx("code",{children:H.scenario_id}),H.file?` · ${H.file}`:""]})]}),C.runs.map(il=>{const nl=H.entries[il.run_id];return i.jsxs("td",{className:`compare-cell compare-cell--${nl?.status??"missing"}`,title:nl?.reason??void 0,children:[i.jsx("div",{children:Dv(nl?.status??"missing")}),i.jsx("div",{className:"compare-score",children:Mv(nl?.score??null)})]},il.run_id)}),i.jsx("td",{children:Uo(H.delta_score)}),i.jsx("td",{children:H.status_change})]},H.alignment_key))})]}),El.length===0&&i.jsx("p",{className:"compare-hint",children:"No aligned scenario rows match this comparison."})]}),!C&&!Y&&v.length<2&&i.jsx("p",{className:"compare-hint",children:"Select at least two runs above to load a comparison."})]})}function Bv(s){const o={};for(const r of s.tool_calls??[]){const g=r.turn_index??-1;o[g]||(o[g]=[]),o[g].push(r)}const v={};for(const r of s.checkpoints??[]){const g=r.preceding_turn_index??-1;v[g]||(v[g]=[]),v[g].push(r)}return(s.turns??[]).map(r=>({...r,tool_calls:o[r.turn_index]??[],checkpoints:v[r.turn_index]??[]}))}const qv=/session_id:\s*(\S+)|reset_policy:\s*(\S+)|time_offset:\s*(\S+)|user_id:\s*(\S+)/g;function Yv(s){const o={};for(const v of s.matchAll(qv))v[1]&&(o.session_id=v[1]),v[2]&&(o.reset_policy=v[2]),v[3]&&(o.time_offset=v[3]),v[4]&&(o.user_id=v[4]);return o}function Gv(s){return s.role==="system"&&typeof s.content=="string"&&s.content.startsWith("--- Session boundary")}function Qv({turn:s}){const o=Yv(s.content??"");return i.jsxs("div",{className:"turn turn-boundary",children:[i.jsxs("div",{className:"turn-header",children:[i.jsx("span",{className:"turn-role role-boundary",children:"Session Boundary"}),i.jsxs("span",{className:"turn-meta",children:["Turn ",s.turn_index]})]}),i.jsx("div",{className:"boundary-pills",children:Object.entries(o).map(([v,r])=>i.jsxs("span",{className:"boundary-pill",children:[i.jsxs("span",{className:"pill-label",children:[v,":"]})," ",r]},v))})]})}function Xv({tc:s}){return i.jsxs("div",{className:"tool-call",children:[i.jsx("div",{className:"tool-name",children:s.name}),s.args!=null&&i.jsx("pre",{className:"tool-args",children:JSON.stringify(s.args,null,2)})]})}function Lv({cp:s}){const o=s.passed?"cp-pass":"cp-fail";return i.jsxs("div",{className:`checkpoint ${o}`,children:[i.jsxs("div",{className:"cp-header",children:[i.jsxs("span",{children:["Checkpoint ",s.checkpoint_index]}),i.jsx("span",{className:"cp-status",children:s.passed?"PASS":"FAIL"})]}),(s.failures??[]).map((v,r)=>i.jsx("div",{className:"cp-failure",children:v},r))]})}function Zv({turn:s}){const o={user:"role-user",assistant:"role-assistant"},v={user:"turn-user",assistant:"turn-assistant"},r={user:"User",assistant:"Assistant",system:"System",inject:"Inject",checkpoint:"Checkpoint"};return i.jsxs("div",{className:`turn ${v[s.role]??"turn-system"}`,children:[i.jsxs("div",{className:"turn-header",children:[i.jsx("span",{className:`turn-role ${o[s.role]??"role-system"}`,children:r[s.role]??s.role}),s.source&&i.jsx("span",{className:"turn-source",children:s.source}),i.jsxs("span",{className:"turn-meta",children:["Turn ",s.turn_index]})]}),s.content&&i.jsx("div",{className:"turn-content",children:s.content}),(s.tool_calls??[]).length>0&&i.jsxs("div",{className:"tool-calls",children:[i.jsx("div",{className:"section-label",children:"Tool Calls"}),s.tool_calls?.map((g,E)=>i.jsx(Xv,{tc:g},E))]}),(s.checkpoints??[]).length>0&&i.jsx("div",{className:"checkpoints",children:s.checkpoints?.map((g,E)=>i.jsx(Lv,{cp:g},E))})]})}function qo({detail:s}){const o=Bv(s);return i.jsx("div",{children:o.map((v,r)=>Gv(v)?i.jsx(Qv,{turn:v},r):i.jsx(Zv,{turn:v},r))})}function Vv({d:s}){const o=gs(s.normalized_score),v=s.raw_score!=null?`${s.raw_score}${s.scale_points!=null?`/${s.scale_points}`:""}`:"n/a";return i.jsxs("div",{className:"dimension",children:[i.jsxs("div",{className:"dim-header",children:[i.jsxs("div",{children:[i.jsx("div",{className:"dim-name",children:s.dimension_name}),i.jsx("div",{className:"dim-id",children:s.dimension_id})]}),i.jsxs("div",{className:"dim-score-block",children:[i.jsx("div",{className:"dim-score",children:v}),s.weight!=null&&i.jsxs("div",{className:"dim-weight",children:["Weight ",s.weight]})]})]}),i.jsx("div",{className:"dim-bar",children:i.jsx("div",{className:"dim-fill",style:{width:`${o}%`}})}),s.reasoning&&i.jsx("div",{className:"dim-reasoning",children:s.reasoning}),(s.evidence??[]).length>0&&i.jsx("div",{className:"dim-evidence",children:s.evidence?.map((r,g)=>i.jsx("div",{className:"evidence-item",children:r},g))})]})}function Yo({detail:s}){const o=s.judge_dimension_scores??[],v=s.judge?.overall_notes,r=s.judge?.output;return i.jsxs("div",{children:[v&&i.jsxs("div",{className:"overall-notes",children:[i.jsx("div",{className:"section-label",children:"Overall Notes"}),i.jsx("div",{className:"notes-text",children:v})]}),o.length>0?o.map((g,E)=>i.jsx(Vv,{d:g},E)):i.jsx("div",{className:"no-data",children:"No rubric dimensions recorded."}),r&&i.jsxs("details",{className:"judge-raw",children:[i.jsx("summary",{children:"Raw Judge Output"}),i.jsx("pre",{children:JSON.stringify(r,null,2)})]})]})}function Go({detail:s,onClose:o}){const[v,r]=D.useState("conversation"),g=s.status==="running",E=s.overall_score!=null?s.overall_score.toFixed(2):g?"...":"n/a",C=s.pass_threshold!=null?s.pass_threshold.toFixed(2):"n/a",R=g?"RUNNING":s.passed?"PASS":"FAIL",T=g?"detail-running":s.passed?"detail-pass":"detail-fail",S=typeof s.judge?.output=="object"&&s.judge?.output!=null?s.judge.output.failure_mode_detected:null;return i.jsxs(i.Fragment,{children:[i.jsx("div",{className:"detail-backdrop open",role:"presentation",onClick:o,onKeyDown:Y=>{Y.key==="Escape"&&o()}}),i.jsx("div",{className:"detail-overlay open",children:i.jsxs("div",{className:"detail-panel",children:[i.jsxs("div",{className:"detail-top",children:[i.jsx("button",{type:"button",className:"detail-close",onClick:o,children:"×"}),i.jsxs("div",{className:`detail-score-header ${T}`,children:[i.jsxs("div",{className:"detail-title-block",children:[i.jsxs("div",{className:"detail-name",children:[s.scenario_name,g&&i.jsxs("span",{className:"live-badge",style:{marginLeft:12,verticalAlign:"middle"},children:[i.jsx("span",{className:"live-dot"})," LIVE"]})]}),i.jsxs("div",{className:"detail-sid",children:[s.scenario_id,s.user_id?` / ${s.user_id}`:""]})]}),i.jsxs("div",{className:"detail-score-block",children:[i.jsxs("div",{className:"detail-score-group",children:[i.jsx("div",{className:"detail-score-label",children:"Score"}),i.jsx("div",{className:"detail-score-value",children:E})]}),i.jsxs("div",{className:"detail-score-group",children:[i.jsx("div",{className:"detail-score-label",children:"Threshold"}),i.jsx("div",{className:"detail-score-value",children:C})]}),i.jsxs("div",{className:"detail-score-group",children:[i.jsx("div",{className:"detail-score-label",children:"Status"}),i.jsx("div",{className:"detail-score-value",children:R})]}),typeof S=="string"&&S&&i.jsxs("div",{className:"detail-score-group",children:[i.jsx("div",{className:"detail-score-label",children:"Failure"}),i.jsx("div",{className:"detail-score-value",children:S})]})]}),i.jsx("div",{className:"detail-bar",children:i.jsx("div",{className:"detail-bar-fill",style:{width:`${gs(s.overall_score)}%`}})})]}),i.jsxs("div",{className:"detail-tabs",children:[i.jsx("button",{type:"button",className:`tab-btn${v==="conversation"?" tab-active":""}`,onClick:()=>r("conversation"),children:"Conversation"}),i.jsx("button",{type:"button",className:`tab-btn${v==="rubric"?" tab-active":""}`,onClick:()=>r("rubric"),children:"Rubric"})]})]}),i.jsx("div",{className:"detail-body",children:v==="conversation"?i.jsx(qo,{detail:s}):i.jsx(Yo,{detail:s})})]})})]})}function Qo({data:s}){const o=s.total||1,v=s.passed/o*100,r=s.failed/o*100,g=s.running/o*100;return i.jsxs("div",{className:"progress-bar",style:{display:"flex"},children:[i.jsx("div",{className:"progress-fill progress-pass",style:{width:`${v}%`}}),i.jsx("div",{className:"progress-fill progress-fail",style:{width:`${r}%`}}),i.jsx("div",{className:"progress-fill progress-running",style:{width:`${g}%`}})]})}const Kv={pending:"PENDING",running:"RUNNING",pass:"PASS",fail:"FAIL",error:"ERROR"};function Jv({scenario:s}){const[o,v]=D.useState(Date.now()),r=D.useRef(void 0);if(D.useEffect(()=>{if(s.status==="running"&&s.started_at!=null)return r.current=setInterval(()=>v(Date.now()),1e3),()=>clearInterval(r.current);clearInterval(r.current)},[s.status,s.started_at]),s.started_at==null)return i.jsx(i.Fragment,{children:"-"});if(s.finished_at!=null)return i.jsxs(i.Fragment,{children:[(s.finished_at-s.started_at).toFixed(1),"s"]});const g=o/1e3-s.started_at;return i.jsx(i.Fragment,{children:g>0?`${g.toFixed(0)}s`:"-"})}function Xo({data:s,onSelect:o}){return i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"section-title",children:["Scenarios"," ",i.jsx("span",{style:{color:"var(--muted)",fontWeight:400,fontSize:12},children:"(click completed rows to inspect)"})]}),i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"ID"}),i.jsx("th",{children:"Name"}),i.jsx("th",{children:"Status"}),i.jsx("th",{style:{textAlign:"right"},children:"Score"}),i.jsx("th",{style:{textAlign:"right"},children:"Duration"}),i.jsx("th",{children:"Error"})]})}),i.jsx("tbody",{children:s.scenarios.map((v,r)=>{const g=r in s.details;return i.jsxs("tr",{className:`status-${v.status}${g?" clickable-row":""}`,onClick:g?()=>o(r):void 0,children:[i.jsx("td",{className:"id-cell",children:v.scenario_id}),i.jsx("td",{children:v.scenario_name??""}),i.jsx("td",{className:"status-badge",children:i.jsx("span",{children:Kv[v.status]??v.status.toUpperCase()})}),i.jsx("td",{className:"score-cell",children:v.score!=null?v.score.toFixed(2):"-"}),i.jsx("td",{className:"duration-cell",children:i.jsx(Jv,{scenario:v})}),i.jsx("td",{children:v.error&&i.jsx("span",{className:"error-text",title:v.error,children:v.error.slice(0,60)})})]},`${v.scenario_id}-${r}`)})})]})]})}function wv(s,o){const[v,r]=D.useState(0);return D.useEffect(()=>{if(o)return;const g=performance.now(),E=setInterval(()=>{r((performance.now()-g)/1e3)},500);return()=>{clearInterval(E),r(0)}},[s,o]),o?s:s+v}function $v(s){const o=Math.floor(s/60),v=Math.floor(s%60);return`${o}m ${v}s`}function Ve({value:s,label:o,color:v}){return i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",style:{color:v},children:s}),i.jsx("div",{className:"stat-label",children:o})]})}function Lo({data:s}){const o=wv(s.elapsed,s.all_done),v=s.total>0?Math.round(s.done/s.total*100):0;return i.jsxs("div",{className:"stats",children:[i.jsx(Ve,{value:`${s.done}/${s.total}`,label:"Completed",color:"var(--text)"}),i.jsx(Ve,{value:`${s.passed}`,label:"Passed",color:"var(--green)"}),i.jsx(Ve,{value:`${s.failed}`,label:"Failed",color:"var(--red)"}),i.jsx(Ve,{value:`${s.errored}`,label:"Errors",color:"var(--amber)"}),i.jsx(Ve,{value:`${s.running}`,label:"Running",color:"var(--blue)"}),i.jsx(Ve,{value:$v(o),label:"Elapsed",color:"var(--muted)"}),i.jsx(Ve,{value:`${v}%`,label:"Progress",color:"var(--indigo)"})]})}const kv=2e3;function Wv(){const[s,o]=D.useState(null),[v,r]=D.useState(null),g=D.useRef(!1);return D.useEffect(()=>{let E,C=!1;async function R(){try{const T=await fetch("/api/state");if(!T.ok)throw new Error(`HTTP ${T.status}`);const S=await T.json();if(C)return;o(S),r(null),g.current=S.all_done}catch(T){if(C)return;r(T instanceof Error?T.message:"Unknown error")}}return R(),E=setInterval(()=>{g.current||R()},kv),()=>{C=!0,clearInterval(E)}},[]),{data:s,error:v}}const Fv=800;function Iv(s){if(!s||!(s instanceof Element))return!1;const o=s.tagName;return o==="INPUT"||o==="TEXTAREA"||o==="SELECT"?!0:o==="BUTTON"?!1:!!s.isContentEditable}function Co(s){const{shortcuts:o,isEnabled:v}=s,r=s.sequenceTimeoutMs??Fv,g={pending:[],lastAt:0};return E=>{if(E.defaultPrevented||E.ctrlKey||E.metaKey||E.altKey||v&&!v()||Iv(E.target))return;const C=Date.now();C-g.lastAt>r&&(g.pending=[]),g.lastAt=C;const R=E.key,T=[...g.pending,R],S=o.find(V=>V.sequence!==void 0&&V.sequence.length===T.length&&V.sequence.every((dl,I)=>dl===T[I]));if(S){g.pending=[],E.preventDefault(),S.run(E);return}if(o.some(V=>V.sequence!==void 0&&V.sequence.length>T.length&&V.sequence.slice(0,T.length).every((dl,I)=>dl===T[I]))){g.pending=T,E.preventDefault();return}const U=o.find(V=>V.sequence===void 0&&V.key===R);if(U){g.pending=[],E.preventDefault(),U.run(E);return}g.pending=[]}}function Pv(s){const o=D.useRef(s);o.current=s,D.useEffect(()=>{const v={current:Co(o.current)};let r=o.current.shortcuts;const g=E=>{r!==o.current.shortcuts&&(v.current=Co(o.current),r=o.current.shortcuts),v.current(E)};return window.addEventListener("keydown",g),()=>window.removeEventListener("keydown",g)},[])}const vs="agentprobe:server-token";class Zo extends Error{status;constructor(o,v){super(v),this.name="ApiError",this.status=o}}function Vo(){try{return window.sessionStorage.getItem(vs)??""}catch{return""}}function ly(s){try{s?window.sessionStorage.setItem(vs,s):window.sessionStorage.removeItem(vs)}catch{}}function ty(s,o){if(!s||typeof s!="object")return o;const v=s.error;if(!v||typeof v!="object")return o;const r=v.message;return typeof r=="string"&&r?r:o}async function ey(s,o,v={}){const r={accept:"application/json"},g=new Headers(v.headers);for(const[T,S]of g.entries())r[T]=S;o&&(r.authorization=`Bearer ${o}`);const E=await fetch(s,{...v,headers:r}),C=await E.text();let R=null;if(C)try{R=JSON.parse(C)}catch{R=C}if(!E.ok)throw new Zo(E.status,ty(R,`HTTP ${E.status}`));return R}function Zu(s){if(!s)return null;const o=Date.parse(s);return Number.isNaN(o)?null:o/1e3}function ay(s,o){const v=Zu(s);if(v==null)return 0;const r=Zu(o)??Date.now()/1e3;return Math.max(0,r-v)}function ny(s){return s.status==="running"?"running":s.status==="pending"?"pending":s.status==="error"||s.status==="runtime_error"?"error":s.passed===!0?"pass":s.passed===!1?"fail":"pending"}function uy(s){if(!s)return null;if(typeof s=="string")return s;if(typeof s=="object"){const o=s.message;return typeof o=="string"?o:JSON.stringify(s)}return String(s)}function iy(s){return{dimension_id:String(s.dimension_id??""),dimension_name:String(s.dimension_name??s.dimension_id??""),raw_score:typeof s.raw_score=="number"?s.raw_score:Number(s.raw_score),scale_points:s.scale_points==null?null:Number(s.scale_points),normalized_score:s.normalized_score==null?null:Number(s.normalized_score),weight:s.weight==null?null:Number(s.weight),reasoning:typeof s.reasoning=="string"?s.reasoning:"",evidence:Array.isArray(s.evidence)?s.evidence.map(String):[]}}function Ko(s){return{scenario_id:s.scenarioId,scenario_name:s.scenarioName,user_id:s.userId??void 0,passed:s.passed===!0,overall_score:s.overallScore??null,pass_threshold:s.passThreshold??null,status:s.status,judge:s.judge?{provider:s.judge.provider??void 0,model:s.judge.model??void 0,temperature:s.judge.temperature??void 0,max_tokens:s.judge.maxTokens??void 0,overall_notes:s.judge.overallNotes??void 0,output:s.judge.output&&typeof s.judge.output=="object"&&!Array.isArray(s.judge.output)?s.judge.output:void 0}:void 0,turns:s.turns??[],tool_calls:s.toolCalls??[],checkpoints:s.checkpoints??[],judge_dimension_scores:(s.judgeDimensionScores??[]).map(iy),expectations:s.expectations,error:s.error,counts:s.counts?{turn_count:s.counts.turnCount,assistant_turn_count:s.counts.assistantTurnCount,tool_call_count:s.counts.toolCallCount,checkpoint_count:s.counts.checkpointCount}:void 0}}function cy(s){const o=s.scenarios.map(E=>({scenario_id:E.scenarioId,scenario_name:E.scenarioName,status:ny(E),score:E.overallScore??null,error:uy(E.error),started_at:Zu(E.startedAt),finished_at:Zu(E.completedAt)})),v={};for(const E of s.scenarios)v[E.ordinal]=Ko(E);const r=o.filter(E=>E.status==="running").length,g=o.filter(E=>E.status!=="running"&&E.status!=="pending").length;return{total:s.aggregateCounts.scenarioTotal||o.length,elapsed:ay(s.startedAt,s.completedAt),passed:s.aggregateCounts.scenarioPassedCount,failed:s.aggregateCounts.scenarioFailedCount,errored:s.aggregateCounts.scenarioErroredCount,running:r,done:g,all_done:!!s.completedAt||r===0,scenarios:o,details:v,averages:[]}}function sy(){const[s,o]=D.useState(window.location.pathname);D.useEffect(()=>{const r=()=>o(window.location.pathname);return window.addEventListener("popstate",r),()=>window.removeEventListener("popstate",r)},[]);const v=D.useCallback(r=>{window.history.pushState({},"",r),o(window.location.pathname)},[]);return{pathname:s,navigate:v}}function fy(s){D.useEffect(()=>{const o=v=>{if(v.defaultPrevented||!(v.target instanceof Element))return;const r=v.target.closest("a");if(!r)return;const g=r.getAttribute("href");!g?.startsWith("/")||g.startsWith("//")||g.startsWith("/api/")||r.target||(v.preventDefault(),s(g))};return document.addEventListener("click",o),()=>document.removeEventListener("click",o)},[s])}function Pt({label:s="Loading..."}){return i.jsx("div",{className:"server-empty",children:s})}function Ho(s){const o=Array.from(document.querySelectorAll('[data-keynav="row"]'));if(o.length===0)return;const v=document.activeElement?.closest('[data-keynav="row"]'),r=v?o.indexOf(v):-1,g=r===-1?s>0?0:o.length-1:Math.max(0,Math.min(o.length-1,r+s)),E=o[g];if(!E)return;const R=E.querySelector('[data-keynav-link="true"]')??E;R.focus?.(),typeof R.scrollIntoView=="function"&&R.scrollIntoView({block:"nearest",inline:"nearest"})}function Tt({message:s}){return i.jsx("div",{className:"server-error",children:s})}function Jo({run:s}){const o=s.status==="running"?"status-running":s.passed===!0?"status-pass":s.passed===!1?"status-fail":"status-pending",v=s.status==="completed"&&s.passed!=null?s.passed?"pass":"fail":s.status;return i.jsx("span",{className:`${o} status-badge`,children:i.jsx("span",{children:v.toUpperCase()})})}function Ss({runs:s}){return s.length===0?i.jsx("div",{className:"server-empty",children:"No runs recorded."}):i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Run"}),i.jsx("th",{children:"Status"}),i.jsx("th",{children:"Preset"}),i.jsx("th",{children:"Started"}),i.jsx("th",{style:{textAlign:"right"},children:"Passed"})]})}),i.jsx("tbody",{children:s.map((o,v)=>i.jsxs("tr",{className:"clickable-row","data-keynav":"row","data-keynav-index":v,children:[i.jsx("td",{className:"id-cell",children:i.jsx("a",{href:`/runs/${encodeURIComponent(o.runId)}`,"data-keynav-link":"true",children:o.runId})}),i.jsx("td",{children:i.jsx(Jo,{run:o})}),i.jsx("td",{children:o.preset??"-"}),i.jsx("td",{children:o.startedAt}),i.jsxs("td",{className:"score-cell",children:[o.aggregateCounts.scenarioPassedCount,"/",o.aggregateCounts.scenarioTotal]})]},o.runId))})]})}function wo({token:s,onTokenChange:o,authRequired:v}){const[r,g]=D.useState(s);return D.useEffect(()=>{g(s)},[s]),i.jsxs("form",{className:"server-token-form",onSubmit:E=>{E.preventDefault(),o(r.trim())},children:[i.jsx("label",{htmlFor:"server-token",children:v?"Bearer token required":"Bearer token"}),i.jsxs("div",{className:"server-token-row",children:[i.jsx("input",{id:"server-token",type:"password",value:r,onChange:E=>g(E.currentTarget.value),placeholder:"token"}),i.jsx("button",{type:"submit",children:"Save"}),s&&i.jsx("button",{type:"button",className:"secondary",onClick:()=>o(""),children:"Clear"})]})]})}function ry(s,o){return D.useCallback(async(v,r)=>{try{return await ey(v,s,r)}catch(g){throw g instanceof Zo&&g.status===401&&o(),g}},[s,o])}function dy({request:s}){const[o,v]=D.useState(null),[r,g]=D.useState(null),[E,C]=D.useState(null);if(D.useEffect(()=>{let S=!1;return Promise.all([s("/api/runs?limit=5"),s("/api/suites")]).then(([Y,U])=>{S||(v(Y),g(U),C(null))}).catch(Y=>{S||C(Y instanceof Error?Y.message:String(Y))}),()=>{S=!0}},[s]),E)return i.jsx(Tt,{message:E});if(!o||!r)return i.jsx(Pt,{});const R=o.runs.filter(S=>S.passed===!0).length,T=o.runs.filter(S=>S.passed===!1).length;return i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"stats",children:[i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:o.total}),i.jsx("div",{className:"stat-label",children:"Runs"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",style:{color:"var(--green)"},children:R}),i.jsx("div",{className:"stat-label",children:"Recent Passed"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",style:{color:"var(--red)"},children:T}),i.jsx("div",{className:"stat-label",children:"Recent Failed"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",style:{color:"var(--indigo)"},children:r.suites.length}),i.jsx("div",{className:"stat-label",children:"Suites"})]})]}),i.jsx("div",{className:"section-title",children:"Latest Runs"}),i.jsx(Ss,{runs:o.runs})]})}function oy({request:s}){const[o,v]=D.useState(null),[r,g]=D.useState(null),[E,C]=D.useState("");D.useEffect(()=>{let S=!1;return s("/api/runs").then(Y=>{S||(v(Y),g(null))}).catch(Y=>{S||g(Y instanceof Error?Y.message:String(Y))}),()=>{S=!0}},[s]);const R=E.trim().toLowerCase(),T=D.useMemo(()=>o?R===""?o.runs:o.runs.filter(S=>[S.runId,S.preset??"",S.label??"",S.status].join(" ").toLowerCase().includes(R)):[],[o,R]);return i.jsxs(i.Fragment,{children:[i.jsx("div",{className:"section-title",children:"Runs"}),i.jsxs("div",{className:"server-filter-row",children:[i.jsx("label",{htmlFor:"runs-search",className:"sr-only",children:"Filter runs"}),i.jsx("input",{id:"runs-search",className:"server-filter-input",type:"search",value:E,onChange:S=>C(S.currentTarget.value),placeholder:"Filter by id, preset, label, status ( / )","aria-label":"Filter runs"})]}),r?i.jsx(Tt,{message:r}):o?o.runs.length===0?i.jsx("div",{className:"server-empty",role:"status",children:"No runs recorded yet."}):T.length===0?i.jsxs("div",{className:"server-empty",role:"status",children:['No runs match "',E,'".']}):i.jsx(Ss,{runs:T}):i.jsx(Pt,{})]})}function hy({runId:s,request:o,token:v}){const[r,g]=D.useState(null),[E,C]=D.useState(null),[R,T]=D.useState(null),[S,Y]=D.useState(!1),U=D.useRef(o),V=D.useRef(s),dl=D.useRef(!0);U.current=o,V.current=s,D.useEffect(()=>(dl.current=!0,()=>{dl.current=!1}),[]);const I=D.useCallback(async()=>{const al=s;try{const P=await U.current(`/api/runs/${encodeURIComponent(al)}`);if(!dl.current||V.current!==al)return;g(P.run),C(null)}catch(P){if(!dl.current||V.current!==al)return;C(P instanceof Error?P.message:String(P))}},[s]),Tl=D.useRef(I);Tl.current=I,D.useEffect(()=>{g(null),C(null),T(null),I()},[I]),D.useEffect(()=>{const al=v?`?access_token=${encodeURIComponent(v)}`:"",P=new EventSource(`/api/runs/${encodeURIComponent(s)}/events${al}`),Ol=()=>{Tl.current()},El=()=>{Ol(),P.close()};return P.addEventListener("snapshot",Ol),P.addEventListener("suite_started",Ol),P.addEventListener("scenario_started",Ol),P.addEventListener("scenario_finished",Ol),P.addEventListener("scenario_error",Ol),P.addEventListener("run_finished",El),P.addEventListener("run_cancelled",El),P.addEventListener("run_error",El),()=>P.close()},[s,v]);const _l=async()=>{Y(!0),C(null);try{await o(`/api/runs/${encodeURIComponent(s)}/cancel`,{method:"POST"}),await I()}catch(al){C(al instanceof Error?al.message:String(al))}finally{Y(!1)}},Sl=D.useMemo(()=>r?cy(r):null,[r]),Xl=R!=null&&Sl?Sl.details[R]??null:null;return E?i.jsx(Tt,{message:E}):!r||!Sl?i.jsx(Pt,{}):i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"server-heading-row",children:[i.jsxs("div",{children:[i.jsx("div",{className:"server-eyebrow",children:"Run"}),i.jsx("h1",{children:r.runId})]}),i.jsxs("div",{className:"server-form-actions",children:[r.status==="running"&&i.jsx("button",{type:"button",className:"secondary",onClick:()=>void _l(),disabled:S,children:S?"Cancelling...":"Cancel"}),i.jsx("a",{href:`/api/runs/${encodeURIComponent(r.runId)}/report.html`,children:"HTML report"})]})]}),i.jsx(Lo,{data:Sl}),i.jsx(Qo,{data:Sl}),i.jsx(Xo,{data:Sl,onSelect:T}),i.jsx("div",{className:"server-link-strip",children:r.scenarios.map(al=>i.jsxs("a",{href:`/runs/${encodeURIComponent(r.runId)}/scenarios/${al.ordinal}`,children:["Scenario ",al.ordinal]},al.ordinal))}),i.jsx(Bo,{averages:Sl.averages,onSelectRun:T}),Xl&&i.jsx(Go,{detail:Xl,onClose:()=>T(null)})]})}function my({runId:s,ordinal:o,request:v}){const[r,g]=D.useState(null),[E,C]=D.useState(null);if(D.useEffect(()=>{let T=!1;return v(`/api/runs/${encodeURIComponent(s)}/scenarios/${encodeURIComponent(o)}`).then(S=>{T||(g(S),C(null))}).catch(S=>{T||C(S instanceof Error?S.message:String(S))}),()=>{T=!0}},[v,s,o]),E)return i.jsx(Tt,{message:E});if(!r)return i.jsx(Pt,{});const R=Ko(r.scenario);return i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"server-heading-row",children:[i.jsxs("div",{children:[i.jsx("div",{className:"server-eyebrow",children:i.jsx("a",{href:`/runs/${encodeURIComponent(r.run.runId)}`,children:r.run.runId})}),i.jsx("h1",{children:R.scenario_name})]}),i.jsx(Jo,{run:{...r.run,exitCode:null,preset:null,aggregateCounts:{scenarioTotal:1,scenarioPassedCount:R.passed?1:0,scenarioFailedCount:R.passed?0:1,scenarioErroredCount:R.status==="error"?1:0}}})]}),i.jsxs("div",{className:"server-detail-grid",children:[i.jsxs("section",{children:[i.jsx("div",{className:"section-title",children:"Conversation"}),i.jsx(qo,{detail:R})]}),i.jsxs("section",{children:[i.jsx("div",{className:"section-title",children:"Rubric"}),i.jsx(Yo,{detail:R})]})]})]})}function vy({request:s}){const[o,v]=D.useState(null),[r,g]=D.useState(null),[E,C]=D.useState(null);return D.useEffect(()=>{let R=!1;return Promise.all([s("/api/suites"),s("/api/scenarios")]).then(([T,S])=>{R||(v(T),g(S),C(null))}).catch(T=>{R||C(T instanceof Error?T.message:String(T))}),()=>{R=!0}},[s]),E?i.jsx(Tt,{message:E}):!o||!r?i.jsx(Pt,{}):i.jsxs(i.Fragment,{children:[i.jsx("div",{className:"server-heading-row",children:i.jsxs("div",{children:[i.jsx("div",{className:"server-eyebrow",children:"Data Root"}),i.jsx("h1",{children:o.data_path})]})}),o.errors.length>0&&i.jsx(Tt,{message:`${o.errors.length} suite files had validation errors.`}),i.jsx("div",{className:"section-title",children:"Suites"}),o.suites.length===0?i.jsx("div",{className:"server-empty",role:"status",children:"No suites discovered under this data root."}):i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Suite"}),i.jsx("th",{children:"Schema"}),i.jsx("th",{children:"Path"}),i.jsx("th",{style:{textAlign:"right"},children:"Objects"})]})}),i.jsx("tbody",{children:o.suites.map(R=>i.jsxs("tr",{children:[i.jsx("td",{className:"id-cell",children:R.id}),i.jsx("td",{children:R.schema}),i.jsx("td",{children:R.relativePath}),i.jsx("td",{className:"score-cell",children:R.objectCount})]},R.id))})]}),i.jsx("div",{className:"section-title",children:"Scenarios"}),r.scenarios.length===0?i.jsx("div",{className:"server-empty",role:"status",children:"No scenarios discovered."}):i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Scenario"}),i.jsx("th",{children:"Name"}),i.jsx("th",{children:"Suite"}),i.jsx("th",{children:"Tags"}),i.jsx("th",{children:"Rubric"})]})}),i.jsx("tbody",{children:r.scenarios.map(R=>i.jsxs("tr",{children:[i.jsx("td",{className:"id-cell",children:R.id}),i.jsx("td",{children:R.name}),i.jsx("td",{children:R.suiteId}),i.jsx("td",{children:R.tags.join(", ")||"-"}),i.jsx("td",{children:R.rubric??"-"})]},`${R.suiteId}:${R.id}`))})]})]})}function Vu(s,o){return{method:s,headers:{"content-type":"application/json"},body:o===void 0?void 0:JSON.stringify(o)}}function yy({request:s,navigate:o}){const[v,r]=D.useState(null),[g,E]=D.useState(null),[C,R]=D.useState(null),[T,S]=D.useState(""),[Y,U]=D.useState(""),[V,dl]=D.useState(""),[I,Tl]=D.useState(""),[_l,Sl]=D.useState(new Set),[Xl,al]=D.useState(""),[P,Ol]=D.useState(!1),[El,H]=D.useState(2),[il,nl]=D.useState(1),[Ll,vt]=D.useState(!0),[Yl,Ct]=D.useState(!1),[at,nt]=D.useState(""),[_,B]=D.useState(null),[K,fl]=D.useState(!1);D.useEffect(()=>{let M=!1;return Promise.all([s("/api/suites"),s("/api/scenarios"),s("/api/presets")]).then(([L,ll,Ml])=>{M||(r(L),E(ll),R(Ml),S(L.suites.find(bl=>bl.schema==="endpoints")?.relativePath??""),U(L.suites.find(bl=>bl.schema==="personas")?.relativePath??""),dl(L.suites.find(bl=>bl.schema==="rubrics")?.relativePath??""))}).catch(L=>{M||B(L instanceof Error?L.message:String(L))}),()=>{M=!0}},[s]);const ol=D.useMemo(()=>g?g.scenarios.filter(M=>_l.has(`${M.sourcePath}::${M.id}`)).map(M=>({file:M.sourcePath,id:M.id})):[],[g,_l]);if(_)return i.jsx(Tt,{message:_});if(!v||!g||!C)return i.jsx(Pt,{});const m=v.suites.filter(M=>M.schema==="endpoints"),O=v.suites.filter(M=>M.schema==="personas"),q=v.suites.filter(M=>M.schema==="rubrics"),Q=async M=>{M.preventDefault(),fl(!0),B(null);try{const L={enabled:P,limit:P?El:void 0},ll=I?await s(`/api/presets/${encodeURIComponent(I)}/runs`,Vu("POST",{label:Xl||void 0,overrides:{parallel:L,repeat:il,dry_run:Ll}})):await s("/api/runs",Vu("POST",{endpoint:T,personas:Y,rubric:V,selection:ol,parallel:L,repeat:il,dry_run:Ll,label:Xl||void 0,save_as_preset:Yl&&at.trim()?{name:at.trim()}:void 0}));o(`/runs/${encodeURIComponent(ll.run_id)}`)}catch(L){B(L instanceof Error?L.message:String(L))}finally{fl(!1)}};return i.jsxs(i.Fragment,{children:[i.jsx("div",{className:"server-heading-row",children:i.jsxs("div",{children:[i.jsx("div",{className:"server-eyebrow",children:"Start"}),i.jsx("h1",{children:"Run Builder"})]})}),i.jsxs("form",{className:"server-form",onSubmit:Q,children:[i.jsxs("label",{children:["Preset",i.jsxs("select",{value:I,onChange:M=>Tl(M.currentTarget.value),children:[i.jsx("option",{value:"",children:"Ad-hoc"}),C.presets.map(M=>i.jsx("option",{value:M.id,children:M.name},M.id))]})]}),i.jsxs("div",{className:"server-form-grid",children:[i.jsxs("label",{children:["Endpoint",i.jsx("select",{value:T,onChange:M=>S(M.currentTarget.value),disabled:!!I,children:m.map(M=>i.jsx("option",{value:M.relativePath,children:M.relativePath},M.id))})]}),i.jsxs("label",{children:["Personas",i.jsx("select",{value:Y,onChange:M=>U(M.currentTarget.value),disabled:!!I,children:O.map(M=>i.jsx("option",{value:M.relativePath,children:M.relativePath},M.id))})]}),i.jsxs("label",{children:["Rubric",i.jsx("select",{value:V,onChange:M=>dl(M.currentTarget.value),disabled:!!I,children:q.map(M=>i.jsx("option",{value:M.relativePath,children:M.relativePath},M.id))})]})]}),!I&&i.jsxs("div",{className:"scenario-picker",children:[i.jsxs("div",{className:"server-form-actions",children:[i.jsx("span",{className:"section-label",children:"Scenarios"}),i.jsx("button",{type:"button",className:"secondary",onClick:()=>Sl(new Set(g.scenarios.map(M=>`${M.sourcePath}::${M.id}`))),children:"Select all"})]}),g.scenarios.slice(0,80).map(M=>{const L=`${M.sourcePath}::${M.id}`;return i.jsxs("label",{className:"check-row",children:[i.jsx("input",{type:"checkbox",checked:_l.has(L),onChange:ll=>{const Ml=new Set(_l);ll.currentTarget.checked?Ml.add(L):Ml.delete(L),Sl(Ml)}}),i.jsx("span",{children:M.id}),i.jsx("span",{children:M.sourcePath})]},L)})]}),i.jsxs("div",{className:"server-form-grid",children:[i.jsxs("label",{children:["Label",i.jsx("input",{value:Xl,onChange:M=>al(M.currentTarget.value)})]}),i.jsxs("label",{children:["Repeat",i.jsx("input",{type:"number",min:1,value:il,onChange:M=>nl(Number(M.currentTarget.value))})]}),i.jsxs("label",{children:["Parallel limit",i.jsx("input",{type:"number",min:1,value:El,onChange:M=>H(Number(M.currentTarget.value)),disabled:!P})]})]}),i.jsxs("div",{className:"server-toggle-row",children:[i.jsxs("label",{children:[i.jsx("input",{type:"checkbox",checked:Ll,onChange:M=>vt(M.currentTarget.checked)}),"Dry run"]}),i.jsxs("label",{children:[i.jsx("input",{type:"checkbox",checked:P,onChange:M=>Ol(M.currentTarget.checked)}),"Parallel"]}),!I&&i.jsxs("label",{children:[i.jsx("input",{type:"checkbox",checked:Yl,onChange:M=>Ct(M.currentTarget.checked)}),"Save preset"]})]}),Yl&&!I&&i.jsxs("label",{children:["Preset name",i.jsx("input",{value:at,onChange:M=>nt(M.currentTarget.value)})]}),i.jsx("div",{className:"server-form-actions",children:i.jsx("button",{type:"submit",disabled:K,children:K?"Starting...":"Start run"})})]})]})}function gy({request:s,navigate:o}){const[v,r]=D.useState(null),[g,E]=D.useState(null);D.useEffect(()=>{let R=!1;return s("/api/presets").then(T=>{R||r(T)}).catch(T=>{R||E(T instanceof Error?T.message:String(T))}),()=>{R=!0}},[s]);const C=async R=>{try{const T=await s(`/api/presets/${encodeURIComponent(R.id)}/runs`,Vu("POST"));o(`/runs/${encodeURIComponent(T.run_id)}`)}catch(T){E(T instanceof Error?T.message:String(T))}};return g?i.jsx(Tt,{message:g}):v?i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"server-heading-row",children:[i.jsxs("div",{children:[i.jsx("div",{className:"server-eyebrow",children:"Presets"}),i.jsx("h1",{children:"Saved Runs"})]}),i.jsx("a",{href:"/start",children:"New run"})]}),v.presets.length===0?i.jsx("div",{className:"server-empty",children:"No presets saved."}):i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Name"}),i.jsx("th",{children:"Scenarios"}),i.jsx("th",{children:"Repeat"}),i.jsx("th",{children:"Last run"}),i.jsx("th",{})]})}),i.jsx("tbody",{children:v.presets.map(R=>i.jsxs("tr",{children:[i.jsx("td",{className:"id-cell",children:i.jsx("a",{href:`/presets/${encodeURIComponent(R.id)}`,children:R.name})}),i.jsx("td",{children:R.selection.length}),i.jsx("td",{children:R.repeat}),i.jsx("td",{children:R.last_run?.status??"-"}),i.jsx("td",{className:"score-cell",children:i.jsx("button",{type:"button",onClick:()=>void C(R),children:"Run"})})]},R.id))})]})]}):i.jsx(Pt,{})}function Sy({presetId:s,request:o,navigate:v}){const[r,g]=D.useState(null),[E,C]=D.useState(null),[R,T]=D.useState(null);D.useEffect(()=>{let U=!1;return Promise.all([o(`/api/presets/${encodeURIComponent(s)}`),o(`/api/presets/${encodeURIComponent(s)}/runs`)]).then(([V,dl])=>{U||(g(V),C(dl))}).catch(V=>{U||T(V instanceof Error?V.message:String(V))}),()=>{U=!0}},[o,s]);const S=async()=>{try{const U=await o(`/api/presets/${encodeURIComponent(s)}/runs`,Vu("POST"));v(`/runs/${encodeURIComponent(U.run_id)}`)}catch(U){T(U instanceof Error?U.message:String(U))}},Y=async()=>{try{await o(`/api/presets/${encodeURIComponent(s)}`,{method:"DELETE"}),v("/presets")}catch(U){T(U instanceof Error?U.message:String(U))}};return R?i.jsx(Tt,{message:R}):!r||!E?i.jsx(Pt,{}):i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"server-heading-row",children:[i.jsxs("div",{children:[i.jsx("div",{className:"server-eyebrow",children:"Preset"}),i.jsx("h1",{children:r.preset.name})]}),i.jsxs("div",{className:"server-form-actions",children:[i.jsx("button",{type:"button",onClick:()=>void S(),children:"Run"}),i.jsx("button",{type:"button",className:"secondary",onClick:()=>void Y(),children:"Delete"})]})]}),r.warnings.map(U=>i.jsx(Tt,{message:U.message},`${U.file}:${U.id}`)),i.jsxs("div",{className:"server-settings",children:[i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:r.preset.selection.length}),i.jsx("div",{className:"stat-label",children:"Scenarios"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:r.preset.repeat}),i.jsx("div",{className:"stat-label",children:"Repeat"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:r.preset.dry_run?"on":"off"}),i.jsx("div",{className:"stat-label",children:"Dry Run"})]})]}),i.jsx("div",{className:"section-title",children:"Selection"}),i.jsxs("table",{children:[i.jsx("thead",{children:i.jsxs("tr",{children:[i.jsx("th",{children:"Scenario"}),i.jsx("th",{children:"File"})]})}),i.jsx("tbody",{children:r.preset.selection.map(U=>i.jsxs("tr",{children:[i.jsx("td",{className:"id-cell",children:U.id}),i.jsx("td",{children:U.file})]},`${U.file}:${U.id}`))})]}),i.jsx("div",{className:"section-title",children:"Runs"}),E.runs.length>=2&&i.jsx("p",{className:"compare-cta",children:i.jsx("a",{href:`/compare?run_ids=${encodeURIComponent(E.runs[0]?.runId??"")},${encodeURIComponent(E.runs[1]?.runId??"")}`,children:"Compare last two runs →"})}),i.jsx(Ss,{runs:E.runs})]})}function by({token:s,onTokenChange:o}){const[v,r]=D.useState(null),[g,E]=D.useState(null),[C,R]=D.useState(null);return D.useEffect(()=>{let T=!1;return Promise.all([fetch("/healthz").then(S=>S.json()),fetch("/readyz").then(S=>S.json())]).then(([S,Y])=>{T||(r(S),E(Y),R(null))}).catch(S=>{T||R(S instanceof Error?S.message:String(S))}),()=>{T=!0}},[]),i.jsxs(i.Fragment,{children:[C&&i.jsx(Tt,{message:C}),i.jsxs("div",{className:"server-settings",children:[i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:v?.status??"-"}),i.jsx("div",{className:"stat-label",children:"Health"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:g?.status??"-"}),i.jsx("div",{className:"stat-label",children:"Readiness"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:v?.version??"-"}),i.jsx("div",{className:"stat-label",children:"Version"})]}),i.jsxs("div",{className:"stat",children:[i.jsx("div",{className:"stat-value",children:g?.db_url?"sqlite":"-"}),i.jsx("div",{className:"stat-label",children:"Database"})]})]}),i.jsx(wo,{token:s,onTokenChange:o})]})}function py(){const{pathname:s,navigate:o}=sy(),[v,r]=D.useState(Vo),[g,E]=D.useState(!1),C=ry(v,D.useCallback(()=>E(!0),[]));fy(o);const R=D.useMemo(()=>[{key:"/",description:"Focus search input",run:()=>{const Y=document.querySelector('[data-keynav-search="true"], #runs-search');Y&&(Y.focus(),Y.select?.())}},{key:"j",description:"Move selection down in list",run:()=>Ho(1)},{key:"k",description:"Move selection up in list",run:()=>Ho(-1)},{sequence:["g","r"],key:"r",description:"Go to Runs",run:()=>o("/runs")},{sequence:["g","p"],key:"p",description:"Go to Presets",run:()=>o("/presets")},{sequence:["g","s"],key:"s",description:"Go to Start run",run:()=>o("/start")}],[o]);Pv({shortcuts:R});const T=D.useCallback(Y=>{ly(Y),r(Y),E(!1)},[]),S=(()=>{if(s==="/"||s==="/index.html")return i.jsx(dy,{request:C});if(s==="/runs")return i.jsx(oy,{request:C});if(s==="/start")return i.jsx(yy,{request:C,navigate:o});if(s==="/presets")return i.jsx(gy,{request:C,navigate:o});if(s==="/suites")return i.jsx(vy,{request:C});if(s==="/settings")return i.jsx(by,{token:v,onTokenChange:T});const Y=s.match(/^\/runs\/([^/]+)\/scenarios\/([0-9]+)$/);if(Y)return i.jsx(my,{runId:decodeURIComponent(Y[1]??""),ordinal:Y[2]??"0",request:C});const U=s.match(/^\/runs\/([^/]+)$/);if(U)return i.jsx(hy,{runId:decodeURIComponent(U[1]??""),request:C,token:v});const V=s.match(/^\/presets\/([^/]+)$/);return V?i.jsx(Sy,{presetId:decodeURIComponent(V[1]??""),request:C,navigate:o}):i.jsx(Tt,{message:"Page not found."})})();return i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"header server-header",children:[i.jsxs("div",{children:[i.jsx("h1",{children:"AgentProbe"}),i.jsx("div",{className:"server-subtitle",children:"Server"})]}),i.jsxs("nav",{className:"server-nav",children:[i.jsx("a",{className:s==="/"?"active":"",href:"/",children:"Overview"}),i.jsx("a",{className:s==="/start"?"active":"",href:"/start",children:"Start"}),i.jsx("a",{className:s.startsWith("/runs")?"active":"",href:"/runs",children:"Runs"}),i.jsx("a",{className:s.startsWith("/presets")?"active":"",href:"/presets",children:"Presets"}),i.jsx("a",{className:s.startsWith("/suites")?"active":"",href:"/suites",children:"Suites"}),i.jsx("a",{className:s==="/settings"?"active":"",href:"/settings",children:"Settings"})]})]}),g&&i.jsx(wo,{token:v,onTokenChange:T,authRequired:!0}),S]})}function jy(){const{data:s,error:o}=Wv(),[v,r]=D.useState(null);if(o&&!s)return i.jsxs("div",{style:{padding:48,textAlign:"center",color:"var(--muted)"},children:[i.jsx("div",{style:{fontSize:16,marginBottom:8},children:"Waiting for run to start..."}),i.jsx("div",{style:{fontSize:12},children:o})]});if(!s)return i.jsx("div",{style:{padding:48,textAlign:"center",color:"var(--muted)"},children:"Loading..."});const g=v!=null?s.details[v]??null:null;return i.jsxs(i.Fragment,{children:[i.jsxs("div",{className:"header",children:[i.jsx("h1",{children:"AgentProbe Live Dashboard"}),i.jsxs("span",{className:"live-badge",children:[i.jsx("span",{className:s.all_done?"done-dot":"live-dot"}),s.all_done?"COMPLETE":"LIVE"]})]}),i.jsx(Lo,{data:s}),i.jsx(Qo,{data:s}),i.jsx(Xo,{data:s,onSelect:r}),i.jsx(Bo,{averages:s.averages,onSelectRun:r}),i.jsxs("div",{className:"footer",children:["AgentProbe Dashboard · ",s.done,"/",s.total," scenarios"]}),g&&i.jsx(Go,{detail:g,onClose:()=>r(null)})]})}function xy(){const[s,o]=D.useState("detecting"),[v,r]=D.useState(typeof window<"u"?window.location.pathname:"/");return D.useEffect(()=>{const g=()=>r(window.location.pathname);return window.addEventListener("popstate",g),()=>window.removeEventListener("popstate",g)},[]),D.useEffect(()=>{let g=!1;return fetch("/api/state",{headers:{accept:"application/json"}}).then(E=>{g||o(E.ok?"live":"server")}).catch(()=>{g||o("server")}),()=>{g=!0}},[]),v==="/compare"?i.jsx(Hv,{token:Vo()||null}):s==="detecting"?i.jsx(Pt,{label:"Starting dashboard..."}):s==="live"?i.jsx(jy,{}):i.jsx(py,{})}const $o=document.getElementById("root");if(!$o)throw new Error("Missing #root element");zv.createRoot($o).render(i.jsx(D.StrictMode,{children:i.jsx(xy,{})})); +
diff --git a/dashboard/src/App.tsx b/dashboard/src/App.tsx index b07fe87..932f260 100644 --- a/dashboard/src/App.tsx +++ b/dashboard/src/App.tsx @@ -15,6 +15,10 @@ import { RubricView } from "./components/RubricView.tsx"; import { ScenarioTable } from "./components/ScenarioTable.tsx"; import { StatsBar } from "./components/StatsBar.tsx"; import { useDashboard } from "./hooks/useDashboard.ts"; +import { + type KeyboardShortcut, + useKeyboardShortcuts, +} from "./hooks/useKeyboardShortcuts.ts"; import type { DashboardData, DimensionScore, @@ -471,6 +475,31 @@ function Loading({ label = "Loading..." }: { label?: string }) { return
{label}
; } +function moveKeynavRow(delta: number): void { + const rows = Array.from( + document.querySelectorAll('[data-keynav="row"]'), + ); + if (rows.length === 0) return; + const activeRow = document.activeElement?.closest( + '[data-keynav="row"]', + ); + const currentIndex = activeRow ? rows.indexOf(activeRow) : -1; + const nextIndex = + currentIndex === -1 + ? delta > 0 + ? 0 + : rows.length - 1 + : Math.max(0, Math.min(rows.length - 1, currentIndex + delta)); + const target = rows[nextIndex]; + if (!target) return; + const link = target.querySelector('[data-keynav-link="true"]'); + const focusTarget = link ?? target; + focusTarget.focus?.(); + if (typeof (focusTarget as HTMLElement).scrollIntoView === "function") { + focusTarget.scrollIntoView({ block: "nearest", inline: "nearest" }); + } +} + function ErrorBanner({ message }: { message: string }) { return
{message}
; } @@ -513,10 +542,20 @@ function RunsTable({ runs }: { runs: RunSummary[] }) { - {runs.map((run) => ( - + {runs.map((run, index) => ( + - {run.runId} + + {run.runId} + @@ -666,6 +705,7 @@ function OverviewView({ request }: { request: ServerRequest }) { function RunsView({ request }: { request: ServerRequest }) { const [data, setData] = useState(null); const [error, setError] = useState(null); + const [filter, setFilter] = useState(""); useEffect(() => { let cancelled = false; @@ -684,13 +724,55 @@ function RunsView({ request }: { request: ServerRequest }) { }; }, [request]); - if (error) return ; - if (!data) return ; + const filterLower = filter.trim().toLowerCase(); + const filteredRuns = useMemo(() => { + if (!data) return [] as RunSummary[]; + if (filterLower === "") return data.runs; + return data.runs.filter((run) => { + const haystack = [ + run.runId, + run.preset ?? "", + run.label ?? "", + run.status, + ] + .join(" ") + .toLowerCase(); + return haystack.includes(filterLower); + }); + }, [data, filterLower]); return ( <>
Runs
- +
+ + setFilter(event.currentTarget.value)} + placeholder="Filter by id, preset, label, status ( / )" + aria-label="Filter runs" + /> +
+ {error ? ( + + ) : !data ? ( + + ) : data.runs.length === 0 ? ( +
+ No runs recorded yet. +
+ ) : filteredRuns.length === 0 ? ( +
+ No runs match "{filter}". +
+ ) : ( + + )} ); } @@ -975,49 +1057,61 @@ function SuitesView({ request }: { request: ServerRequest }) { /> )}
Suites
- - - - - - - - - - - {suites.suites.map((suite) => ( - - - - - + {suites.suites.length === 0 ? ( +
+ No suites discovered under this data root. +
+ ) : ( +
SuiteSchemaPathObjects
{suite.id}{suite.schema}{suite.relativePath}{suite.objectCount}
+ + + + + + - ))} - -
SuiteSchemaPathObjects
+ + + {suites.suites.map((suite) => ( + + {suite.id} + {suite.schema} + {suite.relativePath} + {suite.objectCount} + + ))} + + + )}
Scenarios
- - - - - - - - - - - - {scenarios.scenarios.map((scenario) => ( - - - - - - + {scenarios.scenarios.length === 0 ? ( +
+ No scenarios discovered. +
+ ) : ( +
ScenarioNameSuiteTagsRubric
{scenario.id}{scenario.name}{scenario.suiteId}{scenario.tags.join(", ") || "-"}{scenario.rubric ?? "-"}
+ + + + + + + - ))} - -
ScenarioNameSuiteTagsRubric
+ + + {scenarios.scenarios.map((scenario) => ( + + {scenario.id} + {scenario.name} + {scenario.suiteId} + {scenario.tags.join(", ") || "-"} + {scenario.rubric ?? "-"} + + ))} + + + )} ); } @@ -1639,6 +1733,55 @@ function ServerDashboard() { useLocalLinkInterception(navigate); + const shortcuts = useMemo( + () => [ + { + key: "/", + description: "Focus search input", + run: () => { + const search = document.querySelector( + '[data-keynav-search="true"], #runs-search', + ); + if (search) { + search.focus(); + search.select?.(); + } + }, + }, + { + key: "j", + description: "Move selection down in list", + run: () => moveKeynavRow(1), + }, + { + key: "k", + description: "Move selection up in list", + run: () => moveKeynavRow(-1), + }, + { + sequence: ["g", "r"], + key: "r", + description: "Go to Runs", + run: () => navigate("/runs"), + }, + { + sequence: ["g", "p"], + key: "p", + description: "Go to Presets", + run: () => navigate("/presets"), + }, + { + sequence: ["g", "s"], + key: "s", + description: "Go to Start run", + run: () => navigate("/start"), + }, + ], + [navigate], + ); + + useKeyboardShortcuts({ shortcuts }); + const onTokenChange = useCallback((nextToken: string) => { writeStoredToken(nextToken); setToken(nextToken); diff --git a/dashboard/src/hooks/useKeyboardShortcuts.ts b/dashboard/src/hooks/useKeyboardShortcuts.ts new file mode 100644 index 0000000..7aba3a8 --- /dev/null +++ b/dashboard/src/hooks/useKeyboardShortcuts.ts @@ -0,0 +1,121 @@ +import { useEffect, useRef } from "react"; + +export type KeyboardShortcut = { + key: string; + sequence?: string[]; + description: string; + run: (event: KeyboardEvent) => void; +}; + +export type KeyboardShortcutHandlerOptions = { + shortcuts: KeyboardShortcut[]; + isEnabled?: () => boolean; + sequenceTimeoutMs?: number; +}; + +const DEFAULT_SEQUENCE_TIMEOUT_MS = 800; + +export function shouldIgnoreKeyboardEvent(target: EventTarget | null): boolean { + if (!target || !(target instanceof Element)) { + return false; + } + const tag = target.tagName; + if (tag === "INPUT" || tag === "TEXTAREA" || tag === "SELECT") { + return true; + } + if (tag === "BUTTON") { + return false; + } + const htmlEl = target as HTMLElement; + if (htmlEl.isContentEditable) { + return true; + } + return false; +} + +export function createKeyboardDispatcher( + options: KeyboardShortcutHandlerOptions, +): (event: KeyboardEvent) => void { + const { shortcuts, isEnabled } = options; + const timeoutMs = options.sequenceTimeoutMs ?? DEFAULT_SEQUENCE_TIMEOUT_MS; + const state = { pending: [] as string[], lastAt: 0 }; + + return (event: KeyboardEvent) => { + if (event.defaultPrevented) return; + if (event.ctrlKey || event.metaKey || event.altKey) return; + if (isEnabled && !isEnabled()) return; + if (shouldIgnoreKeyboardEvent(event.target)) return; + + const now = Date.now(); + if (now - state.lastAt > timeoutMs) { + state.pending = []; + } + state.lastAt = now; + + const currentKey = event.key; + const nextSequence = [...state.pending, currentKey]; + + const sequenceMatch = shortcuts.find( + (shortcut) => + shortcut.sequence !== undefined && + shortcut.sequence.length === nextSequence.length && + shortcut.sequence.every((key, idx) => key === nextSequence[idx]), + ); + if (sequenceMatch) { + state.pending = []; + event.preventDefault(); + sequenceMatch.run(event); + return; + } + + const hasSequencePrefix = shortcuts.some( + (shortcut) => + shortcut.sequence !== undefined && + shortcut.sequence.length > nextSequence.length && + shortcut.sequence + .slice(0, nextSequence.length) + .every((key, idx) => key === nextSequence[idx]), + ); + + if (hasSequencePrefix) { + state.pending = nextSequence; + event.preventDefault(); + return; + } + + const singleMatch = shortcuts.find( + (shortcut) => + shortcut.sequence === undefined && shortcut.key === currentKey, + ); + if (singleMatch) { + state.pending = []; + event.preventDefault(); + singleMatch.run(event); + return; + } + + state.pending = []; + }; +} + +export function useKeyboardShortcuts( + options: KeyboardShortcutHandlerOptions, +): void { + const optionsRef = useRef(options); + optionsRef.current = options; + useEffect(() => { + const dispatcherRef: { current: (event: KeyboardEvent) => void } = { + current: createKeyboardDispatcher(optionsRef.current), + }; + let previousShortcuts = optionsRef.current.shortcuts; + const handler = (event: KeyboardEvent): void => { + if (previousShortcuts !== optionsRef.current.shortcuts) { + dispatcherRef.current = createKeyboardDispatcher(optionsRef.current); + previousShortcuts = optionsRef.current.shortcuts; + } + dispatcherRef.current(event); + }; + window.addEventListener("keydown", handler); + return () => window.removeEventListener("keydown", handler); + }, []); +} diff --git a/dashboard/src/styles.css b/dashboard/src/styles.css index 4b069af..d8010f1 100644 --- a/dashboard/src/styles.css +++ b/dashboard/src/styles.css @@ -103,6 +103,43 @@ body { padding: 16px; color: var(--muted); } +.server-filter-row { + display: flex; + gap: 8px; + align-items: center; + margin: 8px 0 12px; +} +.server-filter-input { + flex: 1; + min-width: 200px; + background: var(--bg); + border: 1px solid var(--border); + border-radius: 6px; + color: var(--text); + padding: 8px 10px; +} +.server-filter-input:focus { + outline: 2px solid var(--indigo); + outline-offset: 1px; +} +.sr-only { + position: absolute; + width: 1px; + height: 1px; + margin: -1px; + border: 0; + padding: 0; + overflow: hidden; + clip: rect(0 0 0 0); + clip-path: inset(50%); + white-space: nowrap; +} +[data-keynav="row"]:focus-within, +[data-keynav="row"]:focus, +[data-keynav-link="true"]:focus { + outline: 2px solid var(--indigo); + outline-offset: -2px; +} .server-error { color: var(--amber); border-color: rgba(245, 158, 11, 0.35); diff --git a/docs/RELIABILITY.md b/docs/RELIABILITY.md index d01322c..3630e4b 100644 --- a/docs/RELIABILITY.md +++ b/docs/RELIABILITY.md @@ -40,3 +40,69 @@ The initial repository-level budgets are: - Include the smallest stable identifiers needed for tracing a failure. - Log enough context to reproduce the path, but not guessed or unvalidated payloads. + +## Server metrics, spans, and budgets (Phase 4) + +The AgentProbe server ships a narrow in-process metrics registry and span +recorder so operators can introspect a running server without depending on an +external collector. All adapters live under +`src/runtime/server/observability/`. + +### Shipped counters + +| Name | Labels | Purpose | +| --- | --- | --- | +| `server.http.requests` | `method`, `route`, `status` | Per-request volume by outcome. | +| `server.runs.started_total` | `preset` | Runs accepted by the run controller. | +| `server.runs.finished_total` | `preset` | Runs that reached a terminal state. | + +### Shipped gauges + +| Name | Purpose | +| --- | --- | +| `server.runs.active` | Active runs tracked by the controller. | +| `server.sse.connections` | Open SSE subscribers across all runs. | + +### Shipped spans + +| Name | Where | Purpose | +| --- | --- | --- | +| `server.run.start.validation` | `RunController.start` | Validates OpenRouter configuration and suite conflicts. | +| `server.run.controller.execute` | `RunController.execute` | End-to-end run execution wrapper. | +| `server.run.suite.boot` | `RunController.execute` | Time from controller accept to first suite/scenario event. | + +### Latency budgets + +Run `bun run latency-budget --samples 25` to populate these numbers against +seeded local data. Budgets are `p95` unless noted. CI is expected to stay +well below the budget on loopback; degraded values should be investigated +before shipping. + +| Surface | Budget (p95) | +| --- | --- | +| `GET /` (dashboard index) | 150 ms | +| `GET /api/runs` | 150 ms | +| `POST /api/runs` (validation rejection) | 200 ms | +| SSE first-event latency | 200 ms | + +### SSE hardening contract + +- Every SSE response emits `retry: 2000` on connect and periodic heartbeat + comments every 15 seconds. +- Terminal events (`run_finished`, `run_cancelled`, `run_failed`) are emitted + exactly once per run and close the stream after dispatch. +- `Last-Event-ID` is honored from both the standard `Last-Event-ID` header and + the `last_event_id` query parameter. +- Historical runs resolve terminal state on replay even when the ring buffer + has been dropped. +- Proxy-safe headers (`cache-control: no-store, no-transform`, + `x-accel-buffering: no`, `connection: keep-alive`) are set on every stream + response. + +### Soak harness + +`bun run soak --duration-ms 10000 --runs 50 --sse-connections 3` is the fast +CI mode: it verifies that no active runs, no stuck streams, and no request +failures remain at shutdown. The `--manual` flag extends the defaults to a +~1h soak and emits the run/failure/RSS/event-lag/latency/connection summary +line for PR evidence. diff --git a/docs/playbooks/agent-probe-server.md b/docs/playbooks/agent-probe-server.md index 5e37715..7ecaac8 100644 --- a/docs/playbooks/agent-probe-server.md +++ b/docs/playbooks/agent-probe-server.md @@ -308,3 +308,120 @@ The HTTP endpoint: The dashboard reads `?run_ids=a,b[&only=changes]` so shared deep links survive refresh, and the preset detail view surfaces a "Compare last two runs" CTA that pre-selects the two most recent runs for the preset. + +## Phase 4: Observability, SSE Hardening, and Operational Polish + +### Tracing a request by id + +Every HTTP response carries an `x-request-id` header (injected from the +incoming `x-request-id` when present, generated otherwise). Structured logs +emit `request_id` on the same line as `method`, `path`, `route`, `status`, and +`duration_ms`, so an operator can pipe a request id through `grep` to find the +single lifecycle of a request. + +```bash +curl -fsS \ + -H "Authorization: Bearer $AGENTPROBE_SERVER_TOKEN" \ + -H "x-request-id: incident-2026-04-17-01" \ + http://127.0.0.1:7878/api/runs | cat +grep incident-2026-04-17-01 server.log +``` + +Set `AGENTPROBE_SERVER_LOG_FORMAT=json` (or pass `--log-format json`) to emit +JSON lines for stream processors. Text remains the default for interactive +bring-up. + +### Metrics, spans, and startup log + +- `docs/RELIABILITY.md#server-metrics-spans-and-budgets-phase-4` lists every + counter, gauge, and span name the server emits along with the expected + labels. Adapters are in-process only — no external collector is required. +- The server's first log line (`server.startup`) includes the redacted config + summary. Tokens and database passwords are replaced with + `[redacted]:c`. Use it to confirm bind host, port, CORS origins, and + backend without leaking secrets. + +### Latency budgets + +`bun run latency-budget --samples 25` boots the server against a synthetic data +root, samples the indexed surfaces, and prints p50/p95/p99 per surface. Use +`--report-only` to log without failing the process when you are diagnosing +rather than gating. Shipping budgets live in `docs/RELIABILITY.md`. + +### Soak harness + +- CI mode: `bun run soak --duration-ms 10000 --runs 50 --sse-connections 3` + proves that no active runs, no stuck streams, and no request failures + remain when the server stops. +- Manual mode: `bun run soak --manual` targets about one hour, repeatedly + launches synthetic runs, reconnects SSE streams, and browses history. The + emitted JSON summary (runs, failures, RSS trend, event lag, request + latency, open connections at shutdown) is the PR evidence artifact. + +### SSE proxying notes + +AgentProbe's SSE endpoint is designed to be safe behind standard reverse +proxies: + +- Every response sets `cache-control: no-store, no-transform`, + `x-accel-buffering: no`, and `connection: keep-alive`. +- A `retry: 2000` directive is emitted on every reconnect so misbehaving + client libraries still back off. +- Heartbeat comments flow every 15 seconds on idle streams so NAT and idle + timeouts do not silently sever the connection. + +Minimum nginx snippet for a reverse proxy that preserves the contract: + +```nginx +location /api/runs/ { + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 1h; + chunked_transfer_encoding on; + proxy_pass http://agentprobe/api/runs/; +} +``` + +`proxy_buffering off` and the request header `Connection ""` are load-bearing: +nginx's default buffered mode will starve the browser's EventSource until the +buffer fills. + +### Backup, restore, and migration recovery + +- SQLite: `sqlite3 .agentprobe/runs.sqlite3 ".backup backup.sqlite3"` while the + server runs. Restore by stopping the server, copying the backup into place, + and starting again. +- Postgres: use `pg_dump -Fc` daily and store dumps off-host. Restore with + `pg_restore -d agentprobe backup.dump` after stopping writers. +- Migration failure: `agentprobe db:migrate` applies schema changes. On + failure, capture the CLI output, roll back to the previous dump, and + rerun the migration after addressing the root cause. Phase 4 does not + change the migration surface; see the Phase 3 section above for the Postgres + boot gate behaviour. + +### Dashboard cache behaviour + +- `/healthz`, `/readyz`, `/api/session`, `/api/runs`, `/api/runs/:id`, and + SSE responses all set `cache-control: no-store` (or equivalent) so + reverse proxies never serve stale state to operators. +- Static assets under the dashboard bundle (`/*.js`, `/*.css`) inherit the + Bun static file cache and may be cached on the CDN. Rebuild the dashboard + bundle (`bun run dashboard:build`) to invalidate fingerprinted asset URLs. +- Local storage holds the bearer token under + `agentprobe:server-token`; clear it with the Settings view when rotating. + +### Dashboard keyboard shortcuts + +| Shortcut | Action | +| --- | --- | +| `/` | Focus the runs-page search input. | +| `j` / `k` | Move focus to the next / previous list row. | +| `g r` | Navigate to Runs. | +| `g p` | Navigate to Presets. | +| `g s` | Navigate to Start run. | + +Shortcuts are suppressed while typing in `INPUT`, `TEXTAREA`, `SELECT`, or a +`contenteditable` element, and while any of `Ctrl`, `Meta`, or `Alt` is held. +Every shortcut-backed action still has a visible nav link or button. diff --git a/docs/product-specs/current-state.md b/docs/product-specs/current-state.md index 25571fd..8a2f390 100644 --- a/docs/product-specs/current-state.md +++ b/docs/product-specs/current-state.md @@ -35,6 +35,13 @@ Last validated against `platform.md`: 2026-04-17 - [x] Docker image boots safely with SQLite-on-volume persistence - [x] Database URL credentials stay redacted in operator-visible output - [x] Docker Compose readiness waits for server readiness +- [x] Server observability adapters emit counters, gauges, and spans without an external collector +- [x] Startup and per-request logs redact secrets and preserve request IDs +- [x] SSE streams emit heartbeats, retry hints, and exactly-once terminal events +- [x] Dashboard keyboard shortcuts coexist with form typing +- [x] Dashboard views render empty, error, and loading states for every major surface +- [x] Latency budget checks run deterministically against seeded local data +- [x] Soak harness produces CI and manual evidence with the required summary ## Notes diff --git a/docs/product-specs/e2e-checklist.md b/docs/product-specs/e2e-checklist.md index e621204..b6d27e9 100644 --- a/docs/product-specs/e2e-checklist.md +++ b/docs/product-specs/e2e-checklist.md @@ -32,3 +32,10 @@ Derived from `platform.md`. Every scenario should have a coverage owner. | Docker image boots safely with SQLite-on-volume persistence | `Dockerfile` + `docker-compose.yml` + `docs/playbooks/agent-probe-server.md` | ✅ covered | | Database URL credentials stay redacted in operator-visible output | `tests/unit/persistence/url.test.ts` + `tests/unit/server/config.test.ts` | ✅ covered | | Docker Compose readiness waits for server readiness | `docker-compose.yml` + `docs/playbooks/agent-probe-server.md` + `docker compose config` | ✅ covered | +| Server observability adapters emit counters, gauges, and spans without an external collector | `tests/unit/server/observability/metrics.test.ts` + `tests/unit/server/observability/spans.test.ts` + `tests/integration/server/sse-reconnect.test.ts` | ✅ covered | +| Startup and per-request logs redact secrets and preserve request IDs | `tests/unit/server/observability/redaction.test.ts` + `tests/unit/server/observability/logger.test.ts` | ✅ covered | +| SSE streams emit heartbeats, retry hints, and exactly-once terminal events | `tests/integration/server/sse-reconnect.test.ts` + `tests/unit/server/streams.test.ts` | ✅ covered | +| Dashboard keyboard shortcuts coexist with form typing | `tests/unit/dashboard/keyboard-shortcuts.test.tsx` | ✅ covered | +| Dashboard views render empty, error, and loading states for every major surface | `tests/unit/dashboard-app.test.tsx` + `tests/unit/dashboard/compare-view.test.tsx` + manual dashboard pass | ⏳ unit covered; browser pass logged on PR | +| Latency budget checks run deterministically against seeded local data | `scripts/latency-budget.ts` + `docs/RELIABILITY.md` | ✅ covered | +| Soak harness produces CI and manual evidence with the required summary | `scripts/soak.ts` + `docs/RELIABILITY.md` + `docs/playbooks/agent-probe-server.md` | ✅ covered | diff --git a/docs/product-specs/platform.md b/docs/product-specs/platform.md index aa12328..f185ced 100644 --- a/docs/product-specs/platform.md +++ b/docs/product-specs/platform.md @@ -292,3 +292,86 @@ configuration errors that include the database URL **Then** the output redacts the password component for any URL scheme that contains credentials, including percent-encoded and reserved password characters, and never exposes the raw configured password. + +### Server observability adapters emit counters, gauges, and spans without an external collector + +**Given** an `agentprobe start-server` instance running without any external +metrics collector configured +**When** the server handles HTTP traffic, accepts runs, and streams SSE events +**Then** the in-process registry emits `server.http.requests`, +`server.runs.started_total`, and `server.runs.finished_total` counters (with +`method`, `route`, `status`, and `preset` labels where applicable), updates the +`server.runs.active` and `server.sse.connections` gauges as lifecycle events +occur, records `server.run.start.validation`, `server.run.controller.execute`, +and `server.run.suite.boot` spans, and exposes a snapshot API for tests and +operators without requiring a collector to be installed. + +### Startup and per-request logs redact secrets and preserve request IDs + +**Given** an `agentprobe start-server` instance configured with a bearer token +and a database URL that contains credentials +**When** the server logs its startup configuration, an HTTP request, or a run +controller lifecycle event +**Then** the startup log emits a single `server.startup` line that masks +token-like fields and redacts userinfo credentials, every request log carries +`method`, `route`, `status`, `duration_ms`, and `request_id`, and run +controller logs tag each event with `run_id` and `preset_id` so a single +request or run can be traced through the pipeline by ID. + +### SSE streams emit heartbeats, retry hints, and exactly-once terminal events + +**Given** a client subscribed to `GET /api/runs/:runId/events` for an active or +historical run +**When** the server accepts the connection and the run proceeds to a terminal +state +**Then** the server emits a `retry:` directive on connect, periodic heartbeat +comments on idle streams, replays buffered events after any `Last-Event-ID` +supplied via the standard header or the `last_event_id` query parameter, sets +`cache-control: no-store, no-transform`, `x-accel-buffering: no`, and +`connection: keep-alive` on every response, and emits exactly one terminal +event (`run_finished`, `run_cancelled`, or `run_failed`) before closing the +stream even when replaying a historical run whose ring buffer has been +evicted. + +### Dashboard keyboard shortcuts coexist with form typing + +**Given** an operator on any dashboard page +**When** they press `/`, `j`, `k`, or the `g r` / `g p` / `g s` chord +**Then** the browser focuses the runs search input, moves the keyboard focus +between list rows, or navigates to the Runs, Presets, or Start-run routes +respectively; shortcuts are ignored while typing in `INPUT`, `TEXTAREA`, +`SELECT`, or a `contenteditable` element, ignored when any of `Ctrl`, `Meta`, +or `Alt` is held, and every shortcut-backed action remains reachable through a +visible nav link or button. + +### Dashboard views render empty, error, and loading states for every major surface + +**Given** a dashboard view loading data from the server +**When** the view is awaiting a response, receives an empty result set, +receives a filter that does not match any rows, or encounters an HTTP error +**Then** the runs, presets, compare, settings, suites, and auth surfaces all +render a dedicated empty, error, or loading affordance rather than a blank +layout, and the runs page provides a filter input that collapses to an empty +state explaining that the current filter term returned no rows. + +### Latency budget checks run deterministically against seeded local data + +**Given** a developer or CI job running `bun run latency-budget` +**When** the harness boots a loopback server against a synthetic data root and +samples `GET /`, `GET /api/runs`, `POST /api/runs`, and SSE first-event +latencies +**Then** the harness prints per-surface p50/p95/p99 values, compares them +against the budgets recorded in `docs/RELIABILITY.md`, and exits non-zero when +any p95 exceeds its budget unless `--report-only` is set. + +### Soak harness produces CI and manual evidence with the required summary + +**Given** a developer or CI job running `bun run soak` +**When** the harness executes in default CI mode or with `--manual` for a +longer window +**Then** the harness repeatedly starts synthetic runs, reconnects SSE streams, +and browses history; the CI run verifies that no active runs, no stuck +streams, and no HTTP failures remain at shutdown; and every mode emits a +JSON summary with run count, failures, RSS start/end samples, event lag, +request latency p95, and open-connection counts at shutdown that can be +attached to PR evidence. diff --git a/package.json b/package.json index 0af0131..e51b1f4 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,9 @@ "typecheck": "bunx tsc --noEmit && bun run --cwd dashboard typecheck", "dashboard:dev": "bun run --cwd dashboard dev", "dashboard:build": "bun run --cwd dashboard build", - "validate:setup": "./scripts/validate-setup.sh" + "validate:setup": "./scripts/validate-setup.sh", + "soak": "bun scripts/soak.ts", + "latency-budget": "bun scripts/latency-budget.ts" }, "dependencies": { "nunjucks": "^3.2.4", diff --git a/scripts/latency-budget.ts b/scripts/latency-budget.ts new file mode 100644 index 0000000..3eb23ef --- /dev/null +++ b/scripts/latency-budget.ts @@ -0,0 +1,205 @@ +#!/usr/bin/env bun +/* eslint-disable no-console */ +import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { + type StartedServer, + startAgentProbeServer, +} from "../src/runtime/server/app-server.ts"; +import { buildServerConfig } from "../src/runtime/server/config.ts"; + +type SampleKind = + | "static.index" + | "api.runs.list" + | "api.runs.create" + | "sse.first_event"; + +type Budget = { + kind: SampleKind; + label: string; + p95Ms: number; +}; + +const BUDGETS: Budget[] = [ + { kind: "static.index", label: "GET / (index.html)", p95Ms: 150 }, + { kind: "api.runs.list", label: "GET /api/runs", p95Ms: 150 }, + { + kind: "api.runs.create", + label: "POST /api/runs (validation error)", + p95Ms: 200, + }, + { + kind: "sse.first_event", + label: "SSE /api/runs/:id/events first byte", + p95Ms: 200, + }, +]; + +function writeMinimalSuite(root: string): string { + const data = join(root, "data"); + mkdirSync(data, { recursive: true }); + writeFileSync( + join(data, "endpoint.yaml"), + [ + "transport: http", + "connection:", + " base_url: http://example.test", + "request:", + " method: POST", + ' url: "{{ base_url }}/chat"', + " body_template: '{}'", + "response:", + " format: text", + ' content_path: "$"', + "", + ].join("\n"), + "utf8", + ); + return data; +} + +function parseFlag(args: string[], name: string, fallback: number): number { + const index = args.indexOf(name); + if (index === -1) return fallback; + const raw = args[index + 1]; + const parsed = Number(raw); + if (!Number.isFinite(parsed) || parsed <= 0) return fallback; + return parsed; +} + +function percentile(values: number[], pct: number): number { + if (values.length === 0) return 0; + const sorted = [...values].sort((a, b) => a - b); + const rank = Math.min(sorted.length - 1, Math.ceil(sorted.length * pct) - 1); + return sorted[Math.max(0, rank)] ?? 0; +} + +async function timeRequest( + fn: () => Promise, + samples: number, +): Promise { + const results: number[] = []; + for (let i = 0; i < samples; i++) { + const t0 = performance.now(); + try { + await fn(); + } catch { + // Errors are expected for some paths (e.g., validation-error runs). + } + results.push(performance.now() - t0); + } + return results; +} + +async function measureSseFirstEvent( + server: StartedServer, + samples: number, +): Promise { + const results: number[] = []; + for (let i = 0; i < samples; i++) { + const runId = `latency-sse-${i}`; + server.streamHub.publish({ + runId, + kind: "run_progress", + payload: { kind: "scenario_started" }, + }); + const t0 = performance.now(); + const response = await fetch(`${server.url}/api/runs/${runId}/events`); + const reader = response.body?.getReader(); + if (!reader) continue; + await reader.read(); + const duration = performance.now() - t0; + await reader.cancel(); + results.push(duration); + } + return results; +} + +async function main(): Promise { + const args = process.argv.slice(2); + const samples = parseFlag(args, "--samples", 20); + const strict = !args.includes("--report-only"); + + const root = mkdtempSync(join(tmpdir(), "agentprobe-latency-")); + const dataPath = writeMinimalSuite(root); + const dbPath = join(root, "runs.sqlite3"); + + const server = await startAgentProbeServer( + buildServerConfig({ + args: [ + "--host", + "127.0.0.1", + "--port", + "0", + "--data", + dataPath, + "--db", + dbPath, + ], + env: {}, + }), + ); + + try { + const staticSamples = await timeRequest(async () => { + const response = await fetch(`${server.url}/`); + await response.text(); + }, samples); + + const listSamples = await timeRequest(async () => { + const response = await fetch(`${server.url}/api/runs`); + await response.json(); + }, samples); + + const createSamples = await timeRequest(async () => { + const response = await fetch(`${server.url}/api/runs`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: "{}", + }); + await response.text(); + }, samples); + + const sseSamples = await measureSseFirstEvent(server, samples); + + const recordings: Record = { + "static.index": staticSamples, + "api.runs.list": listSamples, + "api.runs.create": createSamples, + "sse.first_event": sseSamples, + }; + + let exceeded = 0; + console.log("kind\tlabel\tp50_ms\tp95_ms\tp99_ms\tbudget_ms\tstatus"); + for (const budget of BUDGETS) { + const values = recordings[budget.kind]; + const p50 = percentile(values, 0.5); + const p95 = percentile(values, 0.95); + const p99 = percentile(values, 0.99); + const ok = p95 <= budget.p95Ms; + if (!ok) exceeded += 1; + console.log( + [ + budget.kind, + budget.label, + p50.toFixed(2), + p95.toFixed(2), + p99.toFixed(2), + budget.p95Ms.toString(), + ok ? "ok" : "over", + ].join("\t"), + ); + } + + if (exceeded > 0 && strict) { + console.error(`\n${exceeded} budget(s) exceeded.`); + process.exitCode = 1; + } + } finally { + await server.stop(); + } +} + +void main(); diff --git a/scripts/soak.ts b/scripts/soak.ts new file mode 100644 index 0000000..3ea15e9 --- /dev/null +++ b/scripts/soak.ts @@ -0,0 +1,255 @@ +#!/usr/bin/env bun +/* eslint-disable no-console */ +import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { + type StartedServer, + startAgentProbeServer, +} from "../src/runtime/server/app-server.ts"; +import { buildServerConfig } from "../src/runtime/server/config.ts"; +import { METRIC_NAMES } from "../src/runtime/server/observability/index.ts"; + +type Mode = "ci" | "manual"; + +type SoakOptions = { + mode: Mode; + durationMs: number; + runs: number; + sseConnections: number; +}; + +function parseOptions(args: string[]): SoakOptions { + const mode: Mode = args.includes("--manual") ? "manual" : "ci"; + const durationFlag = args.indexOf("--duration-ms"); + const durationMs = + durationFlag !== -1 + ? Math.max(1_000, Number(args[durationFlag + 1] ?? "")) + : mode === "manual" + ? 60 * 60 * 1_000 + : 10_000; + const runsFlag = args.indexOf("--runs"); + const runs = + runsFlag !== -1 + ? Math.max(1, Number(args[runsFlag + 1] ?? "")) + : mode === "manual" + ? 500 + : 50; + const sseFlag = args.indexOf("--sse-connections"); + const sseConnections = + sseFlag !== -1 + ? Math.max(1, Number(args[sseFlag + 1] ?? "")) + : mode === "manual" + ? 5 + : 3; + return { mode, durationMs, runs, sseConnections }; +} + +function writeMinimalSuite(root: string): string { + const data = join(root, "data"); + mkdirSync(data, { recursive: true }); + writeFileSync( + join(data, "endpoint.yaml"), + [ + "transport: http", + "connection:", + " base_url: http://example.test", + "request:", + " method: POST", + ' url: "{{ base_url }}/chat"', + " body_template: '{}'", + "response:", + " format: text", + ' content_path: "$"', + "", + ].join("\n"), + "utf8", + ); + return data; +} + +type RssSample = { ts: number; rssMb: number }; + +function rssMb(): number { + return process.memoryUsage.rss() / 1024 / 1024; +} + +async function openSse(server: StartedServer, runId: string) { + const response = await fetch(`${server.url}/api/runs/${runId}/events`); + const reader = response.body?.getReader(); + if (!reader) return; + let firstEventLag = Number.NaN; + const start = performance.now(); + const read = async () => { + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (Number.isNaN(firstEventLag) && value && value.length > 0) { + firstEventLag = performance.now() - start; + } + } + } catch { + // stream ended + } + }; + void read(); + return { + cancel: async () => { + try { + await reader.cancel(); + } catch { + // ignore + } + }, + firstEventLag: () => firstEventLag, + }; +} + +async function runSoak(options: SoakOptions): Promise { + const root = mkdtempSync(join(tmpdir(), "agentprobe-soak-")); + const dataPath = writeMinimalSuite(root); + const dbPath = join(root, "runs.sqlite3"); + const server = await startAgentProbeServer( + buildServerConfig({ + args: [ + "--host", + "127.0.0.1", + "--port", + "0", + "--data", + dataPath, + "--db", + dbPath, + ], + env: {}, + }), + ); + + const endBy = Date.now() + options.durationMs; + let runsStarted = 0; + let failures = 0; + const rssSamples: RssSample[] = [{ ts: Date.now(), rssMb: rssMb() }]; + + const latencies: number[] = []; + let openConnections = 0; + const sseHandles: Awaited>[] = []; + + const ssePool = async (): Promise => { + for (let i = 0; i < options.sseConnections; i++) { + const runId = `soak-sse-${i}`; + server.streamHub.publish({ + runId, + kind: "run_progress", + payload: { kind: "scenario_started" }, + }); + const handle = await openSse(server, runId); + if (handle) sseHandles.push(handle); + openConnections += 1; + } + }; + + await ssePool(); + + while (Date.now() < endBy && runsStarted < options.runs) { + const runId = `soak-run-${runsStarted}`; + const t0 = performance.now(); + server.streamHub.publish({ + runId, + kind: "run_started", + payload: { + run_id: runId, + label: null, + preset_id: null, + trigger: "soak", + }, + }); + // Simulate progress and terminal event. + server.streamHub.publish({ + runId, + kind: "scenario_started", + payload: { scenario_id: "synthetic" }, + }); + server.streamHub.publish({ + runId, + kind: "run_finished", + payload: { kind: "run_finished", run_id: runId }, + }); + latencies.push(performance.now() - t0); + runsStarted += 1; + + if (runsStarted % 25 === 0) { + rssSamples.push({ ts: Date.now(), rssMb: rssMb() }); + } + + try { + const response = await fetch(`${server.url}/api/runs`); + await response.json(); + } catch { + failures += 1; + } + // Yield to the event loop so stream subscribers can drain. + await new Promise((resolve) => setImmediate(resolve)); + } + + rssSamples.push({ ts: Date.now(), rssMb: rssMb() }); + + const activeRuns = server.observability.metrics.getGauge( + METRIC_NAMES.runsActive, + ); + const httpCounters = server.observability.metrics + .snapshot() + .counters.filter((entry) => entry.name === METRIC_NAMES.httpRequests) + .reduce((total, entry) => total + entry.value, 0); + const openSseGauge = server.observability.metrics.getGauge( + METRIC_NAMES.sseConnections, + ); + + const firstEventLags = sseHandles + .map((handle) => handle?.firstEventLag() ?? Number.NaN) + .filter((value) => !Number.isNaN(value)); + const avgLag = + firstEventLags.length > 0 + ? firstEventLags.reduce((sum, value) => sum + value, 0) / + firstEventLags.length + : 0; + const p95Latency = latencies.length + ? ([...latencies].sort((a, b) => a - b)[ + Math.floor(latencies.length * 0.95) + ] ?? 0) + : 0; + + for (const handle of sseHandles) { + if (handle) await handle.cancel(); + } + + await server.stop(); + + const summary = { + mode: options.mode, + duration_ms: Date.now() - (endBy - options.durationMs), + runs: runsStarted, + failures, + http_requests: httpCounters, + open_sse_connections_at_shutdown: openSseGauge, + active_runs_at_shutdown: activeRuns, + event_lag_ms_avg: Math.round(avgLag * 100) / 100, + request_latency_ms_p95: Math.round(p95Latency * 100) / 100, + rss_mb_start: rssSamples[0]?.rssMb ?? 0, + rss_mb_end: rssSamples[rssSamples.length - 1]?.rssMb ?? 0, + rss_mb_samples: rssSamples.length, + }; + + console.log(JSON.stringify(summary, null, 2)); + + if (activeRuns !== 0) { + console.error( + `soak harness: expected 0 active runs at shutdown, got ${activeRuns}`, + ); + process.exitCode = 1; + } + void openConnections; +} + +void runSoak(parseOptions(process.argv.slice(2))); diff --git a/src/runtime/server/app-server.ts b/src/runtime/server/app-server.ts index 0bcbf62..f514238 100644 --- a/src/runtime/server/app-server.ts +++ b/src/runtime/server/app-server.ts @@ -17,6 +17,13 @@ import { PresetController } from "./controllers/preset-controller.ts"; import { RunController } from "./controllers/run-controller.ts"; import { SuiteController } from "./controllers/suite-controller.ts"; import { ensureRequestId, errorResponse } from "./http-helpers.ts"; +import { + createObservability, + type Logger, + METRIC_NAMES, + type Observability, + summarizeServerConfig, +} from "./observability/index.ts"; import { handleCompareRuns } from "./routes/comparisons.ts"; import { handleHealthz, handleReadyz, handleSession } from "./routes/health.ts"; import { @@ -58,6 +65,7 @@ export type ServerContext = { comparisonController: ComparisonController; repository: PersistenceRepository; streamHub: StreamHub; + observability: Observability; requestId: string; startedAt: number; version: string; @@ -69,6 +77,7 @@ export type StartedServer = { port: number; streamHub: StreamHub; suiteController: SuiteController; + observability: Observability; stop: () => Promise; }; @@ -332,37 +341,36 @@ function withCorsHeaders( } function logRequest( - config: ServerConfig, + logger: Logger, request: Request, response: Response, durationMs: number, requestId: string, + matchedRoute: string | undefined, ): void { const pathname = new URL(request.url).pathname; - if (config.logFormat === "json") { - const payload = { - ts: new Date().toISOString(), - level: "info", - component: "agentprobe.server", - method: request.method, - path: pathname, - status: response.status, - duration_ms: Math.round(durationMs), - request_id: requestId, - }; - process.stderr.write(`${JSON.stringify(payload)}\n`); - return; - } - process.stderr.write( - `[server] ${request.method} ${pathname} -> ${response.status} (${durationMs.toFixed( - 1, - )}ms) rid=${requestId}\n`, - ); + logger.info("http.request", { + method: request.method, + path: pathname, + route: matchedRoute ?? null, + status: response.status, + duration_ms: Math.round(durationMs), + request_id: requestId, + }); } +export type StartAgentProbeServerOptions = { + observability?: Observability; +}; + export async function startAgentProbeServer( config: ServerConfig, + options: StartAgentProbeServerOptions = {}, ): Promise { + const observability = + options.observability ?? createObservability({ format: config.logFormat }); + const { logger, metrics } = observability; + const repository: RecordingRepository = createRecordingRepository( config.dbUrl, ); @@ -381,11 +389,19 @@ export async function startAgentProbeServer( repository, suiteController, streamHub, + observability, }); const comparisonController = createComparisonController({ repository }); const routes = buildRoutes(); const startedAt = Date.now(); + logger.info("server.startup", { + version: SERVER_VERSION, + config: summarizeServerConfig(config), + }); + metrics.setGauge(METRIC_NAMES.runsActive, 0); + metrics.setGauge(METRIC_NAMES.sseConnections, 0); + const baseContext = { config, presetController, @@ -394,6 +410,7 @@ export async function startAgentProbeServer( comparisonController, repository, streamHub, + observability, startedAt, version: SERVER_VERSION, }; @@ -403,13 +420,16 @@ export async function startAgentProbeServer( const url = new URL(request.url); const t0 = performance.now(); let response: Response; + let routeLabel: string | undefined; const context: ServerContext = { ...baseContext, requestId }; try { if (request.method === "OPTIONS" && isApiPath(url.pathname)) { response = preflightResponse(request, config); + routeLabel = "OPTIONS"; } else { const matched = matchRoute(routes, request.method, url.pathname); if (matched) { + routeLabel = matched.route.pattern.source; if (matched.route.requiresAuth && config.token) { if (!verifyBearerToken(request, config.token)) { response = errorResponse({ @@ -466,7 +486,13 @@ export async function startAgentProbeServer( response = withCorsHeaders(request, config, response); } - logRequest(config, request, response, performance.now() - t0, requestId); + const duration = performance.now() - t0; + logRequest(logger, request, response, duration, requestId, routeLabel); + metrics.incrementCounter(METRIC_NAMES.httpRequests, 1, { + method: request.method, + route: routeLabel ?? "unmatched", + status: response.status, + }); return response; }; @@ -491,6 +517,7 @@ export async function startAgentProbeServer( port, streamHub, suiteController, + observability, stop, }; } diff --git a/src/runtime/server/controllers/run-controller.ts b/src/runtime/server/controllers/run-controller.ts index 460c538..5aafce3 100644 --- a/src/runtime/server/controllers/run-controller.ts +++ b/src/runtime/server/controllers/run-controller.ts @@ -14,6 +14,14 @@ import type { RunProgressEvent, } from "../../../shared/types/contracts.ts"; import { AgentProbeConfigError } from "../../../shared/utils/errors.ts"; +import { + type Logger, + METRIC_NAMES, + type MetricsRegistry, + type Observability, + SPAN_NAMES, + type SpanRecorder, +} from "../observability/index.ts"; import type { StreamHub } from "../streams/hub.ts"; import { HttpInputError, @@ -163,14 +171,32 @@ function parseOverrides( export class RunController { private readonly activeByRunId = new Map(); private readonly activeBySuiteKey = new Map(); + private readonly logger: Logger; + private readonly metrics: MetricsRegistry | undefined; + private readonly spans: SpanRecorder | undefined; constructor( private readonly options: { repository: RecordingRepository; suiteController: SuiteController; streamHub: StreamHub; + observability?: Observability; }, - ) {} + ) { + this.logger = options.observability + ? options.observability.logger.child("agentprobe.run", {}) + : ({ + log: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + child() { + return this; + }, + } as Logger); + this.metrics = options.observability?.metrics; + this.spans = options.observability?.spans; + } assertRunnable(): void { ensureOpenRouterConfigured(); @@ -309,15 +335,32 @@ export class RunController { } start(spec: RunSpec): StartRunResult { - const client = ensureOpenRouterConfigured(); + const validationScope = this.spans?.start(SPAN_NAMES.runStartValidation, { + preset_id: spec.presetId ?? null, + }); + let client: OpenAiResponsesClient; + try { + client = ensureOpenRouterConfigured(); + } catch (error) { + validationScope?.setStatus( + "error", + error instanceof Error ? error : new Error(String(error)), + ); + validationScope?.end(); + throw error; + } const suiteKey = this.suiteKey(spec); if (this.activeBySuiteKey.has(suiteKey)) { + validationScope?.setStatus("error", new Error("conflict")); + validationScope?.end(); throw new HttpInputError( 409, "conflict", "A run with the same resolved suite key is already active.", ); } + validationScope?.setStatus("ok"); + validationScope?.end(); const abortController = new AbortController(); const recorder = this.options.repository.createRecorder(); @@ -345,9 +388,28 @@ export class RunController { }; this.activeByRunId.set(runId, active); this.activeBySuiteKey.set(suiteKey, active); + + this.metrics?.incrementCounter(METRIC_NAMES.runsStartedTotal, 1, { + preset: spec.presetId ?? "none", + }); + this.metrics?.adjustGauge(METRIC_NAMES.runsActive, 1); + this.logger.info("run.started", { + run_id: runId, + preset_id: spec.presetId ?? null, + label: spec.label ?? null, + }); + void promise.finally(() => { this.activeByRunId.delete(runId); this.activeBySuiteKey.delete(suiteKey); + this.metrics?.adjustGauge(METRIC_NAMES.runsActive, -1); + this.metrics?.incrementCounter(METRIC_NAMES.runsFinishedTotal, 1, { + preset: spec.presetId ?? "none", + }); + this.logger.info("run.finished", { + run_id: runId, + preset_id: spec.presetId ?? null, + }); }); this.options.streamHub.publish({ @@ -373,6 +435,21 @@ export class RunController { suiteKey: string; }, ): Promise { + const executeScope = this.spans?.start(SPAN_NAMES.runControllerExecute, { + preset_id: spec.presetId ?? null, + dry_run: spec.dryRun, + repeat: spec.repeat, + }); + const bootScope = this.spans?.start(SPAN_NAMES.runSuiteBoot, { + preset_id: spec.presetId ?? null, + }); + let bootEnded = false; + const completeBoot = (): void => { + if (bootEnded) return; + bootEnded = true; + bootScope?.setStatus("ok"); + bootScope?.end(); + }; try { await runSuite({ endpoint: spec.endpoint, @@ -391,6 +468,12 @@ export class RunController { if (!runId) { return; } + if ( + event.kind === "suite_started" || + event.kind === "scenario_started" + ) { + completeBoot(); + } this.options.streamHub.publish({ runId, kind: @@ -416,6 +499,12 @@ export class RunController { const runId = options.recorder.runId; const failure = normalizeError(error); writeRunExecutorErrorLog(runId, failure); + this.logger.error("run.error", { + run_id: runId ?? null, + preset_id: spec.presetId ?? null, + error_type: failure.name || "Error", + error_message: failure.message, + }); if (runId) { try { options.recorder.recordRunError(failure, { @@ -436,6 +525,11 @@ export class RunController { }, }); } + executeScope?.setStatus("error", failure); + bootScope?.setStatus("error", failure); + } finally { + completeBoot(); + executeScope?.end(); } } diff --git a/src/runtime/server/observability/index.ts b/src/runtime/server/observability/index.ts new file mode 100644 index 0000000..ff974cb --- /dev/null +++ b/src/runtime/server/observability/index.ts @@ -0,0 +1,48 @@ +import type { LogFormat } from "../config.ts"; +import { createLogger, type Logger } from "./logger.ts"; +import { MetricsRegistry, SERVER_METRIC_NAMES } from "./metrics.ts"; +import { SERVER_SPAN_NAMES, SpanRecorder } from "./spans.ts"; + +export type Observability = { + logger: Logger; + metrics: MetricsRegistry; + spans: SpanRecorder; +}; + +export function createObservability(options: { + format: LogFormat; + component?: string; + metrics?: MetricsRegistry; + spans?: SpanRecorder; +}): Observability { + const logger = createLogger({ + component: options.component ?? "agentprobe.server", + format: options.format, + }); + return { + logger, + metrics: options.metrics ?? new MetricsRegistry(), + spans: options.spans ?? new SpanRecorder(), + }; +} + +export type { LogFields, Logger, LogLevel } from "./logger.ts"; +export { createLogger } from "./logger.ts"; +export type { + CounterSnapshot, + GaugeSnapshot, + MetricLabels, + MetricsSnapshot, +} from "./metrics.ts"; +export { MetricsRegistry, SERVER_METRIC_NAMES } from "./metrics.ts"; +export { + isSecretKey, + redactRecord, + redactSecretValue, + summarizeServerConfig, +} from "./redaction.ts"; +export type { SpanRecord, SpanScope } from "./spans.ts"; +export { SERVER_SPAN_NAMES, SpanRecorder } from "./spans.ts"; + +export const METRIC_NAMES = SERVER_METRIC_NAMES; +export const SPAN_NAMES = SERVER_SPAN_NAMES; diff --git a/src/runtime/server/observability/logger.ts b/src/runtime/server/observability/logger.ts new file mode 100644 index 0000000..e06d482 --- /dev/null +++ b/src/runtime/server/observability/logger.ts @@ -0,0 +1,97 @@ +import type { LogFormat } from "../config.ts"; + +export type LogLevel = "debug" | "info" | "warn" | "error"; + +export type LogFields = Record; + +export type Logger = { + log(level: LogLevel, event: string, fields?: LogFields): void; + info(event: string, fields?: LogFields): void; + warn(event: string, fields?: LogFields): void; + error(event: string, fields?: LogFields): void; + child(component: string, baseFields?: LogFields): Logger; +}; + +type Sink = (line: string) => void; + +const LEVEL_LABEL: Record = { + debug: "debug", + info: "info", + warn: "warn", + error: "error", +}; + +function defaultSink(line: string): void { + process.stderr.write(`${line}\n`); +} + +function formatText( + level: LogLevel, + component: string, + event: string, + fields: LogFields, +): string { + const parts: string[] = [`[${component}]`, `${LEVEL_LABEL[level]}`, event]; + for (const [key, value] of Object.entries(fields)) { + parts.push(`${key}=${formatTextValue(value)}`); + } + return parts.join(" "); +} + +function formatTextValue(value: unknown): string { + if (value === null || value === undefined) return "-"; + if (typeof value === "string") { + return value.includes(" ") ? `"${value.replace(/"/g, '\\"')}"` : value; + } + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + return JSON.stringify(value); +} + +export function createLogger(options: { + component: string; + format: LogFormat; + baseFields?: LogFields; + sink?: Sink; +}): Logger { + const sink = options.sink ?? defaultSink; + const baseFields = { ...(options.baseFields ?? {}) }; + const component = options.component; + const format = options.format; + + const log = ( + level: LogLevel, + event: string, + fields: LogFields = {}, + ): void => { + const merged = { ...baseFields, ...fields }; + if (format === "json") { + const payload = { + ts: new Date().toISOString(), + level: LEVEL_LABEL[level], + component, + event, + ...merged, + }; + sink(JSON.stringify(payload)); + } else { + sink(formatText(level, component, event, merged)); + } + }; + + return { + log, + info: (event, fields) => log("info", event, fields), + warn: (event, fields) => log("warn", event, fields), + error: (event, fields) => log("error", event, fields), + child(childComponent: string, childFields?: LogFields): Logger { + return createLogger({ + component: childComponent, + format, + baseFields: { ...baseFields, ...(childFields ?? {}) }, + sink, + }); + }, + }; +} diff --git a/src/runtime/server/observability/metrics.ts b/src/runtime/server/observability/metrics.ts new file mode 100644 index 0000000..21454f1 --- /dev/null +++ b/src/runtime/server/observability/metrics.ts @@ -0,0 +1,142 @@ +export type MetricLabels = Readonly>; + +export type CounterSnapshot = { + name: string; + value: number; + labels: Record; +}; + +export type GaugeSnapshot = CounterSnapshot; + +export type MetricsSnapshot = { + counters: CounterSnapshot[]; + gauges: GaugeSnapshot[]; +}; + +function labelKey(labels: MetricLabels | undefined): string { + if (!labels) return ""; + const entries = Object.entries(labels) + .map(([k, v]) => [k, String(v)] as const) + .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0)); + return entries.map(([k, v]) => `${k}=${v}`).join("|"); +} + +function cloneLabels(labels: MetricLabels | undefined): MetricLabels { + if (!labels) return {}; + return { ...labels }; +} + +export class MetricsRegistry { + private readonly counters = new Map>(); + private readonly counterLabels = new Map>(); + private readonly gauges = new Map>(); + private readonly gaugeLabels = new Map>(); + + incrementCounter(name: string, value = 1, labels?: MetricLabels): void { + const key = labelKey(labels); + let perName = this.counters.get(name); + if (!perName) { + perName = new Map(); + this.counters.set(name, perName); + } + perName.set(key, (perName.get(key) ?? 0) + value); + let labelMap = this.counterLabels.get(name); + if (!labelMap) { + labelMap = new Map(); + this.counterLabels.set(name, labelMap); + } + if (!labelMap.has(key)) { + labelMap.set(key, cloneLabels(labels)); + } + } + + setGauge(name: string, value: number, labels?: MetricLabels): void { + const key = labelKey(labels); + let perName = this.gauges.get(name); + if (!perName) { + perName = new Map(); + this.gauges.set(name, perName); + } + perName.set(key, value); + let labelMap = this.gaugeLabels.get(name); + if (!labelMap) { + labelMap = new Map(); + this.gaugeLabels.set(name, labelMap); + } + if (!labelMap.has(key)) { + labelMap.set(key, cloneLabels(labels)); + } + } + + adjustGauge(name: string, delta: number, labels?: MetricLabels): void { + const key = labelKey(labels); + let perName = this.gauges.get(name); + if (!perName) { + perName = new Map(); + this.gauges.set(name, perName); + } + perName.set(key, (perName.get(key) ?? 0) + delta); + let labelMap = this.gaugeLabels.get(name); + if (!labelMap) { + labelMap = new Map(); + this.gaugeLabels.set(name, labelMap); + } + if (!labelMap.has(key)) { + labelMap.set(key, cloneLabels(labels)); + } + } + + getCounter(name: string, labels?: MetricLabels): number { + const key = labelKey(labels); + return this.counters.get(name)?.get(key) ?? 0; + } + + getGauge(name: string, labels?: MetricLabels): number { + const key = labelKey(labels); + return this.gauges.get(name)?.get(key) ?? 0; + } + + snapshot(): MetricsSnapshot { + const counters: CounterSnapshot[] = []; + for (const [name, perName] of this.counters.entries()) { + const labelMap = this.counterLabels.get(name); + for (const [key, value] of perName.entries()) { + counters.push({ + name, + value, + labels: { ...(labelMap?.get(key) ?? {}) }, + }); + } + } + const gauges: GaugeSnapshot[] = []; + for (const [name, perName] of this.gauges.entries()) { + const labelMap = this.gaugeLabels.get(name); + for (const [key, value] of perName.entries()) { + gauges.push({ + name, + value, + labels: { ...(labelMap?.get(key) ?? {}) }, + }); + } + } + return { + counters: counters.sort((a, b) => a.name.localeCompare(b.name)), + gauges: gauges.sort((a, b) => a.name.localeCompare(b.name)), + }; + } + + reset(): void { + this.counters.clear(); + this.counterLabels.clear(); + this.gauges.clear(); + this.gaugeLabels.clear(); + } +} + +export const SERVER_METRIC_NAMES = { + httpRequests: "server.http.requests", + runsActive: "server.runs.active", + runsStartedTotal: "server.runs.started_total", + runsFinishedTotal: "server.runs.finished_total", + sseConnections: "server.sse.connections", +} as const; diff --git a/src/runtime/server/observability/redaction.ts b/src/runtime/server/observability/redaction.ts new file mode 100644 index 0000000..54734e0 --- /dev/null +++ b/src/runtime/server/observability/redaction.ts @@ -0,0 +1,53 @@ +import { redactDbUrl } from "../../../providers/persistence/url.ts"; +import type { ServerConfig } from "../config.ts"; + +const REDACTED = "[redacted]"; +const SECRET_KEY_PATTERN = + /(token|secret|key|password|authorization|api[_-]?key)/i; + +export function redactSecretValue(value: string | undefined | null): string { + if (value === undefined || value === null || value === "") { + return ""; + } + if (value.length <= 4) { + return REDACTED; + } + return `${REDACTED}:${value.length}c`; +} + +export function isSecretKey(key: string): boolean { + return SECRET_KEY_PATTERN.test(key); +} + +export function redactRecord( + record: Record, +): Record { + const out: Record = {}; + for (const [key, value] of Object.entries(record)) { + if (typeof value === "string" && isSecretKey(key)) { + out[key] = redactSecretValue(value); + } else if (value && typeof value === "object" && !Array.isArray(value)) { + out[key] = redactRecord(value as Record); + } else { + out[key] = value; + } + } + return out; +} + +export function summarizeServerConfig( + config: ServerConfig, +): Record { + return { + host: config.host, + port: config.port, + data_path: config.dataPath, + db_url: redactDbUrl(config.dbUrl), + dashboard_dist: config.dashboardDist ?? null, + token: config.token ? redactSecretValue(config.token) : null, + cors_origins: config.corsOrigins, + unsafe_expose: config.unsafeExpose, + open_browser: config.openBrowser, + log_format: config.logFormat, + }; +} diff --git a/src/runtime/server/observability/spans.ts b/src/runtime/server/observability/spans.ts new file mode 100644 index 0000000..1bd8de5 --- /dev/null +++ b/src/runtime/server/observability/spans.ts @@ -0,0 +1,108 @@ +export type SpanRecord = { + name: string; + startedAt: number; + durationMs: number; + attributes: Record; + status: "ok" | "error"; + error?: { type: string; message: string }; +}; + +export type SpanScope = { + setAttribute(key: string, value: unknown): void; + setStatus(status: "ok" | "error", error?: Error): void; + end(): void; + readonly name: string; + readonly startedAt: number; +}; + +export class SpanRecorder { + private readonly records: SpanRecord[] = []; + private readonly capacity: number; + + constructor(options: { capacity?: number } = {}) { + this.capacity = options.capacity ?? 1024; + } + + start(name: string, attributes: Record = {}): SpanScope { + const startedAt = performance.now(); + const attrs: Record = { ...attributes }; + let status: "ok" | "error" = "ok"; + let recordedError: Error | undefined; + let ended = false; + + const finalize = (): void => { + if (ended) return; + ended = true; + const record: SpanRecord = { + name, + startedAt, + durationMs: performance.now() - startedAt, + attributes: { ...attrs }, + status, + }; + if (status === "error" && recordedError) { + record.error = { + type: recordedError.name || "Error", + message: recordedError.message, + }; + } + this.records.push(record); + if (this.records.length > this.capacity) { + this.records.splice(0, this.records.length - this.capacity); + } + }; + + return { + name, + startedAt, + setAttribute(key: string, value: unknown): void { + attrs[key] = value; + }, + setStatus(next: "ok" | "error", error?: Error): void { + status = next; + if (error) { + recordedError = error; + } + }, + end: finalize, + }; + } + + async withSpan( + name: string, + attributes: Record, + fn: (scope: SpanScope) => Promise | T, + ): Promise { + const scope = this.start(name, attributes); + try { + const result = await fn(scope); + scope.setStatus("ok"); + return result; + } catch (error) { + scope.setStatus( + "error", + error instanceof Error ? error : new Error(String(error)), + ); + throw error; + } finally { + scope.end(); + } + } + + snapshot(): SpanRecord[] { + return this.records.map((record) => ({ + ...record, + attributes: { ...record.attributes }, + })); + } + + reset(): void { + this.records.length = 0; + } +} + +export const SERVER_SPAN_NAMES = { + runStartValidation: "server.run.start.validation", + runControllerExecute: "server.run.controller.execute", + runSuiteBoot: "server.run.suite.boot", +} as const; diff --git a/src/runtime/server/routes/sse.ts b/src/runtime/server/routes/sse.ts index 009bb60..5a1f09d 100644 --- a/src/runtime/server/routes/sse.ts +++ b/src/runtime/server/routes/sse.ts @@ -1,13 +1,17 @@ import type { RunRecord } from "../../../shared/types/contracts.ts"; import type { ServerContext } from "../app-server.ts"; import { errorResponse } from "../http-helpers.ts"; +import { METRIC_NAMES } from "../observability/index.ts"; import { formatSseEvent, formatSseKeepalive, + formatSseRetry, + isTerminalEvent, type RunEvent, } from "../streams/events.ts"; -const KEEPALIVE_INTERVAL_MS = 15_000; +export const SSE_KEEPALIVE_INTERVAL_MS = 15_000; +export const SSE_RECONNECT_RETRY_MS = 2_000; function snapshotPayloadForRun(run: RunRecord): RunEvent["payload"] { return { @@ -33,7 +37,9 @@ function snapshotPayloadForRun(run: RunRecord): RunEvent["payload"] { }; } -function parseLastEventId(value: string | null): number | undefined { +function parseLastEventId( + value: string | null | undefined, +): number | undefined { if (!value) { return undefined; } @@ -44,22 +50,38 @@ function parseLastEventId(value: string | null): number | undefined { return parsed; } +function pickLastEventId(request: Request, url: URL): number | undefined { + const headerValue = + request.headers.get("last-event-id") ?? + request.headers.get("Last-Event-ID"); + const queryValue = url.searchParams.get("last_event_id"); + return parseLastEventId(headerValue) ?? parseLastEventId(queryValue); +} + +function terminalEventForHistorical( + run: RunRecord, +): RunEvent["kind"] | undefined { + if (run.status === "running") return undefined; + if (run.status === "cancelled") return "run_cancelled"; + if (run.status === "errored" || run.status === "failed") return "run_failed"; + return "run_finished"; +} + export async function handleRunSse( request: Request, context: ServerContext, params: { runId: string }, ): Promise { - const lastEventId = parseLastEventId(request.headers.get("last-event-id")); + const url = new URL(request.url); + const lastEventId = pickLastEventId(request, url); const { runId } = params; const historicalRun: RunRecord | undefined = context.config.dbUrl ? await context.repository.getRun(runId) : undefined; - // Replay any buffered events (after last-event-id if provided). const replayEvents = context.streamHub.replay(runId, lastEventId); - // If neither a buffered stream nor a historical run exist, treat as 404. if (!historicalRun && replayEvents.length === 0) { return errorResponse({ status: 404, @@ -70,12 +92,16 @@ export async function handleRunSse( } const encoder = new TextEncoder(); + const metrics = context.observability.metrics; let unsubscribe: (() => void) | undefined; let keepalive: ReturnType | undefined; + let teardown: (() => void) | undefined; const stream = new ReadableStream({ start(controller) { let closed = false; + let connectionCounted = false; + let terminalSent = false; const safeEnqueue = (chunk: string): void => { try { controller.enqueue(encoder.encode(chunk)); @@ -96,7 +122,12 @@ export async function handleRunSse( clearInterval(keepalive); keepalive = undefined; } + if (connectionCounted) { + metrics.adjustGauge(METRIC_NAMES.sseConnections, -1); + connectionCounted = false; + } }; + teardown = cleanup; const close = (): void => { cleanup(); try { @@ -106,13 +137,39 @@ export async function handleRunSse( } }; + metrics.adjustGauge(METRIC_NAMES.sseConnections, 1); + connectionCounted = true; + + // Always advise reconnect retry interval to the browser. + safeEnqueue(formatSseRetry(SSE_RECONNECT_RETRY_MS)); + + const dispatchEvent = (event: RunEvent): void => { + safeEnqueue(formatSseEvent(event)); + if (isTerminalEvent(event) && !terminalSent) { + terminalSent = true; + queueMicrotask(close); + } + }; + if (replayEvents.length > 0) { for (const event of replayEvents) { - safeEnqueue(formatSseEvent(event)); + dispatchEvent(event); } - if (historicalRun && historicalRun.status !== "running") { - queueMicrotask(close); - return; + if (!terminalSent && historicalRun) { + const terminalKind = terminalEventForHistorical(historicalRun); + if (terminalKind) { + const terminalEvent = context.streamHub.publish({ + runId, + kind: terminalKind, + payload: { + run_id: runId, + source: "historical_terminal", + status: historicalRun.status, + }, + }); + dispatchEvent(terminalEvent); + return; + } } } else if (historicalRun) { const snapshot = context.streamHub.publish({ @@ -120,20 +177,32 @@ export async function handleRunSse( kind: "snapshot", payload: snapshotPayloadForRun(historicalRun), }); - safeEnqueue(formatSseEvent(snapshot)); - if (historicalRun.status !== "running") { - queueMicrotask(close); + dispatchEvent(snapshot); + const terminalKind = terminalEventForHistorical(historicalRun); + if (terminalKind && !terminalSent) { + const terminalEvent = context.streamHub.publish({ + runId, + kind: terminalKind, + payload: { + run_id: runId, + source: "historical_terminal", + status: historicalRun.status, + }, + }); + dispatchEvent(terminalEvent); return; } } + if (terminalSent) return; + unsubscribe = context.streamHub.subscribe(runId, (event) => { - safeEnqueue(formatSseEvent(event)); + dispatchEvent(event); }); keepalive = setInterval(() => { safeEnqueue(formatSseKeepalive()); - }, KEEPALIVE_INTERVAL_MS); + }, SSE_KEEPALIVE_INTERVAL_MS); if (request.signal) { request.signal.addEventListener("abort", () => { @@ -142,11 +211,8 @@ export async function handleRunSse( } }, cancel() { - if (unsubscribe) { - unsubscribe(); - } - if (keepalive) { - clearInterval(keepalive); + if (teardown) { + teardown(); } }, }); diff --git a/src/runtime/server/streams/events.ts b/src/runtime/server/streams/events.ts index f99f19f..f6af637 100644 --- a/src/runtime/server/streams/events.ts +++ b/src/runtime/server/streams/events.ts @@ -7,12 +7,23 @@ export type RunEventKind = | "run_progress" | "run_finished" | "run_cancelled" + | "run_failed" | "run_error" | "scenario_started" | "scenario_finished" | "scenario_error" | "log"; +export const TERMINAL_EVENT_KINDS: ReadonlySet = new Set([ + "run_finished", + "run_cancelled", + "run_failed", +]); + +export function isTerminalEvent(event: RunEvent): boolean { + return TERMINAL_EVENT_KINDS.has(event.kind); +} + export type RunEvent = { id: number; runId: string; @@ -35,3 +46,7 @@ export function formatSseEvent(event: RunEvent): string { export function formatSseKeepalive(): string { return `: keepalive ${new Date().toISOString()}\n\n`; } + +export function formatSseRetry(retryMs: number): string { + return `retry: ${Math.max(0, Math.floor(retryMs))}\n\n`; +} diff --git a/tests/integration/server/sse-reconnect.test.ts b/tests/integration/server/sse-reconnect.test.ts new file mode 100644 index 0000000..f3c8153 --- /dev/null +++ b/tests/integration/server/sse-reconnect.test.ts @@ -0,0 +1,162 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +import { + type StartedServer, + startAgentProbeServer, +} from "../../../src/runtime/server/app-server.ts"; +import { buildServerConfig } from "../../../src/runtime/server/config.ts"; +import { METRIC_NAMES } from "../../../src/runtime/server/observability/index.ts"; +import { makeTempDir } from "../../unit/support.ts"; + +function writeMinimalData(root: string): string { + const data = join(root, "data"); + mkdirSync(data, { recursive: true }); + writeFileSync( + join(data, "endpoint.yaml"), + [ + "transport: http", + "connection:", + " base_url: http://example.test", + "request:", + " method: POST", + ' url: "{{ base_url }}/chat"', + " body_template: '{}'", + "response:", + " format: text", + ' content_path: "$"', + "", + ].join("\n"), + "utf8", + ); + return data; +} + +async function readEnoughBytes( + reader: ReadableStreamDefaultReader>, + decoder: TextDecoder, + predicate: (collected: string) => boolean, + maxChunks = 10, +): Promise { + let collected = ""; + for (let i = 0; i < maxChunks; i++) { + const result = await reader.read(); + if (result.done) break; + collected += decoder.decode(result.value, { stream: true }); + if (predicate(collected)) break; + } + return collected; +} + +async function startServer(servers: StartedServer[]): Promise { + const root = makeTempDir("sse-reconnect"); + const data = writeMinimalData(root); + const dbPath = join(root, "runs.sqlite3"); + const args = [ + "--host", + "127.0.0.1", + "--port", + "0", + "--data", + data, + "--db", + dbPath, + ]; + const server = await startAgentProbeServer( + buildServerConfig({ args, env: {} }), + ); + servers.push(server); + return server; +} + +describe("sse hardening", () => { + const servers: StartedServer[] = []; + + afterEach(async () => { + for (const server of servers.splice(0)) { + await server.stop(); + } + }); + + test("replays missed events using last_event_id query and emits terminal close", async () => { + const server = await startServer(servers); + const runId = "run-reconnect"; + + server.streamHub.publish({ + runId, + kind: "run_started", + payload: { run_id: runId, label: null, preset_id: null, trigger: "test" }, + }); + server.streamHub.publish({ + runId, + kind: "run_progress", + payload: { kind: "scenario_started", scenario_id: "smoke" }, + }); + + const url = `${server.url}/api/runs/${runId}/events?last_event_id=1`; + const response = await fetch(url); + expect(response.status).toBe(200); + expect(response.headers.get("cache-control")).toContain("no-store"); + expect(response.headers.get("x-accel-buffering")).toBe("no"); + expect(response.headers.get("connection")).toBe("keep-alive"); + + const reader = response.body?.getReader() as + | ReadableStreamDefaultReader> + | undefined; + expect(reader).toBeDefined(); + if (!reader) return; + const decoder = new TextDecoder(); + + server.streamHub.publish({ + runId, + kind: "run_finished", + payload: { kind: "run_finished", run_id: runId }, + }); + + const collected = await readEnoughBytes(reader, decoder, (text) => + text.includes("event: run_finished"), + ); + await reader.cancel(); + + expect(collected).toContain("retry: 2000"); + expect(collected).toContain("event: run_progress"); + expect(collected).not.toContain("event: run_started"); + expect(collected).toContain("event: run_finished"); + }); + + test("metrics record http requests and active sse connections", async () => { + const server = await startServer(servers); + + await fetch(`${server.url}/healthz`); + const snapshot = server.observability.metrics.snapshot(); + const requestEntries = snapshot.counters.filter( + (entry) => entry.name === METRIC_NAMES.httpRequests, + ); + expect(requestEntries.length).toBeGreaterThan(0); + expect(server.observability.metrics.getGauge(METRIC_NAMES.runsActive)).toBe( + 0, + ); + + const runId = "run-metrics"; + server.streamHub.publish({ + runId, + kind: "run_progress", + payload: { kind: "scenario_started" }, + }); + const events = await fetch(`${server.url}/api/runs/${runId}/events`); + const reader = events.body?.getReader() as + | ReadableStreamDefaultReader> + | undefined; + expect(reader).toBeDefined(); + if (!reader) return; + const decoder = new TextDecoder(); + await readEnoughBytes(reader, decoder, (text) => + text.includes("event: run_progress"), + ); + expect( + server.observability.metrics.getGauge(METRIC_NAMES.sseConnections), + ).toBe(1); + await reader.cancel(); + }); +}); diff --git a/tests/unit/dashboard/keyboard-shortcuts.test.tsx b/tests/unit/dashboard/keyboard-shortcuts.test.tsx new file mode 100644 index 0000000..03cf3e5 --- /dev/null +++ b/tests/unit/dashboard/keyboard-shortcuts.test.tsx @@ -0,0 +1,146 @@ +import { beforeEach, describe, expect, test } from "bun:test"; +import { Window } from "happy-dom"; + +const dashboardWindow = new Window({ url: "http://localhost/" }); +const dashboardDocument = dashboardWindow.document; +(globalThis as Record).window = dashboardWindow; +(globalThis as Record).document = dashboardDocument; +(globalThis as Record).KeyboardEvent = dashboardWindow.KeyboardEvent; +(globalThis as Record).Element = dashboardWindow.Element; +(globalThis as Record).HTMLElement = dashboardWindow.HTMLElement; + +import { + createKeyboardDispatcher, + shouldIgnoreKeyboardEvent, +} from "../../../dashboard/src/hooks/useKeyboardShortcuts.ts"; + +describe("keyboard shortcut dispatcher", () => { + beforeEach(() => { + dashboardDocument.body.innerHTML = ""; + }); + + function dispatch( + handler: (event: KeyboardEvent) => void, + key: string, + target?: EventTarget, + ): KeyboardEvent { + const event = new dashboardWindow.KeyboardEvent("keydown", { + key, + bubbles: true, + cancelable: true, + }) as unknown as KeyboardEvent; + if (target) { + Object.defineProperty(event, "target", { value: target }); + } + handler(event); + return event; + } + + test("ignores keys dispatched from text inputs", () => { + const input = dashboardDocument.createElement("input"); + input.type = "text"; + dashboardDocument.body.appendChild(input); + expect(shouldIgnoreKeyboardEvent(input)).toBeTrue(); + + let calls = 0; + const handler = createKeyboardDispatcher({ + shortcuts: [ + { + key: "j", + description: "down", + run: () => { + calls += 1; + }, + }, + ], + }); + dispatch(handler, "j", input); + expect(calls).toBe(0); + }); + + test("fires single-key shortcut and calls preventDefault", () => { + let calls = 0; + const handler = createKeyboardDispatcher({ + shortcuts: [ + { + key: "/", + description: "focus", + run: () => { + calls += 1; + }, + }, + ], + }); + const event = dispatch(handler, "/"); + expect(calls).toBe(1); + expect(event.defaultPrevented).toBeTrue(); + }); + + test("fires g r sequence only after both keys", () => { + let navigated = 0; + const handler = createKeyboardDispatcher({ + shortcuts: [ + { + sequence: ["g", "r"], + key: "r", + description: "go runs", + run: () => { + navigated += 1; + }, + }, + ], + }); + dispatch(handler, "g"); + expect(navigated).toBe(0); + dispatch(handler, "r"); + expect(navigated).toBe(1); + }); + + test("resets pending sequence after timeout", () => { + let navigated = 0; + const handler = createKeyboardDispatcher({ + shortcuts: [ + { + sequence: ["g", "r"], + key: "r", + description: "go runs", + run: () => { + navigated += 1; + }, + }, + ], + sequenceTimeoutMs: 1, + }); + dispatch(handler, "g"); + const now = Date.now(); + while (Date.now() - now < 5) { + // Spin the event loop past the tiny timeout. + } + dispatch(handler, "r"); + expect(navigated).toBe(0); + }); + + test("ignores when a modifier key is held", () => { + let calls = 0; + const handler = createKeyboardDispatcher({ + shortcuts: [ + { + key: "j", + description: "down", + run: () => { + calls += 1; + }, + }, + ], + }); + const event = new dashboardWindow.KeyboardEvent("keydown", { + key: "j", + ctrlKey: true, + bubbles: true, + cancelable: true, + }) as unknown as KeyboardEvent; + handler(event); + expect(calls).toBe(0); + expect(event.defaultPrevented).toBeFalse(); + }); +}); diff --git a/tests/unit/server/observability/logger.test.ts b/tests/unit/server/observability/logger.test.ts new file mode 100644 index 0000000..ba6c9d9 --- /dev/null +++ b/tests/unit/server/observability/logger.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, test } from "bun:test"; + +import { createLogger } from "../../../../src/runtime/server/observability/logger.ts"; + +describe("createLogger", () => { + test("emits JSON lines with merged base fields", () => { + const lines: string[] = []; + const logger = createLogger({ + component: "agentprobe.server", + format: "json", + sink: (line) => lines.push(line), + baseFields: { request_id: "rid-1" }, + }); + logger.info("http.request", { method: "GET", status: 200 }); + expect(lines).toHaveLength(1); + const parsed = JSON.parse(lines[0] ?? ""); + expect(parsed.event).toBe("http.request"); + expect(parsed.component).toBe("agentprobe.server"); + expect(parsed.request_id).toBe("rid-1"); + expect(parsed.method).toBe("GET"); + expect(parsed.status).toBe(200); + expect(parsed.level).toBe("info"); + }); + + test("text format emits component, level, event prefix", () => { + const lines: string[] = []; + const logger = createLogger({ + component: "agentprobe.run", + format: "text", + sink: (line) => lines.push(line), + }); + logger.error("run.error", { run_id: "abc" }); + expect(lines[0]).toContain("[agentprobe.run]"); + expect(lines[0]).toContain("error"); + expect(lines[0]).toContain("run.error"); + expect(lines[0]).toContain("run_id=abc"); + }); + + test("child loggers inherit base fields and override component", () => { + const lines: string[] = []; + const parent = createLogger({ + component: "agentprobe.server", + format: "json", + sink: (line) => lines.push(line), + baseFields: { request_id: "rid-1" }, + }); + const child = parent.child("agentprobe.run", { run_id: "abc" }); + child.warn("run.slow", { duration_ms: 1500 }); + const parsed = JSON.parse(lines[0] ?? ""); + expect(parsed.component).toBe("agentprobe.run"); + expect(parsed.request_id).toBe("rid-1"); + expect(parsed.run_id).toBe("abc"); + expect(parsed.duration_ms).toBe(1500); + expect(parsed.level).toBe("warn"); + }); +}); diff --git a/tests/unit/server/observability/metrics.test.ts b/tests/unit/server/observability/metrics.test.ts new file mode 100644 index 0000000..0e26e5c --- /dev/null +++ b/tests/unit/server/observability/metrics.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, test } from "bun:test"; + +import { + MetricsRegistry, + SERVER_METRIC_NAMES, +} from "../../../../src/runtime/server/observability/metrics.ts"; + +describe("MetricsRegistry", () => { + test("increments counters and groups by labels", () => { + const metrics = new MetricsRegistry(); + metrics.incrementCounter(SERVER_METRIC_NAMES.httpRequests, 1, { + method: "GET", + route: "/api/runs", + status: 200, + }); + metrics.incrementCounter(SERVER_METRIC_NAMES.httpRequests, 1, { + method: "GET", + route: "/api/runs", + status: 200, + }); + metrics.incrementCounter(SERVER_METRIC_NAMES.httpRequests, 1, { + method: "POST", + route: "/api/runs", + status: 202, + }); + + expect( + metrics.getCounter(SERVER_METRIC_NAMES.httpRequests, { + method: "GET", + route: "/api/runs", + status: 200, + }), + ).toBe(2); + const snapshot = metrics.snapshot(); + const requestCounters = snapshot.counters.filter( + (entry) => entry.name === SERVER_METRIC_NAMES.httpRequests, + ); + expect(requestCounters).toHaveLength(2); + }); + + test("gauges track active and total separately", () => { + const metrics = new MetricsRegistry(); + metrics.adjustGauge(SERVER_METRIC_NAMES.runsActive, 1); + metrics.adjustGauge(SERVER_METRIC_NAMES.runsActive, 1); + metrics.adjustGauge(SERVER_METRIC_NAMES.runsActive, -1); + metrics.incrementCounter(SERVER_METRIC_NAMES.runsStartedTotal, 1); + metrics.incrementCounter(SERVER_METRIC_NAMES.runsFinishedTotal, 1); + + expect(metrics.getGauge(SERVER_METRIC_NAMES.runsActive)).toBe(1); + expect(metrics.getCounter(SERVER_METRIC_NAMES.runsStartedTotal)).toBe(1); + expect(metrics.getCounter(SERVER_METRIC_NAMES.runsFinishedTotal)).toBe(1); + }); + + test("snapshot is sorted by name", () => { + const metrics = new MetricsRegistry(); + metrics.incrementCounter("z.alpha", 1); + metrics.incrementCounter("a.alpha", 1); + const snapshot = metrics.snapshot(); + expect(snapshot.counters.map((entry) => entry.name)).toEqual([ + "a.alpha", + "z.alpha", + ]); + }); + + test("reset clears all state", () => { + const metrics = new MetricsRegistry(); + metrics.incrementCounter("c", 5); + metrics.setGauge("g", 3); + metrics.reset(); + expect(metrics.snapshot().counters).toEqual([]); + expect(metrics.snapshot().gauges).toEqual([]); + }); +}); diff --git a/tests/unit/server/observability/redaction.test.ts b/tests/unit/server/observability/redaction.test.ts new file mode 100644 index 0000000..abdb0f7 --- /dev/null +++ b/tests/unit/server/observability/redaction.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, test } from "bun:test"; + +import type { ServerConfig } from "../../../../src/runtime/server/config.ts"; +import { + isSecretKey, + redactRecord, + redactSecretValue, + summarizeServerConfig, +} from "../../../../src/runtime/server/observability/redaction.ts"; + +describe("redaction", () => { + test("redactSecretValue masks long secrets and keeps length signal", () => { + expect(redactSecretValue("abcd1234")).toBe("[redacted]:8c"); + expect(redactSecretValue("ab")).toBe("[redacted]"); + expect(redactSecretValue(undefined)).toBe(""); + expect(redactSecretValue(null)).toBe(""); + }); + + test("isSecretKey covers common credential patterns", () => { + expect(isSecretKey("token")).toBeTrue(); + expect(isSecretKey("api_key")).toBeTrue(); + expect(isSecretKey("Authorization")).toBeTrue(); + expect(isSecretKey("port")).toBeFalse(); + }); + + test("redactRecord redacts nested secret keys", () => { + const out = redactRecord({ + host: "127.0.0.1", + auth: { token: "supersecretvalue", scope: "read" }, + }); + expect(out.host).toBe("127.0.0.1"); + const auth = out.auth as Record; + expect(auth.token).toBe("[redacted]:16c"); + expect(auth.scope).toBe("read"); + }); + + test("summarizeServerConfig redacts token and db url", () => { + const config: ServerConfig = { + host: "127.0.0.1", + port: 7878, + dataPath: "/tmp/data", + dbUrl: "postgres://user:secretpw@db.example/agentprobe", + dashboardDist: undefined, + token: "tok_super_long_value", + corsOrigins: [], + unsafeExpose: false, + openBrowser: false, + logFormat: "json", + }; + const summary = summarizeServerConfig(config); + expect(summary.token).toBe("[redacted]:20c"); + expect(String(summary.db_url)).not.toContain("secretpw"); + }); +}); diff --git a/tests/unit/server/observability/spans.test.ts b/tests/unit/server/observability/spans.test.ts new file mode 100644 index 0000000..568eaa3 --- /dev/null +++ b/tests/unit/server/observability/spans.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, test } from "bun:test"; + +import { + SERVER_SPAN_NAMES, + SpanRecorder, +} from "../../../../src/runtime/server/observability/spans.ts"; + +describe("SpanRecorder", () => { + test("records duration and ok status for synchronous work", () => { + const recorder = new SpanRecorder(); + const scope = recorder.start(SERVER_SPAN_NAMES.runStartValidation, { + preset_id: "preset-1", + }); + scope.setAttribute("note", "ok"); + scope.setStatus("ok"); + scope.end(); + const records = recorder.snapshot(); + expect(records).toHaveLength(1); + const [record] = records; + expect(record).toBeDefined(); + if (!record) return; + expect(record.name).toBe(SERVER_SPAN_NAMES.runStartValidation); + expect(record.status).toBe("ok"); + expect(record.attributes).toMatchObject({ + preset_id: "preset-1", + note: "ok", + }); + expect(record.durationMs).toBeGreaterThanOrEqual(0); + }); + + test("withSpan captures errors and rethrows", async () => { + const recorder = new SpanRecorder(); + await expect( + recorder.withSpan("server.test", {}, () => { + throw new Error("boom"); + }), + ).rejects.toThrow("boom"); + const records = recorder.snapshot(); + expect(records).toHaveLength(1); + const [record] = records; + expect(record).toBeDefined(); + if (!record) return; + expect(record.status).toBe("error"); + expect(record.error?.message).toBe("boom"); + }); + + test("end is idempotent", () => { + const recorder = new SpanRecorder(); + const scope = recorder.start("s"); + scope.end(); + scope.end(); + expect(recorder.snapshot()).toHaveLength(1); + }); +});