11// Copyright (c) Microsoft. All rights reserved.
22
3+ using System . ComponentModel ;
34using System . Text . Json . Serialization ;
45using Microsoft . Agents . AI ;
56using Microsoft . Extensions . AI ;
@@ -27,11 +28,19 @@ public SampleVerifier(ChatClient? chatClient = null)
2728 instructions : """
2829 You are a test output verifier. You will be given:
2930 1. The actual stdout output of a program
30- 2. A list of expectations about what the output should contain or demonstrate
31+ 2. The stderr output (if any)
32+ 3. A list of expectations about what the output should contain or demonstrate
3133
3234 Your job is to determine whether the actual output satisfies each expectation.
3335 Be reasonable — the output comes from an LLM so exact wording won't match, but the
3436 semantic intent should be clearly satisfied.
37+
38+ In your response, you MUST:
39+ - Always provide ai_reasoning with a brief overall assessment.
40+ - Always provide exactly one entry in expectation_results for each expectation,
41+ in the same order as the input list.
42+ - For each expectation_results entry, echo the expectation text in the expectation
43+ field and explain your assessment in the detail field, citing evidence from the output.
3544 """ ,
3645 name : "OutputVerifier" ) ;
3746 }
@@ -78,7 +87,7 @@ public async Task<VerificationResult> VerifyAsync(SampleDefinition sample, Sampl
7887 }
7988 else
8089 {
81- var aiResult = await this . VerifyWithAIAsync ( run . Stdout , sample . ExpectedOutputDescription ) ;
90+ var aiResult = await this . VerifyWithAIAsync ( run . Stdout , run . Stderr , sample . ExpectedOutputDescription ) ;
8291 aiReasoning = aiResult . Reasoning ;
8392
8493 foreach ( var unmet in aiResult . UnmetExpectations )
@@ -100,16 +109,28 @@ public async Task<VerificationResult> VerifyAsync(SampleDefinition sample, Sampl
100109 }
101110
102111 private async Task < ( string Reasoning , List < string > UnmetExpectations ) > VerifyWithAIAsync (
103- string actualOutput ,
112+ string stdout ,
113+ string stderr ,
104114 string [ ] expectations )
105115 {
106116 var expectationList = string . Join ( "\n " , expectations . Select ( ( e , i ) => $ " { i + 1 } . { e } ") ) ;
117+
118+ var stderrSection = string . IsNullOrWhiteSpace ( stderr )
119+ ? ""
120+ : $ """
121+
122+ Stderr output:
123+ ---
124+ { Truncate ( stderr , 2000 ) }
125+ ---
126+ """ ;
127+
107128 var prompt = $ """
108129 Actual program output:
109130 ---
110- { Truncate ( actualOutput , 4000 ) }
131+ { Truncate ( stdout , 4000 ) }
111132 ---
112-
133+ { stderrSection }
113134 Expectations to verify:
114135 { expectationList }
115136
@@ -126,7 +147,9 @@ Does the output satisfy all expectations?
126147 return ( $ "AI verification returned null result. Raw: { response . Text } ", [ "AI verification returned null result." ] ) ;
127148 }
128149
129- var reasoning = result . Reasoning ?? "(no reasoning provided)" ;
150+ var reasoning = string . IsNullOrWhiteSpace ( result . AIReasoning )
151+ ? "(no reasoning provided)"
152+ : result . AIReasoning ;
130153
131154 // Collect unmet expectations as individual failures
132155 var unmet = new List < string > ( ) ;
@@ -174,12 +197,14 @@ internal sealed class AIVerificationResponse
174197 public bool Pass { get ; set ; }
175198
176199 /// <summary>Brief explanation of the overall assessment.</summary>
177- [ JsonPropertyName ( "reasoning" ) ]
178- public string ? Reasoning { get ; set ; }
200+ [ JsonPropertyName ( "ai_reasoning" ) ]
201+ [ Description ( "Always required. Brief explanation of the overall assessment, covering all expectations." ) ]
202+ public string AIReasoning { get ; set ; } = string . Empty ;
179203
180204 /// <summary>Per-expectation results.</summary>
181205 [ JsonPropertyName ( "expectation_results" ) ]
182- public List < ExpectationResult > ? ExpectationResults { get ; set ; }
206+ [ Description ( "Always required. One entry per expectation, in the same order as the input list." ) ]
207+ public List < ExpectationResult > ExpectationResults { get ; set ; } = [ ] ;
183208}
184209
185210/// <summary>
@@ -190,13 +215,15 @@ internal sealed class ExpectationResult
190215{
191216 /// <summary>The expectation text that was evaluated.</summary>
192217 [ JsonPropertyName ( "expectation" ) ]
193- public string ? Expectation { get ; set ; }
218+ [ Description ( "Echo back the expectation text being evaluated." ) ]
219+ public string Expectation { get ; set ; } = string . Empty ;
194220
195221 /// <summary>Whether this expectation was met.</summary>
196222 [ JsonPropertyName ( "met" ) ]
197223 public bool Met { get ; set ; }
198224
199225 /// <summary>Detail about how the expectation was or was not met.</summary>
200226 [ JsonPropertyName ( "detail" ) ]
201- public string ? Detail { get ; set ; }
227+ [ Description ( "Explain how the expectation was or was not met, citing specific evidence from the output." ) ]
228+ public string Detail { get ; set ; } = string . Empty ;
202229}
0 commit comments