Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/dotnet-verify-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ jobs:
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}

- name: Build solution
working-directory: dotnet
shell: bash
run: dotnet build agent-framework-dotnet.slnx -f net10.0 --warnaserror

- name: Run verify-samples
id: verify
working-directory: dotnet
Expand Down
11 changes: 11 additions & 0 deletions dotnet/.github/skills/verify-samples-tool/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@ The `verify-samples` project (`dotnet/eng/verify-samples/`) is an automated tool

## Running verify-samples

**Important:** By default, samples must be pre-built before running verify-samples. Build the solution first, or pass `--build` to build samples during the run:

```bash
cd dotnet
dotnet build agent-framework-dotnet.slnx -f net10.0
```

Then run verify-samples:

```bash
# Run all samples across all categories
dotnet run --project eng/verify-samples -- --log results.log --csv results.csv

Expand All @@ -24,6 +31,10 @@ dotnet run --project eng/verify-samples -- Agent_Step02_StructuredOutput Agent_S
# Control parallelism (default 8)
dotnet run --project eng/verify-samples -- --parallel 8 --log results.log

# Build samples during run (skips the need for a prior build step)
# This may cause build conflicts as multiple samples are built in parallel, so use with caution
dotnet run --project eng/verify-samples -- --build --log results.log

# Combine options
dotnet run --project eng/verify-samples -- --category 03-workflows --parallel 4 --log results.log --csv results.csv --md results.md
```
Expand Down
5 changes: 4 additions & 1 deletion dotnet/eng/verify-samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
// dotnet run -- --log results.log # Write sequential log to file
// dotnet run -- --csv results.csv # Write CSV summary to file
// dotnet run -- --md results.md # Write Markdown summary to file
// dotnet run -- --build # Build samples during run (default: --no-build)
// Note: By default, this tool expects sample build outputs to already exist.
// Pre-build the solution before running, or pass --build to avoid missing build output failures.
//
// Required environment variables (for AI-powered samples):
// AZURE_OPENAI_ENDPOINT
Expand Down Expand Up @@ -63,7 +66,7 @@
// Run all samples
var reporter = new ConsoleReporter();
var verifier = new SampleVerifier(chatClient);
var orchestrator = new VerificationOrchestrator(verifier, reporter, dotnetRoot, TimeSpan.FromMinutes(3), logWriter);
var orchestrator = new VerificationOrchestrator(verifier, reporter, dotnetRoot, TimeSpan.FromMinutes(3), logWriter, buildSamples: options.BuildSamples);

var run = await orchestrator.RunAllAsync(options.Samples, options.MaxParallelism);

Expand Down
13 changes: 11 additions & 2 deletions dotnet/eng/verify-samples/SampleRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,32 @@ internal static class SampleRunner
{
/// <summary>
/// Runs <c>dotnet run --framework net10.0</c> in the given project directory.
/// When <paramref name="build"/> is false (the default), <c>--no-build</c> is passed
/// to skip building, assuming the project was pre-built.
/// </summary>
public static Task<SampleRunResult> RunAsync(
string projectPath,
TimeSpan timeout,
bool build = false,
CancellationToken cancellationToken = default)
=> RunAsync(projectPath, "run --framework net10.0", timeout, inputs: null, inputDelayMs: 0, cancellationToken: cancellationToken);
=> RunAsync(projectPath, DotnetRunArgs(build), timeout, inputs: null, inputDelayMs: 0, cancellationToken: cancellationToken);

/// <summary>
/// Runs <c>dotnet run --framework net10.0</c> with stdin inputs.
/// When <paramref name="build"/> is false (the default), <c>--no-build</c> is passed
/// to skip building, assuming the project was pre-built.
/// </summary>
public static Task<SampleRunResult> RunAsync(
string projectPath,
TimeSpan timeout,
string?[]? inputs,
int inputDelayMs = 2000,
bool build = false,
CancellationToken cancellationToken = default)
=> RunAsync(projectPath, "run --framework net10.0", timeout, inputs, inputDelayMs, cancellationToken);
=> RunAsync(projectPath, DotnetRunArgs(build), timeout, inputs, inputDelayMs, cancellationToken);

private static string DotnetRunArgs(bool build) =>
build ? "run --framework net10.0" : "run --no-build --framework net10.0";
Comment thread
westey-m marked this conversation as resolved.
Outdated

/// <summary>
/// Runs an arbitrary <c>dotnet</c> command in the given working directory.
Expand Down
49 changes: 38 additions & 11 deletions dotnet/eng/verify-samples/SampleVerifier.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft. All rights reserved.

using System.ComponentModel;
using System.Text.Json.Serialization;
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
Expand Down Expand Up @@ -27,11 +28,19 @@ public SampleVerifier(ChatClient? chatClient = null)
instructions: """
You are a test output verifier. You will be given:
1. The actual stdout output of a program
2. A list of expectations about what the output should contain or demonstrate
2. The stderr output (if any)
3. A list of expectations about what the output should contain or demonstrate

Your job is to determine whether the actual output satisfies each expectation.
Be reasonable — the output comes from an LLM so exact wording won't match, but the
semantic intent should be clearly satisfied.

In your response, you MUST:
- Always provide ai_reasoning with a brief overall assessment.
- Always provide exactly one entry in expectation_results for each expectation,
in the same order as the input list.
- For each expectation_results entry, echo the expectation text in the expectation
field and explain your assessment in the detail field, citing evidence from the output.
""",
name: "OutputVerifier");
}
Expand Down Expand Up @@ -78,7 +87,7 @@ public async Task<VerificationResult> VerifyAsync(SampleDefinition sample, Sampl
}
else
{
var aiResult = await this.VerifyWithAIAsync(run.Stdout, sample.ExpectedOutputDescription);
var aiResult = await this.VerifyWithAIAsync(run.Stdout, run.Stderr, sample.ExpectedOutputDescription);
aiReasoning = aiResult.Reasoning;

foreach (var unmet in aiResult.UnmetExpectations)
Expand All @@ -100,16 +109,28 @@ public async Task<VerificationResult> VerifyAsync(SampleDefinition sample, Sampl
}

private async Task<(string Reasoning, List<string> UnmetExpectations)> VerifyWithAIAsync(
string actualOutput,
string stdout,
string stderr,
string[] expectations)
{
var expectationList = string.Join("\n", expectations.Select((e, i) => $" {i + 1}. {e}"));

var stderrSection = string.IsNullOrWhiteSpace(stderr)
? ""
: $"""

Stderr output:
---
{Truncate(stderr, 2000)}
---
""";

var prompt = $"""
Actual program output:
---
{Truncate(actualOutput, 4000)}
{Truncate(stdout, 4000)}
---

{stderrSection}
Expectations to verify:
{expectationList}

Expand All @@ -126,7 +147,9 @@ Does the output satisfy all expectations?
return ($"AI verification returned null result. Raw: {response.Text}", ["AI verification returned null result."]);
}

var reasoning = result.Reasoning ?? "(no reasoning provided)";
var reasoning = string.IsNullOrWhiteSpace(result.AIReasoning)
? "(no reasoning provided)"
: result.AIReasoning;

// Collect unmet expectations as individual failures
var unmet = new List<string>();
Expand Down Expand Up @@ -174,12 +197,14 @@ internal sealed class AIVerificationResponse
public bool Pass { get; set; }

/// <summary>Brief explanation of the overall assessment.</summary>
[JsonPropertyName("reasoning")]
public string? Reasoning { get; set; }
[JsonPropertyName("ai_reasoning")]
[Description("Always required. Brief explanation of the overall assessment, covering all expectations.")]
public string AIReasoning { get; set; } = string.Empty;

/// <summary>Per-expectation results.</summary>
[JsonPropertyName("expectation_results")]
public List<ExpectationResult>? ExpectationResults { get; set; }
[Description("Always required. One entry per expectation, in the same order as the input list.")]
public List<ExpectationResult> ExpectationResults { get; set; } = [];
}

/// <summary>
Expand All @@ -190,13 +215,15 @@ internal sealed class ExpectationResult
{
/// <summary>The expectation text that was evaluated.</summary>
[JsonPropertyName("expectation")]
public string? Expectation { get; set; }
[Description("Echo back the expectation text being evaluated.")]
public string Expectation { get; set; } = string.Empty;

/// <summary>Whether this expectation was met.</summary>
[JsonPropertyName("met")]
public bool Met { get; set; }

/// <summary>Detail about how the expectation was or was not met.</summary>
[JsonPropertyName("detail")]
public string? Detail { get; set; }
[Description("Explain how the expectation was or was not met, citing specific evidence from the output.")]
public string Detail { get; set; } = string.Empty;
}
9 changes: 6 additions & 3 deletions dotnet/eng/verify-samples/VerificationOrchestrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,22 @@ internal sealed class VerificationOrchestrator
private readonly LogFileWriter? _logWriter;
private readonly string _dotnetRoot;
private readonly TimeSpan _timeout;
private readonly bool _buildSamples;

public VerificationOrchestrator(
SampleVerifier verifier,
ConsoleReporter reporter,
string dotnetRoot,
TimeSpan timeout,
LogFileWriter? logWriter = null)
LogFileWriter? logWriter = null,
bool buildSamples = false)
{
this._verifier = verifier;
this._reporter = reporter;
this._logWriter = logWriter;
this._dotnetRoot = dotnetRoot;
this._timeout = timeout;
this._buildSamples = buildSamples;
}

/// <summary>
Expand Down Expand Up @@ -136,8 +139,8 @@ private async Task RunSingleAsync(

var projectPath = Path.Combine(this._dotnetRoot, sample.ProjectPath);
var run = sample.Inputs.Length > 0
? await SampleRunner.RunAsync(projectPath, this._timeout, sample.Inputs, sample.InputDelayMs)
: await SampleRunner.RunAsync(projectPath, this._timeout);
? await SampleRunner.RunAsync(projectPath, this._timeout, sample.Inputs, sample.InputDelayMs, build: this._buildSamples)
: await SampleRunner.RunAsync(projectPath, this._timeout, build: this._buildSamples);

log.Add($"[{sample.Name}] Completed ({run.Elapsed.TotalSeconds:F1}s, exit={run.ExitCode})");
this._reporter.WriteLineWithPrefix(
Expand Down
20 changes: 20 additions & 0 deletions dotnet/eng/verify-samples/VerifyOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ internal sealed class VerifyOptions
/// </summary>
public string? LogFilePath { get; init; }

/// <summary>
/// When true, samples are built as part of <c>dotnet run</c>.
/// When false (the default), <c>--no-build</c> is passed, assuming a prior build step.
/// </summary>
public bool BuildSamples { get; init; }

/// <summary>
/// The filtered list of samples to process.
/// </summary>
Expand Down Expand Up @@ -55,6 +61,7 @@ internal sealed class VerifyOptions
var logFilePath = ExtractArg(argList, "--log");
var csvFilePath = ExtractArg(argList, "--csv");
var markdownFilePath = ExtractArg(argList, "--md");
var buildSamples = ExtractFlag(argList, "--build");

int maxParallelism = 8;
var parallelArg = ExtractArg(argList, "--parallel");
Expand Down Expand Up @@ -105,6 +112,7 @@ internal sealed class VerifyOptions
LogFilePath = logFilePath,
CsvFilePath = csvFilePath,
MarkdownFilePath = markdownFilePath,
BuildSamples = buildSamples,
Samples = samples,
};
}
Expand All @@ -128,4 +136,16 @@ internal sealed class VerifyOptions
list.RemoveRange(idx, 2);
return value;
}

private static bool ExtractFlag(List<string> list, string flag)
{
var idx = list.IndexOf(flag);
if (idx < 0)
{
return false;
}

list.RemoveAt(idx);
return true;
}
}
Loading