Skip to content

Commit 3aa5ffa

Browse files
AnHeuermannclaude
andcommitted
Overhaul reference comparison: new error metrics, DataFrames CSV, richer HTML table
Replace the per-point `_check_point` / `error_fn` dispatch with two vectorised helpers (`_absolute_error`, `_scaled_relative_error`) and a globally-scaled relative tolerance check. `abs_tol` is now optional (`nothing` = disabled, shown as ∞ in the HTML); when set it acts as an independent hard threshold rather than a per-point floor. Switch diff-CSV output from manual text-mode writing to DataFrames.jl + CSV.jl; the new format includes `_abserr` and `_relerr` columns for every failing signal. Improve the `_diff.html` variable-coverage table: - Add "Max Abs Error" (scientific notation) and "Max Rel Error" (percent) columns for both passing and failing signals. - Read per-signal max errors for failing signals directly from the diff CSV. - Hide the abs-tolerance from the meta line when `abs_tol` is `nothing`. Thread `settings::CompareSettings` explicitly through `test_model` and `main`; remove the module-level `_CMP_SETTINGS` global and the `configure_comparison!` / `compare_settings` API. Co-Authored-By: claude-sonnet-4-6 <noreply@anthropic.com>
1 parent ca8fd0d commit 3aa5ffa

5 files changed

Lines changed: 112 additions & 197 deletions

File tree

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ authors = ["AnHeuermann"]
55

66
[deps]
77
BaseModelica = "a17d5099-185d-4ff5-b5d3-51aa4569e56d"
8+
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
9+
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
810
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
911
DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
1012
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"

src/BaseModelicaLibraryTesting.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,7 @@ include("pipeline.jl")
2222

2323
# Shared types and constants
2424
export ModelResult, CompareSettings, RunInfo
25-
export LIBRARY, LIBRARY_VERSION, CMP_REL_TOL, CMP_ABS_TOL
26-
27-
# Comparison configuration
28-
export configure_comparison!, compare_settings
25+
export CMP_REL_TOL, CMP_ABS_TOL
2926

3027
# Pipeline phases
3128
export run_export # Phase 1: Base Modelica export via OMC

src/compare.jl

Lines changed: 93 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# ── Variable name utilities ────────────────────────────────────────────────────
22

3+
import CSV
4+
import DataFrames
35
import ModelingToolkit
46
import Printf: @sprintf
57

@@ -102,120 +104,6 @@ function _install_assets(results_root::String)
102104
end
103105
end
104106

105-
# ── Comparison settings and error functions ────────────────────────────────────
106-
107-
"""Module-level default comparison settings. Modify via `configure_comparison!`."""
108-
const _CMP_SETTINGS = CompareSettings()
109-
110-
"""
111-
_check_relative(s, r, ref_scale, cfg) → Bool
112-
113-
Classic relative-error check. Passes when
114-
115-
|s − r| ≤ max(rel_tol · |r|, abs_tol)
116-
117-
This is the traditional approach used by many validation tools. It works well
118-
when the signal stays well away from zero, but may produce false failures at
119-
zero crossings because the per-point tolerance shrinks to `abs_tol ≈ 0` when
120-
`r ≈ 0`.
121-
"""
122-
function _check_relative(s::Real, r::Real, ::Real, cfg::CompareSettings)::Bool
123-
abs(s - r) <= max(cfg.rel_tol * abs(r), cfg.abs_tol)
124-
end
125-
126-
"""
127-
_check_mixed(s, r, ref_scale, cfg) → Bool
128-
129-
Scale-aware relative-error check (default). Passes when
130-
131-
|s − r| ≤ max(rel_tol · |r|, rel_tol · ref_scale, abs_tol)
132-
133-
The middle term (`rel_tol · ref_scale`) provides an amplitude-proportional
134-
absolute floor. Near zero crossings the tolerance is set by the peak magnitude
135-
of the reference signal rather than the near-zero instantaneous value, so
136-
physically correct simulations are not falsely rejected.
137-
"""
138-
function _check_mixed(s::Real, r::Real, ref_scale::Real, cfg::CompareSettings)::Bool
139-
abs(s - r) <= max(cfg.rel_tol * abs(r), cfg.rel_tol * ref_scale, cfg.abs_tol)
140-
end
141-
142-
"""
143-
_check_absolute(s, r, ref_scale, cfg) → Bool
144-
145-
Pure absolute check. Passes when
146-
147-
|s − r| ≤ abs_tol
148-
149-
Useful when all compared signals have known, small magnitudes or when a
150-
signal-independent tolerance threshold is required.
151-
"""
152-
function _check_absolute(s::Real, r::Real, ::Real, cfg::CompareSettings)::Bool
153-
abs(s - r) <= cfg.abs_tol
154-
end
155-
156-
"""
157-
_check_point(s, r, ref_scale, cfg) → Bool
158-
159-
Dispatch to the error function selected by `cfg.error_fn`.
160-
161-
| `error_fn` | Description |
162-
|:--------------|:----------------------------------------------------|
163-
| `:mixed` | Scale-aware relative error (default, recommended) |
164-
| `:relative` | Classic relative error (may fail at zero crossings) |
165-
| `:absolute` | Pure absolute error |
166-
"""
167-
function _check_point(s::Real, r::Real, ref_scale::Real, cfg::CompareSettings)::Bool
168-
fn = cfg.error_fn
169-
fn === :mixed && return _check_mixed(s, r, ref_scale, cfg)
170-
fn === :relative && return _check_relative(s, r, ref_scale, cfg)
171-
fn === :absolute && return _check_absolute(s, r, ref_scale, cfg)
172-
throw(ArgumentError(
173-
"Unknown error_fn $(repr(fn)); choose :mixed, :relative, or :absolute"))
174-
end
175-
176-
"""
177-
configure_comparison!(; rel_tol, abs_tol, error_fn) → CompareSettings
178-
179-
Update the module-level comparison settings in-place and return them.
180-
181-
# Keyword arguments
182-
183-
- `rel_tol` — maximum allowed relative error. Default: `$(CMP_REL_TOL)` (2 %).
184-
- `abs_tol` — hard absolute-error floor applied when signals are near zero.
185-
Default: `$(CMP_ABS_TOL)`.
186-
- `error_fn` — selects the point-wise check function. One of:
187-
- `:mixed` — scale-aware relative error (default, recommended);
188-
- `:relative` — classic relative error (may reject valid zero-crossing signals);
189-
- `:absolute` — pure absolute error.
190-
191-
# Example
192-
193-
```julia
194-
configure_comparison!(rel_tol = 0.01, error_fn = :relative)
195-
```
196-
"""
197-
function configure_comparison!(;
198-
rel_tol :: Union{Float64,Nothing} = nothing,
199-
abs_tol :: Union{Float64,Nothing} = nothing,
200-
error_fn :: Union{Symbol,Nothing} = nothing,
201-
)
202-
isnothing(rel_tol) || (_CMP_SETTINGS.rel_tol = rel_tol)
203-
isnothing(abs_tol) || (_CMP_SETTINGS.abs_tol = abs_tol)
204-
isnothing(error_fn) || (_CMP_SETTINGS.error_fn = error_fn)
205-
return _CMP_SETTINGS
206-
end
207-
208-
"""
209-
compare_settings() → CompareSettings
210-
211-
Return the current module-level comparison settings.
212-
213-
Pass the returned object (or a freshly constructed `CompareSettings(...)`) to
214-
`compare_with_reference` via the `settings` keyword to override the defaults
215-
for a single call without changing the global state.
216-
"""
217-
compare_settings() = _CMP_SETTINGS
218-
219107
# ── Interactive diff HTML ──────────────────────────────────────────────────────
220108

221109
"""
@@ -235,32 +123,42 @@ The page references `../../assets/dygraph.min.*` relative to its location.
235123
`_install_assets` is called automatically.
236124
"""
237125
function write_diff_html(model_dir::String, model::String;
238-
diff_csv_path::String = "",
239-
pass_sigs::Vector{String} = String[],
240-
skip_sigs::Vector{String} = String[])
126+
diff_csv_path::String = "",
127+
pass_sigs::Vector{String} = String[],
128+
skip_sigs::Vector{String} = String[],
129+
pass_max_abs_error::Dict{String,Float64} = Dict{String,Float64}(),
130+
pass_max_rel_error::Dict{String,Float64} = Dict{String,Float64}(),
131+
settings::CompareSettings = CompareSettings())
241132
short_name = split(model, ".")[end]
242133
html_path = joinpath(model_dir, "$(short_name)_diff.html")
243134
results_root = dirname(dirname(abspath(model_dir))) # …/files/<model> → …
244135
_install_assets(results_root)
245136

246-
# Read fail_sigs and CSV content from the diff CSV (may not exist).
247-
fail_sigs = String[]
248-
csv_js = ""
137+
# Read fail_sigs, per-signal max errors, and CSV content from the diff CSV.
138+
fail_sigs = String[]
139+
max_abs_error = Dict{String,Float64}()
140+
max_rel_error = Dict{String,Float64}()
141+
csv_js = ""
249142
if !isempty(diff_csv_path) && isfile(diff_csv_path)
250-
lines = readlines(diff_csv_path)
251-
if length(lines) >= 1
252-
headers = [replace(strip(h), "\"" => "") for h in split(lines[1], ",")]
253-
for h in headers
254-
length(h) > 4 && h[end-3:end] == "_ref" && push!(fail_sigs, h[1:end-4])
255-
end
256-
csv_text = read(diff_csv_path, String)
257-
csv_js = replace(replace(csv_text, "\\" => "\\\\"), "`" => "\\`")
143+
df = CSV.read(diff_csv_path, DataFrames.DataFrame)
144+
for col in names(df)
145+
endswith(col, "_ref") && push!(fail_sigs, col[1:end-4])
146+
end
147+
for sig in fail_sigs
148+
max_abs_error[sig] = maximum(df[!, "$(sig)_abserr"])
149+
max_rel_error[sig] = maximum(df[!, "$(sig)_relerr"])
258150
end
151+
csv_text = read(diff_csv_path, String)
152+
csv_js = replace(replace(csv_text, "\\" => "\\\\"), "`" => "\\`")
259153
end
260154

261155
# ── Meta block ──────────────────────────────────────────────────────────────
262-
tol_str = "(rel &#x2264; $(round(Int, _CMP_SETTINGS.rel_tol * 100))%," *
263-
" abs &#x2264; $(_CMP_SETTINGS.abs_tol))"
156+
tol_str = if settings.abs_tol === nothing
157+
"(rel &#x2264; $(round(Int, settings.rel_tol * 100))%)"
158+
else
159+
"(rel &#x2264; $(round(Int, settings.rel_tol * 100))%," *
160+
" abs &#x2264; $(settings.abs_tol))"
161+
end
264162
csv_link = isempty(fail_sigs) ? "" :
265163
""" &nbsp;&middot;&nbsp; <a href="$(short_name)_diff.csv">Download diff CSV</a>"""
266164
skip_note = isempty(skip_sigs) ? "" :
@@ -277,23 +175,30 @@ function write_diff_html(model_dir::String, model::String;
277175
n_total = n_found + length(skip_sigs)
278176
th = "border:1px solid #ccc;padding:3px 10px;background:#eee;text-align:left;"
279177
td = "border:1px solid #ccc;padding:3px 10px;"
178+
tdr = td * "text-align:right;"
280179
rows = String[]
281180
for sig in pass_sigs
282181
push!(rows, "<tr style=\"background:#d4edda\"><td style=\"$td\">$sig</td>" *
283-
"<td style=\"$td\">&#10003; pass</td></tr>")
182+
"<td style=\"$td\">&#10003; pass</td>" *
183+
"<td style=\"$tdr\">$(@sprintf("%.4e", pass_max_abs_error[sig]))</td>" *
184+
"<td style=\"$tdr\">$(@sprintf("%.2f%%", pass_max_rel_error[sig] * 100))</td></tr>")
284185
end
285186
for sig in fail_sigs
286187
push!(rows, "<tr style=\"background:#f8d7da\"><td style=\"$td\">$sig</td>" *
287-
"<td style=\"$td\">&#10007; fail</td></tr>")
188+
"<td style=\"$td\">&#10007; fail</td>" *
189+
"<td style=\"$tdr\">$(@sprintf("%.4e", max_abs_error[sig]))</td>" *
190+
"<td style=\"$tdr\">$(@sprintf("%.2f%%", max_rel_error[sig] * 100))</td></tr>")
288191
end
289192
for sig in skip_sigs
290193
push!(rows, "<tr style=\"background:#fff3cd\"><td style=\"$td\">$sig</td>" *
291-
"<td style=\"$td\">not found in simulation</td></tr>")
194+
"<td style=\"$td\">not found in simulation</td>" *
195+
"<td style=\"$tdr\">&#x2014;</td><td style=\"$tdr\">&#x2014;</td></tr>")
292196
end
293197
"""<h2 style="font-size:1.1em;margin-top:2em;">Variable Coverage """ *
294198
"""&#x2014; $n_found of $n_total reference signal(s) found</h2>""" *
295199
"""<table style="border-collapse:collapse;font-size:13px;">""" *
296-
"""<thead><tr><th style="$th">Signal</th><th style="$th">Status</th></tr></thead>""" *
200+
"""<thead><tr><th style="$th">Signal</th><th style="$th">Status</th>""" *
201+
"""<th style="$th">Max Abs Error</th><th style="$th">Max Rel Error</th></tr></thead>""" *
297202
"""<tbody>$(join(rows))</tbody></table>"""
298203
end
299204

@@ -312,6 +217,27 @@ end
312217

313218
# ── Reference comparison ───────────────────────────────────────────────────────
314219

220+
"""
221+
_absolute_error(actual, reference) -> Vector{Real}
222+
223+
Return the element-wise absolute error between `actual` and `reference`.
224+
"""
225+
function _absolute_error(actual::AbstractVector{<:Real}, reference::AbstractVector{<:Real})
226+
return abs.(actual .- reference)
227+
end
228+
229+
"""
230+
_scaled_relative_error(actual, reference) -> Vector{Real}
231+
232+
Return the element-wise absolute error between `actual` and `reference`, scaled by the
233+
maximum absolute value of `reference` (or `eps()` if that maximum is smaller, to avoid
234+
division by zero).
235+
"""
236+
function _scaled_relative_error(actual::AbstractVector{<:Real}, reference::AbstractVector{<:Real})
237+
reference_scale = max( maximum(abs.(reference)), eps() )
238+
return abs.(actual .- reference) ./ reference_scale
239+
end
240+
315241
"""
316242
_eval_sim(sol, accessor, t) → Float64
317243
@@ -354,16 +280,14 @@ is written whenever there are failures or skipped signals.
354280
355281
# Keyword arguments
356282
- `settings` — a `CompareSettings` instance controlling tolerances and the
357-
error function. Defaults to the module-level settings returned
358-
by `compare_settings()`. Use `configure_comparison!` to change
359-
the defaults, or pass a local `CompareSettings(...)` here.
283+
error function.
360284
"""
361285
function compare_with_reference(
362286
sol,
363287
ref_csv_path::String,
364288
model_dir::String,
365289
model::String;
366-
settings::CompareSettings = _CMP_SETTINGS,
290+
settings::CompareSettings = CompareSettings(),
367291
)::Tuple{Int,Int,Int,String}
368292

369293
times, ref_data = _read_ref_csv(ref_csv_path)
@@ -409,7 +333,12 @@ function compare_with_reference(
409333
pass_sigs = String[]
410334
fail_sigs = String[]
411335
skip_sigs = String[]
412-
fail_scales = Dict{String,Float64}()
336+
pass_max_abs_error = Dict{String, Float64}()
337+
pass_max_rel_error = Dict{String, Float64}()
338+
fail_ref_vals = Dict{String, Vector{Float64}}()
339+
fail_sim_vals = Dict{String, Vector{Float64}}()
340+
fail_abs_error = Dict{String, Vector{Float64}}()
341+
fail_scaled_rel_error = Dict{String, Vector{Float64}}()
413342

414343
for sig in signals
415344
haskey(ref_data, sig) || continue # signal absent from ref CSV entirely
@@ -424,10 +353,6 @@ function compare_with_reference(
424353
ref_vals = ref_data[sig][valid_mask]
425354
n_total += 1
426355

427-
# Peak |ref| — used as amplitude floor so relative error stays finite
428-
# near zero crossings.
429-
ref_scale = isempty(ref_vals) ? 0.0 : maximum(abs, ref_vals)
430-
431356
# Interpolate simulation at reference time points.
432357
sim_vals = [_eval_sim(sol, accessor, t) for t in t_ref]
433358

@@ -438,16 +363,24 @@ function compare_with_reference(
438363
continue
439364
end
440365

441-
pass = all(zip(sim_vals, ref_vals)) do (s, r)
442-
_check_point(s, r, ref_scale, settings)
443-
end
366+
# Check absolute error and globally scaled relative error
367+
abs_error = _absolute_error(sim_vals, ref_vals)
368+
scaled_rel_error = _scaled_relative_error(sim_vals, ref_vals)
369+
370+
pass = (settings.abs_tol === nothing || maximum(abs_error) < settings.abs_tol) &&
371+
maximum(scaled_rel_error) < settings.rel_tol
444372

445373
if pass
446374
n_pass += 1
447375
push!(pass_sigs, sig)
376+
pass_max_abs_error[sig] = maximum(abs_error)
377+
pass_max_rel_error[sig] = maximum(scaled_rel_error)
448378
else
449379
push!(fail_sigs, sig)
450-
fail_scales[sig] = ref_scale
380+
fail_ref_vals[sig] = ref_vals
381+
fail_sim_vals[sig] = sim_vals
382+
fail_abs_error[sig] = abs_error
383+
fail_scaled_rel_error[sig] = scaled_rel_error
451384
end
452385
end
453386

@@ -457,35 +390,25 @@ function compare_with_reference(
457390
diff_csv = ""
458391
if !isempty(fail_sigs)
459392
diff_csv = joinpath(model_dir, "$(short_name)_diff.csv")
460-
open(diff_csv, "w") do f
461-
cols = ["time"]
462-
for sig in fail_sigs
463-
push!(cols, "$(sig)_ref", "$(sig)_sim", "$(sig)_relerr")
464-
end
465-
println(f, join(cols, ","))
466-
for (ti, t) in enumerate(t_ref)
467-
row = [@sprintf("%.10g", t)]
468-
for sig in fail_sigs
469-
ref_vals = ref_data[sig][valid_mask]
470-
r = ref_vals[ti]
471-
s = _eval_sim(sol, var_access[_normalize_var(sig)], t)
472-
ref_scale = get(fail_scales, sig, 0.0)
473-
relerr = abs(s - r) / max(abs(r), ref_scale, settings.abs_tol)
474-
push!(row, @sprintf("%.10g", r),
475-
@sprintf("%.10g", s),
476-
@sprintf("%.6g", relerr))
477-
end
478-
println(f, join(row, ","))
479-
end
393+
df = DataFrames.DataFrame("time" => t_ref)
394+
for sig in fail_sigs
395+
df[!, "$(sig)_ref"] = fail_ref_vals[sig]
396+
df[!, "$(sig)_sim"] = fail_sim_vals[sig]
397+
df[!, "$(sig)_abserr"] = fail_abs_error[sig]
398+
df[!, "$(sig)_relerr"] = fail_scaled_rel_error[sig]
480399
end
400+
CSV.write(diff_csv, df)
481401
end
482402

483403
# ── Write detail HTML whenever there is anything worth showing ───────────────
484404
if !isempty(fail_sigs) || !isempty(skip_sigs)
485405
write_diff_html(model_dir, model;
486-
diff_csv_path = diff_csv,
487-
pass_sigs = pass_sigs,
488-
skip_sigs = skip_sigs)
406+
diff_csv_path = diff_csv,
407+
pass_sigs = pass_sigs,
408+
skip_sigs = skip_sigs,
409+
pass_max_abs_error = pass_max_abs_error,
410+
pass_max_rel_error = pass_max_rel_error,
411+
settings = settings)
489412
end
490413

491414
return n_total, n_pass, length(skip_sigs), diff_csv

0 commit comments

Comments
 (0)