Avatar for the Aureliolo user
Aureliolo
synthorg
BlogDocsChangelog

feat(evals): #1980 spine -- scoring + data contract for golden-company benchmark

#2025Merged
Comparing
feat/1980-golden-benchmark
(
2c25a0a
) with
main
(
f60e9ed
)
CodSpeed Performance Gauge
0%
Untouched
33
Skipped
21

Benchmarks

54 total
test_normalize_utc
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
+1%
12.6 µs12.5 µs
test_diversity_penalty_50
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
+1%
16.5 ms16.4 ms
test_compute_cost_per_1k
tests/benchmarks/test_budget_aggregation.py
CodSpeed Performance Gauge
+1%
19.3 µs19.2 µs
test_parse_iso_utc_single
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
+1%
17.1 µs17 µs
test_coerce_row_timestamp_string
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
0%
21.1 µs21.1 µs
test_agent_task_scorer_batch_10
tests/benchmarks/test_routing_scorer.py
CodSpeed Performance Gauge
0%
1.5 ms1.5 ms
test_compute_token_speedup_ratio
tests/benchmarks/test_coordination_metrics.py
CodSpeed Performance Gauge
0%
39 µs38.9 µs
test_compute_straggler_gap_20_agents
tests/benchmarks/test_coordination_metrics.py
CodSpeed Performance Gauge
0%
149 µs148.8 µs
test_coerce_row_timestamp_datetime
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
0%
24.5 µs24.4 µs
test_format_iso_utc_single
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
0%
31 µs31 µs
test_scrub_medium_mixed
tests/benchmarks/test_observability_scrubber.py
CodSpeed Performance Gauge
0%
284 µs283.8 µs
test_bigram_jaccard_short
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
0%
62.9 µs62.8 µs
test_scrub_adversarial
tests/benchmarks/test_observability_scrubber.py
CodSpeed Performance Gauge
0%
842.9 µs842.3 µs
test_bigram_jaccard_long
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
0%
239 µs238.9 µs
test_sum_tokens_2000
tests/benchmarks/test_budget_aggregation.py
CodSpeed Performance Gauge
0%
1.6 ms1.6 ms
test_rate_efficiency
tests/benchmarks/test_budget_optimizer.py
CodSpeed Performance Gauge
0%
16.6 µs16.6 µs
test_rank_memories_100
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
0%
1.8 ms1.8 ms
test_rank_memories_with_shared
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
0%
2.6 ms2.6 ms
test_parse_iso_utc_batch_96
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
0%
149.3 µs149.4 µs
test_compute_message_overhead
tests/benchmarks/test_coordination_metrics.py
CodSpeed Performance Gauge
0%
35.4 µs35.5 µs
test_fuse_ranked_lists_5x200
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
0%
13 ms13 ms
test_group_by_agent_500
tests/benchmarks/test_budget_aggregation.py
CodSpeed Performance Gauge
0%
291.7 µs292.2 µs
test_compute_window_costs_12
tests/benchmarks/test_budget_optimizer.py
CodSpeed Performance Gauge
0%
349.2 µs349.8 µs
test_rank_memories_1000
tests/benchmarks/test_memory_ranking.py
CodSpeed Performance Gauge
0%
16.3 ms16.4 ms
test_format_iso_utc_batch_96
tests/benchmarks/test_persistence_datetime.py
CodSpeed Performance Gauge
0%
452.9 µs453.6 µs

Commits

Click on a commit to change the comparison range
Base
main
f60e9ed
-0.08%
test: drop origin framing from test_aggregate_uses_penalty_table_floor_consistently docstring
6dfda3a
8 days ago
by Aureliolo
-0.01%
fix: babysit round 7, 12 coderabbit findings on head 6dfda3a9
e57d7a0
8 days ago
by Aureliolo
+0.06%
fix: babysit round 8, persist per-brief score_floor in BriefResult
2c25a0a
8 days ago
by Aureliolo
© 2026 CodSpeed Technology
Home Terms Privacy Docs