Class: RubricLLM::Report
- Inherits:
-
Object
- Object
- RubricLLM::Report
- Defined in:
- lib/rubric_llm/report.rb
Instance Attribute Summary collapse
-
#duration ⇒ Object
readonly
Returns the value of attribute duration.
-
#results ⇒ Object
readonly
Returns the value of attribute results.
Instance Method Summary collapse
- #export_csv(path) ⇒ Object
- #export_json(path) ⇒ Object
- #failures(threshold: 0.8) ⇒ Object
-
#initialize(results:, duration: nil) ⇒ Report
constructor
A new instance of Report.
- #metric_stats ⇒ Object
-
#scores_for(metric) ⇒ Object
Scores for a single metric across all results (for Comparison).
- #summary ⇒ Object
- #to_json ⇒ Object
- #worst(n) ⇒ Object
Constructor Details
#initialize(results:, duration: nil) ⇒ Report
Returns a new instance of Report.
9 10 11 12 |
# File 'lib/rubric_llm/report.rb', line 9 def initialize(results:, duration: nil) @results = results @duration = duration end |
Instance Attribute Details
#duration ⇒ Object (readonly)
Returns the value of attribute duration.
7 8 9 |
# File 'lib/rubric_llm/report.rb', line 7 def duration @duration end |
#results ⇒ Object (readonly)
Returns the value of attribute results.
7 8 9 |
# File 'lib/rubric_llm/report.rb', line 7 def results @results end |
Instance Method Details
#export_csv(path) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/rubric_llm/report.rb', line 40 def export_csv(path) require "csv" # optional dependency — add `gem "csv"` to your Gemfile if missing metrics = all_metric_names CSV.open(path, "w") do |csv| csv << ["question", "answer", "overall", *metrics] results.each do |result| csv << [ result.sample[:question], result.sample[:answer], result.overall, *metrics.map { |m| result.scores[m] } ] end end end |
#export_json(path) ⇒ Object
56 57 58 |
# File 'lib/rubric_llm/report.rb', line 56 def export_json(path) File.write(path, JSON.pretty_generate(serializable_hash)) end |
#failures(threshold: 0.8) ⇒ Object
22 23 24 |
# File 'lib/rubric_llm/report.rb', line 22 def failures(threshold: 0.8) results.reject { |r| r.pass?(threshold:) } end |
#metric_stats ⇒ Object
14 15 16 |
# File 'lib/rubric_llm/report.rb', line 14 def metric_stats @metric_stats ||= compute_stats end |
#scores_for(metric) ⇒ Object
Scores for a single metric across all results (for Comparison).
65 66 67 |
# File 'lib/rubric_llm/report.rb', line 65 def scores_for(metric) results.map { |r| r.scores[metric] } end |
#summary ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/rubric_llm/report.rb', line 26 def summary lines = ["RubricLLM Evaluation Report"] lines << ("=" * 40) lines << "Samples: #{results.size}" lines << "Duration: #{"%.1f" % duration}s" if duration metric_stats.each do |metric, stats| lines << format(" %-20s mean=%.3f std=%.3f min=%.3f max=%.3f n=%d", metric, stats[:mean], stats[:std], stats[:min], stats[:max], stats[:count]) end lines.join("\n") end |
#to_json ⇒ Object
60 61 62 |
# File 'lib/rubric_llm/report.rb', line 60 def to_json(*) JSON.generate(serializable_hash, *) end |
#worst(n) ⇒ Object
18 19 20 |
# File 'lib/rubric_llm/report.rb', line 18 def worst(n) results.sort_by { |r| r.overall || Float::INFINITY }.first(n) end |