Class: RubricLLM::Comparison

Inherits:
Object
  • Object
show all
Defined in:
lib/rubric_llm/comparison.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(report_a, report_b) ⇒ Comparison

Returns a new instance of Comparison.



7
8
9
10
11
12
13
14
15
16
# File 'lib/rubric_llm/comparison.rb', line 7

def initialize(report_a, report_b)
  @report_a = report_a
  @report_b = report_b

  return if report_a.results.size == report_b.results.size

  warn "[RubricLLM] Comparison reports have different sizes " \
       "(#{report_a.results.size} vs #{report_b.results.size}). " \
       "Unmatched pairs will be dropped."
end

Instance Attribute Details

#report_aObject (readonly)

Returns the value of attribute report_a.



5
6
7
# File 'lib/rubric_llm/comparison.rb', line 5

def report_a
  @report_a
end

#report_bObject (readonly)

Returns the value of attribute report_b.



5
6
7
# File 'lib/rubric_llm/comparison.rb', line 5

def report_b
  @report_b
end

Instance Method Details

#resultsObject



18
19
20
# File 'lib/rubric_llm/comparison.rb', line 18

def results
  @results ||= compute_results
end

#significant_improvements(alpha: 0.05) ⇒ Object



36
37
38
# File 'lib/rubric_llm/comparison.rb', line 36

def significant_improvements(alpha: 0.05)
  results.select { |_, r| r[:p_value] < alpha && r[:delta].positive? }.keys
end

#significant_regressions(alpha: 0.05) ⇒ Object



40
41
42
# File 'lib/rubric_llm/comparison.rb', line 40

def significant_regressions(alpha: 0.05)
  results.select { |_, r| r[:p_value] < alpha && r[:delta].negative? }.keys
end

#summaryObject



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/rubric_llm/comparison.rb', line 22

def summary
  lines = ["A/B Comparison"]
  lines << ("=" * 70)
  lines << "Metric                      A        B    Delta    p-value  Sig"
  lines << ("-" * 70)

  results.each do |metric, r|
    lines << format("%-20s %8.3f %8.3f %+8.3f %10.4f %4s",
                    metric, r[:mean_a], r[:mean_b], r[:delta], r[:p_value], r[:significance])
  end

  lines.join("\n")
end