Module: RubricLLM

Defined in:
lib/rubric_llm.rb,
lib/rubric_llm/judge.rb,
lib/rubric_llm/rspec.rb,
lib/rubric_llm/config.rb,
lib/rubric_llm/errors.rb,
lib/rubric_llm/report.rb,
lib/rubric_llm/result.rb,
lib/rubric_llm/version.rb,
lib/rubric_llm/minitest.rb,
lib/rubric_llm/evaluator.rb,
lib/rubric_llm/comparison.rb,
lib/rubric_llm/metrics/base.rb,
lib/rubric_llm/retrieval_result.rb,
lib/rubric_llm/metrics/relevance.rb,
lib/rubric_llm/metrics/correctness.rb,
lib/rubric_llm/metrics/faithfulness.rb,
lib/rubric_llm/metrics/context_recall.rb,
lib/rubric_llm/metrics/factual_accuracy.rb,
lib/rubric_llm/metrics/context_precision.rb

Defined Under Namespace

Modules: Assertions, Metrics, RSpecMatchers Classes: Comparison, Config, ConfigurationError, Error, Evaluator, Judge, JudgeError, Report, Result, RetrievalResult

Constant Summary collapse

VERSION =
"0.1.1"

Class Method Summary collapse

Class Method Details

.compare(report_a, report_b) ⇒ Object

Compare two Reports with paired t-tests.

comparison = RubricLLM.compare(report_a, report_b)


77
78
79
# File 'lib/rubric_llm.rb', line 77

def compare(report_a, report_b)
  Comparison.new(report_a, report_b)
end

.configObject



24
25
26
# File 'lib/rubric_llm.rb', line 24

def config
  @config ||= Config.new
end

.configure {|new_config| ... } ⇒ Object

Yields:

  • (new_config)


28
29
30
31
32
33
# File 'lib/rubric_llm.rb', line 28

def configure
  new_config = Config.new(**config.to_h)
  yield(new_config)
  new_config.validate!
  @config = new_config
end

.evaluate(question:, answer:, context: [], ground_truth: nil, metrics: nil, config: self.config, custom_prompt: nil) ⇒ Object

Evaluate a single sample against all (or selected) metrics.

result = RubricLLM.evaluate(
  question: "What is the capital of France?",
  answer: "Paris",
  context: ["Paris is the capital of France."],
  ground_truth: "Paris"
)


47
48
49
50
51
52
# File 'lib/rubric_llm.rb', line 47

def evaluate(question:, answer:, context: [], ground_truth: nil, metrics: nil,
             config: self.config, custom_prompt: nil)
  config = apply_custom_prompt(config, custom_prompt)
  evaluator = Evaluator.new(config:, metrics:)
  evaluator.call(question:, answer:, context:, ground_truth:)
end

.evaluate_batch(dataset, metrics: nil, config: self.config, custom_prompt: nil, concurrency: nil) ⇒ Object

Evaluate a batch of samples and return a Report.

report = RubricLLM.evaluate_batch(dataset)
report = RubricLLM.evaluate_batch(dataset, concurrency: 4)


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/rubric_llm.rb', line 58

def evaluate_batch(dataset, metrics: nil, config: self.config, custom_prompt: nil, concurrency: nil)
  config = apply_custom_prompt(config, custom_prompt)
  pool_size = concurrency || config.concurrency
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  results = if pool_size > 1
              evaluate_batch_threaded(dataset, config:, metrics:, pool_size:)
            else
              evaluator = Evaluator.new(config:, metrics:)
              dataset.map { |sample| evaluate_sample(evaluator, sample) }
            end

  duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
  Report.new(results:, duration:)
end

.evaluate_retrieval(retrieved:, relevant:) ⇒ Object

Evaluate retrieval quality without LLM calls.

result = RubricLLM.evaluate_retrieval(retrieved: [...], relevant: [...])


84
85
86
# File 'lib/rubric_llm.rb', line 84

def evaluate_retrieval(retrieved:, relevant:)
  RetrievalResult.new(retrieved:, relevant:)
end

.reset_configuration!Object



35
36
37
# File 'lib/rubric_llm.rb', line 35

def reset_configuration!
  @config = nil
end