Class: Transformers::SquadExample

Inherits:
Object
  • Object
show all
Defined in:
lib/transformers/data/processors/squad.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(qas_id, question_text, context_text, answer_text, start_position_character, title, answers: [], is_impossible: false) ⇒ SquadExample

Returns a new instance of SquadExample.



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/transformers/data/processors/squad.rb', line 19

def initialize(
  qas_id,
  question_text,
  context_text,
  answer_text,
  start_position_character,
  title,
  answers: [],
  is_impossible: false
)
  @qas_id = qas_id
  @question_text = question_text
  @context_text = context_text
  @answer_text = answer_text
  @title = title
  @is_impossible = is_impossible
  @answers = answers

  @start_position, @end_position = 0, 0

  doc_tokens = []
  char_to_word_offset = []
  prev_is_whitespace = true

  # Split on whitespace so that different tokens may be attributed to their original position.
  @context_text.each_char do |c|
    if _is_whitespace(c)
      prev_is_whitespace = true
    else
      if prev_is_whitespace
        doc_tokens << c
      else
        doc_tokens[-1] += c
      end
      prev_is_whitespace = false
    end
    char_to_word_offset << (doc_tokens.length - 1)
  end

  @doc_tokens = doc_tokens
  @char_to_word_offset = char_to_word_offset

  # Start and end positions only has a value during evaluation.
  if !start_position_character.nil? && !is_impossible
    @start_position = char_to_word_offset[start_position_character]
    @end_position = char_to_word_offset[
      [start_position_character + answer_text.length - 1, char_to_word_offset.length - 1].min
    ]
  end
end

Instance Attribute Details

#context_textObject (readonly)

Returns the value of attribute context_text.



17
18
19
# File 'lib/transformers/data/processors/squad.rb', line 17

def context_text
  @context_text
end

#question_textObject (readonly)

Returns the value of attribute question_text.



17
18
19
# File 'lib/transformers/data/processors/squad.rb', line 17

def question_text
  @question_text
end

Instance Method Details

#_is_whitespace(c) ⇒ Object



70
71
72
# File 'lib/transformers/data/processors/squad.rb', line 70

def _is_whitespace(c)
  c == " " || c == "\t" || c == "\r" || c == "\n" || c.ord == 0x202F
end