Class: EPUB::Searcher::Publication

Inherits:

Object

Object
EPUB::Searcher::Publication

show all

Defined in:: lib/epub/searcher/publication.rb

Class Method Summary collapse

Instance Method Summary collapse

#initialize(package) ⇒ Publication constructor

A new instance of Publication.
#search_by_cfi(cfi) ⇒ Array

Path in EPUB Rendition.
#search_element(css: nil, xpath: nil, namespaces: {}) ⇒ Array<Hash>

@todo: Refactoring.
#search_text(word, algorithm: :seamless) ⇒ Array<Result>

Constructor Details

#initialize(package) ⇒ `Publication`

Returns a new instance of Publication.



23
24
25

# File 'lib/epub/searcher/publication.rb', line 23

def initialize(package)
  @package = package
end

Class Method Details

.search_by_cfi(package, cfi) ⇒ `Object`



18
19
20

# File 'lib/epub/searcher/publication.rb', line 18

def search_by_cfi(package, cfi)
  new(package).search_by_cfi(cfi)
end

.search_element(package, css: nil, xpath: nil, namespaces: {}) ⇒ `Object`



14
15
16

# File 'lib/epub/searcher/publication.rb', line 14

def search_element(package, css: nil, xpath: nil, namespaces: {})
  new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
end

.search_text(package, word, **options) ⇒ `Object`



10
11
12

# File 'lib/epub/searcher/publication.rb', line 10

def search_text(package, word, **options)
  new(package).search_text(word, **options)
end

Instance Method Details

#search_by_cfi(cfi) ⇒ `Array`

TODO:

Use XHTML module

TODO:

Handle CFI with offset

TODO:

Handle range CFI

Note:

Currenty can handle only location CFI without offset

Returns Path in EPUB Rendition.

Parameters:

cfi (EPUB::CFI)

Returns:

(Array) —

Path in EPUB Rendition

Raises:

(NotImplementedError)

# File 'lib/epub/searcher/publication.rb', line 93

def search_by_cfi(cfi)
  path_in_package = cfi.paths.first
  spine = @package.spine
  model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
  raise NotImplementedError, "Currently, #{__method__} supports spine only(#{cfi})" unless model == spine
  raise ArgumentError, "Cannot identify <itemref>'s child" if path_in_package.steps.length > 2

  step_to_itemref = path_in_package.steps[1]
  itemref = spine.itemrefs[step_to_itemref.value / 2 - 1]

  doc = itemref.item.content_document.nokogiri
  path_in_doc = cfi.paths[1]
  current_node = doc.root
  path_in_doc.steps.each do |step|
    if step.element?
      current_node = current_node.elements.to_a[step.value / 2 - 1]
    else
      element_index = (step.value - 1) / 2 - 1
      if element_index == -1
        current_node = current_node.children.first
      else
        prev = current_node.elements.to_a[element_index]
        break unless prev
        current_node = prev.next_sibling
        break unless current_node
      end
    end
  end

  raise NotImplementedError, "Currently, #{__method__} doesn't support deeper DOM tree such as including <iframe>" if cfi.paths[2]

  [itemref, current_node]
end

#search_element(css: nil, xpath: nil, namespaces: {}) ⇒ `Array<Hash>`

@todo: Refactoring

Returns:

(Array<Hash>) —
An array of rearch results. Each result is composed of:
- :element: [REXML::Element, Oga::XML::Element, Nokogiri::XML::ELement] Found element
- :itemref: [EPUB::Publication::Package::Spine::Itemref] Itemref that element’s document belongs to
- :location: [EPUB::CFI::Location] CFI that indicates the element
- :package: [EPUB::Publication::Package] Package that the element belongs to

Raises:

(ArgumentError)

# File 'lib/epub/searcher/publication.rb', line 49

def search_element(css: nil, xpath: nil, namespaces: {})
  raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?

  namespaces = EPUB::NAMESPACES.merge(namespaces)
  results = []

  spine_step = EPUB::CFI::Step.new((EPUB::Publication::Package::CONTENT_MODELS.index(:spine) + 1) * 2)
  @package.spine.each_itemref.with_index do |itemref, index|
    assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
    itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
    path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
    content_document = itemref.item.content_document
    next unless content_document
    elems = if xpath
              doc = Parser::XMLDocument.new(content_document.read)
              doc.each_element_by_xpath(xpath, namespaces)
            else
              begin
                doc = content_document.nokogiri
              rescue LoadError
                raise "#{self.class}##{__method__} with `css` argument requires Nokogiri gem for now. Install Nokogiri and then try again."
              end
              doc.css(css)
            end
    elems.each do |elem|
      path = find_path(elem)
      results << {
        location: EPUB::CFI::Location.new([path_to_itemref, path]),
        package: @package,
        itemref: itemref,
        element: elem
      }
    end
  end

  results
end

#search_text(word, algorithm: :seamless) ⇒ `Array<Result>`

Returns:

(Array<Result>)

# File 'lib/epub/searcher/publication.rb', line 28

def search_text(word, algorithm: :seamless)
  results = []

  spine = @package.spine
  spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
  spine.each_itemref.with_index do |itemref, index|
    itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
    XHTML::ALGORITHMS[algorithm].search_text(Parser::XMLDocument.new(itemref.item.read), word).each do |sub_result|
      results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
    end
  end

  results
end

Class: EPUB::Searcher::Publication

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(package) ⇒ Publication

Class Method Details

.search_by_cfi(package, cfi) ⇒ Object

.search_element(package, css: nil, xpath: nil, namespaces: {}) ⇒ Object

.search_text(package, word, **options) ⇒ Object

Instance Method Details

#search_by_cfi(cfi) ⇒ Array

#search_element(css: nil, xpath: nil, namespaces: {}) ⇒ Array<Hash>

#search_text(word, algorithm: :seamless) ⇒ Array<Result>

#initialize(package) ⇒ `Publication`

.search_by_cfi(package, cfi) ⇒ `Object`

.search_element(package, css: nil, xpath: nil, namespaces: {}) ⇒ `Object`

.search_text(package, word, **options) ⇒ `Object`

#search_by_cfi(cfi) ⇒ `Array`

#search_element(css: nil, xpath: nil, namespaces: {}) ⇒ `Array<Hash>`

#search_text(word, algorithm: :seamless) ⇒ `Array<Result>`