Class: Parser::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/parser.rb

Constant Summary collapse

DELIMITER =
'hr'
METHOD_TYPE_REGEX =
/Method:|Resource:/
REST_METHOD_TYPE =
/Resource:/
RPC_METHOD_TYPE =
/Method:/

Class Method Summary collapse

Class Method Details

.parse_doc(category, file) ⇒ Object

Documentation is a flat structure. HRs split resources, and all elements for each resource are siblings. This walks through the document and pulls out data into a structured hash suitable for further parsing. There are better ways to do this parsing. This is the quick, brute-force way. Note, this implementation uses parse exceptions to indicate the end of parsing. This will be fragile but works for now.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/parser.rb', line 35

def parse_doc(category, file)
  doc = Nokogiri::HTML(File.open(file).read)

  structured = []

  datum_structure = {
    type: 'This will be rest or rpc',
    name: 'This will be a resource name',
    path: 'This will be the resource path',
    methods: 'This will be a list of supported methods',
    description: 'This will be a description of the resource',
    relationships: 'This will be the relationships',
    parameters: 'This will be the parameters',
    fields: 'This will be the fields'
  }

  current_datum = nil

  # Parse REST docs.
  doc.css('#mainContent').children.each do |node|
    next if node.name != DELIMITER
    structured << current_datum if current_datum
    current_datum = datum_structure.dup
    current_datum = parse_chunk(current_datum, node) rescue nil
  end

  # Parse RPC docs
  rpc_found = false
  current_datum = nil
  doc.css('#mainContent').children.each do |node|
    # Skip forward until we get to the RPC section
    unless rpc_found
      if node.name == 'h1' and node.text =~ /RPC API/
        rpc_found = true
      else
        next
      end
    end

    next if node.name != DELIMITER

    structured << current_datum if current_datum
    current_datum = datum_structure.dup
    current_datum = parse_chunk(current_datum, node) rescue nil
  end

  structured
end