Module: RubyTube::Parser
- Defined in:
- lib/rubytube/parser.rb
Class Method Summary collapse
- .find_object_from_startpoint(html, start_point) ⇒ Object
- .parse_for_object(html, preceding_regex) ⇒ Object
- .parse_for_object_from_startpoint(html, start_point) ⇒ Object
- .throttling_array_split(js_array) ⇒ Object
Class Method Details
.find_object_from_startpoint(html, start_point) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/rubytube/parser.rb', line 17 def find_object_from_startpoint(html, start_point) html = html[start_point..-1] unless ["{", "["].include?(html[0]) raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}" end last_char = "{" curr_char = nil stack = [html[0]] i = 1 context_closers = { "{" => "}", "[" => "]", '"' => '"', "/" => "/" } while i < html.length break if stack.empty? last_char = curr_char unless [" ", '\n'].include?(curr_char) curr_char = html[i] curr_context = stack.last if curr_char == context_closers[curr_context] stack.pop i += 1 next end if ['"', "/"].include?(curr_context) if curr_char == "\\" i += 2 next end elsif context_closers.keys.include?(curr_char) unless curr_char == "/" && !["(", ",", "=", ":", "[", "!", "&", "|", "?", "{", "}", ";"].include?(last_char) stack.push(curr_char) end end i += 1 end html[0...i] end |
.parse_for_object(html, preceding_regex) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 |
# File 'lib/rubytube/parser.rb', line 5 def parse_for_object(html, preceding_regex) regex = Regexp.new(preceding_regex) result = regex.match(html) if result.nil? raise HTMLParseError, "No matches for regex #{preceding_regex}" end start_index = result.end(0) parse_for_object_from_startpoint(html, start_index) end |
.parse_for_object_from_startpoint(html, start_point) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/rubytube/parser.rb', line 65 def parse_for_object_from_startpoint(html, start_point) html = html[start_point..-1] unless ["{", "["].include?(html[0]) raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}" end # First letter MUST be an open brace, so we put that in the stack, # and skip the first character. last_char = "{" curr_char = nil stack = [html[0]] i = 1 context_closers = { "{" => "}", "[" => "]", '"' => '"', :"'" => "'", "/" => "/" # JavaScript regex } while i < html.length break if stack.empty? last_char = curr_char unless [" ", '\n'].include?(curr_char) curr_char = html[i] curr_context = stack.last # If we've reached a context closer, we can remove an element off the stack if curr_char == context_closers[curr_context] stack.pop i += 1 next end # Strings and regex expressions require special context handling because they can contain # context openers *and* closers if ['"', "/"].include?(curr_context) # If there's a backslash in a string or regex expression, we skip a character if curr_char == "\\" i += 2 next end elsif context_closers.keys.include?(curr_char) # Non-string contexts are when we need to look for context openers. unless curr_char == "/" && ["(", ",", "=", ":", "[", "!", "&", "|", "?", "{", "}", ";"].include?(last_char) stack << curr_char end # Slash starts a regular expression depending on context end i += 1 end html[0..(i - 1)] end |
.throttling_array_split(js_array) ⇒ Object
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/rubytube/parser.rb', line 122 def throttling_array_split(js_array) results = [] curr_substring = js_array[1..-1] comma_regex = /,/ func_regex = /function\([^)]*\)/ until curr_substring.empty? if curr_substring.start_with?("function") match = func_regex.match(curr_substring) match_start = match.begin(0) match_end = match.end(0) function_text = find_object_from_startpoint(curr_substring, match_end) full_function_def = curr_substring[0, match_end + function_text.length] results << full_function_def curr_substring = curr_substring[full_function_def.length + 1..-1] else match = comma_regex.match(curr_substring) begin match_start = match.begin(0) match_end = match.end(0) rescue NoMethodError match_start = curr_substring.length - 1 match_end = match_start + 1 end curr_el = curr_substring[0, match_start] results << curr_el curr_substring = curr_substring[match_end..-1] end end results end |