Class: Watson::Parser
Overview
Dir/File parser class Contains all necessary methods to parse through files and directories for specified tags and generate data structure containing found issues
Constant Summary collapse
- COMMENT_DEFINITIONS =
{ '.cpp' => ['//', '/*'], # C++ '.cxx' => ['//', '/*'], '.cc' => ['//', '/*'], '.hpp' => ['//', '/*'], '.hxx' => ['//', '/*'], '.c' => ['//', '/*'], # C '.h' => ['//', '/*'], '.java' => ['//', '/*', '/**'], # Java '.class' => ['//', '/*', '/**'], '.cs' => ['//', '/*'], # C# '.scss' => ['//', '/*'], # SASS SCSS '.sass' => ['//', '/*'], # SASS SCSS '.js' => ['//', '/*'], # JavaScript '.php' => ['//', '/*', '#'], # PHP '.m' => ['//', '/*'], # ObjectiveC '.mm' => ['//', '/*'], '.go' => ['//', '/*'], # Go(lang) '.scala' => ['//', '/*'], # Scala '.erl' => ['%%', '%'], # Erlang '.f' => ['!'], # Fortran '.f90' => ['!'], # Fortran '.F' => ['!'], # Fortran '.F90' => ['!'], # Fortran '.hs' => ['--'], # Haskell '.sh' => ['#'], # Bash '.rb' => ['#'], # Ruby '.haml' => ['-#'], # Haml '.pl' => ['#'], # Perl '.pm' => ['#'], '.t' => ['#'], '.py' => ['#'], # Python '.coffee' => ['#'], # CoffeeScript '.zsh' => ['#'], # Zsh '.clj' => [';;'], # Clojure '.sql' => ['---', '//', '#' ], # SQL and PL types '.lua' => ['--', '--[['], # Lua '.vim' => ['"'], # VimL '.md' => ['<!--'], # Markdown '.html' => ['<!--'], # HTML '.el' => [';'], # Emacslisp '.sqf' => ['//','/*'], # SQF '.sqs' => [';'], # SQS '.d' => ['//','/*'], # D '.tex' => ['%'], # LaTex '.hbs' => ['{{!--'], # Handlebars '.twig' => ['{#'] # Twig }.freeze
Constants included from Watson
BLUE, BOLD, CYAN, GRAY, GREEN, MAGENTA, RED, RESET, UNDERLINE, VERSION, WHITE, YELLOW
Instance Method Summary collapse
-
#get_comment_type(filename) ⇒ Object
Get comment syntax for given file.
-
#initialize(config) ⇒ Parser
constructor
Initialize the parser with the current watson config.
-
#parse_dir(dir, depth) ⇒ Object
Parse through specified directory and find all subdirs and files.
-
#parse_file(filename) ⇒ Object
Parse through individual files looking for issue tags, then generate formatted issue hash noinspection RubyResolve.
-
#run ⇒ Object
Begins parsing of files / dirs specified in the initial dir/file lists.
Methods included from Watson
Constructor Details
#initialize(config) ⇒ Parser
Initialize the parser with the current watson config
67 68 69 70 71 72 73 74 |
# File 'lib/watson/parser.rb', line 67 def initialize(config) # [review] - Not sure if passing config here is best way to access it # Identify method entry debug_print "#{ self } : #{ __method__ }\n" @config = config end |
Instance Method Details
#get_comment_type(filename) ⇒ Object
Get comment syntax for given file
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 |
# File 'lib/watson/parser.rb', line 413 def get_comment_type(filename) # Identify method entry debug_print "#{ self } : #{ __method__ }\n" # Merge config file type list with defaults _comments = COMMENT_DEFINITIONS.merge(@config.type_list) # Grab all possible extensions, check for comment match in reverse order # Return comment type if found in comment definitions, else false filename.split('.')[1..-1].each { |_ext| return _comments['.' << _ext] if _comments.has_key?('.' << _ext) } debug_print "Couldn't find any recognized extension type\n" false end |
#parse_dir(dir, depth) ⇒ Object
Parse through specified directory and find all subdirs and files
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'lib/watson/parser.rb', line 118 def parse_dir(dir, depth) # Identify method entry debug_print "#{ self } : #{ __method__ }\n" # Error check on input if Watson::FS.check_dir(dir) debug_print "Opened #{ dir } for parsing\n" else print "Unable to open #{ dir }, exiting\n" return false end debug_print "Parsing through all files/directories in #{ dir }\n" # [review] - Shifted away from single Dir.glob loop to separate for dir/file # This duplicates code but is much better for readability # Not sure which is preferred? # Remove leading . or ./ _glob_dir = dir.gsub(/^\.(\/?)/, '') debug_print "_glob_dir: #{_glob_dir}\n" # Go through directory to find all files # Create new array to hold all parsed files _completed_files = Array.new() Dir.glob("#{ _glob_dir }{*,.*}").select { |_fn| File.file?(_fn) }.sort.each do |_entry| debug_print "Entry: #{_entry} is a file\n" # [review] - Warning to user when file is ignored? (outside of debug_print) # Check against ignore list, if match, set to "" which will be ignored @config.ignore_list.each do |_ignore| if _mtch = _entry.match(_ignore) _entry = '' break end end # If the resulting entry (after filtering) isn't empty, parse it and push into file array unless _entry.empty? debug_print "Parsing #{ _entry }\n" _completed_files.push(parse_file(_entry)) end end # Go through directory to find all subdirs # Create new array to hold all parsed subdirs _completed_dirs = Array.new() Dir.glob("#{ _glob_dir }{*, .*}").select { |_fn| File.directory?(_fn) }.sort.each do |_entry| debug_print "Entry: #{ _entry } is a dir\n" # Check if entry is in ignore list _skip = false @config.ignore_list.each do |_ignore| if mtch = _entry.match(_ignore) _skip = true end end debug_print "#{ _entry } was not on ignorelist, adding\n" # If directory is on the ignore list then skip next if _skip == true ## Depth limit logic # Current depth is depth of previous parse_dir (passed in as second param) + 1 _cur_depth = depth + 1 debug_print "Current Folder depth: #{ _cur_depth }\n" # If Config.parse_depth is 0, no limit on subdir parsing if @config.parse_depth == 0 debug_print "No max depth, parsing directory\n" _completed_dirs.push(parse_dir("#{ _entry }/", _cur_depth)) # If current depth is less than limit (set in config), parse directory and pass depth elsif _cur_depth < @config.parse_depth.to_i + 1 debug_print "Depth less than max dept (from config), parsing directory\n" _completed_dirs.push(parse_dir("#{ _entry }/", _cur_depth)) # Else, depth is greater than limit, ignore the directory else debug_print "Depth greater than max depth, ignoring\n" end # Add directory to ignore list so it isn't repeated again accidentally @config.ignore_list.push(_entry) end # [review] - Not sure if Dir.glob requires a explicit directory/file close? # Create hash to hold all parsed files and directories _structure = Hash.new() _structure[:curdir] = dir _structure[:files] = _completed_files _structure[:subdirs] = _completed_dirs _structure end |
#parse_file(filename) ⇒ Object
Parse through individual files looking for issue tags, then generate formatted issue hash noinspection RubyResolve
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 |
# File 'lib/watson/parser.rb', line 227 def parse_file(filename) # [review] - Rename method input param to filename (more verbose?) # Identify method entry debug_print "#{ self } : #{ __method__ }\n" _relative_path = filename _absolute_path = File.absolute_path(filename) # Error check on input, use input filename to make sure relative path is correct if Watson::FS.check_file(_relative_path) debug_print "Opened #{ _relative_path } for parsing\n" debug_print "Short path: #{ _relative_path }\n" else print "Unable to open #{ _relative_path }, exiting\n" return false end # Get filetype and set corresponding comment type _comment_type = get_comment_type(_relative_path) unless _comment_type debug_print "Using default (#) comment type\n" _comment_type = ['#'] end # Escape out comment type for safety # [review] - Is there a way to do inplace join? _comment_type = _comment_type.map { |comment| Regexp.escape(comment) }.join("|") debug_print "Comment type #{ _comment_type }\n" # [review] - It is possible to embed the valid tags in the regexp, # with a ~5% performance gain, but this would loose the warning about # unrecognized tags. _tag_format = Regexp.escape(@config.tag_format).gsub('\\ ', ' ') _tag_format_regex = _tag_format .gsub("TAG", '(\w+)') .gsub("COMMENT", '(.+)') .gsub(' ' , '\s+') _comment_regex = /^(?:\s*[#{_comment_type}]+\s*)+#{_tag_format_regex}/ debug_print "Comment regex: #{_comment_regex}\n" # Open file and read in entire thing into an array # Use an array so we can look ahead when creating issues later # [review] - Better var name than data for read in file? _data = File.read(_absolute_path).encode('UTF-8', :invalid => :replace).lines # Initialize issue list hash _issue_list = Hash.new() _issue_list[:relative_path] = _relative_path _issue_list[:absolute_path] = _absolute_path _issue_list[:has_issues] = false @config.tag_list.each do | _tag | debug_print "Creating array named #{ _tag }\n" # [review] - Use to_sym to make tag into symbol instead of string? _issue_list[_tag] = Array.new end # Loop through all array elements (lines in file) and look for issues _data.each_with_index do |_line, _i| # Find any comment line with [tag] - text (any comb of space and # acceptable) # Using if match to stay consistent (with config.rb) see there for # explanation of why I do this (not a good good one persay...) begin _mtch = _line.match(_comment_regex) rescue ArgumentError debug_print "Could not encode to UTF-8, non-text\n" end unless _mtch # debug_print "No valid tag found in line, skipping\n" next end # Set tag _tag = _mtch[1].downcase # Make sure that the tag that was found is something we accept # If not, skip it but tell user about an unrecognized tag unless @config.tag_list.include?(_tag) formatter = Printer.new(@config).build_formatter formatter.print_status "+", GREEN print "Unknown tag [#{ _tag }] found, ignoring\n" print " You might want to include it in your RC or with the -t/--tags flag\n" next end # Found a valid match (with recognized tag) # Set flag for this issue_list (for file) to indicate that _issue_list[:has_issues] = true # [review] - This could probably be done better, elsewhere! # If it's a HTML or Handlebars comment, remove trailing -->, --}} if _mtch[0].match(/[<{]+(!--)?(#)?/) _title = _mtch[2].gsub(/(--)?(#)?[>}]+/, "") else _title = _mtch[2] end debug_print "Issue found\n" debug_print "Tag: #{ _tag }\n" debug_print "Issue: #{ _title }\n" # Create hash for each issue found _issue = Hash.new _issue[:line_number] = _i + 1 _issue[:title] = _title # Grab context of issue specified by Config param (+1 to include issue itself) _context = _data[_i..(_i + @config.context_depth + 1)] # [review] - There has got to be a better way to do this... # Go through each line of context and determine indentation # Used to preserve indentation in post _cut = Array.new _context.each do |_line_sub| _max = 0 # Until we reach a non indent OR the line is empty, keep slicin' until !_line_sub.match(/^( |\t|\n)/) || _line_sub.empty? # [fix] - Replace with inplace slice! _line_sub = _line_sub.slice(1..-1) _max = _max + 1 debug_print "New line: #{ _line_sub }\n" debug_print "Max indent: #{ _max }\n" end # Push max indent for current line to the _cut array _cut.push(_max) end # Print old _context debug_print "\n\n Old Context \n" debug_print PP.pp(_context, '') debug_print "\n\n" # Trim the context lines to be left aligned but maintain indentation # Then add a single \t to the beginning so the Markdown is pretty on GitHub/Bitbucket _context.map! { |_line_sub| "\t#{ _line_sub.slice(_cut.min .. -1) }" } # Print new _context debug_print("\n\n New Context \n") debug_print PP.pp(_context, '') debug_print("\n\n") _issue[:context] = _context # These are accessible from _issue_list, but we pass individual issues # to the remote poster, so we need this here to reference them for GitHub/Bitbucket _issue[:tag] = _tag _issue[:path] = _relative_path # Generate md5 hash for each specific issue (for bookkeeping) _issue[:md5] = ::Digest::MD5.hexdigest("#{ _tag }, #{ _relative_path }, #{ _title }") debug_print "#{ _issue }\n" # [todo] - Figure out a way to queue up posts so user has a progress bar? # That way user can tell that wait is because of http calls not app # If GitHub is valid, pass _issue to GitHub poster function # [review] - Keep Remote as a static method and pass config every time? # Or convert to a regular class and make an instance with @config # [review] - Use _tag string as symbol reference in hash or keep as string? # Look into to_sym to keep format of all _issue params the same _issue_list[_tag].push(_issue) # Increment issue counter for posting status @config.issue_count = @config.issue_count.next end # [review] - Return of parse_file is different than watson-perl # Not sure which makes more sense, ruby version seems simpler # perl version might have to stay since hash scoping is weird in perl debug_print "\nIssue list: #{ _issue_list }\n" _issue_list end |
#run ⇒ Object
Begins parsing of files / dirs specified in the initial dir/file lists
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/watson/parser.rb', line 79 def run # Identify method entry debug_print "#{ self } : #{ __method__ }\n" # Go through all files added from CL (sort them first) # If empty, sort and each will do nothing, no errors _completed_dirs = Array.new() _completed_files = Array.new() if @config.cl_entry_set @config.file_list.sort.each do |_file| _completed_files.push(parse_file(_file)) end end # Then go through all the specified directories # Initial parse depth to parse_dir is 0 (unlimited) @config.dir_list.sort.each do |_dir| _completed_dirs.push(parse_dir(_dir, 0)) end # Create overall hash for parsed files _structure = Hash.new() _structure[:files] = _completed_files _structure[:subdirs] = _completed_dirs debug_print "_structure dump\n\n" debug_print PP.pp(_structure, '') debug_print "\n\n" # Pass structure to poster with count as 0 Remote.post_structure(_structure, @config, 0) _structure end |