class Watson::Parser

Dir/File parser class Contains all necessary methods to parse through files and directories for specified tags and generate data structure containing found issues



Public Class Methods

new(config)

Initialize the parser with the current watson config

# File lib/watson/parser.rb, line 20
def initialize(config)
# [review] - Not sure if passing config here is best way to access it

        # Identify method entry
        debug_print "#{ self } : #{ __method__ }\n"
        @config = config

Public Instance Methods

get_comment_type(filename)

Get comment syntax for given file

# File lib/watson/parser.rb, line 352
def get_comment_type(filename)

        # Identify method entry
        debug_print "#{ self } : #{ __method__ }\n"

        # Grab the file extension (.something)
        # Check to see whether it is recognized and set comment type
        # If unrecognized, try to grab the next .something extension
        # This is to account for file.cpp.1 or file.cpp.bak, ect

        # [review] - Matz style while loop a la
        # Create _mtch var so we can access it outside of the do loop

        _mtch =
        loop do
                _mtch = filename.match(/(\.(\w+))$/)
                debug_print "Extension: #{ _mtch }\n"

                # Break if we don't find a match 
                break if _mtch.nil?

                # Determine file type
                case _mtch[0]
                # C / C++, Java, C#
                # [todo] - Add /* style comment
                when ".cpp", ".cc", ".c", ".hpp", ".h",
                         ".java", ".class", ".cs"
                        debug_print "Comment type is: //\n"
                        return "//"

                # Bash, Ruby, Perl, Python
                when ".sh", ".rb", ".pl", ".py"
                        debug_print "Comment type is: #\n"
                        return "#"

                # Can't recognize extension, keep looping in case of .bk, .#, ect
                        filename = filename.gsub(/(\.(\w+))$/, "")
                        debug_print "Didn't recognize, searching #{ filename }\n"

        # We didn't find any matches from the filename, return error (0)
        # Deal with what default to use in calling method
        # [review] - Is Ruby convention to return 1 or 0 (or -1) on failure/error?
        debug_print "Couldn't find any recognized extension type\n"
        return false 
parse_dir(dir, depth)

Parse through specified directory and find all subdirs and files

# File lib/watson/parser.rb, line 68
def parse_dir(dir, depth)

        # Identify method entry
        debug_print "#{ self } : #{ __method__ }\n"

        # Error check on input
        if !Watson::FS.check_dir(dir)
                print "Unable to open #{ dir }, exiting\n"
                return false
                debug_print "Opened #{ dir } for parsing\n"
        debug_print "Parsing through all files/directories in #{ dir }\n"

        # [review] - Shifted away from single Dir.glob loop to separate for dir/file
        #                     This duplicates code but is much better for readability
        #                     Not sure which is preferred?

        # Remove leading . or ./
        _glob_dir = dir.gsub(/^\.(\/?)/, '')
        debug_print "_glob_dir: #{_glob_dir}\n"

        # Go through directory to find all files
        # Create new array to hold all parsed files
        _completed_files =
        Dir.glob("#{ _glob_dir }{*,.*}").select { | _fn | File.file?(_fn) }.sort.each do | _entry |
                debug_print "Entry: #{_entry} is a file\n"  
                # [review] - Warning to user when file is ignored? (outside of debug_print)
                # Check against ignore list, if match, set to "" which will be ignored
                @config.ignore_list.each do | _ignore |
                        # [review] - Better "Ruby" way to check for "*"? 
                        # [review] - Probably cleaner way to perform multiple checks below
                        # Look for *.type on list, regex to match entry
                        if _ignore[0] == "*"
                                _cut = _ignore[1..-1]
                                if _entry.match(/#{ _cut }/)
                                        debug_print "#{ _entry } is on the ignore list, setting to \"\"\n"
                                        _entry = ""
                        # Else check for verbose ignore match
                                if  _entry  == _ignore || File.absolute_path(_entry) == _ignore
                                        debug_print "#{ _entry } is on the ignore list, setting to \"\"\n"
                                        _entry = ""

                # If the resulting entry (after filtering) isn't empty, parse it and push into file array
                if !_entry.empty?
                        debug_print "Parsing #{ _entry }\n"
        # Go through directory to find all subdirs
        # Create new array to hold all parsed subdirs 
        _completed_dirs =
        Dir.glob("#{ _glob_dir }{*, .*}").select { | _fn | }.sort.each do | _entry |
                debug_print "Entry: #{ _entry } is a dir\n" 

                ## Depth limit logic
                # Current depth is depth of previous parse_dir (passed in as second param) + 1
                _cur_depth = depth + 1
                debug_print "Current Folder depth: #{ _cur_depth }\n"
                # If Config.parse_depth is 0, no limit on subdir parsing
                if @config.parse_depth == 0
                        debug_print "No max depth, parsing directory\n"
                        _completed_dirs.push(parse_dir("#{ _entry }/", _cur_depth))
                # If current depth is less than limit (set in config), parse directory and pass depth
                elsif _cur_depth < @config.parse_depth.to_i + 1
                        debug_print "Depth less than max dept (from config), parsing directory\n"
                        _completed_dirs.push(parse_dir("#{ _entry }/", _cur_depth))
                # Else, depth is greater than limit, ignore the directory
                        debug_print "Depth greater than max depth, ignoring\n"     

                # Add directory to ignore list so it isn't repeated again accidentally

        # [review] - Not sure if Dir.glob requires a explicit directory/file close?
        # Create hash to hold all parsed files and directories
        _structure =
        _structure[:curdir] = dir
        _structure[:files] = _completed_files
        _structure[:subdirs]  = _completed_dirs
        return _structure
parse_file(filename)

Parse through individual files looking for issue tags, then generate formatted issue hash

# File lib/watson/parser.rb, line 176
def parse_file(filename)
# [review] - Rename method input param to filename (more verbose?)

        # Identify method entry
        debug_print "#{ self } : #{ __method__ }\n"

        _relative_path = filename
        _absolute_path = File.absolute_path(filename) 

        # Error check on input, use input filename to make sure relative path is correct
        if !Watson::FS.check_file(_relative_path)
                print "Unable to open #{ _relative_path }, exiting\n"
                return false
                debug_print "Opened #{ _relative_path } for parsing\n"
                debug_print "Short path: #{ _relative_path }\n"

        # Get filetype and set corresponding comment type
        _comment_type = get_comment_type(_relative_path)
        if !_comment_type
                debug_print "Using default (#) comment type\n"
                _comment_type = "#"

        # Open file and read in entire thing into an array
        # Use an array so we can look ahead when creating issues later
        # [review] - Not sure if explicit file close is required here
        # [review] - Better var name than data for read in file?
        _data =, 'r').read.each_line do | _line |

        # Initialize issue list hash 
        _issue_list =
        _issue_list[:relative_path] = _relative_path 
        _issue_list[:absolute_path] = _absolute_path
        _issue_list[:has_issues] = false
        @config.tag_list.each do | _tag |
                debug_print "Creating array named #{ _tag }\n"
                # [review] - Use to_sym to make tag into symbol instead of string?
                _issue_list[_tag] =
        # Loop through all array elements (lines in file) and look for issues
        _data.each_with_index do | _line, _i |

                # Find any comment line with [tag] - text (any comb of space and # acceptable)
                # Using if match to stay consistent (with config.rb) see there for
                # explanation of why I do this (not a good good one persay...)
                _mtch = _line.encode('UTF-8', :invalid => :replace).match(/^[#{ _comment_type }+?\s+?]+\[(\w+)\]\s+-\s+(.+)/)
                if !_mtch
                        debug_print "No valid tag found in line, skipping\n"
                # Set tag
                _tag = _mtch[1]

                # Make sure that the tag that was found is something we accept
                # If not, skip it but tell user about an unrecognized tag
                if !@config.tag_list.include?(_tag)
                        Printer.print_status "!", RED
                        print "Unknown tag [#{ _tag }] found, ignoring\n"
                        print "      You might want to include it in your RC or with the -t/--tags flag\n"

                # Found a valid match (with recognized tag)
                # Set flag for this issue_list (for file) to indicate that
                _issue_list[:has_issues] = true

                _title = _mtch[2]
                debug_print "Issue found\n"
                debug_print "Tag: #{ _tag }\n"
                debug_print "Issue: #{ _title }\n"  

                # Create hash for each issue found
                _issue =
                _issue[:line_number] = _i + 1
                _issue[:title] = _title

                # Grab context of issue specified by Config param (+1 to include issue itself)
                _context = _data[_i..(_i + @config.context_depth + 1)]

                # [review] - There has got to be a better way to do this... 
                # Go through each line of context and determine indentation
                # Used to preserve indentation in post
                _cut = 
                _context.each do | _line |
                        _max = 0
                        # Until we reach a non indent OR the line is empty, keep slicin'
                        until !_line.match(/^( |\t|\n)/) || _line.empty?
                                # [fix] - Replace with inplace slice!
                                _line = _line.slice(1..-1)
                                _max = _max + 1

                                debug_print "New line: #{ _line }\n"
                                debug_print "Max indent: #{ _max }\n"
                        # Push max indent for current line to the _cut array 
                # Print old _context
                debug_print "\n\n Old Context \n"
                debug_print PP.pp(_context, "")
                debug_print "\n\n"

                # Trim the context lines to be left aligned but maintain indentation
                # Then add a single \t to the beginning so the Markdown is pretty on GitHub/Bitbucket
      ! { | _line | "\t#{ _line.slice(_cut.min .. -1) }" }
                # Print new _context        
                debug_print("\n\n New Context \n")
                debug_print PP.pp(_context, "")

                _issue[:context] = _context
                # These are accessible from _issue_list, but we pass individual issues
                # to the remote poster, so we need this here to reference them for GitHub/Bitbucket
                _issue[:tag] = _tag
                _issue[:path] = _relative_path 

                # Generate md5 hash for each specific issue (for bookkeeping)
                _issue[:md5] = ::Digest::MD5.hexdigest("#{ _tag }, #{ _relative_path }, #{ _title }")
                debug_print "#{ _issue }\n"

                # [todo] - Figure out a way to queue up posts so user has a progress bar?
                # That way user can tell that wait is because of http calls not app
                # If GitHub is valid, pass _issue to GitHub poster function
                # [review] - Keep Remote as a static method and pass config every time?
                #                    Or convert to a regular class and make an instance with @config

                        if @config.remote_valid
                                if @config.github_valid
                                        debug_print "GitHub is valid, posting issue\n"
                                        Remote::GitHub.post_issue(_issue, @config)
                                        debug_print "GitHub invalid, not posting issue\n"

                                if @config.bitbucket_valid
                                        debug_print "Bitbucket is valid, posting issue\n"
                                        Remote::Bitbucket.post_issue(_issue, @config)
                                        debug_print "Bitbucket invalid, not posting issue\n"
                        # [review] - Use _tag string as symbol reference in hash or keep as string?
                        # Look into to_sym to keep format of all _issue params the same
                        _issue_list[_tag].push( _issue )


        # [review] - Return of parse_file is different than watson-perl
        # Not sure which makes more sense, ruby version seems simpler
        # perl version might have to stay since hash scoping is weird in perl
        debug_print "\nIssue list: #{ _issue_list }\n"

        return _issue_list
run()

Begins parsing of files / dirs specified in the initial dir/file lists

# File lib/watson/parser.rb, line 32
def run

        # Identify method entry
        debug_print "#{ self } : #{ __method__ }\n"
        # Go through all files added from CL (sort them first)
        # If empty, sort and each will do nothing, no errors
        _completed_dirs =
        _completed_files =
        if @config.cl_entry_set
                @config.file_list.sort.each do | _file |
        # Then go through all the specified directories 
        # Initial parse depth to parse_dir is 0 (unlimited)
        @config.dir_list.sort.each do | _dir |
                _completed_dirs.push(parse_dir(_dir, 0))
        # Create overall hash for parsed files       
        _structure =
        _structure[:files] = _completed_files
        _structure[:subdirs]  = _completed_dirs

        debug_print "_structure dump\n\n"
        debug_print PP.pp(_structure, "")
        debug_print "\n\n"
        return _structure