Re: [PATCH v5] BaseTools/Scripts/PatchCheck.py: Extended patch style check for c code

From: Jordan Justen <jordan.l.justen@intel.com>
To: Daniil Egranov <daniil.egranov@arm.com>,  edk2-devel@lists.01.org
Cc: liming.gao@intel.com,  leif.lindholm@linaro.org
Subject: Re: [PATCH v5] BaseTools/Scripts/PatchCheck.py: Extended patch style check for c code
Date: Wed, 14 Dec 2016 18:22:45 -0800	[thread overview]
Message-ID: <148176856528.3449.15339945413772613831@jljusten-ivb> (raw)
In-Reply-To: <1481638518-37028-1-git-send-email-daniil.egranov@arm.com>

The patch subject seems too generic, but I think that is because the
patch is doing too many things at once.

(See more comments below.)

On 2016-12-13 06:15:18, Daniil Egranov wrote:
> Corrected code checking for multi-line and commented lines. Both 
> multi-line and open comment flags will be reset when leaving
> diff "+" area of the patch.
> Changed version of the tool to 0.2.
>  
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Daniil Egranov <daniil.egranov@arm.com>
> ---
> Changelog:
> 
> v4
> Corrected maximum code line size to 120 characters.
> 
> v3
> Corrected space detection before parentheses. 
> 
> v2:
> Fixed several indentation cases
> 
> v1:
> Fixed reporting signature error for a cover letter.
> Fixed reporting line size error for a file change information
> included in the commit message.
> Fixed line number reported in PatchCheck error messages. It
> points to the correct line in the diff file.
> The patch extends style checking for c code:
>  Added check for code indentation.
>  Added report line size greater than 80 characters.
>  Added checking for space before '('.
>  Added checking for space before '{'.
>  Added checking for '}' to be on a new line and have spaces
>  for "} else {" or "} while ()" cases.
> 
>  BaseTools/Scripts/PatchCheck.py | 263 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 238 insertions(+), 25 deletions(-)
> 
> diff --git a/BaseTools/Scripts/PatchCheck.py b/BaseTools/Scripts/PatchCheck.py
> index 7c30082..2406601 100755
> --- a/BaseTools/Scripts/PatchCheck.py
> +++ b/BaseTools/Scripts/PatchCheck.py
> @@ -15,7 +15,7 @@
>  
>  from __future__ import print_function
>  
> -VersionNumber = '0.1'
> +VersionNumber = '0.2'
>  __copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation  All rights reserved."
>  
>  import email
> @@ -32,7 +32,7 @@ class Verbose:
>  class CommitMessageCheck:
>      """Checks the contents of a git commit message."""
>  
> -    def __init__(self, subject, message):
> +    def __init__(self, subject, message, message_offset, cover):
>          self.ok = True
>  
>          if subject is None and  message is None:
> @@ -41,9 +41,15 @@ class CommitMessageCheck:
>  
>          self.subject = subject
>          self.msg = message
> +        self.msg_offset = message_offset
> +        self.cover = cover
> +
> +        if not cover:
> +            self.check_contributed_under()
> +            self.check_signed_off_by()
> +        else:
> +            print('The commit message is cover letter.')

I think this patch is doing way too many things for a single patch.
For example, I think this cover letter check should be a separate
patch.

>  
> -        self.check_contributed_under()
> -        self.check_signed_off_by()
>          self.check_misc_signatures()
>          self.check_overall_format()
>          self.report_message_result()
> @@ -180,6 +186,9 @@ class CommitMessageCheck:
>          for sig in self.sig_types:
>              self.find_signatures(sig)
>  
> +    diff_change_info_re = \
> +        re.compile(r'.*\|\s+(\d|Bin)*\s.*[\+\-]')
> +
>      def check_overall_format(self):
>          lines = self.msg.splitlines()
>  
> @@ -197,9 +206,10 @@ class CommitMessageCheck:
>              self.error('Empty commit message!')
>              return
>  
> -        if count >= 1 and len(lines[0]) >= 72:
> +        if count >= 1 and len(lines[0]) > 72:

I think you are reverting e61406708c83f96d3dc2e8899716cce43c058491
here, but I don't think you meant too. If you had smaller, focused
patches, it would less likely for things like this to get lost in the
noise...

>              self.error('First line of commit message (subject line) ' +
> -                       'is too long.')
> +                       'is too long (%d) (max 72 characters):' %(len(lines[0])))
> +            print(lines[0], '\n')

Why is the extra print added here?

>  
>          if count >= 1 and len(lines[0].strip()) == 0:
>              self.error('First line of commit message (subject line) ' +
> @@ -210,10 +220,13 @@ class CommitMessageCheck:
>                         'empty.')
>  
>          for i in range(2, count):
> -            if (len(lines[i]) >= 76 and
> +            if (len(lines[i]) > 76 and
>                  len(lines[i].split()) > 1 and
> -                not lines[i].startswith('git-svn-id:')):
> -                self.error('Line %d of commit message is too long.' % (i + 1))
> +                not lines[i].startswith('git-svn-id:') and
> +                self.diff_change_info_re.search(lines[i]) is None):
> +                self.error('Line %d of commit message is too long (%d) (max 76 characters):' \

I don't think you need the '\' here since python keeps looking on
lines if it is within '(' like in the function call to self.error. In
fact, I think it is best to try to avoid code that requires the '\',
although in rare cases, it might be okay.

> +                           % (i + self.msg_offset - 1, len(lines[i])))
> +                print(lines[i], '\n')
>  
>          last_sig_line = None
>          for i in range(count - 1, 0, -1):
> @@ -236,13 +249,18 @@ class CommitMessageCheck:
>  class GitDiffCheck:
>      """Checks the contents of a git diff."""
>  
> -    def __init__(self, diff):
> +    def __init__(self, diff, offset):
>          self.ok = True
>          self.format_ok = True
>          self.lines = diff.splitlines(True)
>          self.count = len(self.lines)
>          self.line_num = 0
>          self.state = START
> +        self.comments_open = False
> +        self.multiline_string = False
> +        self.current_indent_size = 0
> +        self.parentheses_count = 0
> +        self.offset = offset
>          while self.line_num < self.count and self.format_ok:
>              line_num = self.line_num
>              self.run()
> @@ -255,6 +273,12 @@ class GitDiffCheck:
>          if self.ok:
>              print('The code passed all checks.')
>  
> +    def clean_counts(self):
> +        self.current_indent_size = -1
> +        self.parentheses_count = 0
> +        self.comments_open = False
> +        self.multiline_string = False
> +
>      def run(self):
>          line = self.lines[self.line_num]
>  
> @@ -283,6 +307,20 @@ class GitDiffCheck:
>          elif self.state == PRE_PATCH:
>              if line.startswith('+++ b/'):
>                  self.set_filename(line[6:].rstrip())
> +                print("Checking patch for %s ...\n" %(self.hunk_filename))

Should be:

                print("Checking patch for %s ...\n" % self.hunk_filename)

> +                if self.hunk_filename.endswith(".c") or \
> +                   self.hunk_filename.endswith(".h"):

In this case, rather than the '\', can you use:

                if (self.hunk_filename.endswith(".c") or
                    self.hunk_filename.endswith(".h")):

> +                    self.file_type = "c_type"
> +                elif  self.hunk_filename.endswith(".dec") or \
> +                      self.hunk_filename.endswith(".dsc") or \
> +                      self.hunk_filename.endswith(".fdf"):
> +                    self.file_type = "edk2_type"
> +                elif  self.hunk_filename.endswith(".S"):
> +                    self.file_type = "asm_arm_type"
> +                elif  self.hunk_filename.endswith(".asm"):
> +                    self.file_type = "asm_type"
> +                else:
> +                    self.file_type = "other_type"
>              if line.startswith('@@ '):
>                  self.state = PATCH
>                  self.binary = False
> @@ -296,18 +334,27 @@ class GitDiffCheck:
>                          ok = True
>                  if not ok:
>                      self.format_error("didn't find diff hunk marker (@@)")
> +
>              self.line_num += 1
>          elif self.state == PATCH:
>              if self.binary:
> +                self.clean_counts()
>                  pass
>              if line.startswith('-'):
> +                self.clean_counts()
>                  pass
>              elif line.startswith('+'):
> -                self.check_added_line(line[1:])
> +                # indentation resets every time we leave "+" diff area
> +                self.check_added_line(line[1:], self.line_num + self.offset)
>              elif line.startswith(r'\ No newline '):
> +                self.clean_counts()
>                  pass
>              elif not line.startswith(' '):
> +                self.clean_counts()
>                  self.format_error("unexpected patch line")
> +            else:
> +                self.clean_counts()
> +
>              self.line_num += 1
>  
>      pre_patch_prefixes = (
> @@ -336,7 +383,7 @@ class GitDiffCheck:
>          lines = [ msg ]
>          if self.hunk_filename is not None:
>              lines.append('File: ' + self.hunk_filename)
> -        lines.append('Line: ' + line)
> +        lines.append('Line: ' + line + '\n')
>  
>          self.error(*lines)
>  
> @@ -348,28 +395,171 @@ class GitDiffCheck:
>                     ''',
>                     re.VERBOSE)
>  
> -    def check_added_line(self, line):
> +    space_check_parentheses_re = re.compile(r'[a-zA-Z0-9_]\S*\((?<!\)\()(?<!\(\().*')

This line is more than 80 columns.

I prefer multi-line re.VERBOSE regular expressions. It also allows you
to break up bigger regexes onto multiple lines.

> +    space_check_brace_open_re = re.compile(r'[a-zA-Z0-9_]\S*\{')
> +    space_check_brace_close_re = re.compile(r'[\}]\S.*')
> +    brace_open_check_re = re.compile(r'[a-zA-Z0-9_]*\{')
> +    brace_close_check_re = re.compile(r'.*\}.*')

It looks like the brace_open/close_check_re regexes are not used. ?

For some sophisticated style checks, I think we would need to look at
the full source file. (Not just the diff.) It would quickly become
pretty challenging to write such a tool, since it is practically a
compiler. Perhaps a tool based on libclang?

I think that any checks that require multiple lines probably should be
beyond the scope of PatchCheck.py

Some of the checks you are proposing might be fine, but could you
separate them each into separate patches?

Thanks,

-Jordan

> +    multiline_check_re = re.compile(r'[a-zA-Z0-9_]*(\\|\&|\,|\+|\-|\*|\||\=)\Z')
> +
> +    def check_added_line(self, line, line_num):
>          eol = ''
>          for an_eol in self.line_endings:
>              if line.endswith(an_eol):
>                  eol = an_eol
>                  line = line[:-len(eol)]
>  
> -        stripped = line.rstrip()
> +        # empty "+" line, skip it
> +        if len(line) == 0:
> +            return
> +
> +        rstripped = line.rstrip()
> +        lstripped = line.lstrip()
>  
>          if self.force_crlf and eol != '\r\n':
> -            self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
> +            self.added_line_error('Line ending (%s) is not CRLF:' % repr(eol),
>                                    line)
>          if '    ' in line:
> -            self.added_line_error('Tab character used', line)
> -        if len(stripped) < len(line):
> -            self.added_line_error('Trailing whitespace found', line)
> +            self.added_line_error('Tab character used at line %d:' \
> +                                  %(line_num), line)
> +        if len(rstripped) < len(line):
> +            self.added_line_error('Trailing whitespace found at line %d:' \
> +                                  %(line_num), line)
> +        # the file type specific checks
> +        if self.file_type == "c_type":
> +
> +            # set initial indentation as the number of spaces after the first "+"
> +            # indentation does not apply on lines with 0 characters
> +            # adjust the indentation if first "+" has "}"
> +            if self.current_indent_size == -1:
> +                if len(lstripped) != 0:
> +                    self.current_indent_size = len(line) - len(lstripped)
> +                else:
> +                    self.clean_counts()
>  
> -        mo = self.old_debug_re.search(line)
> -        if mo is not None:
> -            self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
> -                                  'but DEBUG_' + mo.group(1) +
> -                                  ' is now recommended', line)
> +                if lstripped.startswith("}"):
> +                     self.current_indent_size = line.find("}") + 2
> +
> +            indent_size = len(line) - len(lstripped)
> +            indent_size_adjust = 0
> +            force_brace_check = False
> +
> +            if len(line) > 120:
> +                self.added_line_error('Line %d is too long (%d) (max 120 characters):' \
> +                                  %(line_num, len(line)), line)
> +
> +            # skip comments for code checking
> +            if lstripped.startswith("//"):
> +                return
> +            if lstripped.startswith("/*") and not self.comments_open:
> +                self.comments_open = True
> +
> +            if rstripped.endswith("*/") and self.comments_open:
> +                self.comments_open = False
> +                return
> +
> +            if rstripped.endswith("*/") and not self.comments_open:
> +                # found unknown close comment, reset the indentation as
> +                # no idea where it started
> +                self.current_indent_size = -1
> +                return
> +
> +            if not self.comments_open:
> +                mo = self.old_debug_re.search(line)
> +                if mo is not None:
> +                    self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
> +                                      'but DEBUG_' + mo.group(1) +
> +                                      ' is now recommended at line %d' \
> +                                      %(line_num), line)
> +
> +                # track down multiple braces in the same line
> +                braces_count = line.count('{') - line.count('}')
> +
> +                # track down a special case with "} else ..{" braces
> +                if line.find("else") != -1:
> +                    force_brace_check = True
> +
> +                # check for closing brace which affects indentation
> +                if braces_count < 0 and not force_brace_check:
> +                    self.current_indent_size -= 2 * abs(braces_count)
> +                elif force_brace_check:
> +                    if line.find("}") != -1:
> +                        self.current_indent_size -= 2
> +                        braces_count = 1
> +
> +                # for the switch()/case:, adjust "case:" and "default:" indentation
> +                # as they aligned with the "switch()" indentation
> +                if lstripped.startswith("case ") or \
> +                    lstripped.startswith("default:"):
> +                    indent_size_adjust = -2
> +
> +                # indentation code check related to the diff structure
> +                # do not check code indentation between parentheses
> +                if indent_size != self.current_indent_size + indent_size_adjust \
> +                    and len(line) != 0 and self.parentheses_count == 0 \
> +                    and not self.multiline_string:
> +                        self.added_line_error('Invalid indentation at line %d. Expecting %d but found %d spaces:' \
> +                        % (line_num, self.current_indent_size + indent_size_adjust, \
> +                           indent_size), line)
> +
> +                # check for missing space before open brace
> +                if braces_count > 0 or force_brace_check:
> +                    if line.find("{") != -1:
> +                        self.current_indent_size += 2 * braces_count
> +                    if not line.startswith("{"):
> +                        spb_check = self.space_check_brace_open_re.search(line)
> +                        if spb_check is not None:
> +                            self.added_line_error('Missing space before \'{\' at line %d:' \
> +                                                  %(line_num), line)
> +
> +                # check for missing space before open parenthesis
> +                spo = self.space_check_parentheses_re.search(line)
> +                if spo is not None and not self.multiline_string:
> +                    # check if it's not inside of the double quoted string or
> +                    # (aa)(bb)fff case
> +                    text_str_start = line.find('\"')
> +                    if not (text_str_start != -1 and text_str_start < spo.start()):
> +                        self.added_line_error('Missing space before \'(\' at line %d:' \
> +                                          %(line_num), line)
> +
> +                # check for closing brace cases
> +                if braces_count < 0 or \
> +                    (force_brace_check and line.find("}") != -1):
> +                    if not lstripped.startswith("}") and rstripped.endswith("}"):
> +                        self.added_line_error('The \'}\' is not on its own line at line %d:' \
> +                                                %(line_num), line)
> +                    if not lstripped.endswith("}"):
> +                        scb = self.space_check_brace_close_re.search(lstripped)
> +                        if scb is not None \
> +                            and lstripped[scb.start() + 1] != ';' \
> +                            and lstripped[scb.start() + 1] != ',' \
> +                            and lstripped[scb.start() + 1] != '}':
> +                            self.added_line_error('Missing space after \'}\' at line %d:' \
> +                                                  %(line_num), line)
> +
> +                # track down parentheses, no indentation check between them
> +                if not (self.parentheses_count == 0 and lstripped.startswith(')') \
> +                        and not self.multiline_string and not self.comments_open):
> +                    self.parentheses_count += line.count('(') - line.count(')')
> +
> +                # multiline strings indentation is unknown
> +                if not self.comments_open:
> +                    mlc = self.multiline_check_re.search(line)
> +                    if mlc is not None:
> +                        self.multiline_string = True
> +                    else:
> +                        self.multiline_string = False
> +
> +        elif self.file_type == "edk2_type":
> +            pass
> +        elif self.file_type == "asm_arm_type":
> +            pass
> +        elif self.file_type == "asm_type":
> +            pass
> +        elif self.file_type == "other_type":
> +            pass
> +        else:
> +            pass
>  
>      split_diff_re = re.compile(r'''
>                                     (?P<cmd>
> @@ -410,12 +600,13 @@ class CheckOnePatch:
>          self.patch = patch
>          self.find_patch_pieces()
>  
> -        msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
> +        msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg, \
> +                                       self.commit_msg_offset, self.cover)
>          msg_ok = msg_check.ok
>  
>          diff_ok = True
>          if self.diff is not None:
> -            diff_check = GitDiffCheck(self.diff)
> +            diff_check = GitDiffCheck(self.diff, self.diff_offset)
>              diff_ok = diff_check.ok
>  
>          self.ok = msg_ok and diff_ok
> @@ -458,6 +649,16 @@ class CheckOnePatch:
>                     ''',
>                     re.VERBOSE)
>  
> +    cover_letter_re = re.compile(r'Subject:\s*\[PATCH.*.0/.*',
> +                                  re.IGNORECASE | re.MULTILINE)
> +
> +    def is_cover_letter(self, patch):
> +        cl = self.cover_letter_re.search(patch)
> +        if cl is None:
> +            return False
> +
> +        return True
> +
>      def find_patch_pieces(self):
>          if sys.version_info < (3, 0):
>              patch = self.patch.encode('ascii', 'ignore')
> @@ -465,10 +666,13 @@ class CheckOnePatch:
>              patch = self.patch
>  
>          self.commit_msg = None
> +        self.commit_msg_offset = 0
>          self.stat = None
>          self.commit_subject = None
>          self.commit_prefix = None
>          self.diff = None
> +        self.diff_offset = 0
> +        self.cover = False
>  
>          if patch.startswith('diff --git'):
>              self.diff = patch
> @@ -480,6 +684,14 @@ class CheckOnePatch:
>          assert(parts[0].get_content_type() == 'text/plain')
>          content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
>  
> +        # find offset of content
> +        self.commit_msg_offset = len(patch.splitlines()) - len(content.splitlines())
> +
> +        # find offset of first diff section
> +        mo = self.git_diff_re.search(patch)
> +        if mo is not None:
> +            self.diff_offset = len(patch[:mo.start()].splitlines()) + 1
> +
>          mo = self.git_diff_re.search(content)
>          if mo is not None:
>              self.diff = content[mo.start():]
> @@ -492,6 +704,7 @@ class CheckOnePatch:
>              self.stat = mo.group('stat')
>              self.commit_msg = mo.group('commit_message')
>  
> +        self.cover = self.is_cover_letter(patch)
>          self.commit_subject = pmail['subject'].replace('\r\n', '')
>          self.commit_subject = self.commit_subject.replace('\n', '')
>          self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1)
> -- 
> 2.7.4
> 
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel