libcamera/utils/checkstyle.py
Laurent Pinchart 5849dd0b17 utils: checkstyle: Add formatter to sort #include statements
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
2020-03-23 13:51:00 +02:00

794 lines
23 KiB
Python
Executable file

#!/usr/bin/python3
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright (C) 2018, Google Inc.
#
# Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
#
# checkstyle.py - A patch style checker script based on astyle or clang-format
#
# TODO:
#
# - Support other formatting tools and checkers (cppcheck, cpplint, kwstyle, ...)
# - Split large hunks to minimize context noise
# - Improve style issues counting
#
import argparse
import difflib
import fnmatch
import os.path
import re
import shutil
import subprocess
import sys
astyle_options = (
'-n',
'--style=linux',
'--indent=force-tab=8',
'--attach-namespaces',
'--attach-extern-c',
'--pad-oper',
'--align-pointer=name',
'--align-reference=name',
'--keep-one-line-blocks',
'--max-code-length=120'
)
dependencies = {
'astyle': False,
'clang-format': False,
'git': True,
}
# ------------------------------------------------------------------------------
# Colour terminal handling
#
class Colours:
Default = 0
Black = 0
Red = 31
Green = 32
Yellow = 33
Blue = 34
Magenta = 35
Cyan = 36
LightGrey = 37
DarkGrey = 90
LightRed = 91
LightGreen = 92
Lightyellow = 93
LightBlue = 94
LightMagenta = 95
LightCyan = 96
White = 97
@staticmethod
def fg(colour):
if sys.stdout.isatty():
return '\033[%um' % colour
else:
return ''
@staticmethod
def bg(colour):
if sys.stdout.isatty():
return '\033[%um' % (colour + 10)
else:
return ''
@staticmethod
def reset():
if sys.stdout.isatty():
return '\033[0m'
else:
return ''
# ------------------------------------------------------------------------------
# Diff parsing, handling and printing
#
class DiffHunkSide(object):
"""A side of a diff hunk, recording line numbers"""
def __init__(self, start):
self.start = start
self.touched = []
self.untouched = []
def __len__(self):
return len(self.touched) + len(self.untouched)
class DiffHunk(object):
diff_header_regex = re.compile(r'@@ -([0-9]+),?([0-9]+)? \+([0-9]+),?([0-9]+)? @@')
def __init__(self, line):
match = DiffHunk.diff_header_regex.match(line)
if not match:
raise RuntimeError("Malformed diff hunk header '%s'" % line)
self.__from_line = int(match.group(1))
self.__to_line = int(match.group(3))
self.__from = DiffHunkSide(self.__from_line)
self.__to = DiffHunkSide(self.__to_line)
self.lines = []
def __repr__(self):
s = '%s@@ -%u,%u +%u,%u @@\n' % \
(Colours.fg(Colours.Cyan),
self.__from.start, len(self.__from),
self.__to.start, len(self.__to))
for line in self.lines:
if line[0] == '-':
s += Colours.fg(Colours.Red)
elif line[0] == '+':
s += Colours.fg(Colours.Green)
if line[0] == '-':
spaces = 0
for i in range(len(line)):
if line[-i-1].isspace():
spaces += 1
else:
break
spaces = len(line) - spaces
line = line[0:spaces] + Colours.bg(Colours.Red) + line[spaces:]
s += line
s += Colours.reset()
s += '\n'
return s[:-1]
def append(self, line):
if line[0] == ' ':
self.__from.untouched.append(self.__from_line)
self.__from_line += 1
self.__to.untouched.append(self.__to_line)
self.__to_line += 1
elif line[0] == '-':
self.__from.touched.append(self.__from_line)
self.__from_line += 1
elif line[0] == '+':
self.__to.touched.append(self.__to_line)
self.__to_line += 1
self.lines.append(line.rstrip('\n'))
def intersects(self, lines):
for line in lines:
if line in self.__from.touched:
return True
return False
def side(self, side):
if side == 'from':
return self.__from
else:
return self.__to
def parse_diff(diff):
hunks = []
hunk = None
for line in diff:
if line.startswith('@@'):
if hunk:
hunks.append(hunk)
hunk = DiffHunk(line)
elif hunk is not None:
hunk.append(line)
if hunk:
hunks.append(hunk)
return hunks
# ------------------------------------------------------------------------------
# Style Checkers
#
_style_checkers = []
class StyleCheckerRegistry(type):
def __new__(cls, clsname, bases, attrs):
newclass = super(StyleCheckerRegistry, cls).__new__(cls, clsname, bases, attrs)
if clsname != 'StyleChecker':
_style_checkers.append(newclass)
return newclass
class StyleChecker(metaclass=StyleCheckerRegistry):
def __init__(self):
pass
#
# Class methods
#
@classmethod
def checkers(cls, filename):
for checker in _style_checkers:
if checker.supports(filename):
yield checker
@classmethod
def supports(cls, filename):
for pattern in cls.patterns:
if fnmatch.fnmatch(os.path.basename(filename), pattern):
return True
return False
@classmethod
def all_patterns(cls):
patterns = set()
for checker in _style_checkers:
patterns.update(checker.patterns)
return patterns
class StyleIssue(object):
def __init__(self, line_number, line, msg):
self.line_number = line_number
self.line = line
self.msg = msg
class IncludeChecker(StyleChecker):
patterns = ('*.cpp', '*.h')
headers = ('assert', 'ctype', 'errno', 'fenv', 'float', 'inttypes',
'limits', 'locale', 'math', 'setjmp', 'signal', 'stdarg',
'stddef', 'stdint', 'stdio', 'stdlib', 'string', 'time', 'uchar',
'wchar', 'wctype')
include_regex = re.compile('^#include <c([a-z]*)>')
def __init__(self, content):
super().__init__()
self.__content = content
def check(self, line_numbers):
issues = []
for line_number in line_numbers:
line = self.__content[line_number - 1]
match = IncludeChecker.include_regex.match(line)
if not match:
continue
header = match.group(1)
if header not in IncludeChecker.headers:
continue
issues.append(StyleIssue(line_number, line,
'C compatibility header <%s.h> is preferred' % header))
return issues
class LogCategoryChecker(StyleChecker):
log_regex = re.compile('\\bLOG\((Debug|Info|Warning|Error|Fatal)\)')
patterns = ('*.cpp',)
def __init__(self, content):
super().__init__()
self.__content = content
def check(self, line_numbers):
issues = []
for line_number in line_numbers:
line = self.__content[line_number-1]
if not LogCategoryChecker.log_regex.search(line):
continue
issues.append(StyleIssue(line_number, line, 'LOG() should use categories'))
return issues
class MesonChecker(StyleChecker):
patterns = ('meson.build',)
def __init__(self, content):
super().__init__()
self.__content = content
def check(self, line_numbers):
issues = []
for line_number in line_numbers:
line = self.__content[line_number-1]
if line.find('\t') != -1:
issues.append(StyleIssue(line_number, line, 'meson.build should use spaces for indentation'))
return issues
class Pep8Checker(StyleChecker):
patterns = ('*.py',)
results_regex = re.compile('stdin:([0-9]+):([0-9]+)(.*)')
def __init__(self, content):
super().__init__()
self.__content = content
def check(self, line_numbers):
issues = []
data = ''.join(self.__content).encode('utf-8')
try:
ret = subprocess.run(['pycodestyle', '--ignore=E501', '-'],
input=data, stdout=subprocess.PIPE)
except FileNotFoundError:
issues.append(StyleIssue(0, None, "Please install pycodestyle to validate python additions"))
return issues
results = ret.stdout.decode('utf-8').splitlines()
for item in results:
search = re.search(Pep8Checker.results_regex, item)
line_number = int(search.group(1))
position = int(search.group(2))
msg = search.group(3)
if line_number in line_numbers:
line = self.__content[line_number - 1]
issues.append(StyleIssue(line_number, line, msg))
return issues
class ShellChecker(StyleChecker):
patterns = ('*.sh',)
results_line_regex = re.compile('In - line ([0-9]+):')
def __init__(self, content):
super().__init__()
self.__content = content
def check(self, line_numbers):
issues = []
data = ''.join(self.__content).encode('utf-8')
try:
ret = subprocess.run(['shellcheck', '-Cnever', '-'],
input=data, stdout=subprocess.PIPE)
except FileNotFoundError:
issues.append(StyleIssue(0, None, "Please install shellcheck to validate shell script additions"))
return issues
results = ret.stdout.decode('utf-8').splitlines()
for nr, item in enumerate(results):
search = re.search(ShellChecker.results_line_regex, item)
if search is None:
continue
line_number = int(search.group(1))
line = results[nr + 1]
msg = results[nr + 2]
# Determined, but not yet used
position = msg.find('^') + 1
if line_number in line_numbers:
issues.append(StyleIssue(line_number, line, msg))
return issues
# ------------------------------------------------------------------------------
# Formatters
#
_formatters = []
class FormatterRegistry(type):
def __new__(cls, clsname, bases, attrs):
newclass = super(FormatterRegistry, cls).__new__(cls, clsname, bases, attrs)
if clsname != 'Formatter':
_formatters.append(newclass)
return newclass
class Formatter(metaclass=FormatterRegistry):
enabled = True
def __init__(self):
pass
#
# Class methods
#
@classmethod
def formatters(cls, filename):
for formatter in _formatters:
if not cls.enabled:
continue
if formatter.supports(filename):
yield formatter
@classmethod
def supports(cls, filename):
if not cls.enabled:
return False
for pattern in cls.patterns:
if fnmatch.fnmatch(os.path.basename(filename), pattern):
return True
return False
@classmethod
def all_patterns(cls):
patterns = set()
for formatter in _formatters:
if not cls.enabled:
continue
patterns.update(formatter.patterns)
return patterns
class AStyleFormatter(Formatter):
enabled = False
patterns = ('*.c', '*.cpp', '*.h')
@classmethod
def format(cls, filename, data):
ret = subprocess.run(['astyle', *astyle_options],
input=data.encode('utf-8'), stdout=subprocess.PIPE)
return ret.stdout.decode('utf-8')
class CLangFormatter(Formatter):
enabled = False
patterns = ('*.c', '*.cpp', '*.h')
@classmethod
def format(cls, filename, data):
ret = subprocess.run(['clang-format', '-style=file',
'-assume-filename=' + filename],
input=data.encode('utf-8'), stdout=subprocess.PIPE)
return ret.stdout.decode('utf-8')
class DoxygenFormatter(Formatter):
patterns = ('*.c', '*.cpp')
return_regex = re.compile(' +\\* +\\\\return +[a-z]')
@classmethod
def format(cls, filename, data):
lines = []
in_doxygen = False
for line in data.split('\n'):
if line.find('/**') != -1:
in_doxygen = True
if not in_doxygen:
lines.append(line)
continue
line = cls.return_regex.sub(lambda m: m.group(0)[:-1] + m.group(0)[-1].upper(), line)
if line.find('*/') != -1:
in_doxygen = False
lines.append(line)
return '\n'.join(lines)
class IncludeOrderFormatter(Formatter):
patterns = ('*.cpp', '*.h')
include_regex = re.compile('^#include ["<]([^">]*)[">]')
@classmethod
def format(cls, filename, data):
lines = []
includes = []
# Parse blocks of #include statements, and output them as a sorted list
# when we reach a non #include statement.
for line in data.split('\n'):
match = IncludeOrderFormatter.include_regex.match(line)
if match:
# If the current line is an #include statement, add it to the
# includes group and continue to the next line.
includes.append((line, match.group(1)))
continue
# The current line is not an #include statement, output the sorted
# stashed includes first, and then the current line.
if len(includes):
includes.sort(key=lambda i: i[1])
for include in includes:
lines.append(include[0])
includes = []
lines.append(line)
# In the unlikely case the file ends with an #include statement, make
# sure we output the stashed includes.
if len(includes):
includes.sort(key=lambda i: i[1])
for include in includes:
lines.append(include[0])
includes = []
return '\n'.join(lines)
class StripTrailingSpaceFormatter(Formatter):
patterns = ('*.c', '*.cpp', '*.h', '*.py', 'meson.build')
@classmethod
def format(cls, filename, data):
lines = data.split('\n')
for i in range(len(lines)):
lines[i] = lines[i].rstrip() + '\n'
return ''.join(lines)
# ------------------------------------------------------------------------------
# Style checking
#
class Commit:
def __init__(self, commit):
self.commit = commit
def get_info(self):
# Get the commit title and list of files.
ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-only',
self.commit],
stdout=subprocess.PIPE).stdout.decode('utf-8')
files = ret.splitlines()
# Returning title and files list as a tuple
return files[0], files[1:]
def get_diff(self, top_level, filename):
return subprocess.run(['git', 'diff', '%s~..%s' % (self.commit, self.commit),
'--', '%s/%s' % (top_level, filename)],
stdout=subprocess.PIPE).stdout.decode('utf-8')
def get_file(self, filename):
return subprocess.run(['git', 'show', '%s:%s' % (self.commit, filename)],
stdout=subprocess.PIPE).stdout.decode('utf-8')
class StagedChanges(Commit):
def __init__(self):
Commit.__init__(self, '')
def get_info(self):
ret = subprocess.run(['git', 'diff', '--staged', '--name-only'],
stdout=subprocess.PIPE).stdout.decode('utf-8')
return "Staged changes", ret.splitlines()
def get_diff(self, top_level, filename):
return subprocess.run(['git', 'diff', '--staged', '--',
'%s/%s' % (top_level, filename)],
stdout=subprocess.PIPE).stdout.decode('utf-8')
class Amendment(StagedChanges):
def __init__(self):
StagedChanges.__init__(self)
def get_info(self):
# Create a title using HEAD commit
ret = subprocess.run(['git', 'show', '--pretty=oneline', '--no-patch'],
stdout=subprocess.PIPE).stdout.decode('utf-8')
title = 'Amendment of ' + ret.strip()
# Extract the list of modified files
ret = subprocess.run(['git', 'diff', '--staged', '--name-only', 'HEAD~'],
stdout=subprocess.PIPE).stdout.decode('utf-8')
return title, ret.splitlines()
def get_diff(self, top_level, filename):
return subprocess.run(['git', 'diff', '--staged', 'HEAD~', '--',
'%s/%s' % (top_level, filename)],
stdout=subprocess.PIPE).stdout.decode('utf-8')
def check_file(top_level, commit, filename):
# Extract the line numbers touched by the commit.
diff = commit.get_diff(top_level, filename)
diff = diff.splitlines(True)
commit_diff = parse_diff(diff)
lines = []
for hunk in commit_diff:
lines.extend(hunk.side('to').touched)
# Skip commits that don't add any line.
if len(lines) == 0:
return 0
# Format the file after the commit with all formatters and compute the diff
# between the unformatted and formatted contents.
after = commit.get_file(filename)
formatted = after
for formatter in Formatter.formatters(filename):
formatted = formatter.format(filename, formatted)
after = after.splitlines(True)
formatted = formatted.splitlines(True)
diff = difflib.unified_diff(after, formatted)
# Split the diff in hunks, recording line number ranges for each hunk, and
# filter out hunks that are not touched by the commit.
formatted_diff = parse_diff(diff)
formatted_diff = [hunk for hunk in formatted_diff if hunk.intersects(lines)]
# Check for code issues not related to formatting.
issues = []
for checker in StyleChecker.checkers(filename):
checker = checker(after)
for hunk in commit_diff:
issues += checker.check(hunk.side('to').touched)
# Print the detected issues.
if len(issues) == 0 and len(formatted_diff) == 0:
return 0
print('%s---' % Colours.fg(Colours.Red), filename)
print('%s+++' % Colours.fg(Colours.Green), filename)
if len(formatted_diff):
for hunk in formatted_diff:
print(hunk)
if len(issues):
issues = sorted(issues, key=lambda i: i.line_number)
for issue in issues:
print('%s#%u: %s' % (Colours.fg(Colours.Yellow), issue.line_number, issue.msg))
if issue.line is not None:
print('+%s%s' % (issue.line.rstrip(), Colours.reset()))
return len(formatted_diff) + len(issues)
def check_style(top_level, commit):
title, files = commit.get_info()
separator = '-' * len(title)
print(separator)
print(title)
print(separator)
# Filter out files we have no checker for.
patterns = set()
patterns.update(StyleChecker.all_patterns())
patterns.update(Formatter.all_patterns())
files = [f for f in files if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])]
if len(files) == 0:
print("Commit doesn't touch source files, skipping")
return 0
issues = 0
for f in files:
issues += check_file(top_level, commit, f)
if issues == 0:
print("No style issue detected")
else:
print('---')
print("%u potential style %s detected, please review" % \
(issues, 'issue' if issues == 1 else 'issues'))
return issues
def extract_commits(revs):
"""Extract a list of commits on which to operate from a revision or revision
range.
"""
ret = subprocess.run(['git', 'rev-parse', revs], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if ret.returncode != 0:
print(ret.stderr.decode('utf-8').splitlines()[0])
return []
revlist = ret.stdout.decode('utf-8').splitlines()
# If the revlist contains more than one item, pass it to git rev-list to list
# each commit individually.
if len(revlist) > 1:
ret = subprocess.run(['git', 'rev-list', *revlist], stdout=subprocess.PIPE)
revlist = ret.stdout.decode('utf-8').splitlines()
revlist.reverse()
return [Commit(x) for x in revlist]
def git_top_level():
"""Get the absolute path of the git top-level directory."""
ret = subprocess.run(['git', 'rev-parse', '--show-toplevel'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if ret.returncode != 0:
print(ret.stderr.decode('utf-8').splitlines()[0])
return None
return ret.stdout.decode('utf-8').strip()
def main(argv):
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--formatter', '-f', type=str, choices=['astyle', 'clang-format'],
help='Code formatter. Default to clang-format if not specified.')
parser.add_argument('--staged', '-s', action='store_true',
help='Include the changes in the index. Defaults to False')
parser.add_argument('--amend', '-a', action='store_true',
help='Include changes in the index and the previous patch combined. Defaults to False')
parser.add_argument('revision_range', type=str, default=None, nargs='?',
help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.')
args = parser.parse_args(argv[1:])
# Check for required dependencies.
for command, mandatory in dependencies.items():
found = shutil.which(command)
if mandatory and not found:
print("Executable %s not found" % command)
return 1
dependencies[command] = found
if args.formatter:
if not args.formatter in dependencies or \
not dependencies[args.formatter]:
print("Formatter %s not available" % args.formatter)
return 1
formatter = args.formatter
else:
if dependencies['clang-format']:
CLangFormatter.enabled = True
elif dependencies['astyle']:
AStyleFormatter.enabled = True
else:
print("No formatter found, please install clang-format or astyle")
return 1
# Get the top level directory to pass absolute file names to git diff
# commands, in order to support execution from subdirectories of the git
# tree.
top_level = git_top_level()
if top_level is None:
return 1
commits = []
if args.staged:
commits.append(StagedChanges())
if args.amend:
commits.append(Amendment())
# If none of --staged or --amend was passed
if len(commits) == 0:
# And no revisions were passed, then default to HEAD
if not args.revision_range:
args.revision_range = 'HEAD'
if args.revision_range:
commits += extract_commits(args.revision_range)
issues = 0
for commit in commits:
issues += check_style(top_level, commit)
print('')
if issues:
return 1
else:
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))