summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Wiederhake <BenWiederhake.GitHub@gmx.de>2021-10-26 23:49:28 +0200
committerBrian Gianforcaro <b.gianfo@gmail.com>2021-10-27 11:35:47 -0700
commita6ccf6659a706eb985cf5e62cd8e8db05ab18ab9 (patch)
treea10e10db7af1ab76097c6c76bbb3819ccd14173e
parent074ce35b3707e23bb02ad4e14694a3113d1a2e4f (diff)
downloadserenity-a6ccf6659a706eb985cf5e62cd8e8db05ab18ab9.zip
Meta: Reimplement license checker in python
On my machine, this script took about 3.4 seconds, and was responsible for essentially all of the time taken by the precommit hook. The script is a faithful 1:1 reimplementation, even the regexes are identical. And yet, it takes about 0.02 seconds, making the pre-commit hook lightning fast again. Apparently python is just faster in this case. Fun fact: - Just reading all ~4000 files took bash about 1.2 seconds - Checking the license took another 1.8 seconds in total - Checking for math.h took another 0.4 seconds in total - Checking for '#pragma once' took another 0.4 seconds in total The timing is highly load-dependent, so they don't exactly add up to 3.4 seconds. However, it's good enough to determine that bash is no longer fit for the purpose of this script.
-rwxr-xr-xMeta/check-style.py86
-rwxr-xr-xMeta/check-style.sh70
-rwxr-xr-xMeta/lint-ci.sh2
3 files changed, 87 insertions, 71 deletions
diff --git a/Meta/check-style.py b/Meta/check-style.py
new file mode 100755
index 0000000000..8b61db5217
--- /dev/null
+++ b/Meta/check-style.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import os
+import re
+import subprocess
+import sys
+
+# Ensure copyright headers match this format and are followed by a blank line:
+# /*
+# * Copyright (c) YYYY(-YYYY), Whatever
+# * ... more of these ...
+# *
+# * SPDX-License-Identifier: BSD-2-Clause
+# */
+GOOD_LICENSE_HEADER_PATTERN = re.compile(
+ '^/\\*\n' +
+ '( \\* Copyright \\(c\\) [0-9]{4}(-[0-9]{4})?, .*\n)+' +
+ ' \\*\n' +
+ ' \\* SPDX-License-Identifier: BSD-2-Clause\n' +
+ ' \\*/\n' +
+ '\n')
+LICENSE_HEADER_CHECK_EXCLUDES = {
+ 'AK/Checked.h',
+ 'AK/Function.h',
+ 'Userland/Libraries/LibC/elf.h',
+ 'Userland/DevTools/HackStudio/LanguageServers/Cpp/Tests/',
+ 'Userland/Libraries/LibCpp/Tests/parser/',
+ 'Userland/Libraries/LibCpp/Tests/preprocessor/'
+}
+
+# We check that "#pragma once" is present
+PRAGMA_ONCE_STRING = '#pragma once'
+
+# We make sure that there's a blank line before and after pragma once
+GOOD_PRAGMA_ONCE_PATTERN = re.compile('(^|\\S\n\n)#pragma once(\n\n\\S.|$)')
+
+# We check that "#include <LibM/math.h>" is not being used
+LIBM_MATH_H_INCLUDE_STRING = '#include <LibM/math.h>'
+
+GIT_LS_FILES = ['git', 'ls-files', '--', '*.cpp', '*.h', ':!:Base', ':!:Kernel/FileSystem/ext2_fs.h']
+
+
+def run():
+ files = subprocess.run(GIT_LS_FILES, check=True, capture_output=True).stdout.decode().strip('\n').split('\n')
+ assert len(files) > 1000
+
+ errors_license = []
+ errors_libm_math_h = []
+ errors_pragma_once_bad = []
+ errors_pragma_once_missing = []
+
+ for filename in files:
+ with open(filename, "r") as f:
+ file_content = f.read()
+ if not any(filename.startswith(forbidden_prefix) for forbidden_prefix in LICENSE_HEADER_CHECK_EXCLUDES):
+ if not GOOD_LICENSE_HEADER_PATTERN.search(file_content):
+ errors_license.append(filename)
+ if LIBM_MATH_H_INCLUDE_STRING in file_content:
+ errors_libm_math_h.append(filename)
+ if filename.endswith('.h'):
+ if GOOD_PRAGMA_ONCE_PATTERN.search(file_content):
+ # Excellent, the formatting is correct.
+ pass
+ elif PRAGMA_ONCE_STRING in file_content:
+ # Bad, the '#pragma once' is present but it's formatted wrong.
+ errors_pragma_once_bad.append(filename)
+ else:
+ # Bad, the '#pragma once' is missing completely.
+ errors_pragma_once_missing.append(filename)
+
+ if errors_license:
+ print("Files with bad licenses:", " ".join(errors_license))
+ if errors_pragma_once_missing:
+ print("Files without #pragma once:", " ".join(errors_pragma_once_missing))
+ if errors_pragma_once_bad:
+ print("Files with a bad #pragma once:", " ".join(errors_pragma_once_bad))
+ if errors_libm_math_h:
+ print("Files including LibM/math.h (include just 'math.h' instead):", " ".join(errors_libm_math_h))
+
+ if errors_license or errors_pragma_once_missing or errors_pragma_once_bad or errors_libm_math_h:
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ os.chdir(os.path.dirname(__file__) + "/..")
+ run()
diff --git a/Meta/check-style.sh b/Meta/check-style.sh
deleted file mode 100755
index 33ef45d59f..0000000000
--- a/Meta/check-style.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-
-script_path=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
-cd "$script_path/.." || exit 1
-
-# Ensure copyright headers match this format and are followed by a blank line:
-# /*
-# * Copyright (c) YYYY(-YYYY), Whatever
-# * ... more of these ...
-# *
-# * SPDX-License-Identifier: BSD-2-Clause
-# */
-GOOD_LICENSE_HEADER_PATTERN=$'^/\*\n( \* Copyright \(c\) [0-9]{4}(-[0-9]{4})?, .*\n)+ \*\n \* SPDX-License-Identifier: BSD-2-Clause\n \*/\n\n'
-BAD_LICENSE_HEADER_ERRORS=()
-LICENSE_HEADER_CHECK_EXCLUDES=(AK/Checked.h AK/Function.h Userland/Libraries/LibC/elf.h Userland/DevTools/HackStudio/LanguageServers/Cpp/Tests/* Userland/Libraries/LibCpp/Tests/parser/* Userland/Libraries/LibCpp/Tests/preprocessor/*)
-
-# We check that "#pragma once" is present
-PRAGMA_ONCE_PATTERN='#pragma once'
-MISSING_PRAGMA_ONCE_ERRORS=()
-
-# We make sure that there's a blank line before and after pragma once
-GOOD_PRAGMA_ONCE_PATTERN=$'(^|\\S\n\n)#pragma once(\n\n\\S.|$)'
-BAD_PRAGMA_ONCE_ERRORS=()
-
-# We check that "#include <LibM/math.h>" is not being used
-LIBM_MATH_H_INCLUDE_PATTERN='#include <LibM/math.h>'
-LIBM_MATH_H_INCLUDE_ERRORS=()
-
-while IFS= read -r f; do
- file_content="$(< "$f")"
- if [[ ! "${LICENSE_HEADER_CHECK_EXCLUDES[*]} " =~ $f ]]; then
- if [[ ! "$file_content" =~ $GOOD_LICENSE_HEADER_PATTERN ]]; then
- BAD_LICENSE_HEADER_ERRORS+=("$f")
- fi
- fi
- if [[ "$file_content" =~ $LIBM_MATH_H_INCLUDE_PATTERN ]]; then
- LIBM_MATH_H_INCLUDE_ERRORS+=("$f")
- fi
- if [[ "$f" =~ \.h$ ]]; then
- if [[ ! "$file_content" =~ $PRAGMA_ONCE_PATTERN ]]; then
- MISSING_PRAGMA_ONCE_ERRORS+=("$f")
- elif [[ ! "$file_content" =~ $GOOD_PRAGMA_ONCE_PATTERN ]]; then
- BAD_PRAGMA_ONCE_ERRORS+=("$f")
- fi
- fi
-done < <(git ls-files -- \
- '*.cpp' \
- '*.h' \
- ':!:Base' \
- ':!:Kernel/FileSystem/ext2_fs.h' \
-)
-
-exit_status=0
-if (( ${#BAD_LICENSE_HEADER_ERRORS[@]} )); then
- echo "Files with missing or incorrect license header: ${BAD_LICENSE_HEADER_ERRORS[*]}"
- exit_status=1
-fi
-if (( ${#MISSING_PRAGMA_ONCE_ERRORS[@]} )); then
- echo "Header files missing \"#pragma once\": ${MISSING_PRAGMA_ONCE_ERRORS[*]}"
- exit_status=1
-fi
-if (( ${#BAD_PRAGMA_ONCE_ERRORS[@]} )); then
- echo "\"#pragma once\" should have a blank line before and after in these files: ${BAD_PRAGMA_ONCE_ERRORS[*]}"
- exit_status=1
-fi
-if (( ${#LIBM_MATH_H_INCLUDE_ERRORS[@]} )); then
- echo "\"#include <LibM/math.h>\" should be replaced with just \"#include <math.h>\" in these files: ${LIBM_MATH_H_INCLUDE_ERRORS[*]}"
- exit_status=1
-fi
-exit "$exit_status"
diff --git a/Meta/lint-ci.sh b/Meta/lint-ci.sh
index 452db6902b..785be689d8 100755
--- a/Meta/lint-ci.sh
+++ b/Meta/lint-ci.sh
@@ -24,7 +24,7 @@ for cmd in \
Meta/check-debug-flags.sh \
Meta/check-markdown.sh \
Meta/check-newlines-at-eof.py \
- Meta/check-style.sh \
+ Meta/check-style.py \
Meta/lint-executable-resources.sh \
Meta/lint-ipc-ids.sh \
Meta/lint-keymaps.py \