diff options
author | Ben Wiederhake <BenWiederhake.GitHub@gmx.de> | 2020-08-15 01:11:58 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-08-15 16:49:55 +0200 |
commit | 5f724b6ca1aae3a5a8c7189069649e8a9347cca2 (patch) | |
tree | 661367d6764767e017de7e7c8e10d93cb2a37953 | |
parent | 0df9ddf604002095e27e0116fc9b45980c5a1015 (diff) | |
download | serenity-5f724b6ca1aae3a5a8c7189069649e8a9347cca2.zip |
Travis: Toolchain only depends on headers, not impls
When libstdc++ was added in 4977fd22b874fb9d6d089665e36badd03bcde827, just calling
'make install' was the easiest way to install the headers. And the headers are all
that is needed for libstdc++ to determine the ABI. Since then, BuildIt.sh was
rewritten again and again, and somehow everyone just silently assumed that
libstdc++ also depends on libc.a and libm.a, because surely it does?
Turns out, it doesn't! This massively reduces the dependencies of libstdc++,
hopefully meaning that the Toolchain doesn't need to be rebuilt so often on Travis.
Furthermore, the old method of trying to determine the dependency tree with
bash/grep/etc. has finally broken anyways:
https://travis-ci.com/github/SerenityOS/serenity/builds/179805569#L567
In summary, this should eliminate most of the Toolchain rebuilds on Travis,
and therefore make Travis build blazingly fast! :^)
-rwxr-xr-x | Toolchain/BuildIt.sh | 24 | ||||
-rwxr-xr-x | Toolchain/ComputeDependenciesHash.sh | 103 |
2 files changed, 35 insertions, 92 deletions
diff --git a/Toolchain/BuildIt.sh b/Toolchain/BuildIt.sh index c5a22fd67a..19c535a2ab 100755 --- a/Toolchain/BuildIt.sh +++ b/Toolchain/BuildIt.sh @@ -19,10 +19,10 @@ MAKE="make" MD5SUM="md5sum" NPROC="nproc" -# Each cache entry is 70 MB. 10 entries are 700 MiB. +# Each cache entry is 70 MB. 5 entries are 350 MiB. # It seems that Travis starts having trouble around a total # cache size of 9 GiB, so I think this is a good amount. -KEEP_CACHE_COUNT=10 +KEEP_CACHE_COUNT=5 if command -v ginstall &>/dev/null; then INSTALL=ginstall @@ -76,19 +76,27 @@ GCC_BASE_URL="http://ftp.gnu.org/gnu/gcc" pushd "$DIR" if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then echo "Checking cached toolchain:" - - DEPS_CONFIG=" + # TODO: This is still overly pessimistic. + DEPS_CONFIG="\ uname=$(uname),TARGET=${TARGET}, BuildItHash=$($MD5SUM "$(basename "$0")"), MAKE=${MAKE},MD5SUM=${MD5SUM},NPROC=${NPROC}, CC=${CC},CXX=${CXX},with_gmp=${with_gmp},LDFLAGS=${LDFLAGS}, BINUTILS_VERSION=${BINUTILS_VERSION},BINUTILS_MD5SUM=${BINUTILS_MD5SUM}, GCC_VERSION=${GCC_VERSION},GCC_MD5SUM=${GCC_MD5SUM}" - echo "Config is:${DEPS_CONFIG}" if ! DEPS_HASH=$("$DIR/ComputeDependenciesHash.sh" "$MD5SUM" <<<"${DEPS_CONFIG}"); then + # Make it stand out more + echo + echo + echo + echo echo "Dependency hashing failed" echo "Will rebuild toolchain from scratch, and NOT SAVE THE RESULT." echo "Someone should look into this, but for now it'll work, albeit inefficient." + echo + echo + echo + echo # Should be empty anyway, but just to make sure: DEPS_HASH="" elif [ -r "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then @@ -238,12 +246,10 @@ pushd "$DIR/Build/" echo "XXX install gcc and libgcc" "$MAKE" install-gcc install-target-libgcc || exit 1 - echo "XXX serenity libc and libm" + echo "XXX serenity libc and libm headers" mkdir -p "$BUILD" pushd "$BUILD" - CXXFLAGS="-DBUILDING_SERENITY_TOOLCHAIN" cmake .. - cmake --build . --target LibC - "$INSTALL" -D Libraries/LibC/libc.a Libraries/LibM/libm.a Root/usr/lib/ + mkdir -p Root/usr/include/ SRC_ROOT=$(realpath "$DIR"/..) FILES=$(find "$SRC_ROOT"/Libraries/LibC "$SRC_ROOT"/Libraries/LibM -name '*.h' -print) for header in $FILES; do diff --git a/Toolchain/ComputeDependenciesHash.sh b/Toolchain/ComputeDependenciesHash.sh index 0da2cf2a76..3a4ba36889 100755 --- a/Toolchain/ComputeDependenciesHash.sh +++ b/Toolchain/ComputeDependenciesHash.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -set -eu +set -euo pipefail # This file will need to be run in bash, for now. if [ $# -lt 1 ] ; then @@ -23,89 +23,26 @@ function finish { } trap finish EXIT -# libstdc++ depends on libc and libm, so we pessimistically assume it depends -# on *all* of their implementation and recursive dependencies. -# Scan all files for potential dependencies. -# Thinking in graphs, this computes the edge list: -cat <(find AK/ Libraries/ Services/ Kernel/ -name '*.h') \ - <(find Libraries/LibC/ Libraries/LibM/ -name '*.cpp' ! -name 'Test*.cpp' ) | \ - xargs grep -F '#include ' | \ - sed -r \ - -e 's,^(.*/)([^/]+:)#include "(.*)",\1\2\1\3,' \ - -e 's^#include <(Kernel/.*)>^\1^' \ - -e 's^#include <(AK/.*)>^\1^' \ - -e 's^#include <(Lib[A-Za-z]+/.*)>^Libraries/\1^' \ - -e 's^#include <((bits|netinet|sys|arpa|net)/.*)>^Libraries/LibC/\1^' \ - -e 's^#include <fd_set.h>^Libraries/LibC/fd_set.h^' \ - -e 's^#include <([a-z]{3,10}(_numbers)?\.h)>^Libraries/LibC/\1^' \ - -e 's^#include <([A-Z][a-z]+Server/.*)>^Services/\1^' \ - -e 's^#include <(.*)>^UNRESOLVED_I/\1^' \ - -e 's^#include "(.*)"^UNRESOLVED_L/\1^' > "${DEPLIST_FILE}" -# Some #include's cannot be resolved, like <chrono>. However, these are only -# a problem if they turn up as a transitive dependency of libc and libm. -# We will check for that when the time comes. - -# The initial guess is pessimistic: *all* of libc and libm. -FILE_LIST=$(find Libraries/LibC/ Libraries/LibM/ \( -name '*.cpp' -o -name '*.c' -o -name '*.h' \) ! -name 'Test*') -echo "$0: Exploring dependencies of libstdc++" >&2 -FILE_LIST_COMPLETE="n" -# In each iteration, we extend FILE_LIST by the dependencies not listed yet in -# FILE_LIST. Note that the results are always semantically the same, -# but the order depends on the initial `find` runs. -for _ in $(seq 10) ; do - FILE_REGEX=$(echo "${FILE_LIST}" | sed -zr -e 's,\n$,,' -e 's,\.,\\.,g' -e 's,\n,|,g') - FURTHER_FILE_LIST=$(grep -P "^(${FILE_REGEX}):" "${DEPLIST_FILE}" | grep -Pv ":(${FILE_REGEX})\$" | sed -re 's,^.*:(.*)$,\1,' | sort -u) - if [ -n "${FURTHER_FILE_LIST}" ] ; then - # FILE_LIST should grow to a maximum of "number of all .cpp and .c and .h files", - # i.e. roughly 700 lines. This should be managable, even as the project grows. - FILE_LIST="${FILE_LIST} -${FURTHER_FILE_LIST}" - else - FILE_LIST_COMPLETE="y" - break - fi -done -FURTHER_FILE_LIST="" -FILE_REGEX="" -if [ "${FILE_LIST_COMPLETE}" != "y" ] ; then - # Dependency chains might grow very long. Also, if for some reason we fail - # to filter out the already listed files, the FILE_LIST would grow - # exponentially. Both of these unpleasant cases are handled by capping the - # iteration count to 10 and giving up: - echo "$0: Dependencies don't seem to converge, giving up." >&2 - exit 1 -fi - -# Sort for reproducability, -FILE_LIST=$(echo "${FILE_LIST}" | LC_ALL=C sort -u) -if grep -F 'UNRESOLVED' <<EOLIST >&2 ; then -${FILE_LIST} -EOLIST - echo "$0: Unresolved dependency, giving up." - exit 1 -fi - -echo "$0: Computing hashes" >&2 -# "$@" is the md5sum invocation. The piping might hide non-zero exit-codes, +# First, capture the caller's input. +echo "$0: Configuration:" >&2 +cat /dev/stdin | tee /dev/stderr > "${DEPLIST_FILE}" +# "$@" is the md5sum invocation. +"$@" Toolchain/ComputeDependenciesHash.sh | tee /dev/stderr >> "${DEPLIST_FILE}" + +# libstdc++ depends on the *headers* of libc, so we pessimistically assume it depends +# on *all* of them. +# This list of files can be cut down considerably: +# strace -ff -e trace=file "make install-target-libstdc++-v3" 2>&1 >/dev/null | perl -ne 's/^[^"]+"(([^\\"]|\\[\\"nt])*)".*/$1/ && print' | sort -u | grep -P 'serenity/Build/Root/usr/include/.*\.h$' +# However, we don't want to risk breaking the build when we upgrade gcc in the future. +# +# If you want to further cut down the Toolchain rebuilds on Travis, +# one way would be to reduce this list somehow. +cd Libraries/LibC/ +find -name '*.h' | sort | xargs "$@" | tee /dev/stderr >> "${DEPLIST_FILE}" + +# The piping might hide non-zero exit-codes, # but thankfully only the first command can reasonably fail. -# Also, abuse the deplist file as a temporary buffer. -cat /dev/stdin > "${DEPLIST_FILE}" -HASHES=$(xargs "$@" <<EOLIST -${FILE_LIST} -Toolchain/ComputeDependenciesHash.sh -${DEPLIST_FILE} -EOLIST -) -# Caller (probably BuildIt.sh) should inject it's own hash via stdin. - -# Mask the temporary (= non-reproducable) name of the DEPLIST_FILE: -HASHES=$(echo "${HASHES}" | sed -re 's,/tmp/serenity_deps_........\.lst,CONFIG,') - -echo "$0: Hashes are:" >&2 -echo "${HASHES}" >&2 echo "$0: Toolchain hash:" >&2 -cat <<EOHASH | "$@" - | cut -f1 -d' ' | tee /dev/stderr -${HASHES} -EOHASH +"$@" "${DEPLIST_FILE}" | cut -f1 -d' ' | tee /dev/stderr echo "$0: Great success!" >&2 |