summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmilio G. Cota <cota@braap.org>2017-04-26 23:29:24 -0400
committerRichard Henderson <rth@twiddle.net>2017-06-05 09:25:42 -0700
commit6f1653180f5701c6a8f1b35b89a80b1e3260928e (patch)
tree2fca38d3e3019e3df4885ebe34bfae8e25a69e18
parentb4aa297781ceddef79deb0e99da7817551fa89f8 (diff)
downloadqemu-6f1653180f5701c6a8f1b35b89a80b1e3260928e.zip
tb-hash: improve tb_jmp_cache hash function in user mode
Optimizations to cross-page chaining and indirect branches make performance more sensitive to the hit rate of tb_jmp_cache. The constraint of reserving some bits for the page number lowers the achievable quality of the hashing function. However, user-mode does not have this requirement. Thus, with this change we use for user-mode a hashing function that is both faster and of better quality than the previous one. Measurements: Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. - SPECint06 (test set), x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz 2.2x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | jr | 2x +jr+multhash +....................................................+++++...................................+-+ | jr+hash |$$$ | | |$+$ | | ### $ | 1.8x +-+......................................................................#|#.$...................................+-+ | ++#+# $ | | |# # $ | 1.6x +-+....................................................................***.#.$....................++$$$..........+-+ | $$$ *+* # $ |$+$ | | ++$$$ ### $ * * # $ +++|$ $ | | ++###+$ # # $ * * # $ ### ****## $ | 1.4x +-+...................***+#.$.........***.#.$..........................*.*.#.$...........#+#$$.*++*|#.$..........+-+ | *+* # $ * * # $ * * # $ # # $ * *+# $ | | * * # $ +++++ * * # $ * * # $ *** # $ * * # $ ###$$ | 1.2x +-+...................*.*.#.$.***##$$.*.*.#.$..........................*.*.#.$.........*.*.#.$.*..*.#.$.***+#+$..+-+ | * * # $ *+* # $ * * # $ +++ * * # $ ++###$$ * * # $ * * # $ * * # $ | | ***##$$ * * # $ * * # $ * * # $ ***##$$ ++### * * # $ *** #+$ * * # $ * * # $ * * # $ | | *+*+#+$ ***##$$$ * * # $ * * # $ * * # $ *+* # $ ++####$$ ***+# * * # $ * * # $ * * # $ * * # $ * * # $ | 1x +-++-*+*+#+$+*+*+#-+$+*+*-#+$+*+*+#+$+*+*+#+$+*-*+#+$+***++#+$+*+*+#$$+*+*+#+$+*+*+#+$+*+*-#+$+*+-*+#+$+*+*+#+$-++-+ | * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ | | * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ * * # $ | 0.8x +-+--***##$$-***##$$$-***##$$-***##$$-***##$$-***##$$-***###$$-***##$$-***##$$-***##$$-***##$$-****##$$-***##$$--+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/4UXTrEc Here I also tried the hash function suggested by Paolo ("multhash"): return ((uint64_t) (pc * 2654435761) >> 32) & (TB_JMP_CACHE_SIZE - 1); As you can see it is just as good as the other new function ("hash"), which is what I ended up going with. - SPECint06 (train set), x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz 2.6x +-+--------------------------------------------------------------------------------------------------------------+-+ | | | jr ### | 2.4x +jr+hash...........................................................................................#.#...........+-+ | # # | | # # | 2.2x +-+................................................................................................#.#...........+-+ | # # | | # # | 2x +-+................................................................................................#.#...........+-+ | **** # | | * * # | 1.8x +-+.............................................................................................*..*.#...........+-+ | +++ * * # | | #### #### * * # | 1.6x +-+......................................####.............................#..#.****..#..........*..*.#...........+-+ | +++ #++# **** # * * # #### * * # | | ### # # * * # * * # # # * * # | 1.4x +-+...................****+#..........****..#..........................*..*..#.*..*..#....#..#..*..*.#...........+-+ | *++* # * * # * * # * * # *** # * * # #### | | * * # #### * * # * * # * * # * * # * * # **** # | 1.2x +-+...................*..*.#..****++#.*..*..#..........................*..*..#.*..*..#..*.*..#..*..*.#..*..*..#..+-+ | ****### * * # * * # * * # * * # * * # * * # * * # * * # | | * * # ***### * * # * * # * * # ****## * * # * * # * * # * * # * * # | 1x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+ astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean png: http://imgur.com/ArCbHqo - NBench, x86_64-linux-user. Host: Intel i7-6700K @ 4.00GHz 1.12x +-+-------------------------------------------------------------------------------------------------------------+-+ | | | jr +++ | 1.1x +jr+hash...........................................................####.........................................+-+ | +++#| # | | | #++# | 1.08x +-+................................+++................+++.+++..*****..#.........................................+-+ | | +++ | | * | * # | | | | | | *+++* # | 1.06x +-+................................****###.............|...|...*...*..#.........................+++.............+-+ | *| * |# ****### * * # | | | *| *++# *| * |# * * # #### | 1.04x +-+................................*++*..#............*|.*.|#..*...*..#........................#.|#.............+-+ | * * # *++*++# * * # +++#++# | | * * # * * # * * # | # # +++#### | 1.02x +-+................................*..*..#......+++...*..*..#..*...*..#.....................****..#..*****++#...+-+ | +++ * * # +++ | * * # * * # +++ *| * # *+++* # | | +++ | +++ +++ ++++++ * * # *****### * * # * * # | +++ ++++++ *++* # * * # | 1x +-++-+++++####++****###++++-+####+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-+++####-+*****###++*++*++#++*+-+*++#+-++-+ | *****| # *++* |# *****| # * * # * *++# * * # * * # **** |# * * # * * # * * # | | * | *| # * *++# * | *++# * * # * * # * * # * * # *| *++# * * # * * # * * # | 0.98x +-+...*.|.*++#..*..*..#..*+++*..#..*..*..#..*...*..#..*..*..#..*...*..#..*++*..#..*...*..#..*..*..#..*...*..#...+-+ | *+++* # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.96x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean png: http://imgur.com/ZXFX0hJ - NBench, arm-linux-user. Host: Intel i7-4790K @ 4.00GHz 1.3x +-+-------------------------------------------------------------------------------------------------------------+-+ | #### | | jr # # +++ | 1.25x +jr+hash.....................#..#...........................................####................................+-+ | # # # # | | # # # # | 1.2x +-+..........................#..#...........................................#..#................................+-+ | # # # # | | # # # # | 1.15x +-+..........................#..#...........................................#..#................................+-+ | # # #### # # | | # # # # # # | 1.1x +-+..........................#..#..................................#..#.....#..#................................+-+ | # # # # # # +++ | | # # #### # # # # #### | 1.05x +-+..........................#..#...............#..#.....####......#..#.....#..#.........................#..#...+-+ | # # # # # # # # # # +++ # # | | +++ ***** # #### ***** # # # +++# # **** # ****### # # | 1x +-++-+*****###++****+++++*+-+*++#+-****++#-+*+++*-+#+++++#++#++*****++#+-*++*++#-+*****-++++*++*++#++*****++#+-++-+ | * * # * * | * * # * * # * * # **** # * * # * * # * *### * *++# * * # | | * * # * *### * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.95x +-+...*...*..#..*..*.|#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#..*..*..#..*...*..#...+-+ | * * # * * |# * * # * * # * * # * * # * * # * * # * * # * * # * * # | | * * # * * |# * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.9x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+ ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean png: http://imgur.com/FfD27ey Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1493263764-18657-12-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net>
-rw-r--r--include/exec/tb-hash.h12
1 files changed, 12 insertions, 0 deletions
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 2c27490cb8..b1fe2d0161 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -22,6 +22,8 @@
#include "exec/tb-hash-xx.h"
+#ifdef CONFIG_SOFTMMU
+
/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
addresses on the same page. The top bits are the same. This allows
TLB invalidation to quickly clear a subset of the hash table. */
@@ -45,6 +47,16 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
| (tmp & TB_JMP_ADDR_MASK));
}
+#else
+
+/* In user-mode we can get better hashing because we do not have a TLB */
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
+{
+ return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
+}
+
+#endif /* CONFIG_SOFTMMU */
+
static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
{