summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2020-07-13 21:00:51 +0200
committerAndreas Kling <kling@serenityos.org>2020-07-13 21:00:51 +0200
commitef84865c8cd106a85be4f993549da9153472507d (patch)
tree3ec0ac98ec932491dd8e06f6d8d681b8427921d4
parentf608b9d89ae0aa54e0912cc8c4f7e883247ce3dd (diff)
downloadserenity-ef84865c8cd106a85be4f993549da9153472507d.zip
LibX86+UserspaceEmulator: Devirtualize and inline more instruction code
Use some template hacks to force GCC to inline more of the instruction decoding stuff into the UserspaceEmulator main execution loop. This is my last optimization for today, and we've gone from ~60 seconds when running "UserspaceEmulator UserspaceEmulator id" to ~8 seconds :^)
-rw-r--r--DevTools/UserspaceEmulator/SoftCPU.h2
-rw-r--r--Libraries/LibX86/Instruction.h72
2 files changed, 44 insertions, 30 deletions
diff --git a/DevTools/UserspaceEmulator/SoftCPU.h b/DevTools/UserspaceEmulator/SoftCPU.h
index 71ae4f3143..bb199b7992 100644
--- a/DevTools/UserspaceEmulator/SoftCPU.h
+++ b/DevTools/UserspaceEmulator/SoftCPU.h
@@ -287,13 +287,13 @@ public:
return 0;
}
-private:
// ^X86::InstructionStream
virtual bool can_read() override { return false; }
virtual u8 read8() override;
virtual u16 read16() override;
virtual u32 read32() override;
+private:
// ^X86::Interpreter
virtual void AAA(const X86::Instruction&) override;
virtual void AAD(const X86::Instruction&) override;
diff --git a/Libraries/LibX86/Instruction.h b/Libraries/LibX86/Instruction.h
index 2cc8fed288..8c575dee76 100644
--- a/Libraries/LibX86/Instruction.h
+++ b/Libraries/LibX86/Instruction.h
@@ -292,7 +292,6 @@ public:
virtual u8 read8() = 0;
virtual u16 read16() = 0;
virtual u32 read32() = 0;
- u32 read(unsigned count);
};
class SimpleInstructionStream final : public InstructionStream {
@@ -378,9 +377,12 @@ private:
String to_string_a16() const;
String to_string_a32() const;
- void decode(InstructionStream&, bool a32);
- void decode16(InstructionStream&);
- void decode32(InstructionStream&);
+ template<typename InstructionStreamType>
+ void decode(InstructionStreamType&, bool a32);
+ template<typename InstructionStreamType>
+ void decode16(InstructionStreamType&);
+ template<typename InstructionStreamType>
+ void decode32(InstructionStreamType&);
template<typename CPU>
LogicalAddress resolve16(const CPU&, Optional<SegmentRegister>);
@@ -413,7 +415,8 @@ private:
class Instruction {
public:
- static Instruction from_stream(InstructionStream&, bool o32, bool a32);
+ template<typename InstructionStreamType>
+ static Instruction from_stream(InstructionStreamType&, bool o32, bool a32);
~Instruction() { }
ALWAYS_INLINE MemoryOrRegisterReference& modrm() const
@@ -503,7 +506,8 @@ public:
String to_string(u32 origin, const SymbolProvider* = nullptr, bool x32 = true) const;
private:
- Instruction(InstructionStream&, bool o32, bool a32);
+ template<typename InstructionStreamType>
+ Instruction(InstructionStreamType&, bool o32, bool a32);
String to_string_internal(u32 origin, const SymbolProvider*, bool x32) const;
@@ -784,7 +788,8 @@ ALWAYS_INLINE u32 MemoryOrRegisterReference::read32(CPU& cpu, const Instruction&
return cpu.read_memory32(address);
}
-ALWAYS_INLINE Instruction Instruction::from_stream(InstructionStream& stream, bool o32, bool a32)
+template<typename InstructionStreamType>
+ALWAYS_INLINE Instruction Instruction::from_stream(InstructionStreamType& stream, bool o32, bool a32)
{
return Instruction(stream, o32, a32);
}
@@ -826,7 +831,8 @@ ALWAYS_INLINE static Optional<SegmentRegister> to_segment_prefix(u8 op)
}
}
-ALWAYS_INLINE Instruction::Instruction(InstructionStream& stream, bool o32, bool a32)
+template<typename InstructionStreamType>
+ALWAYS_INLINE Instruction::Instruction(InstructionStreamType& stream, bool o32, bool a32)
: m_a32(a32)
, m_o32(o32)
{
@@ -905,10 +911,29 @@ ALWAYS_INLINE Instruction::Instruction(InstructionStream& stream, bool o32, bool
m_imm2_bytes = m_descriptor->imm2_bytes_for_address_size(m_a32);
// Consume immediates if present.
- if (m_imm2_bytes)
- m_imm2 = stream.read(m_imm2_bytes);
- if (m_imm1_bytes)
- m_imm1 = stream.read(m_imm1_bytes);
+ switch (m_imm2_bytes) {
+ case 1:
+ m_imm2 = stream.read8();
+ break;
+ case 2:
+ m_imm2 = stream.read16();
+ break;
+ case 4:
+ m_imm2 = stream.read32();
+ break;
+ }
+
+ switch (m_imm1_bytes) {
+ case 1:
+ m_imm1 = stream.read8();
+ break;
+ case 2:
+ m_imm1 = stream.read16();
+ break;
+ case 4:
+ m_imm1 = stream.read32();
+ break;
+ }
m_handler = m_descriptor->handler;
@@ -920,21 +945,8 @@ ALWAYS_INLINE Instruction::Instruction(InstructionStream& stream, bool o32, bool
#endif
}
-ALWAYS_INLINE u32 InstructionStream::read(unsigned count)
-{
- switch (count) {
- case 1:
- return read8();
- case 2:
- return read16();
- case 4:
- return read32();
- }
- ASSERT_NOT_REACHED();
- return 0;
-}
-
-ALWAYS_INLINE void MemoryOrRegisterReference::decode(InstructionStream& stream, bool a32)
+template<typename InstructionStreamType>
+ALWAYS_INLINE void MemoryOrRegisterReference::decode(InstructionStreamType& stream, bool a32)
{
m_a32 = a32;
m_rm = stream.read8();
@@ -972,7 +984,8 @@ ALWAYS_INLINE void MemoryOrRegisterReference::decode(InstructionStream& stream,
}
}
-ALWAYS_INLINE void MemoryOrRegisterReference::decode16(InstructionStream&)
+template<typename InstructionStreamType>
+ALWAYS_INLINE void MemoryOrRegisterReference::decode16(InstructionStreamType&)
{
ASSERT(!m_a32);
@@ -995,7 +1008,8 @@ ALWAYS_INLINE void MemoryOrRegisterReference::decode16(InstructionStream&)
}
}
-ALWAYS_INLINE void MemoryOrRegisterReference::decode32(InstructionStream& stream)
+template<typename InstructionStreamType>
+ALWAYS_INLINE void MemoryOrRegisterReference::decode32(InstructionStreamType& stream)
{
ASSERT(m_a32);