From ebe63de49a95b147980b5b262d6807b32783d83d Mon Sep 17 00:00:00 2001 From: Vas Crabb Date: Mon, 27 Sep 2021 01:21:55 +1000 Subject: [PATCH] util/delegate.cpp: Notes from experiments with clang. --- src/lib/util/delegate.cpp | 129 ++++++++++++++++++++++++++++++-------- 1 file changed, 104 insertions(+), 25 deletions(-) diff --git a/src/lib/util/delegate.cpp b/src/lib/util/delegate.cpp index 6f73c925da1..eff36ba6192 100644 --- a/src/lib/util/delegate.cpp +++ b/src/lib/util/delegate.cpp @@ -148,39 +148,76 @@ delegate_generic_function delegate_mfp_msvc::adjust_this_pointer(delegate_generi std::uint8_t const *func = reinterpret_cast(m_function); while (true) { + // Assumes Windows calling convention, and doesn't consider that + // the "this" pointer could be in RDX if RCX is a pointer to + // space for an oversize scalar result. Since the result area + // is uninitialised on entry, you won't see something that looks + // like a vtable dispatch through RCX in this case - it won't + // behave badly, it just won't bypass virtual call thunks in the + // rare situations where the return type is an oversize scalar. if (0xe9 == func[0]) { // relative jump with 32-bit displacement (typically a resolved PLT entry) LOG("Found relative jump at %p ", func); func += 5 + *reinterpret_cast(func + 1); LOG("redirecting to %p\n", func); + continue; } - else if ((0x48 == func[0]) && (0x8b == func[1]) && (0x01 == func[2]) && (0xff == func[3]) && ((0x20 == func[4]) || (0x60 == func[4]) || (0xa0 == func[4]))) + else if ((0x48 == func[0]) && (0x8b == func[1]) && (0x01 == func[2])) { - // virtual function call thunk - mov rax,QWORD PTR [rcx] ; jmp QWORD PTR [rax+...] - // Assumes Windows calling convention, and doesn't consider - // that the "this" pointer could be in RDX if RCX is a - // pointer to space for an oversize scalar result. Since - // the result area is uninitialised on entry, you won't see - // something that looks like a vtable dispatch through RCX - // in this case - it won't behave badly, it just won't - // bypass virtual call thunks in the rare situations where - // the return type is an oversize scalar. - LOG("Found virtual member function thunk at %p ", func); - std::uint8_t const *const vptr = *reinterpret_cast(object); - if (0x20 == func[4]) // no displacement - func = *reinterpret_cast(vptr); - else if (0x60 == func[4]) // 8-bit displacement - func = *reinterpret_cast(vptr + *reinterpret_cast(func + 5)); - else // 32-bit displacement - func = *reinterpret_cast(vptr + *reinterpret_cast(func + 5)); - LOG("redirecting to %p\n", func); - } - else - { - // not something we can easily bypass - break; + if ((0xff == func[3]) && ((0x20 == func[4]) || (0x60 == func[4]) || (0xa0 == func[4]))) + { + // MSVC virtual function call thunk - mov rax,QWORD PTR [rcx] ; jmp QWORD PTR [rax+...] + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + if (0x20 == func[4]) // no displacement + func = *reinterpret_cast(vptr); + else if (0x60 == func[4]) // 8-bit displacement + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 5)); + else // 32-bit displacement + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 5)); + LOG("redirecting to %p\n", func); + continue; + } + else if ((0x48 == func[3]) && (0x8b == func[4])) + { + // clang virtual function call thunk - mov rax,QWORD PTR [rcx] ; mov rax,QWORD PTR [rax+...] ; jmp rax + if ((0x40 == func[5]) && (0x48 == func[7]) && (0xff == func[8]) && (0xe0 == func[9])) + { + // 8-bit displacement + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 6)); + LOG("redirecting to %p\n", func); + continue; + } + else if ((0x80 == func[5]) && (0x48 == func[10]) && (0xff == func[11]) && (0xe0 == func[12])) + { + // 32-bit displacement + LOG("Found virtual member function thunk at %p ", func); + std::uint8_t const *const vptr = *reinterpret_cast(object); + func = *reinterpret_cast(vptr + *reinterpret_cast(func + 6)); + LOG("redirecting to %p\n", func); + continue; + } + } } + + // clang uses unoptimised thunks if optimisation is disabled + // Without optimisation, clang produces thunks like: + // 50 push rax + // 48 89 0c 24 mov QWORD PTR [rsp],rcx + // 48 8b 0c 24 mov rcx,QWORD PTR [rsp] + // 48 8b 01 mov rax,QWORD PTR [rcx] + // 48 8b 80 xx xx xx xx mov rax,QWORD PTR [rax+...] + // 41 5a pop r10 + // 48 ff e0 jmp rax + // Trying to decode these thunks likely isn't worth the effort. + // Chasing performance in unoptimised builds isn't very useful, + // and the format of these thinks may be fragile. + + // not something we can easily bypass + break; } return reinterpret_cast(std::uintptr_t(func)); #elif defined(__aarch64__) || defined(_M_ARM64) @@ -190,7 +227,19 @@ delegate_generic_function delegate_mfp_msvc::adjust_this_pointer(delegate_generi // Assumes little Endian mode. Instructions are always stored // in little Endian format on AArch64, so if big Endian mode is // to be supported, the values need to be swapped. - if ((0xf9400010 == func[0]) && (0xf9400210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) + if ((0x90000010 == (func[0] & 0x9f00001f)) && (0x91000210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) + { + // page-relative jump with +/-4GB reach - adrp xip0,... ; add xip0,xip0,#... ; br xip0 + LOG("Found page-relative jump at %p ", func); + std::int64_t const page = + (std::uint64_t(func[0] & 0x60000000) >> 17) | + (std::uint64_t(func[0] & 0x00ffffe0) << 9) | + ((func[0] & 0x00800000) ? (~std::uint64_t(0) << 33) : 0); + std::uint32_t const offset = (func[1] & 0x003ffc00) >> 10; + func = reinterpret_cast(((std::uintptr_t(func) + page) & (~std::uintptr_t(0) << 12)) + offset); + LOG("redirecting to %p\n", func); + } + else if ((0xf9400010 == func[0]) && (0xf9400210 == (func[1] & 0xffc003ff)) && (0xd61f0200 == func[2])) { // virtual function call thunk - ldr xip0,[x0] ; ldr xip0,[x0,#...] ; br xip0 LOG("Found virtual member function thunk at %p ", func); @@ -203,6 +252,36 @@ delegate_generic_function delegate_mfp_msvc::adjust_this_pointer(delegate_generi // not something we can easily bypass break; } + + // clang uses horribly sub-optimal thunks for AArch64 + // Without optimisation, clang produces thunks like: + // d10143ff sub sp,sp,#80 + // f90027e7 str x7,[sp,#72] + // f90023e6 str x6,[sp,#64] + // f9001fe5 str x5,[sp,#56] + // f9001be4 str x4,[sp,#48] + // f90017e3 str x3,[sp,#40] + // f90013e2 str x2,[sp,#32] + // f9000fe1 str x1,[sp,#24] + // f90007e0 str x0,[sp,#8] + // f94007e0 ldr x0,[sp,#8] + // f9400009 ldr x9,[x0] + // f9400129 ldr x9,[x9,#...] + // 910143ff add sp,sp,#80 + // d61f0120 br x9 + // With optimisation, clang produces thunks like: + // d10103ff sub sp,sp,#64 + // a9008be1 stp x1,x2,[sp,#8] + // a90193e3 stp x3,x4,[sp,#24] + // a9029be5 stp x5,x6,[sp,#40] + // f9001fe7 str x7,[sp,#56] + // f9400009 ldr x9,[x0] + // f9400129 ldr x9,[x9,#...] + // 910103ff add sp,sp,#64 + // d61f0120 br x9 + // It's more effort than it's worth to try decoding these + // thunks. + } return reinterpret_cast(std::uintptr_t(func)); #else