Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ endif()

# compiler flags
if (MSVC)
set(CMAKE_CXX_FLAGS "/Wall /W4 /EHsc")
set(CMAKE_CXX_FLAGS "/W4 /EHsc")
else()
# Linux and Apple
set(CMAKE_CXX_FLAGS "-Wextra -Wall -Wextra -Wconversion -Wdouble-promotion -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion")
Expand Down
139 changes: 88 additions & 51 deletions src/vmaware.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9199,17 +9199,18 @@ struct VM {

/**
* @brief Check if after raising two traps at the same RIP, a hypervisor interferes with the instruction pointer delivery
* @category Windows, x86
* @category Windows, x86_64
* @implements VM::TRAP
*/
[[nodiscard]] static bool trap() {
bool hypervisorCaught = false;
#if (x86)
// when a single-step (TF) and hardware breakpoint (DR0) collide, Intel CPUs set both DR6.BS and DR6.B0 to report both events, which help make this detection trick
#if (x86_64)
// when a single - step(TF) and hardware breakpoint(DR0) collide, Intel CPUs set both DR6.BS and DR6.B0 to report both events, which help make this detection trick
// AMD CPUs prioritize the breakpoint, setting only its corresponding bit in DR6 and clearing the single-step bit, which is why this technique is not compatible with AMD
if (!cpu::is_intel()) {
return false;
}

// mobile SKUs can "false flag" this check
const char* brand = cpu::get_brand();
for (const char* c = brand; *c; ++c) {
Expand All @@ -9222,15 +9223,19 @@ struct VM {
}
}

// push flags, set TF-bit, pop flags, execute a dummy instruction, then return
// We must preserve RBX because CPUID clobbers it, and RBX is a non-volatile
// register in x64. If we don't restore it, the calling function (VM::check) crashes
// we use MOV R8, RBX instead of PUSH RBX. Pushing to the stack without
// unwind metadata breaks SEH in x64 (OS cannot find the handler), causing a crash
constexpr u8 trampoline[] = {
0x9C, // pushfq
0x81, 0x04, 0x24, // OR DWORD PTR [RSP], 0x10100
0x49, 0x89, 0xD8, // mov r8, rbx (save rbx to volatile register r8)
0x9C, // pushfq
0x81, 0x04, 0x24, // OR DWORD PTR [RSP], 0x10100 (Set TF)
0x00, 0x01, 0x01, 0x00,
0x9D, // popfq
0x0F, 0xA2, // cpuid (or any other trappable instruction, but this one is ok since it has to be trapped in every x86 hv)
0x90, 0x90, 0x90, // NOPs to pad to breakpoint offset
0xC3 // ret
0x9D, // popfq
0x0F, 0xA2, // cpuid
0x4C, 0x89, 0xC3, // mov rbx, r8 (restore rbx from r8) - trap happens here
0xC3 // ret
};
SIZE_T trampSize = sizeof(trampoline);

Expand All @@ -9257,13 +9262,14 @@ struct VM {
using NtGetContextThread_t = NTSTATUS(__stdcall*)(HANDLE, PCONTEXT);
using NtSetContextThread_t = NTSTATUS(__stdcall*)(HANDLE, PCONTEXT);

const auto pNtAllocateVirtualMemory = reinterpret_cast<NtAllocateVirtualMemory_t>(funcs[0]);
const auto pNtProtectVirtualMemory = reinterpret_cast<NtProtectVirtualMemory_t>(funcs[1]);
const auto pNtFreeVirtualMemory = reinterpret_cast<NtFreeVirtualMemory_t>(funcs[2]);
const auto pNtFlushInstructionCache = reinterpret_cast<NtFlushInstructionCache_t>(funcs[3]);
const auto pNtClose = reinterpret_cast<NtClose_t>(funcs[4]);
const auto pNtGetContextThread = reinterpret_cast<NtGetContextThread_t>(funcs[5]);
const auto pNtSetContextThread = reinterpret_cast<NtSetContextThread_t>(funcs[6]);
// volatile ensures these are loaded from stack after SEH unwind when compiled with aggresive optimizations
NtAllocateVirtualMemory_t volatile pNtAllocateVirtualMemory = reinterpret_cast<NtAllocateVirtualMemory_t>(funcs[0]);
NtProtectVirtualMemory_t volatile pNtProtectVirtualMemory = reinterpret_cast<NtProtectVirtualMemory_t>(funcs[1]);
NtFreeVirtualMemory_t volatile pNtFreeVirtualMemory = reinterpret_cast<NtFreeVirtualMemory_t>(funcs[2]);
NtFlushInstructionCache_t volatile pNtFlushInstructionCache = reinterpret_cast<NtFlushInstructionCache_t>(funcs[3]);
NtClose_t volatile pNtClose = reinterpret_cast<NtClose_t>(funcs[4]);
NtGetContextThread_t volatile pNtGetContextThread = reinterpret_cast<NtGetContextThread_t>(funcs[5]);
NtSetContextThread_t volatile pNtSetContextThread = reinterpret_cast<NtSetContextThread_t>(funcs[6]);

if (!pNtAllocateVirtualMemory || !pNtProtectVirtualMemory || !pNtFlushInstructionCache ||
!pNtFreeVirtualMemory || !pNtGetContextThread || !pNtSetContextThread || !pNtClose) {
Expand All @@ -9285,7 +9291,8 @@ struct VM {
ULONG oldProt = 0;
st = pNtProtectVirtualMemory(hCurrentProcess, &tmpBase, &tmpSz, PAGE_EXECUTE_READ, &oldProt);
if (!NT_SUCCESS(st)) {
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
PVOID freeBase = execMem;
SIZE_T freeSize = trampSize;
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
return false;
}
Expand All @@ -9300,66 +9307,96 @@ struct VM {
const HANDLE hCurrentThread = reinterpret_cast<HANDLE>(-2LL);

if (!NT_SUCCESS(pNtGetContextThread(hCurrentThread, &origCtx))) {
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
PVOID freeBase = execMem;
SIZE_T freeSize = trampSize;
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
return false;
}

// set Dr0 to trampoline+offset (step triggers here)
// Set DR0 to trampoline + 14 (Instruction: mov rbx, r8)
// Offset calculation: mov_r8_rbx(3) + pushfq(1) + or(7) + popfq(1) + cpuid(2) = 14
// This is where single step traps after CPUID, and where we want the collision
const uintptr_t expectedTrapAddr = reinterpret_cast<uintptr_t>(execMem) + 14;

// set Dr0 to trampoline+offset
CONTEXT dbgCtx = origCtx;
const uintptr_t baseAddr = reinterpret_cast<uintptr_t>(execMem);
dbgCtx.Dr0 = baseAddr + 11; // single step breakpoint address
dbgCtx.Dr7 = 1; // enable local breakpoint 0
dbgCtx.Dr0 = expectedTrapAddr; // single step breakpoint address
dbgCtx.Dr7 = 1; // enable Local Breakpoint 0

if (!NT_SUCCESS(pNtSetContextThread(hCurrentThread, &dbgCtx))) {
pNtSetContextThread(hCurrentThread, &origCtx);
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
PVOID freeBase = execMem;
SIZE_T freeSize = trampSize;
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
return false;
}

auto vetExceptions = [&](u32 code, EXCEPTION_POINTERS* info) noexcept -> u8 {
// if not single-step, hypervisor likely swatted our trap
if (code != static_cast<DWORD>(0x80000004L)) {
hypervisorCaught = true;
return EXCEPTION_CONTINUE_SEARCH;
}
// Context structure to pass data to the static SEH handler
struct TrapContext {
uintptr_t expectedTrapAddr;
u8* hitCount;
bool* hypervisorCaught;
};

// count breakpoint hits
hitCount++;
// Static class for SEH filtering to avoid Release mode Lambda corruption
struct SEH_Trap {
static LONG Vet(u32 code, EXCEPTION_POINTERS* info, TrapContext* ctx) noexcept {
// Lambda returns LONG to support EXCEPTION_CONTINUE_EXECUTION
if (code != static_cast<DWORD>(0x80000004L)) {
return EXCEPTION_CONTINUE_SEARCH;
}

// validate exception address matches our breakpoint location
if (reinterpret_cast<uintptr_t>(info->ExceptionRecord->ExceptionAddress) != baseAddr + 11) {
hypervisorCaught = true;
return EXCEPTION_EXECUTE_HANDLER;
}
// Verify exception happened at our calculated offset
if (reinterpret_cast<uintptr_t>(info->ExceptionRecord->ExceptionAddress) != ctx->expectedTrapAddr) {
info->ContextRecord->EFlags &= ~0x100; // Clear TF
info->ContextRecord->Dr7 &= ~1; // Clear DR0 Enable
*ctx->hypervisorCaught = true;
return EXCEPTION_CONTINUE_EXECUTION;
}

// check if Trap Flag and DR0 contributed
constexpr u64 required_bits = (1ULL << 14) | 1ULL;
const u64 status = info->ContextRecord->Dr6;
(*ctx->hitCount)++;

if ((status & required_bits) != required_bits) {
if (util::hyper_x() != HYPERV_ARTIFACT_VM)
hypervisorCaught = true; // detects type 1 Hyper-V too, which we consider legitimate
// check if Trap Flag and DR0 contributed
constexpr u64 required_bits = (1ULL << 14) | 1ULL; // BS | B0
const u64 status = info->ContextRecord->Dr6;

if ((status & required_bits) != required_bits) {
if (util::hyper_x() != HYPERV_ARTIFACT_VM) // detects type 1 Hyper-V too, which we consider legitimate
*ctx->hypervisorCaught = true;
}

// Clear Trap Flag to stop single stepping
info->ContextRecord->EFlags &= ~0x100;

// Clear DR7 Local Enable 0 to disable the hardware breakpoint
// If we don't do this, the next instruction will trigger the breakpoint again immediately
info->ContextRecord->Dr7 &= ~1;

// executes mov rbx, r8 (restore), and returns
return EXCEPTION_CONTINUE_EXECUTION;
}
return EXCEPTION_EXECUTE_HANDLER;
};

TrapContext ctx = { expectedTrapAddr, &hitCount, &hypervisorCaught };

__try {
reinterpret_cast<void(*)()>(execMem)();
}
__except (vetExceptions(_exception_code(), reinterpret_cast<EXCEPTION_POINTERS*>(_exception_info()))) {
// if we didn't hit exactly once, assume hypervisor interference
if (hitCount != 1) {
hypervisorCaught = true;
}
__except (SEH_Trap::Vet(_exception_code(), reinterpret_cast<EXCEPTION_POINTERS*>(_exception_info()), &ctx)) {
// This block is effectively unreachable because vetExceptions returns CONTINUE_EXECUTION or CONTINUE_SEARCH
}

// If the hypervisor swallowed the exception entirely, hitCount will be 0
if (hitCount != 1) {
hypervisorCaught = true;
}

pNtSetContextThread(hCurrentThread, &origCtx);

PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
PVOID freeBase = execMem;
SIZE_T freeSize = trampSize;
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
#endif
#endif
return hypervisorCaught;
}

Expand Down
Loading