// Copyright (c) Microsoft Corporation. All rights reserved. // Copyright (c) Lysann Tranvouez. All rights reserved. #include "mach_detours.h" #include "detours_internal.h" #include "arm64/detours_arm64.h" #include #include #include #include #include #include #ifdef __arm64__ #define DETOURS_ARM64 #else #error Unsupported architecture (arm64) #endif //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Trampoline Memory Management typedef struct detour_region { uint32_t signature; struct detour_region* next; // Next region in list of regions. detour_trampoline* free_list_head; // List of free trampolines in this region. } detour_region; // ReSharper disable once CppMultiCharacterLiteral static const uint32_t DETOUR_REGION_SIGNATURE = 'Rrtd'; static const uint32_t DETOUR_REGION_SIZE = 0x10000; static const uint32_t DETOUR_TRAMPOLINES_PER_REGION = (DETOUR_REGION_SIZE / sizeof(detour_trampoline)) - 1; static detour_region* s_regions_head = nullptr; static detour_region* s_default_region = nullptr; static mach_error_t internal_detour_writable_trampoline_regions() { // Mark all the regions as writable. const mach_port_t port = mach_task_self(); for (detour_region* pRegion = s_regions_head; pRegion != NULL; pRegion = pRegion->next) { const mach_error_t error = mach_vm_protect(port, (mach_vm_address_t)pRegion, DETOUR_REGION_SIZE, false, VM_PROT_READ | VM_PROT_WRITE); if (error != err_none) { return error; } } return err_none; } static void internal_detour_runnable_trampoline_regions() { // Mark all the regions as executable. const mach_port_t port = mach_task_self(); for (detour_region* pRegion = s_regions_head; pRegion != NULL; pRegion = pRegion->next) { const mach_error_t error = mach_vm_protect(port, (mach_vm_address_t)pRegion, DETOUR_REGION_SIZE, false, VM_PROT_READ | VM_PROT_EXECUTE); if (error != err_none) { DETOUR_BREAK(); } } } static void internal_detour_free_trampoline(detour_trampoline* trampoline) { detour_region* region = (detour_region*)((uintptr_t)trampoline & ~(uintptr_t)0xffff); memset(trampoline, 0, sizeof(*trampoline)); trampoline->ptr_remain = (uint8_t*)region->free_list_head; region->free_list_head = trampoline; } static bool internal_detour_is_region_empty(detour_region* region) { // Stop if the region isn't a region (this would be bad). if (region->signature != DETOUR_REGION_SIGNATURE) { DETOUR_BREAK(); return false; } uint8_t* region_begin = (uint8_t*)region; uint8_t* region_limit = region_begin + DETOUR_REGION_SIZE; // Stop if any of the trampolines aren't free. detour_trampoline* trampoline = ((detour_trampoline*)region) + 1; for (int i = 0; i < DETOUR_TRAMPOLINES_PER_REGION; i++) { if (trampoline[i].ptr_remain != NULL && (trampoline[i].ptr_remain < region_begin || trampoline[i].ptr_remain >= region_limit)) { return false; } } // OK, the region is empty. return true; } static void internal_detour_free_unused_trampoline_regions() { detour_region** ptr_region_base = &s_regions_head; detour_region* curr_region = s_regions_head; const mach_port_t port = mach_task_self(); while (curr_region) { if (internal_detour_is_region_empty(curr_region)) { *ptr_region_base = curr_region->next; vm_deallocate(port, (vm_address_t)curr_region, DETOUR_REGION_SIZE); s_default_region = nullptr; } else { ptr_region_base = &curr_region->next; } curr_region = *ptr_region_base; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Trampoline Helpers static uint8_t internal_detour_align_from_trampoline(const detour_trampoline* trampoline, const uint8_t offset_trampoline) { for (int32_t n = 0; n < ARRAYSIZE(trampoline->align); n++) { if (trampoline->align[n].offset_trampoline == offset_trampoline) { return trampoline->align[n].offset_target; } } return 0; } static uint8_t internal_detour_align_from_target(const detour_trampoline* trampoline, const uint8_t offset_target) { for (int32_t n = 0; n < ARRAYSIZE(trampoline->align); n++) { if (trampoline->align[n].offset_target == offset_target) { return trampoline->align[n].offset_trampoline; } } return 0; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Transactions typedef enum detour_operation_kind { detour_operation_kind_attach, detour_operation_kind_detach, } detour_operation_kind; typedef struct detour_operation { struct detour_operation* next; detour_operation_kind kind; uint8_t** pointer; uint8_t* target; detour_trampoline* trampoline; vm_prot_t perm; } detour_operation; typedef struct detour_pending_thread { struct detour_pending_thread* next; thread_t thread; } detour_pending_thread; static bool s_ignore_too_small = false; static bool s_retain_regions = false; static _Atomic(thread_t) s_transaction_thread = THREAD_NULL; static detour_operation* s_pending_operations_head = nullptr; static detour_pending_thread* s_pending_threads_head = nullptr; static mach_error_t s_pending_error = err_none; static void** s_pending_error_pointer = nullptr; mach_error_t detour_transaction_begin() { // Make sure only one thread can start a transaction. thread_t expected = THREAD_NULL; // ReSharper disable once CppIncompatiblePointerConversion if (!atomic_compare_exchange_strong(&s_transaction_thread, &expected, mach_thread_self())) { return detour_err_in_progress; } s_pending_operations_head = nullptr; s_pending_threads_head = nullptr; s_pending_error_pointer = nullptr; // Make sure the trampoline pages are writable. s_pending_error = internal_detour_writable_trampoline_regions(); return s_pending_error; } mach_error_t detour_transaction_abort() { if (s_transaction_thread != mach_thread_self()) { return detour_err_wrong_thread; } // Restore all the page permissions. const mach_port_t port = mach_task_self(); for (detour_operation* operation = s_pending_operations_head; operation != nullptr;) { DETOUR_CHECK( mach_vm_protect(port, (mach_vm_address_t)operation->target, operation->trampoline->restore_code_size, false, operation->perm)); if (operation->kind == detour_operation_kind_attach) { if (operation->trampoline) { internal_detour_free_trampoline(operation->trampoline); operation->trampoline = nullptr; } } detour_operation* next = operation->next; free(operation); operation = next; } s_pending_operations_head = nullptr; // Make sure the trampoline pages are no longer writable. internal_detour_runnable_trampoline_regions(); // Resume any suspended threads. for (detour_pending_thread* thread = s_pending_threads_head; thread != nullptr;) { // There is nothing we can do if this fails. DETOUR_CHECK(thread_resume(thread->thread)); detour_pending_thread* next = thread->next; free(thread); thread = next; } s_pending_threads_head = nullptr; s_transaction_thread = THREAD_NULL; return err_none; } mach_error_t detour_transaction_commit() { return detour_transaction_commit_ex(nullptr); } mach_error_t detour_transaction_commit_ex(detour_func_t** out_failed_target) { if (out_failed_target != NULL) { *out_failed_target = s_pending_error_pointer; } if (s_transaction_thread != mach_thread_self()) { return detour_err_wrong_thread; } // If any of the pending operations failed, then we abort the whole transaction. if (s_pending_error != err_none) { DETOUR_BREAK(); detour_transaction_abort(); return s_pending_error; } // Insert or remove each of the detours. for (detour_operation* operation = s_pending_operations_head; operation != nullptr; operation = operation->next) { if (operation->kind == detour_operation_kind_detach) { memcpy(operation->target, operation->trampoline->restore_code, operation->trampoline->restore_code_size); #ifdef DETOURS_ARM64 *operation->pointer = operation->target; #endif } else { DETOUR_TRACE(("detours: trampoline=%p, ptr_remain=%p, ptr_detour=%p, restore_code_size=%u\n", operation->trampoline, operation->trampoline->ptr_remain, operation->trampoline->ptr_detour, operation->trampoline->restore_code_size)); DETOUR_TRACE(("detours: target=%p: " "%02x %02x %02x %02x " "%02x %02x %02x %02x " "%02x %02x %02x %02x [before]\n", operation->target, operation->target[0], operation->target[1], operation->target[2], operation->target[3], operation->target[4], operation->target[5], operation->target[6], operation->target[7], operation->target[8], operation->target[9], operation->target[10], operation->target[11])); #ifdef DETOURS_ARM64 uint8_t* code = internal_detour_gen_jmp_indirect(operation->target, (uint64_t*)&(operation->trampoline->ptr_detour)); code = internal_detour_gen_brk(code, operation->trampoline->ptr_remain); UNUSED_VARIABLE(code); *operation->pointer = operation->trampoline->code; #endif // DETOURS_ARM64 DETOUR_TRACE(("detours: target=%p: " "%02x %02x %02x %02x " "%02x %02x %02x %02x " "%02x %02x %02x %02x [after]\n", operation->target, operation->target[0], operation->target[1], operation->target[2], operation->target[3], operation->target[4], operation->target[5], operation->target[6], operation->target[7], operation->target[8], operation->target[9], operation->target[10], operation->target[11])); DETOUR_TRACE(("detours: trampoline=%p: " "%02x %02x %02x %02x " "%02x %02x %02x %02x " "%02x %02x %02x %02x\n", operation->trampoline, operation->trampoline->code[0], operation->trampoline->code[1], operation->trampoline->code[2], operation->trampoline->code[3], operation->trampoline->code[4], operation->trampoline->code[5], operation->trampoline->code[6], operation->trampoline->code[7], operation->trampoline->code[8], operation->trampoline->code[9], operation->trampoline->code[10], operation->trampoline->code[11])); } } // Update any suspended threads. for (detour_pending_thread* thread = s_pending_threads_head; thread != nullptr; thread = thread->next) { arm_thread_state64_t threadState; mach_msg_type_number_t threadStateCnt = ARM_THREAD_STATE64_COUNT; const kern_return_t error = thread_get_state(thread->thread, ARM_THREAD_STATE64, (thread_state_t)&threadState, &threadStateCnt); if (error != err_none) { DETOUR_BREAK(); continue; } const uintptr_t pc = arm_thread_state64_get_pc(threadState); for (detour_operation* op = s_pending_operations_head; op != nullptr; op = op->next) { switch (op->kind) { case detour_operation_kind_attach: { const uintptr_t targetAddr = (uintptr_t)op->target; if (pc >= targetAddr && pc < targetAddr + op->trampoline->restore_code_size) { uintptr_t new_pc = (uintptr_t)op->trampoline; new_pc += internal_detour_align_from_target(op->trampoline, pc - targetAddr); printf("detours: thread %u was at 0x%" PRIXPTR ", moved to 0x%" PRIXPTR "\n", thread->thread, pc, new_pc); arm_thread_state64_set_pc_fptr(threadState, new_pc); thread_set_state(thread->thread, ARM_THREAD_STATE64, (thread_state_t)&threadState, ARM_THREAD_STATE64_COUNT); } break; } case detour_operation_kind_detach: { const uintptr_t trampAddr = (uintptr_t)op->trampoline; if (pc >= trampAddr && pc < trampAddr + sizeof(*op->trampoline)) { uintptr_t new_pc = (uintptr_t)op->target; new_pc += internal_detour_align_from_trampoline(op->trampoline, pc - trampAddr); printf("detours: thread %u was at 0x%" PRIXPTR ", moved to 0x%" PRIXPTR "\n", thread->thread, pc, new_pc); arm_thread_state64_set_pc_fptr(threadState, new_pc); thread_set_state(thread->thread, ARM_THREAD_STATE64, (thread_state_t)&threadState, ARM_THREAD_STATE64_COUNT); } break; } } } } // Restore all the page permissions bool freed_trampoline = false; const mach_port_t port = mach_task_self(); for (detour_operation* operation = s_pending_operations_head; operation != nullptr;) { DETOUR_CHECK( mach_vm_protect(port, (mach_vm_address_t)operation->target, operation->trampoline->restore_code_size, false, operation->perm)); if (operation->kind == detour_operation_kind_detach && operation->trampoline) { internal_detour_free_trampoline(operation->trampoline); operation->trampoline = nullptr; freed_trampoline = true; } detour_operation* next = operation->next; free(operation); operation = next; } s_pending_operations_head = nullptr; // Free any trampoline regions that are now unused. if (freed_trampoline && !s_retain_regions) { internal_detour_free_unused_trampoline_regions(); } // Make sure the trampoline pages are no longer writable. internal_detour_runnable_trampoline_regions(); // Resume any suspended threads. for (detour_pending_thread* thread = s_pending_threads_head; thread != nullptr;) { // There is nothing we can do if this fails. DETOUR_CHECK(thread_resume(thread->thread)); detour_pending_thread* next = thread->next; free(thread); thread = next; } s_pending_threads_head = nullptr; s_transaction_thread = THREAD_NULL; if (out_failed_target) { *out_failed_target = s_pending_error_pointer; } return s_pending_error; } mach_error_t detour_manage_thread(thread_t thread) { mach_error_t error; // If any of the pending operations failed, then we don't need to do this. if (s_pending_error != err_none) { return s_pending_error; } // must be the transaction thread if (s_transaction_thread != mach_thread_self()) { return detour_err_wrong_thread; } // Silently (and safely) drop any attempt to suspend our own thread. if (thread == mach_thread_self()) { return err_none; } detour_pending_thread* new_pending_thread = malloc(sizeof(detour_pending_thread)); if (!new_pending_thread) { error = KERN_RESOURCE_SHORTAGE; fail: free(new_pending_thread); s_pending_error = error; s_pending_error_pointer = nullptr; DETOUR_BREAK(); return error; } error = thread_suspend(thread); if (error != err_none) { DETOUR_BREAK(); goto fail; } new_pending_thread->thread = thread; new_pending_thread->next = s_pending_threads_head; s_pending_threads_head = new_pending_thread; return err_none; }