1. 程式人生 > >趣探 Mach-O:FishHook 解析

趣探 Mach-O:FishHook 解析

  • 對作業系統、編譯原理的理解:深入解析Mac OS X & iOS作業系統 、 程式設計師的自我修養

本文的闡述順序按照函式呼叫過程來進行

Fishhook 可以做什麼

在此借用阿里百川的一張分析圖,可以比較清晰的瞭解FishHook發揮了哪些作用

852671-57cff46c8aabbe36 阿里百川

FishHook在這裡是對動態連結庫起作用,修改對應的函式實現

對於動態連結庫裡面的C函式,第一次呼叫的時候,我們會得到函式和實現地址的對應關係,函式的實現地址存放在一個叫la_symbol_ptr的地方,第二次呼叫的時候,直接通過la_symbol_ptr找到函式地址就可以,不再需要繁瑣的獲取函式地址的過程。(具體通過哪些過程,可以參考剛才的連結:

Mach-O 的動態連結過程

那麼,上圖的含義就很明瞭了

在程式執行時,動態連結的 C 函式dynamic(...)地址記錄在DATA segment下的la_symbol_ptr中;初始時,程式只知道dynamic函式的符號名而不知道函式的實現地址;首次呼叫時,程式通過TEXT segment中的stub_helper取得繫結資訊,通過dyld_stub_binder來更新la_symbol_ptr中的符號實現地址;這樣,再次呼叫時,就可以通過la_symbol_ptr直接找到dynamic函式的實現;如果我們需要替換dynamic函式的實現,只需要修改__la_symbol_ptr即可,也就是我們要談的Fishhook

Fishhook 的實現

通過fishhook的官方文件可以知道,Fishhook的使用方法大致如下:

123456789101112131415161718192021222324252627 staticint(*original_open)(constchar*,int,...);intnew_open(constchar*path,intoflag,...){va_list ap={0};mode_t mode=0;if((oflag&O_CREAT)!=0){// mode only applies to O_CREATva_start(ap,oflag);mode=va_arg(ap,int);va_end(ap);printf("Calling real open('%s', %d, %d)\n",path,oflag,mode);returnoriginal_open(path,oflag,mode);}else{printf("Calling real open('%s', %d)\n",path,oflag);returnoriginal_open(path,oflag,mode);}}intmain(intargc,constchar*argv[]){@autoreleasepool{structrebinding open_rebinding={"open",new_open,(void*)&original_open};rebind_symbols((structrebinding[1]){open_rebinding},1);__unused intfd=open(argv[0],O_RDONLY);}return0;}

先從函式的入口,rebind_symbols開始談起吧,rebind_symbols主要是使用_dyld_register_func_for_add_image來註冊回撥函式,在載入動態庫的時候執行一些操作

1234567891011 intrebind_symbols(structrebinding rebindings[],size_t rebindings_nel){// 呼叫 prepend_rebindings 的函式,將整個 rebindings 陣列新增到 _rebindings_head 這個私有連結串列的頭部intretval=prepend_rebindings(&_rebindings_head,rebindings,rebindings_nel);if(retval next的值來判斷是否為第一次呼叫// If this was the first call, register callback for image additions (which is also invoked for// existing images, otherwise, just run on existing imagesif(!_rebindings_head->next){_dyld_register_func_for_add_image(_rebind_symbols_for_image);}else{uint32_tc=_dyld_image_count();for(uint32_ti=0;i

對於prepend_rebindings的程式碼如下

12345678910111213141516171819202122232425262728 // 連結串列的陣列結構structrebindings_entry{structrebinding *rebindings;size_t rebindings_nel;structrebindings_entry *next;};staticstructrebindings_entry *_rebindings_head;staticintprepend_rebindings(structrebindings_entry **rebindings_head,structrebinding rebindings[],size_t nel){structrebindings_entry *new_entry=malloc(sizeof(structrebindings_entry));if(!new_entry){return-1;}new_entry->rebindings=malloc(sizeof(structrebinding)*nel);if(!new_entry->rebindings){free(new_entry);return-1;}// 將 rebindings 插入到連結串列頭部memcpy(new_entry->rebindings,rebindings,sizeof(structrebinding)*nel);new_entry->rebindings_nel=nel;new_entry->next=*rebindings_head;*rebindings_head=new_entry;return0;}

基礎結構解釋

Dl_info

123456789 /* * Structure filled in by dladdr(). */typedefstructdl_info{constchar*dli_fname;/* Pathname of shared object */void*dli_fbase;/* Base address of shared object */constchar*dli_sname;/* Name of nearest symbol */void*dli_saddr;/* Address of nearest symbol */}Dl_info;

我們一會經過 dladdr()處理後的有效資訊都會放進這個結構體中

  • fname:路徑名,例如
1 /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation
  • dli_fbase:共享物件的的起始地址(Base address of shared object,比如上面的 CoreFoundation)
  • dli_saddr :符號的地址
  • dli_sname:符號的名字,即下面的第四列的函式資訊
12345 Thread0:0libsystem_kernel.dylib0x11135810a__semwait_signal+944741libsystem_c.dylib0x1110dab0bsleep+5189232QYPerformanceMonitor0x10dda4f1b-[ViewController tableView:cellForRowAtIndexPath:]+79633UIKit0x10ed4d4f4-[UITableView _createPreparedCellForGlobalRow:withIndexPath:willDisplay:]+1586420

LC_SYMTAB

12345678 structsymtab_command{uint32_t    cmd;/* LC_SYMTAB */uint32_t    cmdsize;/* sizeof(struct symtab_command) */uint32_t    symoff;/* symbol table offset */uint32_t    nsyms;/* number of symbol table entries */uint32_t    stroff;/* string table offset */uint32_t    strsize;/* string table size in bytes */};

主要是提供符號表的偏移量,以及元素個數,還有字串表的偏移和其長度。符號表在 Mach-O目標檔案中的地址可以通過LC_SYMTAB載入命令指定的 symoff找到,對應的符號名稱在stroff,總共有nsyms條符號資訊

LC_DYSYMTAB

這個陣列結構有些複雜,有興趣的可以閱讀loader.h檔案,內部標示了動態符號表的偏移量和符號個數

123456 structdysymtab_command{uint32_t cmd;/* LC_DYSYMTAB */uint32_t cmdsize;/* sizeof(struct dysymtab_command) */uint32_t indirectsymoff;/* file offset to the indirect symbol table */uint32_t nindirectsyms;/* number of indirect symbol table entries */.......

_rebind_symbols_for_image

對於關鍵的程式碼 _rebind_symbols_for_image 如下

123456789101112131415161718192021222324252627282930313233 staticvoidrebind_symbols_for_image(structrebindings_entry *rebindings,conststructmach_header *header,intptr_t slide){Dl_info info;if(dladdr(header,&info)==0){return;}// segment_command_64segment_command_t *cur_seg_cmd;segment_command_t *linkedit_segment=NULL;// LC_SYMTABstructsymtab_command*symtab_cmd=NULL;// LC_DYSYMTABstructdysymtab_command*dysymtab_cmd=NULL;// 下面是要尋找load_command,所以越過mach_header_tuintptr_t cur=(uintptr_t)header+sizeof(mach_header_t);for(uinti=0;incmds;i++,cur+=cur_seg_cmd->cmdsize){cur_seg_cmd=(segment_command_t *)cur;if(cur_seg_cmd->cmd==LC_SEGMENT_ARCH_DEPENDENT){if(strcmp(cur_seg_cmd->segname,SEG_LINKEDIT)==0){//遍歷尋找__LINKEDITlinkedit_segment=cur_seg_cmd;}}elseif(cur_seg_cmd->cmd==LC_SYMTAB){//遍歷尋找lc_symtabsymtab_cmd=(structsymtab_command*)cur_seg_cmd;}elseif(cur_seg_cmd->cmd==LC_DYSYMTAB){//遍歷尋找