Capstone反汇编引擎数据类型及API分析及示例(三)

2019-07-29 约 1538 字 预计阅读 8 分钟

声明:本文 【Capstone反汇编引擎数据类型及API分析及示例(三)】 由作者 kabeor 于 2019-07-29 09:08:00 首发 先知社区 曾经 浏览数 91 次

感谢 kabeor 的辛苦付出!

Capstone反汇编引擎数据类型及API分析及示例(三)

Capstone反汇编引擎数据类型及API分析及示例(一)
Capstone反汇编引擎数据类型及API分析及示例(二)

API分析

cs_open

cs_err CAPSTONE_API cs_open(cs_arch arch, cs_mode mode, csh *handle);

初始化cs句柄

参数
arch: 架构类型 (CSARCH)
mode: 硬件模式. CSMODE
在cs_mode数据类型中可查
handle: 指向句柄, 返回时更新
return: 创建成功返回CS_ERR_OK,否则返回cs_err枚举中对应的错误信息

实现代码

cs_err CAPSTONE_API cs_open(cs_arch arch, cs_mode mode, csh *handle)
{
    cs_err err;
    struct cs_struct *ud;
    if (!cs_mem_malloc || !cs_mem_calloc || !cs_mem_realloc || !cs_mem_free || !cs_vsnprintf)
        // Error: 使用cs_open()前, 必须使用cs_option(CS_OPT_MEM)进行动态内存管理的初始化
        return CS_ERR_MEMSETUP;

    if (arch < CS_ARCH_MAX && cs_arch_init[arch]) {
        // 验证架构是否使用,方式:架构在枚举中且可初始化
        if (mode & cs_arch_disallowed_mode_mask[arch]) {
            *handle = 0;
            return CS_ERR_MODE;
        }

        ud = cs_mem_calloc(1, sizeof(*ud));
        if (!ud) {
            // 内存不足
            return CS_ERR_MEM;
        }

        ud->errnum = CS_ERR_OK;
        ud->arch = arch;
        ud->mode = mode;
        // 默认情况指令不打开detail模式
        ud->detail = CS_OPT_OFF;

        // 默认skipdata设置
        ud->skipdata_setup.mnemonic = SKIPDATA_MNEM;

        err = cs_arch_init[ud->arch](ud);
        if (err) {
            cs_mem_free(ud);
            *handle = 0;
            return err;
        }

        *handle = (uintptr_t)ud;

        return CS_ERR_OK;
    } else {
        *handle = 0;
        return CS_ERR_ARCH;
    }
}

其中,cs_struct结构体包含更多细节设定,如下

struct cs_struct {
    cs_arch arch;
    cs_mode mode;
    Printer_t printer;  // 打印asm
    void *printer_info; // 打印信息
    Disasm_t disasm;    // 反编译
    void *getinsn_info; // 打印辅助信息
    GetName_t reg_name;
    GetName_t insn_name;
    GetName_t group_name;
    GetID_t insn_id;
    PostPrinter_t post_printer;
    cs_err errnum;
    ARM_ITStatus ITBlock;   // ARM特殊选项
    cs_opt_value detail, imm_unsigned;
    int syntax; //ARM, Mips & PPC等架构的基本asm语法打印
    bool doing_mem; // 在InstPrinter代码中处理内存操作数
    unsigned short *insn_cache; //为mapping.c建立缓存索引
    GetRegisterName_t get_regname;
    bool skipdata;  // 如果反编译时要跳过数据,该项设置为True
    uint8_t skipdata_size;  //要跳过bytes的数量
    cs_opt_skipdata skipdata_setup; // 自定义skipdata设置
    const uint8_t *regsize_map; //映射register大小 (目前仅支持x86)
    GetRegisterAccess_t reg_access;
    struct insn_mnem *mnem_list;    // 自定义指令助记符的链接list
};

示例(创建一个x86_64类型的cs句柄):
cs_open(CS_ARCH_X86, CS_MODE_64, &handle)

cs_close

cs_err CAPSTONE_API cs_close(csh *handle);

释放句柄
参数
handle: 指向一个cs_open()打开的句柄
return: 释放成功返回CS_ERR_OK,否则返回cs_err枚举的错误信息

实现代码,可以看出释放句柄实质为将句柄值设置为0

cs_err CAPSTONE_API cs_close(csh *handle)
{
    struct cs_struct *ud;
    struct insn_mnem *next, *tmp;

    if (*handle == 0)
        // 句柄不可用
        return CS_ERR_CSH;

    ud = (struct cs_struct *)(*handle);

    if (ud->printer_info)
        cs_mem_free(ud->printer_info);

    // 释放自定义助记符的链接list
    tmp = ud->mnem_list;
    while(tmp) {
        next = tmp->next;
        cs_mem_free(tmp);
        tmp = next;
    }

    cs_mem_free(ud->insn_cache);

    memset(ud, 0, sizeof(*ud));
    cs_mem_free(ud);

    // handle值设置为0,保证这个句柄在cs_close()释放后不可使用
    *handle = 0;

    return CS_ERR_OK;
}

示例:
cs_close(&handle);

cs_option

cs_err CAPSTONE_API cs_option(csh handle, cs_opt_type type, size_t value);

反编译引擎的运行时选项

handle: cs_open()打开的句柄
type: 设置选项的类型
value: 与type对应的选项值
return: 设置成功返回CS_ERR_OK,否则返回cs_err枚举的错误信息

注意: 在CS_OPT_MEM的情况下,handle可以是任何值,因此cs_option(handle, CS_OPT_MEM, value)必须在cs_open()之前被调用

实现代码

cs_err CAPSTONE_API cs_option(csh ud, cs_opt_type type, size_t value)
{
    struct cs_struct *handle;
    cs_opt_mnem *opt;

    // 支持在所有API前支持 (even cs_open())
    if (type == CS_OPT_MEM) {
        cs_opt_mem *mem = (cs_opt_mem *)value;

        cs_mem_malloc = mem->malloc;
        cs_mem_calloc = mem->calloc;
        cs_mem_realloc = mem->realloc;
        cs_mem_free = mem->free;
        cs_vsnprintf = mem->vsnprintf;

        return CS_ERR_OK;
    }

    handle = (struct cs_struct *)(uintptr_t)ud;
    if (!handle)
        return CS_ERR_CSH;

    switch(type) {
        default:
            break;

        case CS_OPT_UNSIGNED:
            handle->imm_unsigned = (cs_opt_value)value;
            return CS_ERR_OK;

        case CS_OPT_DETAIL:
            handle->detail = (cs_opt_value)value;
            return CS_ERR_OK;

        case CS_OPT_SKIPDATA:
            handle->skipdata = (value == CS_OPT_ON);
            if (handle->skipdata) {
                if (handle->skipdata_size == 0) {
                    handle->skipdata_size = skipdata_size(handle);
                }
            }
            return CS_ERR_OK;

        case CS_OPT_SKIPDATA_SETUP:
            if (value)
                handle->skipdata_setup = *((cs_opt_skipdata *)value);
            return CS_ERR_OK;

        case CS_OPT_MNEMONIC:
            opt = (cs_opt_mnem *)value;
            if (opt->id) {
                if (opt->mnemonic) {
                    struct insn_mnem *tmp;

                    // 添加新指令或替换现有指令
                    // 查看当前insn释放在list中
                    tmp = handle->mnem_list;
                    while(tmp) {
                        if (tmp->insn.id == opt->id) {
                            // f找到指令,替换助记符
                            (void)strncpy(tmp->insn.mnemonic, opt->mnemonic, sizeof(tmp->insn.mnemonic) - 1);
                            tmp->insn.mnemonic[sizeof(tmp->insn.mnemonic) - 1] = '\0';
                            break;
                        }
                        tmp = tmp->next;
                    }

                    // 2. 如果没有就添加这条指令
                    if (!tmp) {
                        tmp = cs_mem_malloc(sizeof(*tmp));
                        tmp->insn.id = opt->id;
                        (void)strncpy(tmp->insn.mnemonic, opt->mnemonic, sizeof(tmp->insn.mnemonic) - 1);
                        tmp->insn.mnemonic[sizeof(tmp->insn.mnemonic) - 1] = '\0';
                        // 新指令放在list最前面
                        tmp->next = handle->mnem_list;
                        handle->mnem_list = tmp;
                    }
                    return CS_ERR_OK;
                } else {
                    struct insn_mnem *prev, *tmp;

                    tmp = handle->mnem_list;
                    prev = tmp;
                    while(tmp) {
                        if (tmp->insn.id == opt->id) {
                            // 删除指令
                            if (tmp == prev) {
                                handle->mnem_list = tmp->next;
                            } else {
                                prev->next = tmp->next;
                            }
                            cs_mem_free(tmp);
                            break;
                        }
                        prev = tmp;
                        tmp = tmp->next;
                    }
                }
            }
            return CS_ERR_OK;

        case CS_OPT_MODE:
            // 验证所请求的模式是否有效
            if (value & cs_arch_disallowed_mode_mask[handle->arch]) {
                return CS_ERR_OPTION;
            }
            break;
    }

    return cs_arch_option[handle->arch](handle, type, value);
}

示例,更改反汇编后显示的语法:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00"

int main(void)
{
    csh handle;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }
    cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);  // 以AT&T语法显示
    count = cs_disasm(handle, (unsigned char*)CODE, sizeof(CODE) - 1, 0x1000, 0, &insn);
    if (count) {
        size_t j;

        for (j = 0; j < count; j++) {
            printf("0x%""Ix"":\t%s\t\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str);
        }

        cs_free(insn, count);
    }
    else
        printf("ERROR: Failed to disassemble given code!\n");

    cs_close(&handle);

    return 0;
}

输出

cs_errno

cs_err CAPSTONE_API cs_errno(csh handle);

API出错时返回错误消息
参数
handle: cs_open()打开的句柄
return: 无错误返回CS_ERR_OK,否则返回cs_err枚举的错误信息

实现很简单,判断到句柄不存在直接返回CS_ERR_CSH

示例:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00"

int main(void)
{
    csh handle = 0;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }

    cs_close(&handle);
    std::cout << cs_errno(handle);    //关闭句柄后检查将报错
    return 0;
}

输出,错误码4即CS_ERR_CSH

cs_strerror

const char * CAPSTONE_API cs_strerror(cs_err code);

将上个API输出的错误码转换为详细错误信息

const char * CAPSTONE_API cs_strerror(cs_err code)
{
    switch(code) {
        default:
            return "Unknown error code";
        case CS_ERR_OK:
            return "OK (CS_ERR_OK)";
        case CS_ERR_MEM:
            return "Out of memory (CS_ERR_MEM)";
        case CS_ERR_ARCH:
            return "Invalid/unsupported architecture(CS_ERR_ARCH)";
        case CS_ERR_HANDLE:
            return "Invalid handle (CS_ERR_HANDLE)";
        case CS_ERR_CSH:
            return "Invalid csh (CS_ERR_CSH)";
        case CS_ERR_MODE:
            return "Invalid mode (CS_ERR_MODE)";
        case CS_ERR_OPTION:
            return "Invalid option (CS_ERR_OPTION)";
        case CS_ERR_DETAIL:
            return "Details are unavailable (CS_ERR_DETAIL)";
        case CS_ERR_MEMSETUP:
            return "Dynamic memory management uninitialized (CS_ERR_MEMSETUP)";
        case CS_ERR_VERSION:
            return "Different API version between core & binding (CS_ERR_VERSION)";
        case CS_ERR_DIET:
            return "Information irrelevant in diet engine (CS_ERR_DIET)";
        case CS_ERR_SKIPDATA:
            return "Information irrelevant for 'data' instruction in SKIPDATA mode (CS_ERR_SKIPDATA)";
        case CS_ERR_X86_ATT:
            return "AT&T syntax is unavailable (CS_ERR_X86_ATT)";
        case CS_ERR_X86_INTEL:
            return "INTEL syntax is unavailable (CS_ERR_X86_INTEL)";
        case CS_ERR_X86_MASM:
            return "MASM syntax is unavailable (CS_ERR_X86_MASM)";
    }
}

示例,结合cs_errno使用:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00"

int main(void)
{
    csh handle = 0;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }

    cs_close(&handle);
    std::cout << cs_strerror(cs_errno(handle));  //直接输出报错信息
    return 0;
}

输出

cs_disasm

size_t CAPSTONE_API cs_disasm(csh handle,
        const uint8_t *code, size_t code_size,
        uint64_t address,
        size_t count,
        cs_insn **insn);

给定缓冲区、大小、地址和编号,反编译机器码
API动态地分配内存来包含分解的指令,生成的指令将放在*insn中

注意: 必须释放分配的内存,以避免内存泄漏。对于需要动态分配稀缺内存的系统(如OS内核或固件),API cs_disasm_iter()可能是比cs_disasm()更好的选择。原因是,使用cs_disasm()时,基于有限的可用内存,必须预先计算要分解多少条指令。

handle: cs_open()返回的句柄
code: 包含要反汇编的机器码的缓冲区。
code_size:上面代码缓冲区的大小。
address:给定原始代码缓冲区中的第一条指令的地址。
insn: 由这个API填写的指令数组。注意: insn将由这个函数分配,应该用cs_free () API释放
count: 需要分解的指令数量,或输入0分解所有指令
return:成功反汇编指令的数量,如果该函数未能反汇编给定的代码,则为0,失败时,调用cs_errno()获取错误代码。

源码分析

size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
{
    struct cs_struct *handle;
    MCInst mci;
    uint16_t insn_size;
    size_t c = 0, i;
    unsigned int f = 0; // 缓存中下一条指令的索引
    cs_insn *insn_cache;    // 缓存反汇编后的指令
    void *total = NULL;
    size_t total_size = 0;  //所有insn的输出缓冲区的总大小
    bool r;
    void *tmp;
    size_t skipdata_bytes;
    uint64_t offset_org; // 保存缓冲区的所有原始信息
    size_t size_org;
    const uint8_t *buffer_org;
    unsigned int cache_size = INSN_CACHE_SIZE;
    size_t next_offset;

    handle = (struct cs_struct *)(uintptr_t)ud;
    if (!handle) {
        // 修复方式:
        // handle->errnum = CS_ERR_HANDLE;
        return 0;
    }

    handle->errnum = CS_ERR_OK;

    // 重设ARM架构的IT block
    if (handle->arch == CS_ARCH_ARM)
        handle->ITBlock.size = 0;

#ifdef CAPSTONE_USE_SYS_DYN_MEM
    if (count > 0 && count <= INSN_CACHE_SIZE)
        cache_size = (unsigned int) count;
#endif

    // 保存SKIPDATA原始偏移量
    buffer_org = buffer;
    offset_org = offset;
    size_org = size;

    total_size = sizeof(cs_insn) * cache_size;
    total = cs_mem_malloc(total_size);
    if (total == NULL) {
        // 内存不足
        handle->errnum = CS_ERR_MEM;
        return 0;
    }

    insn_cache = total;

    while (size > 0) {
        MCInst_Init(&mci);
        mci.csh = handle;

        mci.address = offset;

        if (handle->detail) {
            //给detail指针分配内存
            insn_cache->detail = cs_mem_malloc(sizeof(cs_detail));
        } else {
            insn_cache->detail = NULL;
        }

        // 为non-detailed模式保存所有信息
        mci.flat_insn = insn_cache;
        mci.flat_insn->address = offset;
#ifdef CAPSTONE_DIET
        //mnemonic & op_str0填充
        mci.flat_insn->mnemonic[0] = '\0';
        mci.flat_insn->op_str[0] = '\0';
#endif

        r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
        if (r) {
            SStream ss;
            SStream_Init(&ss);

            mci.flat_insn->size = insn_size;

            //将内部指令操作码映射到公共insn ID
            handle->insn_id(handle, insn_cache, mci.Opcode);

            handle->printer(&mci, &ss, handle->printer_info);
            fill_insn(handle, insn_cache, ss.buffer, &mci, handle->post_printer, buffer);

            // 调整opcode (X86)
            if (handle->arch == CS_ARCH_X86)
                insn_cache->id += mci.popcode_adjust;

            next_offset = insn_size;
        } else  {
            // 遇到中断指令

            // 为detail指针释放内存
            if (handle->detail) {
                cs_mem_free(insn_cache->detail);
            }

            if (!handle->skipdata || handle->skipdata_size > size)
                break;

            if (handle->skipdata_setup.callback) {
                skipdata_bytes = handle->skipdata_setup.callback(buffer_org, size_org,
                        (size_t)(offset - offset_org), handle->skipdata_setup.user_data);
                if (skipdata_bytes > size)
                    break;

                if (!skipdata_bytes)
                    break;
            } else
                skipdata_bytes = handle->skipdata_size;

            insn_cache->id = 0;
            insn_cache->address = offset;
            insn_cache->size = (uint16_t)skipdata_bytes;
            memcpy(insn_cache->bytes, buffer, skipdata_bytes);
#ifdef CAPSTONE_DIET
            insn_cache->mnemonic[0] = '\0';
            insn_cache->op_str[0] = '\0';
#else
            strncpy(insn_cache->mnemonic, handle->skipdata_setup.mnemonic,
                    sizeof(insn_cache->mnemonic) - 1);
            skipdata_opstr(insn_cache->op_str, buffer, skipdata_bytes);
#endif
            insn_cache->detail = NULL;

            next_offset = skipdata_bytes;
        }

        // 一条新指令进入缓存
        f++;

        // 反汇编了一条指令
        c++;
        if (count > 0 && c == count)
            break;

        if (f == cache_size) {
            cache_size = cache_size * 8 / 5; 
            total_size += (sizeof(cs_insn) * cache_size);
            tmp = cs_mem_realloc(total, total_size);
            if (tmp == NULL) {  //内存不足
                if (handle->detail) {
                    insn_cache = (cs_insn *)total;
                    for (i = 0; i < c; i++, insn_cache++)
                        cs_mem_free(insn_cache->detail);
                }

                cs_mem_free(total);
                *insn = NULL;
                handle->errnum = CS_ERR_MEM;
                return 0;
            }

            total = tmp;
            //在最后一条指令之后继续填充缓存
            insn_cache = (cs_insn *)((char *)total + sizeof(cs_insn) * c);

            // 将f重置为0,从一开始就填入缓存
            f = 0;
        } else
            insn_cache++;

        buffer += next_offset;
        size -= next_offset;
        offset += next_offset;
    }

    if (!c) {
        //未反汇编任何指令
        cs_mem_free(total);
        total = NULL;
    } else if (f != cache_size) {
        // 没有完全使用最后一个缓存,缩小大小
        tmp = cs_mem_realloc(total, total_size - (cache_size - f) * sizeof(*insn_cache));
        if (tmp == NULL) {  // 内存不足
            // 释放所有detail指针
            if (handle->detail) {
                insn_cache = (cs_insn *)total;
                for (i = 0; i < c; i++, insn_cache++)
                    cs_mem_free(insn_cache->detail);
            }

            cs_mem_free(total);
            *insn = NULL;

            handle->errnum = CS_ERR_MEM;
            return 0;
        }

        total = tmp;
    }

    *insn = total;

    return c;
}

示例,x86_64:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00\xe9\xea\xbe\xad\xde\xff\x25\x23\x01\x00\x00\xe8\xdf\xbe\xad\xde\x74\xff"

int main(void)
{
    csh handle = 0;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }

    count = cs_disasm(handle, (unsigned char*)CODE, sizeof(CODE) - 1, 0x1000, 0, &insn);   //所有指令,基址0x1000,放入insn
    if (count) {
        size_t j;

        for (j = 0; j < count; j++) {
            printf("0x%""Ix"":\t%s\t\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str);
        }

        cs_free(insn, count);
    }
    else
        printf("ERROR: Failed to disassemble given code!\n");

    cs_close(&handle);

    return 0;
}

输出

关键词:[‘安全技术’, ‘二进制安全’]


author

旭达网络

旭达网络技术博客,曾记录各种技术问题,一贴搞定.
本文采用知识共享署名 4.0 国际许可协议进行许可。

We notice you're using an adblocker. If you like our webite please keep us running by whitelisting this site in your ad blocker. We’re serving quality, related ads only. Thank you!

I've whitelisted your website.

Not now