结构体 - 18.6 结构体中的位 - 《逆向工程权威指南》

18.6 结构体中的位
- 18.6.1 CPUID 的例子
- 18.6.2 将浮点数当作结构体看待

18.6 结构体中的位

18.6.1 CPUID 的例子

C/C++中允许给结构体的每一个成员都定义一个准确的位域。如果我们想要节省空间的话，这个对我们来说将是非常有用的。比如，对BOOL来说，1位就足矣了。但是当然，如果我们想要速度的话，必然会浪费点空间。让我们以CPUID指令为例，这个指令返回当前CPU的信息和特性。如果EAX在指令执行之前就设置为了1，CPUID将会返回这些内容到EAX中。

MSVC 2010有CPUID的宏，但是GCC 4.4.1没有，所以，我们就手动的利用它的内联汇编器为GCC写一个吧。

#include <stdio.h>
#ifdef __GNUC__
static inline void cpuid(int code, int *a, int *b, int *c, int *d) {
    asm volatile("cpuid":"=a"(*a),"=b"(*b),"=c"(*c),"=d"(*d):"a"(code));
}
#endif
#ifdef _MSC_VER
#include <intrin.h>
#endif
struct CPUID_1_EAX
{
    unsigned int stepping:4;
    unsigned int model:4;
    unsigned int family_id:4;
    unsigned int processor_type:2;
    unsigned int reserved1:2;
    unsigned int extended_model_id:4;
    unsigned int extended_family_id:8;
    unsigned int reserved2:4;
};
int main()
{
    struct CPUID_1_EAX *tmp;
    int b[4];
    #ifdef _MSC_VER
    __cpuid(b,1);
    #endif
    #ifdef __GNUC__
    cpuid (1, &b[0], &b[1], &b[2], &b[3]);
    #endif
    tmp=(struct CPUID_1_EAX *)&b[0];
    printf ("stepping=%d", tmp->stepping);
    printf ("model=%d", tmp->model);
    printf ("family_id=%d", tmp->family_id);
    printf ("processor_type=%d", tmp->processor_type);
    printf ("extended_model_id=%d", tmp->extended_model_id);
    printf ("extended_family_id=%d", tmp->extended_family_id);
    return 0;
};

之后CPU会填充EAX,EBX,ECX,EDX，这些寄存器的值会通过b[]数组显现出来。接着我们用一个指向CPUID_1_EAX结构体的指针，把它指向b[]数组的EAX值。换句话说，我们将把32位的INT类型的值当作一个结构体来看。然后我们就能从结构体中读取数据。让我们在MSVC 2008用/Ox编译一下：

清单18.19: MSVC 2008

_b$ = -16 ; size = 16
_main PROC
    sub esp, 16 ; 00000010H
    push ebx
    xor ecx, ecx
    mov eax, 1
    cpuid
    push esi
    lea esi, DWORD PTR _b$[esp+24]
    mov DWORD PTR [esi], eax
    mov DWORD PTR [esi+4], ebx
    mov DWORD PTR [esi+8], ecx
    mov DWORD PTR [esi+12], edx
    mov esi, DWORD PTR _b$[esp+24]
    mov eax, esi
    and eax, 15 ; 0000000fH
    push eax
    push OFFSET $SG15435 ; ’stepping=%d’, 0aH, 00H
    call _printf
    mov ecx, esi
    shr ecx, 4
    and ecx, 15 ; 0000000fH
    push ecx
    push OFFSET $SG15436 ; ’model=%d’, 0aH, 00H
    call _printf
    mov edx, esi
    shr edx, 8
    and edx, 15 ; 0000000fH
    push edx
    push OFFSET $SG15437 ; ’family_id=%d’, 0aH, 00H
    call _printf
    mov eax, esi
    shr eax, 12 ; 0000000cH
    and eax, 3
    push eax
    push OFFSET $SG15438 ; ’processor_type=%d’, 0aH, 00H
    call _printf
    mov ecx, esi
    shr ecx, 16 ; 00000010H
    and ecx, 15 ; 0000000fH
    push ecx
    push OFFSET $SG15439 ; ’extended_model_id=%d’, 0aH, 00H
    call _printf
    shr esi, 20 ; 00000014H
    and esi, 255 ; 000000ffH
    push esi
    push OFFSET $SG15440 ; ’extended_family_id=%d’, 0aH, 00H
    call _printf
    add esp, 48 ; 00000030H
    pop esi
    xor eax, eax
    pop ebx
    add esp, 16 ; 00000010H
    ret 0
_main ENDP

SHR指令将EAX寄存器的值右移位，移出去的值必须被忽略，例如我们会忽略右边的位。 AND指令将清除左边不需要的位，换句话说，它处理过后EAX将只留下我们需要的值。让我们在GCC4.4.1下用-O3编译。

清单18.20: GCC 4.4.1

main proc near ; DATA XREF: _start+17
    push ebp
    mov ebp, esp
    and esp, 0FFFFFFF0h
    push esi
    mov esi, 1
    push ebx
    mov eax, esi
    sub esp, 18h
    cpuid
    mov esi, eax
    and eax, 0Fh
    mov [esp+8], eax
    mov dword ptr [esp+4], offset aSteppingD ; "stepping=%d"
    mov dword ptr [esp], 1
    call ___printf_chk
    mov eax, esi
    shr eax, 4
    and eax, 0Fh
    mov [esp+8], eax
    mov dword ptr [esp+4], offset aModelD ; "model=%d"
    mov dword ptr [esp], 1
    call ___printf_chk
    mov eax, esi
    shr eax, 8
    and eax, 0Fh
    mov [esp+8], eax
    mov dword ptr [esp+4], offset aFamily_idD ; "family_id=%d"
    mov dword ptr [esp], 1
    call ___printf_chk
    mov eax, esi
    shr eax, 0Ch
    and eax, 3
    mov [esp+8], eax
    mov dword ptr [esp+4], offset aProcessor_type ; "processor_type=%d"
    mov dword ptr [esp], 1
    call ___printf_chk
    mov eax, esi
    shr eax, 10h
    shr esi, 14h
    and eax, 0Fh
    and esi, 0FFh
    mov [esp+8], eax
    mov dword ptr [esp+4], offset aExtended_model ; "extended_model_id=%d"
    mov dword ptr [esp], 1
    call ___printf_chk
    mov [esp+8], esi
    mov dword ptr [esp+4], offset unk_80486D0
    mov dword ptr [esp], 1
    call ___printf_chk
    add esp, 18h
    xor eax, eax
    pop ebx
    pop esi
    mov esp, ebp
    pop ebp
    retn
main endp

几乎一样。只有一个需要注意的地方就是GCC在调用每个printf()之前会把extended_model_id和extended_family_id的计算联合到一块去，而不是把它们分开计算。

18.6.2 将浮点数当作结构体看待

我们已经在FPU（15章）中注意到了float和double两个类型都是有符号的，他们分为符号、有效数字和指数部分。但是我们能直接用上这些位嘛？让我们试一试float。

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <memory.h>
struct float_as_struct
{
    unsigned int fraction : 23; // fractional part
    unsigned int exponent : 8; // exponent + 0x3FF
    unsigned int sign : 1; // sign bit
};
float f(float _in)
{
    float f=_in;
    struct float_as_struct t;
    assert (sizeof (struct float_as_struct) == sizeof (float));
    memcpy (&t, &f, sizeof (float));
    t.sign=1; // set negative sign
    t.exponent=t.exponent+2; // multiple d by 2^n (n here is 2)
    memcpy (&f, &t, sizeof (float));
    return f;
};
int main()
{
    printf ("%f", f(1.234));
};

float_as_struct结构占用了和float一样多的内存空间，也就是4字节，或者说，32位。现在我们给输入值设置一个负值，然后指数加2，这样我们就能把整个数按照22的值来倍乘，也就是乘以4。让我们在MSVC2008无优化模式下编译它。

清单18.21: MSVC 2008

_t$ = -8 ; size = 4
_f$ = -4 ; size = 4
__in$ = 8 ; size = 4
?f@@YAMM@Z PROC ; f
    push ebp
    mov ebp, esp
    sub esp, 8
    fld DWORD PTR __in$[ebp]
    fstp DWORD PTR _f$[ebp]
    push 4
    lea eax, DWORD PTR _f$[ebp]
    push eax
    lea ecx, DWORD PTR _t$[ebp]
    push ecx
    call _memcpy
    add esp, 12 ; 0000000cH
    mov edx, DWORD PTR _t$[ebp]
    or edx, -2147483648 ; 80000000H - set minus sign
    mov DWORD PTR _t$[ebp], edx
    mov eax, DWORD PTR _t$[ebp]
    shr eax, 23 ; 00000017H - drop significand
    and eax, 255 ; 000000ffH - leave here only exponent
    add eax, 2 ; add 2 to it
    and eax, 255 ; 000000ffH
    shl eax, 23 ; 00000017H - shift result to place of bits 30:23
    mov ecx, DWORD PTR _t$[ebp]
    and ecx, -2139095041 ; 807fffffH - drop exponent
    or ecx, eax ; add original value without exponent with new calculated exponent
    mov DWORD PTR _t$[ebp], ecx
    push 4
    lea edx, DWORD PTR _t$[ebp]
    push edx
    lea eax, DWORD PTR _f$[ebp]
    push eax
    call _memcpy
    add esp, 12 ; 0000000cH
    fld DWORD PTR _f$[ebp]
    mov esp, ebp
    pop ebp
    ret 0
?f@@YAMM@Z ENDP ; f

有点多余。如果用/Ox编译的话，这里就没有memcpy调用了。f变量会被直接使用，但是没有优化的版本看起来会更容易理解一点。 GCC 4.4.1的-O3选项会怎么做？

清单18.22： Gcc 4.4.1

; f(float)
public _Z1ff
_Z1ff proc near
var_4 = dword ptr -4
arg_0 = dword ptr 8
    push ebp
    mov ebp, esp
    sub esp, 4
    mov eax, [ebp+arg_0]
    or eax, 80000000h ; set minus sign
    mov edx, eax
    and eax, 807FFFFFh ; leave only significand and exponent in EAX
    shr edx, 23 ; prepare exponent
    add edx, 2 ; add 2
    movzx edx, dl ; clear all bits except 7:0 in EAX
    shl edx, 23 ; shift new calculated exponent to its place
    or eax, edx ; add new exponent and original value without exponent
    mov [ebp+var_4], eax
    fld [ebp+var_4]
    leave
    retn
    _Z1ff endp
    public main
    main proc near
    push ebp
    mov ebp, esp
    and esp, 0FFFFFFF0h
    sub esp, 10h
    fld ds:dword_8048614 ; -4.936
    fstp qword ptr [esp+8]
    mov dword ptr [esp+4], offset asc_8048610 ; "%f
"
    mov dword ptr [esp], 1
    call ___printf_chk
    xor eax, eax
    leave
    retn
main endp

f()函数基本可以理解，但是有趣的是，GCC可以在编译阶段就通过我们这堆大杂烩一样的代码计算出f(1.234)的值，从而会把他当作参数直接给printf()。