逆向基础之switch语句反汇编分析

逆向基础之switch语句反汇编分析

0x01 背景介绍

最近一直在看逆向,闲话不多说,本篇将从反汇编的角度对C/C++中常见的switch语句进行简单分析,如有不对的地方,还望指正。

0x02 环境

Visual C++ 6.0

0x03 分析

首先,我们先编写一段简单的switch代码,如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 1:
            printf("1");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(8);

    return 0;        
}            

注意这里只有一个case,下面观察一下它的反汇编,我这里将用注释的方式对每步操作进行简单的静态分析,如下所示:

00401020   push        ebp                      //提升堆栈 
00401021   mov         ebp,esp 
00401023   sub         esp,44h                  
00401026   push        ebx                      //保留现场
00401027   push        esi
00401028   push        edi                     
00401029   lea         edi,[ebp-44h]            //填充缓冲区
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]    //将参数存放到eax中
0040103B   mov         dword ptr [ebp-4],eax    //将eax的值放到局部变量中
0040103E   cmp         dword ptr [ebp-4],1      //将1和局部变量的值进行比较
00401042   je          Function+26h (00401046)  //当值相等时跳转到00401046
00401044   jmp         Function+35h (00401055)  //否则跳转到00401055
00401046   push        offset string "1" (00422020) //将字串符"1"压入堆栈
0040104B   call        printf (00401160)            //调用printf函数
00401050   add         esp,4                        //降低堆栈
00401053   jmp         Function+42h (00401062)      //跳转到00401062
00401055   push        offset string "error" (00422fb4) //将字符串"error"压入堆栈
0040105A   call        printf (00401160)            //调用printf函数
0040105F   add         esp,4                        //降低堆栈
00401062   pop         edi                          //恢复现场
00401063   pop         esi
00401064   pop         ebx

通过上述静态分析可以知道,当case只有1个时,其反汇编代码与if…else相同,现在再通过动态分析单步执行来进行验证,当程序走到00401038时,看下ebp+8的值是8,也就是说传入的参数为8,如下图所示:

ebp+8

继续单步执行,当执行到0040103E时,此时的ebp-4也就是局部变量的值为8,如下图所示:

ebp-4

继续单步执行,比较1和ebp-4的值(也就是8)是否相等,显然是不相等的,于是执行之后来到00401044,jmp为无条件跳转,继续执行,跳转到00401055,也就是在”error”字符串这里,如下图所示:

jmp

再回头看一下源代码,我们传入的参数为8,显然程序应该执行default语句,打印出error,如下图所示:

error

这里我们可以先得出一个结论,当case为一个时,此编译器是将我们的switch语句当成了if…else语句来进行处理。下面我们继续往switch语句中增加一个case,源代码变成了如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 1:
            printf("1");
            break;
        case 2:
            printf("2");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(8);

    return 0;        
}            

接着我们继续查看反汇编,观察一下与上述一个case时有什么区别,如下图所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]      //将参数放到eax中
0040103B   mov         dword ptr [ebp-4],eax      //将eax的值放到局部变量中
0040103E   cmp         dword ptr [ebp-4],1        //将1和局部变量的值进行比较
00401042   je          Function+2Ch (0040104c)    //如果相等就跳转到0040104c
00401044   cmp         dword ptr [ebp-4],2        //否则跟2进行比较
00401048   je          Function+3Bh (0040105b)    //如果相等就跳转到0040105b
0040104A   jmp         Function+4Ah (0040106a)    //否则就跳转到0040106a
0040104C   push        offset string "1" (00422028)
00401051   call        printf (00401100)
00401056   add         esp,4
00401059   jmp         Function+57h (00401077)
0040105B   push        offset string "2" (00422024)
00401060   call        printf (00401100)
00401065   add         esp,4
00401068   jmp         Function+57h (00401077)
0040106A   push        offset string "error" (0042201c)
0040106F   call        printf (00401100)
00401074   add         esp,4
00401077   pop         edi
00401078   pop         esi
00401079   pop         ebx

通过以上静态分析,可以得出当多了一个case时,switch语句的反汇编还是与if…else的反汇编相同,只是多判断了一个case,其动态分析与上述一样,这里不再赘述。
下面我们我们继续增加case,源代码变成如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 1:
            printf("1");
            break;
        case 2:
            printf("2");
            break;
        case 3:
            printf("3");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(8);

    return 0;        
}

然后继续查看其反汇编,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]     //将参数放到eax中
0040103B   mov         dword ptr [ebp-4],eax     //将eax的值放到局部变量中
0040103E   cmp         dword ptr [ebp-4],1       //将1和局部变量的值进行比较
00401042   je          Function+32h (00401052)   //如果相等就跳转到00401052
00401044   cmp         dword ptr [ebp-4],2       //否则跟2进行比较
00401048   je          Function+41h (00401061)   //如果相等就跳转到00401061
0040104A   cmp         dword ptr [ebp-4],3       //否则跟3进行比较
0040104E   je          Function+50h (00401070)   //如果相等就跳转到00401070
00401050   jmp         Function+5Fh (0040107f)   //否则跳转到0040107f
00401052   push        offset string "1" (00422f5c)
00401057   call        printf (00401100)
0040105C   add         esp,4
0040105F   jmp         Function+6Ch (0040108c)
00401061   push        offset string "2" (00422028)
00401066   call        printf (00401100)
0040106B   add         esp,4
0040106E   jmp         Function+6Ch (0040108c)
00401070   push        offset string "3" (00422024)
00401075   call        printf (00401100)
0040107A   add         esp,4
0040107D   jmp         Function+6Ch (0040108c)
0040107F   push        offset string "error" (0042201c)
00401084   call        printf (00401100)
00401089   add         esp,4
0040108C   pop         edi
0040108D   pop         esi
0040108E   pop         ebx

可以看到,当case为三个的时候编译器还是将其当做if…else来处理,其分析原理与上述相同。
然后我们继续增加case,源代码变成如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 1:
            printf("1");
            break;
        case 2:
            printf("2");
            break;
        case 3:
            printf("3");
            break;
        case 4:
            printf("4");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(4);

    return 0;        
}

此时switch语句已经有了4个case了,继续查看它的反汇编,先通过静态分析看是否还是与上述一样,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]          //将参数放到eax中
0040103B   mov         dword ptr [ebp-4],eax          //将eax中的值放到局部变量中
0040103E   mov         ecx,dword ptr [ebp-4]          //将局部变量的值放到ecx中
00401041   sub         ecx,1                          //将ecx中的值减1
00401044   mov         dword ptr [ebp-4],ecx          //将ecx中的值放到局部变量中
00401047   cmp         dword ptr [ebp-4],3            //将局部变量的值与3进行比较
0040104B   ja          $L590+0Fh (00401093)           //如果大于3则跳转到00401093
0040104D   mov         edx,dword ptr [ebp-4]          //否则将局部变量的值放到edx中
00401050   jmp         dword ptr [edx*4+4010B1h]      //跳转到[edx*4+4010B1h]的值所对应的地址
$L584:
00401057   push        offset string "1" (00422030)
0040105C   call        printf (00401140)
00401061   add         esp,4
00401064   jmp         $L590+1Ch (004010a0)
$L586:
00401066   push        offset string "2" (0042202c)
0040106B   call        printf (00401140)
00401070   add         esp,4
00401073   jmp         $L590+1Ch (004010a0)
$L588:
00401075   push        offset string "3" (00422028)
0040107A   call        printf (00401140)
0040107F   add         esp,4
00401082   jmp         $L590+1Ch (004010a0)
$L590:
00401084   push        offset string "4" (00422024)
00401089   call        printf (00401140)
0040108E   add         esp,4
00401091   jmp         $L590+1Ch (004010a0)
00401093   push        offset string "error" (0042201c)
00401098   call        printf (00401140)
0040109D   add         esp,4
004010A0   pop         edi
004010A1   pop         esi
004010A2   pop         ebx

通过上述分析可以发现,当switch语句的case大于三个的时候(注意这里与default无关,default语句删除后结果仍相同),编译器的处理方式发生的变化,不再当做if…else处理,而是采取了上述的方式,这种方式称为“大表结构”。

这种结构的核心就是上述中的00401050中的jmp指令,下面接着用动态分析的方式来解释一下这种结构。

打开VC6反汇编,进行单步调试,这里我们传入的参数ebp+8的值是4,一路单步执行到0040104B,这里局部变量经过经过前面几步之后值变为3(之前有减1),显然是不大于3的,于是继续单步执行到00401050,此时可以看到寄存器窗口中edx的值为3,如下图所示:

edx

这时,我们观察00401050这行,先来手动计算一下[edx4+4010B1h],计算结果为4010BD,将[edx4+4010B1h]放到内存窗口中查看一下,发现是4010BD,跟我们计算的一样,它的值为00401084(这里为小端存储)也就是将要跳转的对应的case的执行的地址,如下图所示:

edx*4+4010B1h

接着,我们看下这个编译器生成的大表结构,通过分析可以看出编译器为每个case都生成一个对应的地址,如果传入的参数满足case的条件,就会直接通过jmp跳转到对应的计算后的地址(也就是大表中地址),大表与要跳转到的case地址的对应关系(default语句的跳转见图中绿色箭头)如下图所示:

dabiao

下面我们继续分析,现在我们将case后面的值打乱顺序,源代码如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 2:
            printf("2");
            break;
        case 4:
            printf("4");
            break;
        case 1:
            printf("1");
            break;
        case 3:
            printf("3");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(4);

    return 0;        
}

接着观察反汇编,发现顺序打乱并不影响编译器生成大表,其生成的反汇编代码除case后面的打印的值的顺序不同外,并无任何区别,这里就不在赘述,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]
0040103B   mov         dword ptr [ebp-4],eax
0040103E   mov         ecx,dword ptr [ebp-4]
00401041   sub         ecx,1
00401044   mov         dword ptr [ebp-4],ecx
00401047   cmp         dword ptr [ebp-4],3
0040104B   ja          $L590+0Fh (00401093)
0040104D   mov         edx,dword ptr [ebp-4]
00401050   jmp         dword ptr [edx*4+4010B1h]
$L584:
00401057   push        offset string "2" (00422030)
0040105C   call        printf (00401140)
00401061   add         esp,4
00401064   jmp         $L590+1Ch (004010a0)
$L586:
00401066   push        offset string "4" (0042202c)
0040106B   call        printf (00401140)
00401070   add         esp,4
00401073   jmp         $L590+1Ch (004010a0)
$L588:
00401075   push        offset string "1" (00422028)
0040107A   call        printf (00401140)
0040107F   add         esp,4
00401082   jmp         $L590+1Ch (004010a0)
$L590:
00401084   push        offset string "3" (00422024)
00401089   call        printf (00401140)
0040108E   add         esp,4
00401091   jmp         $L590+1Ch (004010a0)
00401093   push        offset string "error" (0042201c)
00401098   call        printf (00401140)
0040109D   add         esp,4
004010A0   pop         edi
004010A1   pop         esi
004010A2   pop         ebx

这里继续分析,尝试将case的值修改的较大一些,数量多一些,源代码如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 100:
            printf("100");
            break;
        case 101:
            printf("101");
            break;
        case 102:
            printf("102");
            break;
        case 103:
            printf("103");
            break;
        case 104:
            printf("104");
            break;
        case 105:
            printf("105");
            break;
        case 106:
            printf("106");
            break;
        case 107:
            printf("107");
            break;
        case 108:
            printf("108");
            break;
        case 109:
            printf("109");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(108);

    return 0;        
}            

查看其反汇编,就算是case后面的值大一些,编译器还是生成了大表,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]
0040103B   mov         dword ptr [ebp-4],eax
0040103E   mov         ecx,dword ptr [ebp-4]
00401041   sub         ecx,64h
00401044   mov         dword ptr [ebp-4],ecx
00401047   cmp         dword ptr [ebp-4],9
0040104B   ja          $L602+0Fh (004010f7)
00401051   mov         edx,dword ptr [ebp-4]
00401054   jmp         dword ptr [edx*4+401115h]
$L584:
0040105B   push        offset string "100" (00422048)
00401060   call        printf (004011e0)
00401065   add         esp,4
00401068   jmp         $L602+1Ch (00401104)
$L586:
0040106D   push        offset string "101" (00422044)
00401072   call        printf (004011e0)
00401077   add         esp,4
0040107A   jmp         $L602+1Ch (00401104)
$L588:
0040107F   push        offset string "102" (00422040)
00401084   call        printf (004011e0)
00401089   add         esp,4
0040108C   jmp         $L602+1Ch (00401104)
$L590:
0040108E   push        offset string "103" (0042203c)
00401093   call        printf (004011e0)
00401098   add         esp,4
0040109B   jmp         $L602+1Ch (00401104)
$L592:
0040109D   push        offset string "104" (00422038)
004010A2   call        printf (004011e0)
004010A7   add         esp,4
004010AA   jmp         $L602+1Ch (00401104)
$L594:
004010AC   push        offset string "105" (00422034)
004010B1   call        printf (004011e0)
004010B6   add         esp,4
004010B9   jmp         $L602+1Ch (00401104)
$L596:
004010BB   push        offset string "106" (00422030)
004010C0   call        printf (004011e0)
004010C5   add         esp,4
004010C8   jmp         $L602+1Ch (00401104)
$L598:
004010CA   push        offset string "107" (0042202c)
004010CF   call        printf (004011e0)
004010D4   add         esp,4
004010D7   jmp         $L602+1Ch (00401104)
$L600:
004010D9   push        offset string "108" (00422028)
004010DE   call        printf (004011e0)
004010E3   add         esp,4
004010E6   jmp         $L602+1Ch (00401104)
$L602:
004010E8   push        offset string "109" (00422024)
004010ED   call        printf (004011e0)
004010F2   add         esp,4
004010F5   jmp         $L602+1Ch (00401104)
004010F7   push        offset string "error" (0042201c)
004010FC   call        printf (004011e0)
00401101   add         esp,4
00401104   pop         edi
00401105   pop         esi
00401106   pop         ebx

现在,我们将这10个case删除几个(不删除头和尾的case),得到源代码如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 100:
            printf("100");
            break;
        case 102:
            printf("102");
            break;
        case 103:
            printf("103");
            break;
        case 105:
            printf("105");
            break;
        case 107:
            printf("107");
            break;
        case 108:
            printf("108");
            break;
        case 109:
            printf("109");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(108);

    return 0;        
}

继续查看反汇编,发现删除其中几个case之后编译器还是生成了大表,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]
0040103B   mov         dword ptr [ebp-4],eax
0040103E   mov         ecx,dword ptr [ebp-4]
00401041   sub         ecx,64h
00401044   mov         dword ptr [ebp-4],ecx
00401047   cmp         dword ptr [ebp-4],9
0040104B   ja          $L596+0Fh (004010c0)
0040104D   mov         edx,dword ptr [ebp-4]
00401050   jmp         dword ptr [edx*4+4010DEh]
$L584:
00401057   push        offset string "100" (0042203c)
0040105C   call        printf (00401190)
00401061   add         esp,4
00401064   jmp         $L596+1Ch (004010cd)
$L586:
00401066   push        offset string "102" (00422038)
0040106B   call        printf (00401190)
00401070   add         esp,4
00401073   jmp         $L596+1Ch (004010cd)
$L588:
00401075   push        offset string "103" (00422034)
0040107A   call        printf (00401190)
0040107F   add         esp,4
00401082   jmp         $L596+1Ch (004010cd)
$L590:
00401084   push        offset string "105" (00422030)
00401089   call        printf (00401190)
0040108E   add         esp,4
00401091   jmp         $L596+1Ch (004010cd)
$L592:
00401093   push        offset string "107" (0042202c)
00401098   call        printf (00401190)
0040109D   add         esp,4
004010A0   jmp         $L596+1Ch (004010cd)
$L594:
004010A2   push        offset string "108" (00422028)
004010A7   call        printf (00401190)
004010AC   add         esp,4
004010AF   jmp         $L596+1Ch (004010cd)
$L596:
004010B1   push        offset string "109" (00422024)
004010B6   call        printf (00401190)
004010BB   add         esp,4
004010BE   jmp         $L596+1Ch (004010cd)
004010C0   push        offset string "error" (0042201c)
004010C5   call        printf (00401190)
004010CA   add         esp,4
004010CD   pop         edi
004010CE   pop         esi
004010CF   pop         ebx

此时查看一下大表的内容,可以发现对于被删除的case,编译器还是生成了其对应的大表,其中填充的地址为4010c0也就是执行default语句的地址,如下图所示:

4010c0

接着修改源代码,将原来的10个case中间连续删除(不要删除最大值和最小值),如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 100:
            printf("100");
            break;
        case 101:
            printf("101");
            break;
        case 102:
            printf("102");
            break;
        case 109:
            printf("109");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(109);

    return 0;        
}    

查看反汇编,发现此次编译器生成的反汇编与之前不一样了,我们先进行静态分析一下,前面的提升堆栈等操作与之前相同不在叙述,这里只分析一下关键代码,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]        //将参数放到eax中
0040103B   mov         dword ptr [ebp-4],eax        //将eax中的值放到局部变量中
0040103E   mov         ecx,dword ptr [ebp-4]        //将局部变量中的值放到ecx中
00401041   sub         ecx,64h                      //将ecx中的值减去64h(100d)
00401044   mov         dword ptr [ebp-4],ecx        //将ecx中的值放到局部变量中
00401047   cmp         dword ptr [ebp-4],9          //将局部变量的值与9进行比较
0040104B   ja          $L590+0Fh (0040109b)         //如果大于9则跳转到0040109b
0040104D   mov         eax,dword ptr [ebp-4]        //否则将局部变量的值放到eax中
00401050   xor         edx,edx                      //将edx的值置为0
00401052   mov         dl,byte ptr  (004010cd)[eax] //将004010cd+eax的值放在dl中
00401058   jmp         dword ptr [edx*4+4010B9h]    //跳转到[edx*4+4010B9h]的值所对应的地址
$L584:
0040105F   push        offset string "100" (00422030)
00401064   call        printf (00401160)
00401069   add         esp,4
0040106C   jmp         $L590+1Ch (004010a8)
$L586:
0040106E   push        offset string "101" (0042202c)
00401073   call        printf (00401160)
00401078   add         esp,4
0040107B   jmp         $L590+1Ch (004010a8)
$L588:
0040107D   push        offset string "102" (00422028)
00401082   call        printf (00401160)
00401087   add         esp,4
0040108A   jmp         $L590+1Ch (004010a8)
$L590:
0040108C   push        offset string "109" (00422024)
00401091   call        printf (00401160)
00401096   add         esp,4
00401099   jmp         $L590+1Ch (004010a8)
0040109B   push        offset string "error" (0042201c)
004010A0   call        printf (00401160)
004010A5   add         esp,4
004010A8   pop         edi
004010A9   pop         esi
004010AA   pop         ebx

接着单步执行,这里传入的参数是ebp+8,值为6D(也就是十进制的109),当执行到00401041时,此时ecx的值也是6D,继续单步执行,减去64h(也就是十进制的100),可以看出编译器是先让参数减去了case后面值中最小的那个,减完之后ecx中的值变为9,然后放到局部变量中,继续单步执行,与9进行比较,不大于9于是继续执行,将局部变量的值(也就是9)放到eax中,接着将edx的值置为0,这里执行到00401052时,观察004010cd中的值,如下图所示:

xiaobiao

上图红框中的表就是传说中的“小表”,在小表的上方就是大表,如下图所示:

dabiao+xiaobiao

大表为4字节存储,小表为1字节存储,当执行到00401052时,eax的值为9,计算004010cd+eax的值为004010d6,通过查看上图可以看到dl的值为03h,所以edx的值为0x00000003,如下图所示:

dl_edx

然后计算[edx*4+4010B9h]的值为4010c5h,然后对照上述的大表,可以得到地址为0040108c。

现在,我们再修改源代码,将case后面的常量表达式修改为间隔较大的值,如下所示:

#include "stdafx.h"

void Function(int x)
{
    switch(x)
    {
        case 100:
            printf("100");
            break;
        case 1000:
            printf("1000");
            break;
        case 2000:
            printf("2000");
            break;
        case 2500:
            printf("2500");
            break;
        case 4000:
            printf("4000");
            break;
        case 4800:
            printf("4800");
            break;
        case 5900:
            printf("5900");
            break;
        case 7300:
            printf("7300");
            break;
        case 7700:
            printf("7700");
            break;
        case 8100:
            printf("8100");
            break;
        default:
            printf("error");
            break;

    }
}            

int main(int argc, char* argv[])            
{            
    Function(4800);

    return 0;        
}            

继续观察反汇编,如下所示:

00401020   push        ebp
00401021   mov         ebp,esp
00401023   sub         esp,44h
00401026   push        ebx
00401027   push        esi
00401028   push        edi
00401029   lea         edi,[ebp-44h]
0040102C   mov         ecx,11h
00401031   mov         eax,0CCCCCCCCh
00401036   rep stos    dword ptr [edi]
00401038   mov         eax,dword ptr [ebp+8]
0040103B   mov         dword ptr [ebp-4],eax
0040103E   cmp         dword ptr [ebp-4],12C0h
00401045   jg          Function+7Dh (0040109d)
00401047   cmp         dword ptr [ebp-4],12C0h
0040104E   je          Function+115h (00401135)
00401054   cmp         dword ptr [ebp-4],7D0h
0040105B   jg          Function+5Eh (0040107e)
0040105D   cmp         dword ptr [ebp-4],7D0h
00401064   je          Function+0E8h (00401108)
0040106A   cmp         dword ptr [ebp-4],64h
0040106E   je          Function+0C4h (004010e4)
00401070   cmp         dword ptr [ebp-4],3E8h
00401077   je          Function+0D6h (004010f6)
00401079   jmp         Function+160h (00401180)
0040107E   cmp         dword ptr [ebp-4],9C4h
00401085   je          Function+0F7h (00401117)
0040108B   cmp         dword ptr [ebp-4],0FA0h
00401092   je          Function+106h (00401126)
00401098   jmp         Function+160h (00401180)
0040109D   cmp         dword ptr [ebp-4],1E14h
004010A4   jg          Function+0B2h (004010d2)
004010A6   cmp         dword ptr [ebp-4],1E14h
004010AD   je          Function+142h (00401162)
004010B3   cmp         dword ptr [ebp-4],170Ch
004010BA   je          Function+124h (00401144)
004010C0   cmp         dword ptr [ebp-4],1C84h
004010C7   je          Function+133h (00401153)
004010CD   jmp         Function+160h (00401180)
004010D2   cmp         dword ptr [ebp-4],1FA4h
004010D9   je          Function+151h (00401171)
004010DF   jmp         Function+160h (00401180)
004010E4   push        offset string "100" (0042206c)
004010E9   call        printf (00401250)
004010EE   add         esp,4
004010F1   jmp         Function+16Dh (0040118d)
004010F6   push        offset string "1000" (00422064)
004010FB   call        printf (00401250)
00401100   add         esp,4
00401103   jmp         Function+16Dh (0040118d)
00401108   push        offset string "2000" (0042205c)
0040110D   call        printf (00401250)
00401112   add         esp,4
00401115   jmp         Function+16Dh (0040118d)
00401117   push        offset string "2500" (00422054)
0040111C   call        printf (00401250)
00401121   add         esp,4
00401124   jmp         Function+16Dh (0040118d)
00401126   push        offset string "4000" (0042204c)
0040112B   call        printf (00401250)
00401130   add         esp,4
00401133   jmp         Function+16Dh (0040118d)
00401135   push        offset string "4800" (00422044)
0040113A   call        printf (00401250)
0040113F   add         esp,4
00401142   jmp         Function+16Dh (0040118d)
00401144   push        offset string "5900" (0042203c)
00401149   call        printf (00401250)
0040114E   add         esp,4
00401151   jmp         Function+16Dh (0040118d)
00401153   push        offset string "7300" (00422034)
00401158   call        printf (00401250)
0040115D   add         esp,4
00401160   jmp         Function+16Dh (0040118d)
00401162   push        offset string "7700" (0042202c)
00401167   call        printf (00401250)
0040116C   add         esp,4
0040116F   jmp         Function+16Dh (0040118d)
00401171   push        offset string "8100" (00422024)
00401176   call        printf (00401250)
0040117B   add         esp,4
0040117E   jmp         Function+16Dh (0040118d)
00401180   push        offset string "error" (0042201c)
00401185   call        printf (00401250)
0040118A   add         esp,4
0040118D   pop         edi
0040118E   pop         esi
0040118F   pop         ebx

可以看到上图中既不是大小表结构,也不是if…else结构,其实是一种树形结构。当case后面的常量间隔大于256时,小表已经无法表示,这时编译器会生成一种树形结构。因在实际中这种情况基本见不到,故不再分析。

0x04 总结

当switch语句中的case分支比较少时,例如在VC++ 6.0中是少于四条时,这里的编译器会将switch…case反汇编成与if…else相同的结构,在这种情况下,两种语句运行的效率一样。

当switch语句中的case分支比较多,case的常量是连续的或者一小部分是间断的,这时候编译器采用大表结构(即4字节存储case跳转的指令地址)来编译,其中间断的部分用default语句执行的指令地址来填充。

当switch语句中case的常量间断的部分比较多,大表中如果一直重复填充default语句执行的指令地址,那么它所占用的4字节空间就比较多,就会造成空间浪费,这时编译器会利用大表+小表的结构来进行编译。但是当case分支间隔大于256时,小表1个字节就无法表示了(此种情况基本不太可能出现)。