学习和研究中前行,并在分享中提升自己

欢迎订阅阿里内推邮件



关于linux下进程栈的研究

阅读次数: 400| 时间:2017年12月31日 00:29 | 标签:linux-task

关于linux下进程栈的研究

早就想写这样一篇文章来探讨一下linux用户态进程栈,今天有机会补上。大家都知道进程栈是往下增长的, 栈上除了放一些local 静态变量,还有就是被调用函数的返回地址。比如,你使用gdb的时候看到的那些调用关系。下面,我们先看一下linux用户态进程栈一个整体图

rbp 即栈顶指针,rsp即栈底指针,栈的方向是由大到小。我们通过一段实际的代码来看一下

#include <stdio.h>                                                                 
#include <stdint.h>                                                                
void bar(void){                                                                    
  int a = 1;                                                                       
}                                                                                  

int test_fun(int a, int b)                                                         
{                                                                                  
    int arry[10];                                                                  
    printf("a + b %d\n", a + b);                                                                                                                                                                            
    return 1;                                                                      
}                                                                                  

int main(){                                                                        
   test_fun(2 , 3);                                                                
   return 1;                                                                       
}                   

使用gcc编译一下,然后gdb看一下

gcc -g test.c -o test_stack
gdb -q ./test_stack
(gdb) disassemble test_fun
Dump of assembler code for function test_fun:
   0x000000000040053a <+0>: push   %rbp
   0x000000000040053b <+1>: mov    %rsp,%rbp
   0x000000000040053e <+4>: sub    $0x40,%rsp
   0x0000000000400542 <+8>: mov    %edi,-0x34(%rbp)
   0x0000000000400545 <+11>:    mov    %esi,-0x38(%rbp)
   0x0000000000400548 <+14>:    mov    -0x38(%rbp),%eax
   0x000000000040054b <+17>:    mov    -0x34(%rbp),%edx
   0x000000000040054e <+20>:    add    %edx,%eax
   0x0000000000400550 <+22>:    mov    %eax,%esi
   0x0000000000400552 <+24>:    mov    $0x400620,%edi
   0x0000000000400557 <+29>:    mov    $0x0,%eax
   0x000000000040055c <+34>:    callq  0x400410 <printf@plt>
   0x0000000000400561 <+39>:    mov    $0x1,%eax
   0x0000000000400566 <+44>:    leaveq 
   0x0000000000400567 <+45>:    retq   

在gdb中对test_fun进行汇编,先看前两行

push %rbp //先把子函数rbp压栈
mov %rsp %rbp //再把父函数的rsp赋值给子函数rbp

也就说每个函数前两行都会是这样的,下面我们再来验证一下栈是往下生长的.首先,我们使用gdb先把main函数进行汇编

 0x0000000000400568 <+0>:   push   %rbp
   0x0000000000400569 <+1>: mov    %rsp,%rbp
   0x000000000040056c <+4>: sub    $0x10,%rsp
   0x0000000000400570 <+8>: mov    %rbp,%rax
   0x0000000000400573 <+11>:    mov    %rax,-0x8(%rbp)
   0x0000000000400577 <+15>:    mov    %rsp,%rax
   0x000000000040057a <+18>:    mov    %rax,-0x10(%rbp)
   0x000000000040057e <+22>:    mov    -0x10(%rbp),%rdx
   0x0000000000400582 <+26>:    mov    -0x8(%rbp),%rax
   0x0000000000400586 <+30>:    mov    %rax,%rsi
   0x0000000000400589 <+33>:    mov    $0x40067a,%edi
   0x000000000040058e <+38>:    mov    $0x0,%eax
   0x0000000000400593 <+43>:    callq  0x400410 <printf@plt>
   0x0000000000400598 <+48>:    mov    $0x3,%esi
   0x000000000040059d <+53>:    mov    $0x2,%edi
   0x00000000004005a2 <+58>:    callq  0x40053a <test_fun>
   0x00000000004005a7 <+63>:    mov    %rbp,%rax
   0x00000000004005aa <+66>:    mov    %rax,-0x8(%rbp)
   0x00000000004005ae <+70>:    mov    %rsp,%rax
   0x00000000004005b1 <+73>:    mov    %rax,-0x10(%rbp)
   0x00000000004005b5 <+77>:    mov    -0x10(%rbp),%rdx
   0x00000000004005b9 <+81>:    mov    -0x8(%rbp),%rax
   0x00000000004005bd <+85>:    mov    %rax,%rsi
   0x00000000004005c0 <+88>:    mov    $0x400698,%edi
   0x00000000004005c5 <+93>:    mov    $0x0,%eax
   0x00000000004005ca <+98>:    callq  0x400410 <printf@plt>
   0x00000000004005cf <+103>:   mov    $0x1,%eax
   0x00000000004005d4 <+108>:   leaveq 
   0x00000000004005d5 <+109>:   retq   

然后,我们在 0x000000000040056c处加上断点,然后看一下当时的rbp和rsp

(gdb) p $rbp
$1 = (void *) 0x7fffffffe470
(gdb) p $rsp
$2 = (void *) 0x7fffffffe470

可以看到rbp 和rsp的值是一样的,再往下运行一步

(gdb) s

Breakpoint 1, main () at test.c:19
19     GET_BP(rbp);
(gdb) p $rbp
$3 = (void *) 0x7fffffffe470
(gdb) p $rsp
$4 = (void *) 0x7fffffffe460
(gdb) 

此时rsp已经变成了0x7fffffffe460,就是因为执行了这句

sub    $0x10,%rsp

栈上变量

再来看看栈上存储的变量,我们先把代码修改一下

#include <stdio.h>                                                                 
#include <ctype.h>                                                                 
#include <stdint.h>                                                                
#define GET_BP(x)               asm("movq %%rbp, %0":"=r"(x));                     
#define GET_SP(x)               asm("movq %%rsp, %0":"=r"(x));                     
void bar(void){                                                                    
  int a = 1;                                                                       
}                                                                                  

int test_fun(int a, int b)                                                         
{                                                                                  
    int arry[10];                                                                  
    printf("a + b %d\n", a + b);                                                   
    return 1;                                                                      
}                                                                                  

int main(){                                                                        
   int c = 5;  //加个变量                                                                                                                                                                                             
   test_fun(2 , 3);                                                                
   return 1;                                                                       
}            

再次使用gdb,操作如下

(gdb) disassemble main
Dump of assembler code for function main:
   0x0000000000400568 <+0>: push   %rbp
   0x0000000000400569 <+1>: mov    %rsp,%rbp
   0x000000000040056c <+4>: sub    $0x10,%rsp
   0x0000000000400570 <+8>: movl   $0x5,-0x4(%rbp)
   0x0000000000400577 <+15>:    mov    $0x3,%esi
   0x000000000040057c <+20>:    mov    $0x2,%edi
   0x0000000000400581 <+25>:    callq  0x40053a <test_fun>
   0x0000000000400586 <+30>:    mov    $0x1,%eax
   0x000000000040058b <+35>:    leaveq 
   0x000000000040058c <+36>:    retq   
End of assembler dump.
(gdb) b *0x0000000000400570
Breakpoint 1 at 0x400570: file test.c, line 18.
(gdb) r
Starting program: /mnt/./test_task 

Breakpoint 1, main () at test.c:18
18     int c = 5;
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.el7_3.1.x86_64
(gdb) i reg
rax            0x400568 4195688
rbx            0x0  0
rcx            0x400590 4195728
rdx            0x7fffffffe568   140737488348520
rsi            0x7fffffffe558   140737488348504
rdi            0x1  1
rbp            0x7fffffffe470   0x7fffffffe470
rsp            0x7fffffffe460   0x7fffffffe460
r8             0x7ffff7dd7e80   140737351876224
r9             0x0  0
r10            0x7fffffffe2c0   140737488347840
r11            0x7ffff7a3da40   140737348098624
r12            0x400440 4195392
r13            0x7fffffffe550   140737488348496
r14            0x0  0
r15            0x0  0
rip            0x400570 0x400570 <main+8>
eflags         0x206    [ PF IF ]
cs             0x33 51
ss             0x2b 43
ds             0x0  0
es             0x0  0
fs             0x0  0
gs             0x0  0
(gdb) s
19     test_fun(2 , 3);
(gdb) x/ag ($rbp - 0x4)
0x7fffffffe46c: 0x5

可以看到我们从 $rbp -0x4这个位置读到了这个变量值为5

栈上保存函数地址

文章一开始的图中,我们已经讲到父函数在调用子函数里,会提前把子函数运行完之后下步要运行的指令压栈,然后在leaveq恢复,接下来我们来验证一下,先来看一下汇编

Dump of assembler code for function main:
   0x0000000000400568 <+0>: push   %rbp
   0x0000000000400569 <+1>: mov    %rsp,%rbp
   0x000000000040056c <+4>: sub    $0x10,%rsp
   0x0000000000400570 <+8>: movl   $0x5,-0x4(%rbp)
   0x0000000000400577 <+15>:    mov    $0x3,%esi
   0x000000000040057c <+20>:    mov    $0x2,%edi
   0x0000000000400581 <+25>:    callq  0x40053a <test_fun>
   0x0000000000400586 <+30>:    mov    $0x1,%eax
   0x000000000040058b <+35>:    leaveq 
   0x000000000040058c <+36>:    retq   

从上面汇编来看main函数调用完test_func之后,下一步需要运行是main+30这一指令,接下来我们在gdb环境下看看

[root@debug010000002015 mnt]# gdb -q ./test_task
Reading symbols from /mnt/test_task...done.
(gdb) b main
Breakpoint 1 at 0x400570: file test.c, line 18.
(gdb) b *0x0000000000400581
Breakpoint 2 at 0x400581: file test.c, line 19.
(gdb) r
Starting program: /mnt/./test_task 

Breakpoint 1, main () at test.c:18
18     int c = 5;
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.el7_3.1.x86_64
(gdb) p $rsp
$1 = (void *) 0x7fffffffe460
(gdb) p $rbp
$2 = (void *) 0x7fffffffe470
(gdb) s
test_fun (a=2, b=3) at test.c:13
13      printf("a + b %d\n", a + b);
(gdb) x/2ag $rbp
0x7fffffffe450: 0x7fffffffe470  0x400586 <main+30>

从上面可以看出,当test_fun被调用时 push %rbp做了两件事,一个是把函数返回时要运行的指令保存下来,还有就是父函数的rbp保存下来,而且从我们读取的结果来看 子函数$rbp + 8 字节所保存的就是其要返回的地址。那么在多个函数嵌套调用的情况下,我们就可以通过这种方法去溯源,也就是所谓的调用栈,即gdb bt打印的结果,下边我们手动也做一下,为此我们在test_fun中再加一层调用

#include <stdio.h>                                                                 
#include <ctype.h>                                                                 
#include <stdint.h>                                                                
#define GET_BP(x)               asm("movq %%rbp, %0":"=r"(x));                     
#define GET_SP(x)               asm("movq %%rsp, %0":"=r"(x));                     
void bar(void){                                                                    
  int a = 1;                                                                       
  int c = 8;                                                                       
  printf("a + c %d", a +c);                                                                                                                                                                                 
}                                                                                  

int test_fun(int a, int b)                                                         
{                                                                                  
    int arry[10];                                                                  
    bar();                                                                         
    printf("a + b %d\n", a + b);                                                   
    return 1;                                                                      
}                                                                                  

int main(){                                                                        
   int c = 5;                                                                      
   test_fun(2 , 3);                                                                
   return 1;                                                                       
}                   

我们增加了一下bar函数,gdb结果如下

(gdb) b bar
Breakpoint 1 at 0x400535: file test.c, line 7.
(gdb) r
Starting program: /mnt/./test_task 

Breakpoint 1, bar () at test.c:7
7     int a = 1;
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.el7_3.1.x86_64
(gdb) s
8     int c = 8;
(gdb) x/2ag $rbp
0x7fffffffe400: 0x7fffffffe450  0x400571 <test_fun+19>
(gdb) x/2ag 0x7fffffffe450
0x7fffffffe450: 0x7fffffffe470  0x4005af <main+30>
(gdb) bt
#0  bar () at test.c:8
#1  0x0000000000400571 in test_fun (a=2, b=3) at test.c:15
#2  0x00000000004005af in main () at test.c:22
(gdb) x/ag ($rbp + 8)
0x7fffffffe408: 0x400571 <test_fun+19>
(gdb) x/2ag $rbp
0x7fffffffe400: 0x7fffffffe450  0x400571 <test_fun+19>
(gdb) x/ag (0x7fffffffe450 + 8)
0x7fffffffe458: 0x4005af <main+30>
(gdb) bt
#0  bar () at test.c:8
#1  0x0000000000400571 in test_fun (a=2, b=3) at test.c:15
#2  0x00000000004005af in main () at test.c:22

可以看出我们手动算出来的结果和bt命令是一样的。这里需要注意的是,bt打印出的函数指令不是被调用函数的调用入口,而是被调用函数返回时要执行的指令。