Low Level Programming: 8. x86 Assembly
Low Level Programming: 8. x86 Assembly
Outline
Assembly languages
Low Level Programming ¢
¢ Intel x86 family of CPUs
¢ Process memory layout
8. x86 assembly ¢ Intel x86 instruction sets
l Intel x86-32 registers
l Intel x86-64 registers
l x86 instruction set
¢ Interrupts and system calls
¢ Assembly in practice
¢ Stack management
¢ Application binary interfaces
1
11/8/23
¢ Examples (disassambled code, using objdump -d) ¢ MIPS (42 and 64 architectures), RISC-V
¢ Intel IA-32, Intel IA-64
c7 05 50 00 00 00 07 00 00 00 movl $7, 80(%rip)
¢ Motorola 68000
Memory transfer instruction ¢ Arm
Opcode (‘movl’ mnemonic)
¢ Motorola PowerPC
Operands (immediate operand,
memory operand)
¢ Sun Sparc
¢ Texas Instrument MSP430
83 c1 01 addl $1, %ecx
Addition instruction Operands (immediate operand, ¢ RISC (Reduced Instruction Set Computers) vs CISC
(‘addl’ mnemonic) register) (Complex Instruction Set Computers)
Note: in general no more than one memory operand per instruction, ¢ Instruction set vs. micro-architecture
registers as temporaries
Low Level Programming (LLP) 5 Low Level Programming (LLP) 6
2
11/8/23
¢ EAX (Accumulator)
Accumulator for operands
and results data (add, sub)
¢ EBX (Base address)
Usually used to store the
base adress of a data
structure in memory
¢ ECX (Counter)
Usually used as loop counter
¢ EDX (Data store)
l Used to store operand and
For backward compatibility for old results for mult and div
8-bit and 16-bit registers
Low Level Programming (LLP) 11 Low Level Programming (LLP) 12
3
11/8/23
4
11/8/23
x86-32: Streaming SIMD extensions (SSE) x86-32 Streaming SIMD extensions (SSE)
5
11/8/23
6
11/8/23
.text
_start: size (optionnal)
movl $8,%ebx # ebx = 8
Labels cmpl %eax,%ebx # compare eax and ebx
(targets jle L0
of jumps) ret
L0: decl %ebx
Comments
ret
x86 assembly syntax: AT&T vs Intel x86 assembly syntax: AT&T vs Intel
syntax syntax
AT&T syntax Intel syntax AT&T syntax Intel syntax
Community UNIX Microsoft Data types • Registers: ‘%eax’ • Registers: ‘eax’
Left to right Right to left
• Concatenation: • Concatenation: ‘eax:ebx’
Direction of operands • ‘%eax:%ebx’ • Immediate values: ‘1’
(first operand is source, second is (first operand is destination, second
destination) is source) • Immediate values: ‘$1’ • Decimal: ‘10’ (or ‘10d’)
inst src, dst inst dst, src • Decimal: ‘10’ (or ‘0d10’) • Hexadecimal: ‘10h’
mov (%ecx), %eax mov eax, [ecx] • Hexadecimal: ‘0x10’ • Address of bytes: ‘byte ptr’
Memory addressing Addresses are enclosed in Addresses are enclosed in • Operand on bytes: ‘movb’ • Address of words: ‘word ptr’
parenthesis brackets • Operand on words: ‘movw’ • Address of longs: ‘dword ptr’
(‘(’, ‘)’) and given by the formula: (‘[’, ‘]’) and given by the formula: • Operand on longs: ‘movl’
offset(base, index, scale) [base+index*scale+offset] movl $1, %eax mov eax, 1
movl (%ebx), %eax mov eax, [ebx] movl $0xff, %ebx mov ebx, 0ffh
movl 3(%ebx), %eax mov eax, [ebx+3] int $0x80 int 80h
movl 0x20(%ebx), %eax mov eax, [ebx+20h] movb %bl, %al mov al, bl
addl (%ebx,%ecx,0x2), %eax add eax, [ebx+ecx*2h] movw %bx, %ax mov ax, bx
leal (%ebx,%ecx), %eax lea eax, [ebx+ecx] movl %ebx, %eax mov eax, ebx
subl -0x20(%ebx,%ecx,0x4), %eax sub eax, [ebx+ecx*4h-20h] movl (%ebx), %eax mov eax, dword ptr [ebx]
7
11/8/23
¢ Direct
Mnemonic Operand Operand Operation Flags
.data add src dst src + dst → dst OF, SF, ZF, AF, CF, PF
var:
.long 42 # Reserve a 4-byte value, initialized to 42
sub src dst dst - src → dst OF, SF, ZF, AF, CF, PF
.text inc dst __ dst + 1→ dst OF, SF, ZF, AF, CF, PF
mov %ebx, var # Move the contents of ebx at memory address var
dec dst __ dst - 1→ dst OF, SF, ZF, AF, CF, PF
¢ Offset(Base+Index+Scale) (in any combination) neg dst reg @(src) → dst OF, SF, ZF, AF, PF
¢ Example
mov (%ebx), %eax # Load 4 bytes from the memory address in ebx into eax
mov -4(%esi), %eax # Move 4 bytes at memory address esi-4 into eax
mov %cl,(%esi,%eax,1) # Move the contents of CL into the byte at address esi+eax movl $15, %eax # %eax = 15
mov (%esi,%ebx,4),%edx # Move the 4 bytes of data at address asi+4*ebx into edx subl $7, %eax # %eax = %eax - 7
addl $30, %eax # %eax = %eax + 30
decl %eax # %eax = %eax - 1
8
11/8/23
mul src src × %eax → %eax (unsigned) OF, CF ja/jge addr Jump to ‘addr’ if above / greater or equal
# Decrement %ebx if 8<= %ebx, else increment it cmp op1 op2 op1 - op2 (result discarded) OF, SF, ZF, AD,
movl $8, %ebx CF, PF
cmpl %eax, %ebx # Compare %eax, %ebx
jle L0 # If %eax <= %ebx go to L0 ¢ Example
incl %ebx # Increment %ebx (then)
ret movl $8, %ebx
L0: andl $8, %ebx # And with myself, does not change
decl %ebx # Decrement %ebx (else) notl %ebx # Now is 0x7
ret cmpl $7, %ebx # Compare with value $7
je L0 # Should branch!
9
11/8/23
sal cnt dst dst << cnt →dst (signed) CF, OF ¢ x86-64 new instructions (far from exhaustive)
sar cnt dst dst >> cnt →dst (signed) CF, OF
Mnemonic Operation
rol cnt dst Left rotate ‘dst’ of ‘cnt’ bits CF, OF lodsq (rsi) → rax
ror cnt dst Left rotate ‘dst’ of ‘cnt’ bits CF, OF stosq (rax) → (rdi)
syscall System call
shll $7, %eax # %eax = 2 ^ 7 sysret Return from syscall
10
11/8/23
11
11/8/23
12
11/8/23
¢ Stack pointer
l ESP (x86-32) Addresses
l RSP (x86-64) 0x00000000 eax 0x1a1b1c1d
l push <src>
1. Fetch operand from src Effect of push %eax
1. Fetch operand from <src>
2. esp = esp – x (x = 2, 4 or 8 depending on operand size) 2. esp = esp – 4
3. Write operand to (esp) Stack 3. Write operand to (esp)
0xffffffdc 0x33333333
l pop <dst> 0xfffffff0 0x00001111
0x1a1b1c1d esp (0xfffffff0)
• Fetch operand from esp 0xfffffff4 0x44444444 esp (0xfffffff4)
0xfffffff8 0x42424242
• Write operand to dst
0xfffffffc 0x44444444
• esp = esp + x (x = 2, 4 or 8 depending on operand size)
13
11/8/23
eip 0x00007f8a
0x00007f50 eip 0x00007f60
0x00007f92
14
11/8/23
Addresses Addresses
0x00000000 Effect of enter 0x00000000 Effect of leave
1. push %ebp 1. mov %ebp, %esp
2. mov %esp, %ebp 2. pop %ebp
15
11/8/23
l SystemV amd64 ABI supplement (64-bits) l ebx: non volatile (callee must preserve)
SystemV i386 ABI: calling conventions SystemV i386 ABI: Calling conventions
16
11/8/23
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
17
11/8/23
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
18
11/8/23
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
19
11/8/23
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller) 1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller) 2. Call instruction (caller)
3. Creation of new stack frame (callee) 3. Creation of new stack frame (callee)
4. Allocate locals on the stack (callee) 4. Allocate locals on the stack (callee)
5. Preserve registers if needed (callee) 5. Preserve registers if needed (callee)
6. <Function code> … 6. <Function code> …
Addresses Addresses
7. Deallocation of the stack frame (callee) 7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) 0x00000000 8. Return instruction (callee)
9. Deallocation of parameters (caller) 9. Deallocation of parameters (caller)
20
11/8/23
SystemV i386 ABI: Calling conventions SystemV i386 ABI: Calling conventions
1. Push parameters onto the stack in reverse order (caller)
2. Call instruction (caller)
3. Creation of new stack frame (callee) ¢ Return of structures
4. Allocate locals on the stack (callee) l Caller in charge of allocating space for the result
5. Preserve registers if needed (callee)
6. <Function code> … l First argument of the function = address of memory space
Addresses
7. Deallocation of the stack frame (callee)
0x00000000 8. Return instruction (callee) l Callee sets %eax to this area on return
9. Deallocation of parameters (caller)
¢ %esp may be rounded down to a frontier of 16-bytes for more
00007f10 <_max>:
7f10: 55 pushl %ebp efficiency
7f11: 89 e5 movl %esp, %ebp
7f13: 50
7f14: 8b 45 0c
pushl
movl
%eax
12(%ebp), %eax
¢ Arguments may be longer than 4 bytes
0xffffffd0 0x44444444
0x00000006
0xffffffd4 0x42424242
old bp
7f17: 8b 45 08
7f1a: 8b 45 08
movl
movl
8(%ebp), %eax
8(%ebp), %eax ¢ Examples
7f1d: 3b 45 0c cmpl 12(%ebp), %eax
0xffffffd8 0x44444444
return @ 7f20: 0f 8e 0b 00 00 00 jle 0x7f31 <_max+0x21> l int foo (int a, int b, int *c);
0xffffffdc 0x00000005 7f26: 8b 45 08 movl 8(%ebp), %eax
7f29: 89 45 fc movl %eax, -4(%ebp) parameters in 8(%ebp), 12(%ebp), 16(%ebp), result in %eax
0xfffffff0 0x00000006 7f2c: e9 06 00 00 00 jmp 0x7f37 <_max+0x27>
0xfffffff4 0x44444444 esp 7f31: 8b 45 0c movl 12(%ebp), %eax l float bar (float a, int b)
7f34: 89 45 fc movl %eax, -4(%ebp)
0xfffffff8 0x42424242 7f37: 8b 45 fc movl -4(%ebp), %eax parameters in 8(%ebp) and 16(%ebp), result in %st(0)
0xfffffffc 0x44444444 7f3a: 83 c4 04 addl $4, %esp
ebp 7f3d: 5d popl %ebp
7f3e: c3 retl
¢ Issue
¢ Calling conventions through registers
l Buffer overflow in local variable:
Addresses l 6 registers for integer arguments: rdi, rsi, rdx, rcx, r8, r9
erase return address 0x00000000
l 8 registers for floats/doubles: xmm0-xmm7
l Source: unsafe string
functions (among others) l First available register for parameter type is used
l Everything else goes to the stack
¢ Fixes
l Non executable stack
0xffffffd0 0x44444444
0x0000006 ¢ Volatile and non volatile registers
0xffffffd4 0x42424242
old bp
l ASLR 0xffffffd8 0x44444444
return @
l Volatile (no need to protect)
0xffffffdc 0x00000005 • rax, rcx, rdx, rsi, rdi, r8-r11, xmm0-xmm15
l More sophisticated attacks 0x00000006
0xfffffff0
l Better defenses (see SE course) 0xfffffff4 0x44444444 l Non-volatile (need to protect)
0xfffffff8 0x42424242 • rbx, rbp, rsp, r12, r15
0xfffffffc 0x44444444
¢ Example
l int f(int a, float b, int c); a in %rdi, b in %xmm0, c in %rsi, result in
%rax
Low Level Programming (LLP) 84 Low Level Programming (LLP) 85
21
11/8/23
22