Take this code for example
#include <assert.h>
#include <string.h>
#define NUM_NODES 10
#define MAX_STACK 10
int adjMatrix[NUM_NODES][NUM_NODES];
int visited[NUM_NODES];
int stack[MAX_STACK];
int stack_index;
void initAdjMatrix(void)
{
memset((void *)adjMatrix, 0, sizeof(adjMatrix));
memset((void *)visited, 0, sizeof(visited));
}
void makeEdge(int from, int to)
{
assert(from < NUM_NODES);
assert(to < NUM_NODES);
adjMatrix[from][to] = 1;
}
void initStack(void)
{
stack_index = 0;
}
void push(int elem)
{
assert(stack_index < MAX_STACK);
stack[stack_index++] = elem;
}
int pop(void)
{
int elem;
assert(stack_index > 0);
elem = stack[--stack_index];
return elem;
}
int empty(void)
{
if (stack_index == 0)
return 1;
return 0;
}
void dfs(int start, int goal)
{
int node, i;
push(start);
while (!empty())
{
node = pop();
if (node == goal)
{
printf("%d Goal\n", node);
return;
}
if (visited[node])
continue;
else
visited[node] = 1;
printf("%d ", node);
for (i = NUM_NODES - 1; i > -1; i--)
if (adjMatrix[node][i] == 1)
push(i);
}
printf("Goal not found!\n");
}
int main(int argc, char *argv[])
{
initAdjMatrix();
initStack();
//make our graph
makeEdge(1, 2);
makeEdge(1, 3);
makeEdge(1, 4);
makeEdge(2, 5);
makeEdge(3, 5);
makeEdge(3, 6);
makeEdge(4, 7);
makeEdge(5, 8);
makeEdge(5, 9);
dfs(1, 6);
return 0;
}
(Slightly modified version of the depth first search from "AI Application Programming" Second Edition by M. Tim Jones)
Lets run that through gcc:
6029 bytes!!!
[craig ~/Desktop/ai/dfs]$ gcc -o dfs dfs.c
[craig ~/Desktop/ai/dfs]$ ls -l dfs
-rwxr-xr-x 1 craig wheel 6029 Aug 18 22:53 dfs
Ok, lets tell gcc to optimize it:
Horray for sacraficing space for speed... I want both though!
[craig ~/Desktop/ai/dfs]$ gcc -O3 dfs.c -o dfs
[craig ~/Desktop/ai/dfs]$ ls -l dfs
-rwxr-xr-x 1 craig wheel 6274 Aug 18 22:54 dfs
Lets take a look at the code sections just out of curiosity
Sections:
Idx Name Size VMA LMA File off Algn
0 .interp 00000015 080480d4 080480d4 000000d4 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
1 .hash 00000098 080480ec 080480ec 000000ec 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
2 .dynsym 00000130 08048184 08048184 00000184 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
3 .dynstr 000000cb 080482b4 080482b4 000002b4 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
4 .rel.plt 00000030 08048380 08048380 00000380 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
5 .init 00000011 080483b0 080483b0 000003b0 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
6 .plt 00000070 080483c4 080483c4 000003c4 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
7 .text 00000428 08048434 08048434 00000434 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
8 .fini 0000000c 0804885c 0804885c 0000085c 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
9 .rodata 00000113 08048868 08048868 00000868 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
10 .data 0000000c 0804997c 0804997c 0000097c 2**2
CONTENTS, ALLOC, LOAD, DATA
11 .eh_frame 00000004 08049988 08049988 00000988 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
12 .dynamic 00000098 0804998c 0804998c 0000098c 2**2
CONTENTS, ALLOC, LOAD, DATA
13 .ctors 00000008 08049a24 08049a24 00000a24 2**2
CONTENTS, ALLOC, LOAD, DATA
14 .dtors 00000008 08049a2c 08049a2c 00000a2c 2**2
CONTENTS, ALLOC, LOAD, DATA
15 .jcr 00000004 08049a34 08049a34 00000a34 2**2
CONTENTS, ALLOC, LOAD, DATA
16 .got 00000024 08049a38 08049a38 00000a38 2**2
CONTENTS, ALLOC, LOAD, DATA
17 .bss 0000022c 08049a60 08049a60 00000a60 2**5
ALLOC
18 .comment 0000012d 00000000 00000000 00000a60 2**0
CONTENTS, READONLY
hmm...that seems unnecessary
Time for a rewrite!
bits 32
; Define our constants
%define stdout 1
%define SYS_exit 1
%define SYS_write 4
%define NUM_NODES_SIZE_1 10
%define NUM_NODES 100
%define MAX_STACK 10
; Define our messages
segment .data
goalMsg db 'Goal', 0x0A
goalMsgBytes equ $ - goalMsg
spaceMsg db ' '
spaceMsgBytes equ $ - spaceMsg
goalNotFoundMsg db 'Goal not found', 0x0A
goalNotFoundMsgBytes equ $ - goalNotFoundMsg
numbers db '0123456789'
output db 0, ' '
; Declare our arrays
segment .bss
adjMatrix resb NUM_NODES
visited resb MAX_STACK
segment .text
align 4
; ==============
; = kernelCall =
; ==============
kernelCall:
int 0x80
ret
; Define our macros
%macro system 1
mov eax, %1
call kernelCall
%endmacro
%macro sys.exit 0
system SYS_exit
%endmacro
%macro sys.write 0
system SYS_write
%endmacro
; ===========================================
; = initAdjMatrix =
; = Parameters: none =
; = Pre: none =
; = Post: adjMatrix and visited are cleared =
; ===========================================
align 4
initAdjMatrix:
push eax
; Clear adjMatrix
; Clear all of eax because we reference it later
xor eax, eax
.initAdjMatrixL1:
mov [adjMatrix + eax], byte 0
inc al
cmp al, NUM_NODES
jne .initAdjMatrixL1
; Clear visited
xor al, al
.initAdjMatrixL2
mov [visited + eax], byte 0
inc al
cmp al, MAX_STACK
jne .initAdjMatrixL2
pop eax
ret
; ==============================================
; = makeEdge =
; = Parameters: from node (ebx), to node (ecx) =
; = Pre: none =
; = Post: matrix[from][to] is set =
; ==============================================
align 4
makeEdge:
mov ebx, [esp + 8]
mov ecx, [esp + 4]
; Determine row (from * 10)
; Plus column (to)
imul ebx, ebx, NUM_NODES_SIZE_1
add ebx, ecx
mov [adjMatrix + ebx], byte 1
ret
; =============================================
; = dfs =
; = Parameters: start (eax), goal (edx) =
; = Pre: Stack and matrix must be initialized =
; = Post: Graph is traversed =
; =============================================
align 4
dfs:
push ebp
mov ebp, esp
; Set start
mov eax, [esp + 12]
; Set goal
mov edx, [esp + 8]
; Make room for stack_index
sub esp, 2
; Using ebp - 2 as stack_index
mov [ebp - 2], word 0
; Push start and increase stack index
push eax
inc word [ebp - 2]
.dfsL1:
; Loop until stack_index is 0
cmp [ebp - 2], word 0
jne .dfsL12
jmp .dfsOutNotFound
.dfsL12:
; Retrieve node
pop eax
dec word [ebp - 2]
; Check if node is our goal
cmp eax, edx
je .dfsOutFound
; If we've visited node, continue
cmp [visited + eax], byte 0
jne .dfsL1
; Set visited to 1
mov [visited + eax], byte 1
; Output eax
push eax
mov al, [numbers + eax]
mov [output], al
push dword 2
push dword output
push dword stdout
sys.write
add esp, byte 12
pop eax
; Loop from 9 to 0
xor ecx, ecx
mov cl, NUM_NODES_SIZE_1
dec cl
.dfsL2:
cmp cl, -1
je .dfsL1
; Calculate place in array
push eax
imul eax, eax, NUM_NODES_SIZE_1
add eax, ecx
; Compare our adjMatrix to 1
cmp [adjMatrix + eax], byte 1
; If not 1, loop
jne .dfsL3
pop eax
; Otherwise push our index
push ecx
inc word [ebp - 2]
dec cl
jmp .dfsL2
.dfsL3:
pop eax
dec cl
jmp .dfsL2
.dfsOutFound:
; Output eax
push eax
mov al, [numbers + eax]
mov [output], al
push dword 2
push dword output
push dword stdout
sys.write
add esp, byte 12
pop eax
; Output goal message
push dword goalMsgBytes
push dword goalMsg
push dword stdout
sys.write
add esp, byte 12
jmp .dfsLeave
.dfsOutNotFound:
; Output goal not found message
push dword goalNotFoundMsgBytes
push dword goalNotFoundMsg
push dword stdout
sys.write
add esp, byte 12
.dfsLeave:
mov esp, ebp
pop ebp
ret
; ========
; = main =
; ========
global _start
_start:
call initAdjMatrix
; Make our edges
push 1
push 2
call makeEdge
push 1
push 3
call makeEdge
push 1
push 4
call makeEdge
push 2
push 5
call makeEdge
push 3
push 5
call makeEdge
push 3
push 6
call makeEdge
push 4
push 7
call makeEdge
push 5
push 8
call makeEdge
push 5
push 9
call makeEdge
; Traverse
push 1
push 6
call dfs
push dword 0
sys.exit
I've already made a few optimizations (for example changing things like "mov eax, 0" to "xor eax, eax", or had I needed it "mov eax, 1" would become "xor eax, eax" "inc eax". Yes its 2 statements but it is actually 2 bytes smaller - longer code does not always mean longer machine code)
And lets build it:
[craig ~/Desktop/ai/dfs/asm]$ nasm -f elf -l dfs.lst dfs.asm
[craig ~/Desktop/ai/dfs/asm]$ ld -s -o dfs dfs.o
[craig ~/Desktop/ai/dfs/asm]$ ls -l dfs
-rwxr-xr-x 1 craig wheel 964 Aug 18 22:57 dfs
Ok ok we are MUCH better, but we can do a little bit better. Let's take a look at the sections:
[craig ~/Desktop/ai/dfs/asm]$ objdump -x dfs
...SNIP...
Sections:
Idx Name Size VMA LMA File off Algn
0 .text 000001eb 08048080 08048080 00000080 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .data 00000021 0804926c 0804926c 0000026c 2**2
CONTENTS, ALLOC, LOAD, DATA
2 .bss 00000070 08049290 08049290 00000290 2**2
ALLOC
3 .comment 0000001f 00000000 00000000 00000290 2**0
CONTENTS, READONLY
...SNIP...
Hmm, my guess is we really don't need the .comment section.
[craig ~/Desktop/ai/dfs/asm]$ hexdump -C -s 0x290 -n 31 dfs
00000290 00 54 68 65 20 4e 65 74 77 69 64 65 20 41 73 73 |.The Netwide Ass|
000002a0 65 6d 62 6c 65 72 20 30 2e 39 38 2e 33 39 00 |embler 0.98.39.|
000002af
Looks like nasm is making us pay for room to setup the .comment section, and is making us pay for a little recognition -- lets fix that.
[craig ~/Desktop/ai/dfs/asm]$ strip -R .comment dfs
[craig ~/Desktop/ai/dfs/asm]$ ls -l dfs
-rwxr-xr-x 1 craig wheel 884 Aug 18 23:04 dfs
884 bytes! We could do better by creating a binary file and defining the elf headers ourselves, but we've already surpassed the goal of the optimizations - 6.82 times smaller!
And if anyones curious:
[craig ~/Desktop/ai/dfs]$ ls -l dfs.c
-rw-r--r-- 1 craig wheel 1373 Aug 18 21:11 dfs.c
[craig ~/Desktop/ai/dfs]$ ls -l asm/dfs.asm
-rw-r--r-- 1 craig wheel 4310 Aug 18 21:55 asm/dfs.asm
[craig ~/Desktop/ai/dfs]$ wc -l dfs.c
103 dfs.c
[craig ~/Desktop/ai/dfs]$ wc -l asm/dfs.asm
260 asm/dfs.asm
thks a lot
ReplyDelete