Commit cccd9656 authored by PoroCYon's avatar PoroCYon Committed by PoroCYon
Browse files

lots of stuff, see detailed desc.

* reorganize the project structure a bit (sorry not sorry :P)
  * src/ -> test/
  * ldr/ -> src/, ld/
  * mksyms -> smol.py (and smol*.py)
* clean up and make the python script a bit less hacky
* optimize the loader code (there can still be done more, though)
* preserve the stack so argc, argv and envp can be read
* more stuff
parent f6b9a927
/bin /bin
/obj /obj
/__pycache__
LDRDIR = ldr OBJDIR := obj
OBJDIR = obj BINDIR := bin
BINDIR = bin SRCDIR := src
SRCDIR = src LDDIR := ld
DATADIR = data TESTDIR:= test
COPTFLAGS=-Os -fvisibility=hidden -mpreferred-stack-boundary=3 -fwhole-program \ # -mpreferred-stack-boundary=3 messes up the stack and kills SSE!
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program \
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \ -ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \
-fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables
CXXOPTFLAGS=$(COPTFLAGS) \ CXXOPTFLAGS=$(COPTFLAGS) \
-fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr \ -fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr \
-fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit -fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit
ASFLAGS=-f elf -I $(LDRDIR)/ CFLAGS=-Wall -Wextra -Wpedantic -std=gnu11 -nostartfiles -fno-PIC $(COPTFLAGS)
CFLAGS=-Wall -Wextra -Wpedantic -std=c99 $(COPTFLAGS) -nostartfiles -fno-PIC
CXXFLAGS=-Wall -Wextra -Wpedantic -std=c++11 $(CXXOPTFLAGS) -nostartfiles -fno-PIC CXXFLAGS=-Wall -Wextra -Wpedantic -std=c++11 $(CXXOPTFLAGS) -nostartfiles -fno-PIC
ASFLAGS=-f elf -I $(SRCDIR)/
LDFLAGS=-m elf_i386
LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary
CFLAGS += -m32
CXXFLAGS += -m32
LIBS=-lc LIBS=-lc
LDFLAGS=--oformat=binary -T ldr/link.ld
ASFLAGS += -DUSE_INTERP
NASM ?= nasm
PYTHON3 ?= python3
.PHONY: all all: $(BINDIR)/sdl $(BINDIR)/hello
all: $(BINDIR)/test
LIBS += -lSDL2 -lGL
.PHONY: clean
clean: clean:
rm -rf $(OBJDIR)/* $(BINDIR)/* @$(RM) -vrf $(OBJDIR) $(BINDIR)
%/:
@mkdir -vp "$@"
.SECONDARY: .SECONDARY:
$(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/
$(CC) -m32 $(CFLAGS) -c $^ -o $@ $(CC) -m32 $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.o: $(TESTDIR)/%.c $(OBJDIR)/
$(CC) -m32 $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.start.o: $(OBJDIR)/%.o $(OBJDIR)/crt1.o
$(LD) $(LDFLAGS) -r -o "$@" $^
$(OBJDIR)/crt1.o: $(SRCDIR)/crt1.c $(OBJDIR)/
$(CC) $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/symbols.%.asm: $(OBJDIR)/%.start.o
$(PYTHON3) ./smol.py $(LIBS) "$<" "$@"
$(OBJDIR)/%.o.syms: $(OBJDIR)/%.o $(OBJDIR)/stub.%.o: $(OBJDIR)/symbols.%.asm $(SRCDIR)/header.asm \
readelf -s $^ | grep UND | sed 1d | awk '{ print $$8 }' > $@ $(SRCDIR)/loader.asm
$(NASM) $(ASFLAGS) $< -o $@
$(OBJDIR)/symbols.%.s: $(OBJDIR)/%.o.syms $(BINDIR)/%: $(OBJDIR)/%.start.o $(OBJDIR)/stub.%.o $(BINDIR)/
$(LDRDIR)/mksyms $(LIBS) $$(cat $^) > $@ $(LD) $(LDFLAGS_) $(OBJDIR)/$*.start.o $(OBJDIR)/stub.$*.o -o "$@"
$(OBJDIR)/header.%.o: $(OBJDIR)/symbols.%.s $(LDRDIR)/header.s $(LDRDIR)/loader.s .PHONY: all clean
nasm -DUSE_INTERP $(ASFLAGS) $< -o $@
$(BINDIR)/%: $(OBJDIR)/%.o $(OBJDIR)/header.%.o
$(LD) -m elf_i386 $(LDFLAGS) $^ -o $@
...@@ -6,9 +6,9 @@ SECTIONS { ...@@ -6,9 +6,9 @@ SECTIONS {
.header : { *(.header) } .header : { *(.header) }
.text : { .text : {
*(.text._smol_start) *(.text.startup.smol)
*(.text._start) *(.text.startup._start)
*(.text .rdata .rdata.* .rodata .rodata.*) *(.text .text.* .rdata .rdata.* .rodata .rodata.*)
} }
.data : { .data : {
...@@ -22,5 +22,5 @@ SECTIONS { ...@@ -22,5 +22,5 @@ SECTIONS {
*(.*) *(.*)
} }
_size = . - 0x400000; _smol_total_size = . - 0x400000;
} }
; vim: set ft=nasm:
%define ORIGIN 0x400000
extern _size
[section .header]
header:
; e_ident
db 0x7F, "ELF" ; EI_MAG0-EI_MAG3
db 1 ; EI_CLASS: 1 = 32-bit
db 1 ; EI_DATA: 1 = LSB
db 1 ; EI_VERSION
db 3 ; EI_OSABI: 3 = Linux
db 1 ; EI_OSABIVERSION
times 7 db 0 ; EI_PAD, ld.so is a busta and won't let us use our leet group tags for padding bytes :(
; e_type: 2 = executable
dw 2
; e_machine: 3 = x86
dw 3
; e_version
dd 1
; e_entry
dd _smol_start
; e_phoff
dd (.segments - header)
; e_shoff
dd 0
; e_flags
dd 0
; e_ehsize
dw (.segments - header)
; e_phentsize
dw (.segments.load - .segments.dynamic)
.segments:
%ifdef USE_INTERP
.segments.interp:
; {e_phnum: 2, e_shentsize: 0}, p_type: 3 = PT_INTERP
dd 3
; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
dd (.interp - header)
; p_vaddr
dd .interp
; p_paddr
dd .interp
; p_filesz
dd (.interp.end-.interp)
; p_memsz
dd (.interp.end-.interp)
; p_flags, p_align
dd 0,0
%endif
.segments.dynamic:
; {e_phnum: 2, e_shentsize: 0}, p_type: 2 = PT_DYNAMIC
dd 2
; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
dd (.dynamic - header)
; p_vaddr
dd .dynamic
; p_paddr
dd 0
; p_filesz
dd (.dynamic.end - .dynamic)
; p_memsz
dd (.dynamic.end - .dynamic)
; p_flags, p_align
dq 0
.segments.load:
; p_type: 1 = PT_LOAD
dd 1
; p_offset
dd 0
; p_vaddr
dd ORIGIN
; p_paddr
dd 0
; p_filesz
dd _size
; p_memsz
dd _size
; p_flags: 1 = execute, 4 = read
dd (1 | 2 | 4)
; p_align
dd 0x1000
.segments.end:
%ifdef USE_INTERP
.interp:
db "/lib/ld-linux.so.2",0
.interp.end:
%endif
.dynamic:
.dynamic.strtab:
; d_tag: 5 = DT_STRTAB
dd 5
; d_un.d_ptr
dd _symbols
.dynamic.symtab:
; this is required to be present or ld.so will crash, but it can be bogus
; d_tag: 6 = DT_SYMTAB
dd 6
; d_un.d_ptr
dd 0
; vim: set ft=nasm ts=8:
%define LM_NAME_OFFSET 0x4
%define LM_NEXT_OFFSET 0xC
%define LM_ADDR_OFFSET 0
%define LM_INFO_OFFSET 0x20
; by default, use the offset 'correction' from glibc 2.28
%define LM_ENTRY_OFFSET_BASE 340
%define LM_NBUCKETS_OFFSET 0x178
%define LM_GNU_BUCKETS_OFFSET 0x188
%define LM_GNU_CHAIN_ZERO_OFFSET 0x18C
%define DT_VALUE_OFFSET 0x4
%define DYN_PTR_OFFSET 0x4
%define DT_SYMTAB 0x6
%define DT_SYMSIZE_SHIFT 4
lm_off_extra:
dd 0
[section .text._smol_start]
strcmp: ; (const char *s1 (esi), const char *s2 (edi))
push esi
push edi
.cmp: lodsb
or al, al
jz .done
sub al, [edi]
jnz .done
inc edi
jmp .cmp
.done: pop edi
pop esi
ret
basename: ; (const char *s (esi))
push esi
push edi
mov edi, esi
.cmp: lodsb
or al, al
jz .done
cmp al, 47 ; '/'
cmove edi, esi
jmp .cmp
.done: mov eax, edi
pop edi
pop esi
ret
link_symbol: ; (struct link_map *entry, uint32_t *h)
mov ecx, esi
; eax = *h % entry->l_nbuckets
mov eax, [ecx]
xor edx, edx
mov ebx, [ebp + edi + LM_NBUCKETS_OFFSET]
div ebx
; eax = entry->l_gnu_buckets[eax]
mov eax, [ebp + edi + LM_GNU_BUCKETS_OFFSET]
mov eax, [eax + edx * 4]
; *h |= 1
or word [ecx], 1
.check_bucket: ; edx = entry->l_gnu_chain_zero[eax] | 1
mov edx, [ebp + edi + LM_GNU_CHAIN_ZERO_OFFSET]
mov edx, [edx + eax * 4]
or edx, 1
; check if this is our symbol
cmp edx, [ecx]
je .found
inc eax
jmp .check_bucket
.found: ; it is! edx = entry->l_info[DT_SYMTAB]->d_un.d_ptr
mov edx, [ebp + LM_INFO_OFFSET + DT_SYMTAB * 4]
mov edx, [edx + DYN_PTR_OFFSET]
; edx = edx[eax].dt_value + entry->l_addr
shl eax, DT_SYMSIZE_SHIFT
mov edx, [edx + eax + DT_VALUE_OFFSET]
add edx, [ebp + LM_ADDR_OFFSET]
sub edx, ecx
sub edx, 4
; finally, write it back!
mov [ecx], edx
ret
link: ; (struct link_map *root, char *symtable)
mov eax, [esp+4]
mov esi, [esp+8]
.do_library: ; null library name means end of symbol table, we're done
cmp byte [esi], 0
jz .done
; setup start of map again
mov ebp, eax
push eax
.find_map_entry: ; compare basename(entry->l_name) to lib name, if so we got a match
push esi
mov esi, [ebp + LM_NAME_OFFSET]
call basename
mov edi, eax
pop esi
call strcmp
jz .process_map_entry
; no match, next entry it is!
mov ebp, [ebp + LM_NEXT_OFFSET]
jmp .find_map_entry
.process_map_entry: ; skip past the name in the symbol table now to get to the symbols
lodsb
or al, al
jnz .process_map_entry
.do_symbols: ; null byte means end of symbols for this library!
cmp byte [esi], 0
jz .next_library
inc esi
push edi
mov edi, [lm_off_extra]
call link_symbol
pop edi
add esi, 4
jmp .do_symbols
.next_library: pop eax
inc esi
jmp .do_library
.done: ret
extern _start
_smol_start:
; try to get the 'version-agnostic' pffset of the stuff we're
; interested in
mov ebx, eax
mov esi, eax
.looper:
lodsd
cmp dword eax, _smol_start
jne short .looper
sub esi, ebx
sub esi, LM_ENTRY_OFFSET_BASE+4 ; +4: take inc-after from lodsb into acct
mov [lm_off_extra], esi
mov eax, ebx
push _symbols
push eax
call link
;jmp short _start
; by abusing the linker script, _start ends up right here :)
#!/bin/sh
set -e
COPTFLAGS=$(cat <<'EOF'
-Os -fvisibility=hidden -mpreferred-stack-boundary=2 -fwhole-program
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer
-fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables
EOF
)
CXXOPTFLAGS=$(cat <<EOF
$CXXOPTFLAGS
-fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr
-fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit
EOF
)
CC="${CC:-cc}"
CXX="${CXX:-c++}"
CFLAGS="-Wall -Wextra -Wpedantic -std=c99 $COPTFLAGS -nostartfiles -fno-PIC"
CXXFLAGS="-Wall -Wextra -Wpedantic -std=c++11 $CXXOPTFLAGS -nostartfiles -fno-PIC"
ASFLAGS="-f elf -I ldr/"
LDFLAGS="--oformat=binary -T ldr/link.ld"
mkdir -p bin
rm -rf obj
mkdir -p obj
cleared=
mksym_args=
files=
symbols=
output=s.out
i=1
add_out() {
out="obj/$i"
files="$files $out"
i=$((i+1))
}
add_syms() {
local syms=$(readelf -s "$1" | grep -F UND | sed 1d | awk '{ print $8 }')
symbols="$symbols $syms"
}
for a; do
if test -z "$cleared"; then
set --; cleared=1
fi
case "$a" in
-o) output="$a";;
-l*) mksym_args="$mksym_args $a";;
-*) set -- "$@" "$a";;
*.c)
add_out
$CC $CFLAGS "$@" -c "$a" -o "$out"
add_syms "$out";;
*.cxx|*.cc|*.cpp)
add_out
$CXX $CXXFLAGS "$@" -c "$a" -o "$out"
add_syms "$out";;
*) printf "not sure what to do with input file: %s, bailing\n" "$a" &>2; exit 1;;
esac
done
./ldr/mksyms $mksym_args $symbols > obj/$output.syms.s
nasm $ASFLAGS obj/$output.syms.s -o obj/$output.header.o
ld $LDFLAGS obj/$output.header.o $files -o bin/$output
#!/usr/bin/env python3
import argparse
import glob
import itertools
import os.path
import shutil
import subprocess
import sys
from smolshared import *
from smolparse import *
from smolemit import *
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--target', default='', \
help='architecture to generate asm code for (default: auto)')
parser.add_argument('-l', '--library', metavar='LIB', action='append', \
help='libraries to link against')
parser.add_argument('-L', '--libdir', metavar='DIR', action='append', \
help="directories to search libraries in")
parser.add_argument('--nasm', default=shutil.which('nasm'), \
help="which nasm binary to use")
parser.add_argument('--cc', default=shutil.which('cc'), \
help="which cc binary to use")
parser.add_argument('--scanelf', default=shutil.which('scanelf'), \
help="which scanelf binary to use")
parser.add_argument('--readelf', default=shutil.which('readelf'), \
help="which readelf binary to use")
parser.add_argument('input', nargs='+', help="input object file")
parser.add_argument('output', type=argparse.FileType('w'), \
help="output nasm file", default=sys.stdout)
args = parser.parse_args()
if args.libdir is None: args.libdir = []
arch = args.target.tolower() if len(args.target)!=0 \
else decide_arch(args.input)
if arch not in archmagic:
eprintf("Unknown architecture '" + str(arch) + "'")
sys.exit(1)
syms = get_needed_syms(args.readelf, args.input)
paths = get_cc_paths(args.cc)
spaths = args.libdir + paths['libraries']
libraries=paths['libraries']
libnames = args.library
libs = list(find_libs(spaths, libnames))
symbols = {}
for symbol in syms:
library = find_symbol(args.scanelf, libs, libnames, symbol)
if not library:
eprintf("could not find symbol: {}".format(symbol))
sys.exit(1)
symbols.setdefault(library, [])
symbols[library].append(symbol)
output(arch, symbols, args.output)
if __name__ == '__main__':
main()
import sys
from smolshared import *
def hash_djb2(s):
h = 5381
for c in s:
h = (h * 33 + ord(c)) & 0xFFFFFFFF
return h
def output_x86(libraries, outf):
outf.write('; vim: set ft=nasm:\n') # be friendly
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
outf.write('%include "header.asm"\n')
outf.write('.dynamic.needed:\n')
for library in libraries:
outf.write('dd 1\n')
outf.write('dd (_symbols.{} - _symbols)\n'.format(shorts[library]))
outf.write('.dynamic.end:\n')
outf.write('_symbols:\n')
for library, symbols in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym in symbols:
hash = hash_djb2(sym)
outf.write("""
\t\tglobal {name}
\t\t{name}: db 0xE9
\t\t dd 0x{hash:x}
""".format(name=sym, hash=hash).lstrip('\n'))
outf.write('\tdb 0\n')
outf.write('db 0\n')
outf.write('%include "loader.asm"\n')
def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, outf)
##elif arch == 'arm':
#elif arch == 'x86_64':
###elif arch == 'aarch64':
else:
eprintf("E: cannot emit for arch '" + str(arch) + "'")
sys.exit(1)
#!/usr/bin/env python3
import glob import glob
import sys
import os.path import os.path
import subprocess import subprocess
import itertools import struct
import argparse import sys
from smolshared import *
def decide_arch(inpfiles):
archs=set({})
for fp in inpfiles:
with open(fp, 'rb') as ff:
_ = ff.read(16) # ei_ident
_ = ff.read( 2) # ei_type
machine = ff.read(2) # ei_machine
def hash_djb2(s): machnum = struct.unpack('<H', machine)[0]
h = 5381 archs.add(machnum)
for c in s:
h = (h * 33 + ord(c)) & 0xFFFFFFFF
return h
if len(archs) != 1:
eprintf("Input files have multiple architectures, can't link this...")
sys.exit(1)
def output_x86(libraries): archn = list(archs)[0]
shorts = { l: l.split('.', 1)[0].lower() for l in libraries }
print('%include "header.s"') if archn not in archmagic:
print('.dynamic.needed:') eprintf("Unknown architecture number " + str(archn) + \
for library in libraries: ". Consult elf.h and rebuild your object files.")
print('dd 1')
print('dd (_symbols.{} - _symbols)'.format(shorts[library]))
print('.dynamic.end:')
print('_symbols:')
for library, symbols in libraries.items():