Commit 38ea8e95 authored by PoroCYon's avatar PoroCYon Committed by PoroCYon
Browse files

64-bit version.

parent cccd9656
......@@ -4,6 +4,8 @@ SRCDIR := src
LDDIR := ld
TESTDIR:= test
BITS ?= $(shell getconf LONG_BIT)
# -mpreferred-stack-boundary=3 messes up the stack and kills SSE!
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program \
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \
......@@ -15,12 +17,18 @@ CXXOPTFLAGS=$(COPTFLAGS) \
CFLAGS=-Wall -Wextra -Wpedantic -std=gnu11 -nostartfiles -fno-PIC $(COPTFLAGS)
CXXFLAGS=-Wall -Wextra -Wpedantic -std=c++11 $(CXXOPTFLAGS) -nostartfiles -fno-PIC
ASFLAGS=-f elf -I $(SRCDIR)/
ASFLAGS=-I $(SRCDIR)/
ifeq ($(BITS),32)
LDFLAGS=-m elf_i386
ASFLAGS += -f elf32
else
LDFLAGS=-m elf_x86_64
ASFLAGS += -f elf64
endif
LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary
CFLAGS += -m32
CXXFLAGS += -m32
CFLAGS += -m$(BITS)
CXXFLAGS += -m$(BITS)
LIBS=-lc
......@@ -29,7 +37,7 @@ ASFLAGS += -DUSE_INTERP
NASM ?= nasm
PYTHON3 ?= python3
all: $(BINDIR)/sdl $(BINDIR)/hello
all: $(BINDIR)/hello $(BINDIR)/sdl
LIBS += -lSDL2 -lGL
......@@ -42,25 +50,22 @@ clean:
.SECONDARY:
$(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/
$(CC) -m32 $(CFLAGS) -c "$<" -o "$@"
$(CC) $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.o: $(TESTDIR)/%.c $(OBJDIR)/
$(CC) -m32 $(CFLAGS) -c "$<" -o "$@"
$(CC) $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.start.o: $(OBJDIR)/%.o $(OBJDIR)/crt1.o
$(LD) $(LDFLAGS) -r -o "$@" $^
$(OBJDIR)/crt1.o: $(SRCDIR)/crt1.c $(OBJDIR)/
$(CC) $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/symbols.%.asm: $(OBJDIR)/%.start.o
$(PYTHON3) ./smol.py $(LIBS) "$<" "$@"
$(OBJDIR)/stub.%.o: $(OBJDIR)/symbols.%.asm $(SRCDIR)/header.asm \
$(SRCDIR)/loader.asm
$(OBJDIR)/stub.%.o: $(OBJDIR)/symbols.%.asm $(SRCDIR)/header32.asm \
$(SRCDIR)/loader32.asm
$(NASM) $(ASFLAGS) $< -o $@
$(BINDIR)/%: $(OBJDIR)/%.start.o $(OBJDIR)/stub.%.o $(BINDIR)/
$(LD) $(LDFLAGS_) $(OBJDIR)/$*.start.o $(OBJDIR)/stub.$*.o -o "$@"
$(LD) -Map=$(BINDIR)/$*.map $(LDFLAGS_) $(OBJDIR)/$*.start.o $(OBJDIR)/stub.$*.o -o "$@"
.PHONY: all clean
......@@ -2,25 +2,50 @@ OUTPUT_FORMAT(binary)
SECTIONS {
. = 0x400000;
_smol_origin = .;
.header : { *(.header) }
_smol_text_start = .;
_smol_text_off = _smol_text_start - _smol_origin;
.text : {
*(.text.startup.smol)
*(.text.startup._start)
*(.text .text.* .rdata .rdata.* .rodata .rodata.*)
}
_smol_text_end = .;
_smol_text_size = _smol_text_end - _smol_text_start;
_smol_textandheader_size = _smol_text_end - _smol_origin;
_smol_data_start = .;
_smol_data_off = _smol_data_start - _smol_origin;
.data : {
*(.data .data.* .tdata .tdata.* .bss .bss.* .tbss .tbss.*)
*(.data.smolgot)
*(.data .data.* .tdata .tdata.*)
}
.dynamic : { *(.dynamic) } :all :dyn
.dynstuff : { *(.symtab .strtab .shstrtab .rel.text .got.plt .gnu.linkonce.* .plt .plt.got .interp) } :all
_smol_data_end = .;
_smol_data_size = _smol_data_end - _smol_data_start;
_smol_total_filesize = . - _smol_origin;
_smol_bss_start = .;
_smol_bss_off = _smol_bss_start - _smol_origin;
.bss : {
*(.bss .bss.* .tbss .tbss.* .sbss .sbss.*)
}
_smol_bss_end = .;
_smol_bss_size = _smol_bss_end - _smol_bss_start;
_smol_dataandbss_size = _smol_bss_end - _smol_data_start;
/DISCARD/ : {
*(.*)
}
_smol_total_size = . - 0x400000;
_smol_total_memsize = . - _smol_origin;
}
......@@ -11,14 +11,16 @@ def hash_djb2(s):
def output_x86(libraries, outf):
outf.write('; vim: set ft=nasm:\n') # be friendly
outf.write('bits 32\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
outf.write('%include "header.asm"\n')
outf.write('%include "header32.asm"\n')
outf.write('.dynamic.needed:\n')
for library in libraries:
outf.write('dd 1\n')
outf.write('dd 1;DT_NEEDED\n')
outf.write('dd (_symbols.{} - _symbols)\n'.format(shorts[library]))
outf.write('.dynamic.end:\n')
outf.write('_symbols:\n')
for library, symbols in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
......@@ -31,15 +33,53 @@ def output_x86(libraries, outf):
\t\t dd 0x{hash:x}
""".format(name=sym, hash=hash).lstrip('\n'))
outf.write('\tdb 0\n')
outf.write('\tdb 0\n') # TODO: not a dd?
outf.write('db 0\n')
outf.write('%include "loader.asm"\n')
outf.write('_symbols.end:\n')
outf.write('%include "loader32.asm"\n')
def output_amd64(libraries, outf):
outf.write('; vim: set ft=nasm:\n')
outf.write('bits 64\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
outf.write('%include "header64.asm"\n')
outf.write('dynamic.needed:\n')
for library in libraries:
outf.write('dq 1;DT_NEEDED\n')
outf.write('dq (_symbols.{} - _symbols)\n'.format(shorts[library]))
outf.write('dynamic.end:\n')
outf.write('[section .data.smolgot]\n')
outf.write('_symbols:\n')
for library, symbols in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym in symbols:
hash = hash_djb2(sym)
outf.write('\t\t_symbols.{lib}.{name}: dq 0x{hash:x}\n'\
.format(lib=shorts[library],name=sym,hash=hash))
outf.write('\tdq 0\n')
outf.write('db 0\n')
outf.write('_symbols.end:\n')
outf.write('_smolplt:\n')
for library, symbols in libraries.items():
for sym in symbols:
outf.write("""
[section .text.smolplt.{name}]
global {name}
{name}:
jmp [rel _symbols.{lib}.{name}]
""".format(lib=shorts[library],name=sym).lstrip('\n'))
outf.write('_smolplt.end:\n')
outf.write('%include "loader64.asm"\n')
def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, outf)
##elif arch == 'arm':
#elif arch == 'x86_64':
###elif arch == 'aarch64':
elif arch == 'x86_64': output_amd64(libraries, outf)
else:
eprintf("E: cannot emit for arch '" + str(arch) + "'")
sys.exit(1)
......
......@@ -2,10 +2,8 @@
import sys
archmagic = {
'i386': 3, 3: 'i386',
##'arm': 40, 40: 'arm',
#'x86_64': 62, 62: 'x86_64',
###'aarch64': 183, 183: 'aarch64'
'i386': 3, 3: 'i386' ,
'x86_64': 62, 62: 'x86_64',
}
def eprintf(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
......
......@@ -8,10 +8,12 @@ extern int __libc_start_main(int (*main)(int, char**),
void (*rtld_fini)(void),
void* stack) __attribute__((__noreturn__));
__attribute__((__externally_visible__, __section__(".text.startup._start"), __noreturn__))
__attribute__((__externally_visible__, __section__(".text.startup._start"),
__noreturn__))
int _start(void* stack) {
int argc=*(int*)stack;
char** argv=(void*)(&((int*)stack)[1]);
// TODO: _dl_fini etc.
int argc=*(size_t*)stack;
char** argv=(void*)(&((size_t*)stack)[1]);
__libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack);
......
; vim: set ft=nasm:
%if __BITS__ == 32
%define EI_CLASS (1) ; 1 == 32-bit
%else
%define EI_CLASS (2) ; 2 == 64-bit
%endif
%define EI_DATA (1) ; 1 == little-endian
%define EI_VERSION (1) ; current
%define EI_OSABI (3) ; Linux
......@@ -9,7 +13,11 @@
%define ELF_TYPE (2) ; 2 == executable
%ifndef ELF_MACHINE
%define ELF_MACHINE (3) ; 3 == i386
%if __BITS__ == 32
%define ELF_MACHINE ( 3) ; 3 == i386
%else
%define ELF_MACHINE (62) ; 62 == x86_64
%endif
%endif
%define PT_LOAD (1)
......
; vim: set ft=nasm:
%define ORIGIN 0x400000
;org ORIGIN
bits 32
%include "linkscr.inc"
extern _smol_total_size
[section .header]
%include "elf.inc"
......@@ -25,7 +22,33 @@ header:
dd 0 ; e_flags
dw (.segments - header) ; e_ehsize
dw (.segments.load - .segments.dynamic) ; e_phentsize
%ifdef USE_NX
%error "USE_NX not supported yet on i386 ('GOT' still needs RWX, and alignment has to be fixed)"
;%ifdef USE_INTERP
; dw 4, 0 ; e_phnum, e_shentsize
;%else
; dw 3, 0
;%endif
;.segments:
;.segments.load.text:
; dd PT_LOAD
; dd _smol_origin
; dd _smol_text_start, 0
; dd _smol_textandheader_size
; dd _smol_textandheader_size
; dd (PHDR_R | PHDR_X)
; dd 0x1000
;.segments.load.data:
; dd PT_LOAD
; dd _smol_data_off
; dd _smol_data_start, 0
; dd _smol_data_size
; dd _smol_dataandbss_size
; dd (PHDR_R | PHDR_W)
; dd 0x1;000
%else
.segments:
%endif
%ifdef USE_INTERP
.segments.interp:
dd PT_INTERP ; {e_phnum: 2, e_shentsize: 0}, p_type
......@@ -42,14 +65,17 @@ header:
dd (.dynamic.end - .dynamic) ; p_filesz
dd (.dynamic.end - .dynamic) ; p_memsz
dd 0, 0 ; p_flags, p_align
%ifndef USE_NX
.segments.load:
dd PT_LOAD ; p_type: 1 = PT_LOAD
dd 0 ; p_offset
dd ORIGIN, 0 ; p_vaddr, p_paddr
dd _smol_total_size ; p_filesz
dd _smol_total_size ; p_memsz
dd _smol_origin, 0 ; p_vaddr, p_paddr
; use memsize twice here, linux doesn't care and it compresses better
dd _smol_total_memsize ; p_filesz
dd _smol_total_memsize ; p_memsz
dd (PHDR_R | PHDR_W | PHDR_X) ; p_flags
dd 0x1000 ; p_align
%endif
.segments.end:
%ifdef USE_INTERP
.interp:
......
; vim: set ft=nasm:
%include "linkscr.inc"
[section .header]
%include "elf.inc"
ehdr:
; e_ident
db 0x7F, "ELF"
db EI_CLASS, EI_DATA, EI_VERSION, EI_OSABI
db EI_OSABIVERSION
times 7 db 0
dw ELF_TYPE ; e_type
dw ELF_MACHINE ; e_machine
dd EI_VERSION ; e_version
dq _smol_start ; e_entry
dq phdr - ehdr ; e_phoff
dq 0 ; e_shoff
dd 0 ; e_flags
dw ehdr.end - ehdr ; e_ehsize
dw phdr.load - phdr.dynamic ; e_phentsize
%ifdef USE_NX
%ifdef USE_INTERP
dw 4 ; e_phnum
%else
dw 3 ; e_phnum
%endif
dw 0, 0, 0 ; e_shentsize, e_shnum, e_shstrndx
%else
phdr:
%ifdef USE_INTERP
phdr.interp:
dd PT_INTERP ; p_type ; e_phnum, e_shentsize
dd 0 ; p_flags ; e_shnum, e_shstrndx
ehdr.end:
dq interp - ehdr ; p_offset
dq interp, interp ; p_vaddr, p_paddr
dq interp.end - interp ; p_filesz
dq interp.end - interp ; p_memsz
dq 0 ; p_align
%endif
phdr.dynamic:
dd PT_DYNAMIC ; p_type ; e_phnum, e_shentsize
dd 0 ; p_flags ; e_shnum, e_shstrndx
%ifndef USE_INTERP
ehdr.end:
%endif
dq dynamic - ehdr ; p_offset
dq dynamic, 0 ; p_vaddr, p_paddr
dq dynamic.end - dynamic ; p_filesz
dq dynamic.end - dynamic ; p_memsz
dq 0 ; p_align
%ifndef USE_NX
phdr.load:
dd PT_LOAD ; p_type
dd PHDR_R | PHDR_W | PHDR_X ; p_flags
dq 0 ; p_offset
dq ehdr, 0 ; p_vaddr, p_paddr
dq _smol_total_memsize ; p_filesz
dq _smol_total_memsize ; p_memsz
dq 0x1000 ; p_align
%else
%error "TODO" ; TODO
%endif
%endif
%ifdef USE_INTERP
interp:
db "/lib64/ld-linux-x86-64.so.2", 0
interp.end:
%endif
dynamic:
dynamic.strtab:
dq DT_STRTAB ; d_tag
dq _symbols ; d_un.d_ptr
dynamic.symtab:
dq DT_SYMTAB ; d_tag
dq 0 ; d_un.d_ptr
; vim: set ft=nasm:
extern _smol_origin
extern _smol_total_size
extern _smol_text_start
extern _smol_text_off
extern _smol_text_end
extern _smol_text_size
extern _smol_textandheader_size
extern _smol_data_start
extern _smol_data_off
extern _smol_data_end
extern _smol_data_size
extern _smol_total_filesize
extern _smol_bss_start
extern _smol_bss_off
extern _smol_bss_end
extern _smol_bss_size
extern _smol_dataandbss_size
extern _smol_total_memsize
......@@ -6,6 +6,7 @@
_smol_start:
push edx ; _dl_fini
; try to get the 'version-agnostic' pffset of the stuff we're
; interested in
mov ebx, eax
......@@ -15,7 +16,7 @@ _smol_start:
cmp dword eax, _smol_start
jne short .looper
sub esi, ebx
sub esi, LM_ENTRY_OFFSET_BASE+4 ; +4: take inc-after from lodsb into acct
sub esi, LM_ENTRY_OFFSET_BASE+4 ; +4: take inc-after from lodsd into acct
xchg ebp, ebx
xchg ebx, esi
......@@ -30,24 +31,17 @@ link: ; (struct link_map *root, char *symtable)
mov esi, [ebp + LM_NAME_OFFSET]
.basename: ; (const char *s (esi))
push esi
push edi
mov edi, esi
.basename.cmp:
lodsb
or al, al
jz short .basename.done
cmp al, 47 ; '/'
cmp al, '/'
cmove edi, esi
jmp short .basename.cmp
or al, al
jnz short .basename.cmp
.basename.done:
xchg eax, edi
pop edi
pop esi
.basename.end:
mov edi, eax
pop esi
.strcmp: ; (const char *s1 (esi), const char *s2 (edi))
push esi
push edi
......@@ -77,7 +71,7 @@ link: ; (struct link_map *root, char *symtable)
.do_symbols: ; null byte means end of symbols for this library!
lodsb
test al, al
jz short .next_library
jz short .do_library
push ebx
xchg ebx, edi
......@@ -89,7 +83,7 @@ link: ; (struct link_map *root, char *symtable)
xor edx, edx
mov ebx, [ebp + edi + LM_NBUCKETS_OFFSET]
div ebx
; eax = entry->l_gnu_buckets[eax]
; eax = entry->l_gnu_buckets[edx]
mov eax, [ebp + edi + LM_GNU_BUCKETS_OFFSET]
mov eax, [eax + edx * 4]
; *h |= 1
......@@ -119,14 +113,13 @@ link: ; (struct link_map *root, char *symtable)
add esi, 4
jmp short link.do_symbols
inc esi
link.next_library:
jmp link.do_library
link.done:
;xor ebp, ebp ; let's put that burden on the user code, so they can leave
; it out if they want to
sub esp, 20 ; put the stack where _stack (C code) expects it to be
pop edx ; _dl_fini
sub esp, 20 ; put the stack where _start (C code) expects it to be
; this can't be left out, because X needs the envvars
;.loopme: jmp short .loopme
......
; vim: set ft=nasm:
%include "rtld.inc"
%ifdef ELF_TYPE
[section .text.startup.smol]
%else
; not defined -> debugging!
[section .text]
%endif
; rax: special op reg
;!rbx: ptrdiff_t glibc_vercompat_extra_hi_field_off
; rcx: special op reg
; rdx: special op reg
; rsi: special op reg
; rdi: struct link_map* root / special op reg
; rbp:
; r8 :
; r9 :
;!r10: struct link_map* entry + far correction factor
; r11: temp storage var
;!r12: struct link_map* entry
;!r13: _dl_fini address (reqd by the ABI)
%ifndef ELF_TYPE
extern _symbols
global _start
_start:
%endif
_smol_start:
xchg r13, rdx ; _dl_fini
mov r12, [rsp - 8] ; return address of _dl_init
mov r11d, dword [r12 - 20] ; decode part of 'mov rdi, [rel _rtld_global]'
mov r12, [r12 + r11 - 16] ; ???
; struct link_map* root = r12
;mov r12, rdi
mov rsi, r12
; size_t* field = (size_t*)root;
; for (; *field != _smol_start; ++field) ;
.next_off:
lodsq
cmp rax, _smol_start
jne short .next_off
; // rbx = offsetof(struct link_map* rsi, l_entry) - DEFAULT_OFFSET
; rbx = field - root - offsetof(struct link_map, l_entry)
sub rsi, r12
sub rsi, LF_ENTRY_OFF+8
xchg rbx, rsi
mov rsi, _symbols
; for (rsi = (uint8_t*)_symbols; *rsi; ++rsi) {
.next_needed:
cmp byte [rsi], 0
je .needed_end
; do { // iter over the link_map
.next_link:
; entry = entry->l_next;
mov r12, [r12 + L_NEXT_OFF] ; skip the first one (this is our main
; binary, it has no symbols)
; keep the current symbol in a backup reg
mov rdx, rsi
; r11 = basename(rsi = entry->l_name)
mov rsi, [r12 + L_NAME_OFF]
.basename:
mov r11, rsi
.basename.next:
lodsb
cmp al, '/'
cmove r11, rsi
or al, al
jnz short .basename.next
.basename.done:
; and place it back
mov rsi, rdx ; rsi == _symbol
; strcmp(rsi, r11) -> flags; rsi == first hash if matches
.strcmp:
lodsb
or al, al
jz short .strcmp.done
sub al, byte [r11]
cmovnz rsi, rdx
jnz short .next_link;.strcmp.done
inc r11
jmp short .strcmp
.strcmp.done:
;mov rsi, rdx
; if (strcmp(...)) goto next_link;
;cmovnz r12, [r12 + L_NEXT_OFF] ; this is guaranteed to be nonzero
;jnz short .next_link ; because otherwise ld.so would have complained
; now we have the right link_map of the library, so all we have
; to do now is to find the right symbol addresses corresponding
; to the hashes.
; do {
.next_hash:
; if (!*phash) break;
lodsq
or eax, eax
jz short .next_needed ; done the last hash, so move to the next lib
;link_symbol(struct link_map* entry = r12, size_t* phash = rsi, uint32_t hash = eax)
lea r10, [r12 + rbx]
mov r11, rax
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
xor edx, edx
mov ecx, dword [r10 + LF_NBUCKETS_OFF]
div ecx
; shift left because we don't want to compare the lowest bit
shr r11, 1
; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind]
mov r8, [r10 + LF_GNU_BUCKETS_OFF]
mov edx, dword [r8 + rdx * 4]
; do {
.next_chain: