Commit 237ba991 authored by PoroCYon's avatar PoroCYon Committed by PoroCYon
Browse files

more size crunching, but not completely perfect

parent 29655608
...@@ -7,7 +7,8 @@ TESTDIR:= test ...@@ -7,7 +7,8 @@ TESTDIR:= test
BITS ?= $(shell getconf LONG_BIT) BITS ?= $(shell getconf LONG_BIT)
# -mpreferred-stack-boundary=3 messes up the stack and kills SSE! # -mpreferred-stack-boundary=3 messes up the stack and kills SSE!
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program \ # -fno-plt
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program -fno-plt \
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \ -ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \
-fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables
CXXOPTFLAGS=$(COPTFLAGS) \ CXXOPTFLAGS=$(COPTFLAGS) \
...@@ -27,19 +28,17 @@ ASFLAGS += -f elf64 ...@@ -27,19 +28,17 @@ ASFLAGS += -f elf64
endif endif
LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary
SMOLFLAGS ?= #--libsep
CFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2) CFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2) CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
LIBS=-lc LIBS=-lc
ASFLAGS += -DUSE_INTERP -DALIGN_STACK ASFLAGS += -DUSE_INTERP -DALIGN_STACK -DUSE_DT_DEBUG -DNO_START_ARG
NASM ?= nasm NASM ?= nasm
PYTHON3 ?= python3 PYTHON3 ?= python3
all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag-crt $(BINDIR)/hello-_start all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag $(BINDIR)/hello-_start
LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
......
OUTPUT_FORMAT(binary) OUTPUT_FORMAT(binary)
SECTIONS { SECTIONS {
. = 0x400000; . = 0x10000;
_smol_origin = .; _smol_origin = .;
.header : { KEEP(*(.header)) } .header : { KEEP(*(.header)) }
......
...@@ -30,8 +30,8 @@ def main(): ...@@ -30,8 +30,8 @@ def main():
parser.add_argument('--readelf', default=shutil.which('readelf'), \ parser.add_argument('--readelf', default=shutil.which('readelf'), \
help="which readelf binary to use") help="which readelf binary to use")
parser.add_argument('--libsep', default=False, action='store_true', \ # parser.add_argument('--libsep', default=False, action='store_true', \
help="Separete import symbols per library, instead of looking at every library when resolving a symbol.") # help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
parser.add_argument('input', nargs='+', help="input object file") parser.add_argument('input', nargs='+', help="input object file")
parser.add_argument('output', type=argparse.FileType('w'), \ parser.add_argument('output', type=argparse.FileType('w'), \
...@@ -64,7 +64,7 @@ def main(): ...@@ -64,7 +64,7 @@ def main():
symbols.setdefault(library, []) symbols.setdefault(library, [])
symbols[library].append((symbol, reloc)) symbols[library].append((symbol, reloc))
output(arch, symbols, args.libsep, args.output) output(arch, symbols, args.output)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
......
...@@ -3,11 +3,9 @@ import sys ...@@ -3,11 +3,9 @@ import sys
from smolshared import * from smolshared import *
def output_x86(libraries, libsep, outf): def output_x86(libraries, outf):
outf.write('; vim: set ft=nasm:\n') # be friendly outf.write('; vim: set ft=nasm:\n') # be friendly
outf.write('bits 32\n') outf.write('bits 32\n')
if libsep:
outf.write('%define LIBSEP\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries } shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
...@@ -23,14 +21,14 @@ def output_x86(libraries, libsep, outf): ...@@ -23,14 +21,14 @@ def output_x86(libraries, libsep, outf):
# outf.write('_GLOBAL_OFFSET_TABLE_:\n') # outf.write('_GLOBAL_OFFSET_TABLE_:\n')
# outf.write('dd dynamic\n') # outf.write('dd dynamic\n')
outf.write('_strtab:\n') outf.write('_strtab:\n')
if not libsep: # if not libsep:
for library, symrels in libraries.items(): # for library, symrels in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) # outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
outf.write('_symbols:\n') outf.write('_symbols:\n')
for library, symrels in libraries.items(): for library, symrels in libraries.items():
if libsep: # if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym, reloc in symrels: for sym, reloc in symrels:
# meh # meh
...@@ -53,11 +51,9 @@ def output_x86(libraries, libsep, outf): ...@@ -53,11 +51,9 @@ def output_x86(libraries, libsep, outf):
# end output_x86 # end output_x86
def output_amd64(libraries, libsep, outf): def output_amd64(libraries, outf):
outf.write('; vim: set ft=nasm:\n') outf.write('; vim: set ft=nasm:\n')
outf.write('bits 64\n') outf.write('bits 64\n')
if libsep:
outf.write('%define LIBSEP\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries } shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
...@@ -68,28 +64,16 @@ def output_amd64(libraries, libsep, outf): ...@@ -68,28 +64,16 @@ def output_amd64(libraries, libsep, outf):
outf.write('dq (_symbols.{} - _strtab)\n'.format(shorts[library])) outf.write('dq (_symbols.{} - _strtab)\n'.format(shorts[library]))
outf.write('dynamic.end:\n') outf.write('dynamic.end:\n')
if libsep: outf.write('[section .rodata.neededlibs]\n')
outf.write('[section .data.smolgot]\n')
else:
outf.write('[section .rodata.neededlibs]\n')
# if needgot:
# outf.write('global _GLOBAL_OFFSET_TABLE_\n')
# outf.write('_GLOBAL_OFFSET_TABLE_:\n')
# outf.write('dq dynamic\n')
outf.write('_strtab:\n') outf.write('_strtab:\n')
if not libsep: for library, symrels in libraries.items():
for library, symrels in libraries.items(): outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
if not libsep: outf.write('[section .data.smolgot]\n')
outf.write('[section .data.smolgot]\n')
outf.write('_symbols:\n') outf.write('_symbols:\n')
for library, symrels in libraries.items(): for library, symrels in libraries.items():
if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym, reloc in symrels: for sym, reloc in symrels:
if reloc != 'R_X86_64_PLT32' and reloc != 'R_X86_64_GOTPCRELX': if reloc != 'R_X86_64_PLT32' and reloc != 'R_X86_64_GOTPCRELX':
eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!') eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
...@@ -105,9 +89,6 @@ global {name} ...@@ -105,9 +89,6 @@ global {name}
outf.write('\t\t_symbols.{lib}.{name}: dq 0x{hash:x}\n'\ outf.write('\t\t_symbols.{lib}.{name}: dq 0x{hash:x}\n'\
.format(lib=shorts[library],name=sym,hash=hash)) .format(lib=shorts[library],name=sym,hash=hash))
if libsep:
outf.write('\tdq 0\n')
outf.write('db 0\n') outf.write('db 0\n')
outf.write('_symbols.end:\n') outf.write('_symbols.end:\n')
...@@ -127,9 +108,9 @@ global {name} ...@@ -127,9 +108,9 @@ global {name}
# end output_amd64 # end output_amd64
def output(arch, libraries, libsep, outf): def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, libsep, outf) if arch == 'i386': output_x86(libraries, outf)
elif arch == 'x86_64': output_amd64(libraries, libsep, outf) elif arch == 'x86_64': output_amd64(libraries, outf)
else: else:
eprintf("E: cannot emit for arch '" + str(arch) + "'") eprintf("E: cannot emit for arch '" + str(arch) + "'")
sys.exit(1) sys.exit(1)
......
...@@ -15,6 +15,7 @@ int _start(void* stack) { ...@@ -15,6 +15,7 @@ int _start(void* stack) {
int argc=*(size_t*)stack; int argc=*(size_t*)stack;
char** argv=(void*)(&((size_t*)stack)[1]); char** argv=(void*)(&((size_t*)stack)[1]);
// TODO: fix this. it borks with -fno-plt
__libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack); __libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack);
__builtin_unreachable(); __builtin_unreachable();
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
ehdr: ehdr:
; e_ident ; e_ident
db 0x7F, "ELF" db 0x7F, "ELF"
db EI_CLASS, EI_DATA, EI_VERSION, EI_OSABI db EI_CLASS, EI_DATA, EI_VERSION, 0;EI_OSABI
db EI_OSABIVERSION db 0;EI_OSABIVERSION
times 7 db 0 times 7 db 0
dw ELF_TYPE ; e_type dw ELF_TYPE ; e_type
dw ELF_MACHINE ; e_machine dw ELF_MACHINE ; e_machine
......
; vim: set ft=nasm: ; vim: set ft=nasm:
%define R10_BIAS (0x2B8) ;%define R10_BIAS (0x2B4)
%define R10_BIAS (0x2B4+0x40)
%include "rtld.inc" %include "rtld.inc"
...@@ -42,132 +43,41 @@ _smol_start: ...@@ -42,132 +43,41 @@ _smol_start:
; the second one isn't needed anymore, see code below (.next_link) ; the second one isn't needed anymore, see code below (.next_link)
%endif %endif
mov rdi, r12 push _smol_start
push r12
push -1 push -1
pop rcx pop rcx
lea rax, [rel _smol_start] ; TODO: make offset positive! pop rdi
repne scasq pop rax
repne scasd ; technically, scasq should be used, but ehhhh
sub rdi, r12 sub rdi, r12
sub rdi, LF_ENTRY_OFF+8 sub rdi, LF_ENTRY_OFF+4
xchg r9 , rdi xchg r9, rdi
;mov edi, _symbols
lea edi, [rel _symbols]
%ifdef LIBSEP
; for (rdi = (uint8_t*)_symbols; *rdi; ++rdi) {
.next_needed:
cmp byte [rdi], 0
je .needed_end
; do { // iter over the link_map
.next_link:
; entry = entry->l_next;
mov r12, [r12 + L_NEXT_OFF] ; skip the first one (this is our main
; binary, it has no symbols)
lea r10, [r12 + r9 + R10_BIAS]
; keep the current symbol in a backup reg
push rdi
pop rdx
; r11 = basename(rsi = entry->l_name)
mov rsi, [r12 + L_NAME_OFF]
.basename:
push rsi
pop r11
.basename.next:
lodsb
cmp al, '/'
cmove r11, rsi
or al, al
jnz short .basename.next
.basename.done:
; and place it back
push rdx
push rdx
pop rdi ; rdi == _symbol
pop rsi
; strcmp(rsi, r11) -> flags; rsi == first hash if matches
.strcmp:
lodsb
or al, al
jz short .strcmp.done
sub al, byte [r11]
cmovnz rsi, rdx
jnz short .next_link;.strcmp.done
inc r11
jmp short .strcmp
.strcmp.done:
xchg rsi, rdi
; if (strcmp(...)) goto next_link;
;cmovnz r12, [r12 + L_NEXT_OFF] ; this is guaranteed to be nonzero
;jnz short .next_link ; because otherwise ld.so would have complained
; now we have the right link_map of the library, so all we have
; to do now is to find the right symbol addresses corresponding
; to the hashes.
; do {
.next_hash:
; if (!*phash) break;
mov eax, dword [rdi]
or eax, eax
jz short .next_needed ; done the last hash, so move to the next lib
;link_symbol(struct link_map* entry = r12, size_t* phash = rsi, uint32_t hash = eax)
push rax
pop r11
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
xor edx, edx
mov ecx, dword [r10 + LF_NBUCKETS_OFF - R10_BIAS]
div ecx
; shift left because we don't want to compare the lowest bit
shr r11, 1
; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind]
mov r8, [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
mov edx, dword [r8 + rdx * 4]
; do {
.next_chain:
; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1
mov rcx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
mov ecx, dword [rcx + rdx * 4]
shr ecx, 1
; if (luhash == hash) break; push _symbols
cmp ecx, r11d ; back up link_map root
je short .chain_break
; ++bucket; } while (LIBSEP || (luhash & 1))
inc edx
jne short .next_chain
%else
; !LIBSEP
push r12 push r12
pop r11 ; back up link_map root pop r11
pop rdi
;.loopme: jmp short .loopme ; debugging
.next_hash: .next_hash:
mov eax, dword [rdi] mov r14d, dword [rdi]
or al, al ; assume we need at least one function
jz short .needed_end ; or al, al
push r11 ; jz short .needed_end
push rax mov r12, r11
push rax ; push r11
push r14
pop rbx pop rbx
pop r14 ; pop r12
pop r12
; shift left because we don't want to compare the lowest bit ; shift left because we don't want to compare the lowest bit
shr ebx, 1 shr ebx, 1
.next_link: .next_link:
mov r12, [r12 + L_NEXT_OFF] mov r12, [r12 + L_NEXT_OFF]
lea r10, [r12 + r9 + R10_BIAS] lea r10, [r12 + r9 + R10_BIAS]
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets ; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
xor edx, edx xor edx, edx
push r14 push r14
...@@ -177,31 +87,30 @@ repne scasq ...@@ -177,31 +87,30 @@ repne scasq
; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind] ; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind]
mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS] mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
mov edx, dword [r8 + rdx * 4] mov ecx, dword [r8 + rdx * 4]
or edx, edx ; can be ignored apparently?
jz short .next_link ; jecxz .next_link
.next_chain: .next_chain:
; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1 ; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1
mov rcx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS] mov rdx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
mov ecx, dword [rcx + rdx * 4] mov edx, dword [rdx + rcx * 4]
; if (!(luhash & 1)) goto next_link; // nothing to be found in this lib.
mov al, cl
shr ecx, 1 ; TODO: make this not suck. (maybe using bt*?)
mov al, dl
shr edx, 1
; if (luhash == hash) break; ; if (luhash == hash) break;
cmp ecx, ebx cmp edx, ebx
je short .chain_break je short .chain_break
; ++bucket; } while (luhash & 1); ; ++bucket; } while (luhash & 1);
and al, 1 and al, 1
jnz short .next_link jnz short .next_link
inc edx
inc ecx
jmp short .next_chain jmp short .next_chain
%endif
.chain_break: .chain_break:
; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr ; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr
...@@ -213,21 +122,25 @@ repne scasq ...@@ -213,21 +122,25 @@ repne scasq
; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr ; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr
mov rax, [rax + D_UN_PTR_OFF] mov rax, [rax + D_UN_PTR_OFF]
; ElfW(Addr) symoff(rax) = symtab[bucket].st_value ; ElfW(Addr) symoff(rax) = symtab[bucket].st_value
lea rdx, [rdx + rdx * 2] lea rdx, [rcx + rcx * 2]
mov rax, [rax + rdx * 8 + ST_VALUE_OFF] mov rax, [rax + rdx * 8 + ST_VALUE_OFF]
; void* finaladdr(rax) = symoff + entry->l_addr ; void* finaladdr(rax) = symoff + entry->l_addr
mov rcx, [r12 + L_ADDR_OFF] add rax, [r12 + L_ADDR_OFF]
add rax, rcx
; *phash = finaladdr ; *phash = finaladdr
stosq stosq
cmp byte [rdi], 0
jne short .next_hash
; } while (1) ; } while (1)
jmp short .next_hash ; jmp short .next_hash
.needed_end: .needed_end:
;xor rbp, rbp ; still 0 from _dl_start_user ; int3 ; debugging
; xor rbp, rbp ; still 0 from _dl_start_user
%ifndef NO_START_ARG
; arg for _start
mov rdi, rsp mov rdi, rsp
%endif
%ifdef ALIGN_STACK %ifdef ALIGN_STACK
push rax push rax
%endif %endif
......
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
#define COLOR(r, g, b) ((r << 16) + (g << 8) + b) #define COLOR(r, g, b) ((r << 16) + (g << 8) + b)
/*__attribute__((__used__)) __attribute__((__used__,__externally_visible__))
void _start() {*/ void _start() {
int main() { /*int main() {*/
#ifdef MAKE_ESC_WORK #ifdef MAKE_ESC_WORK
Atom wmDeleteMessage; Atom wmDeleteMessage;
#endif #endif
......
...@@ -3,9 +3,15 @@ ...@@ -3,9 +3,15 @@
const char *f = "foo"; const char *f = "foo";
__attribute__((__externally_visible__, __section__(".text.startup._start"), __noreturn__)) __attribute__((__externally_visible__, __section__(".text.startup._start"),
__noreturn__
#ifndef __clang__
, __naked__
#endif
))
int _start(void) { int _start(void) {
puts("Hello World!");//printf("hello world %s\n", f); puts("Hello World!");//printf("hello world %s\n", f);
asm volatile("int3");//exit(42); asm volatile("int3");//exit(42);
__builtin_unreachable(); __builtin_unreachable();
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment