Commit 237ba991 authored by PoroCYon's avatar PoroCYon Committed by PoroCYon
Browse files

more size crunching, but not completely perfect

parent 29655608
......@@ -7,7 +7,8 @@ TESTDIR:= test
BITS ?= $(shell getconf LONG_BIT)
# -mpreferred-stack-boundary=3 messes up the stack and kills SSE!
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program \
# -fno-plt
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program -fno-plt \
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \
-fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables
CXXOPTFLAGS=$(COPTFLAGS) \
......@@ -27,19 +28,17 @@ ASFLAGS += -f elf64
endif
LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary
SMOLFLAGS ?= #--libsep
CFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
LIBS=-lc
ASFLAGS += -DUSE_INTERP -DALIGN_STACK
ASFLAGS += -DUSE_INTERP -DALIGN_STACK -DUSE_DT_DEBUG -DNO_START_ARG
NASM ?= nasm
PYTHON3 ?= python3
all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag-crt $(BINDIR)/hello-_start
all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag $(BINDIR)/hello-_start
LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
......
OUTPUT_FORMAT(binary)
SECTIONS {
. = 0x400000;
. = 0x10000;
_smol_origin = .;
.header : { KEEP(*(.header)) }
......
......@@ -30,8 +30,8 @@ def main():
parser.add_argument('--readelf', default=shutil.which('readelf'), \
help="which readelf binary to use")
parser.add_argument('--libsep', default=False, action='store_true', \
help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
# parser.add_argument('--libsep', default=False, action='store_true', \
# help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
parser.add_argument('input', nargs='+', help="input object file")
parser.add_argument('output', type=argparse.FileType('w'), \
......@@ -64,7 +64,7 @@ def main():
symbols.setdefault(library, [])
symbols[library].append((symbol, reloc))
output(arch, symbols, args.libsep, args.output)
output(arch, symbols, args.output)
if __name__ == '__main__':
main()
......
......@@ -3,11 +3,9 @@ import sys
from smolshared import *
def output_x86(libraries, libsep, outf):
def output_x86(libraries, outf):
outf.write('; vim: set ft=nasm:\n') # be friendly
outf.write('bits 32\n')
if libsep:
outf.write('%define LIBSEP\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
......@@ -23,14 +21,14 @@ def output_x86(libraries, libsep, outf):
# outf.write('_GLOBAL_OFFSET_TABLE_:\n')
# outf.write('dd dynamic\n')
outf.write('_strtab:\n')
if not libsep:
for library, symrels in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
# if not libsep:
# for library, symrels in libraries.items():
# outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
outf.write('_symbols:\n')
for library, symrels in libraries.items():
if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
# if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym, reloc in symrels:
# meh
......@@ -53,11 +51,9 @@ def output_x86(libraries, libsep, outf):
# end output_x86
def output_amd64(libraries, libsep, outf):
def output_amd64(libraries, outf):
outf.write('; vim: set ft=nasm:\n')
outf.write('bits 64\n')
if libsep:
outf.write('%define LIBSEP\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
......@@ -68,28 +64,16 @@ def output_amd64(libraries, libsep, outf):
outf.write('dq (_symbols.{} - _strtab)\n'.format(shorts[library]))
outf.write('dynamic.end:\n')
if libsep:
outf.write('[section .data.smolgot]\n')
else:
outf.write('[section .rodata.neededlibs]\n')
# if needgot:
# outf.write('global _GLOBAL_OFFSET_TABLE_\n')
# outf.write('_GLOBAL_OFFSET_TABLE_:\n')
# outf.write('dq dynamic\n')
outf.write('[section .rodata.neededlibs]\n')
outf.write('_strtab:\n')
if not libsep:
for library, symrels in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for library, symrels in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
if not libsep:
outf.write('[section .data.smolgot]\n')
outf.write('[section .data.smolgot]\n')
outf.write('_symbols:\n')
for library, symrels in libraries.items():
if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym, reloc in symrels:
if reloc != 'R_X86_64_PLT32' and reloc != 'R_X86_64_GOTPCRELX':
eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
......@@ -105,9 +89,6 @@ global {name}
outf.write('\t\t_symbols.{lib}.{name}: dq 0x{hash:x}\n'\
.format(lib=shorts[library],name=sym,hash=hash))
if libsep:
outf.write('\tdq 0\n')
outf.write('db 0\n')
outf.write('_symbols.end:\n')
......@@ -127,9 +108,9 @@ global {name}
# end output_amd64
def output(arch, libraries, libsep, outf):
if arch == 'i386': output_x86(libraries, libsep, outf)
elif arch == 'x86_64': output_amd64(libraries, libsep, outf)
def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, outf)
elif arch == 'x86_64': output_amd64(libraries, outf)
else:
eprintf("E: cannot emit for arch '" + str(arch) + "'")
sys.exit(1)
......
......@@ -15,6 +15,7 @@ int _start(void* stack) {
int argc=*(size_t*)stack;
char** argv=(void*)(&((size_t*)stack)[1]);
// TODO: fix this. it borks with -fno-plt
__libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack);
__builtin_unreachable();
......
......@@ -9,8 +9,8 @@
ehdr:
; e_ident
db 0x7F, "ELF"
db EI_CLASS, EI_DATA, EI_VERSION, EI_OSABI
db EI_OSABIVERSION
db EI_CLASS, EI_DATA, EI_VERSION, 0;EI_OSABI
db 0;EI_OSABIVERSION
times 7 db 0
dw ELF_TYPE ; e_type
dw ELF_MACHINE ; e_machine
......
; vim: set ft=nasm:
%define R10_BIAS (0x2B8)
;%define R10_BIAS (0x2B4)
%define R10_BIAS (0x2B4+0x40)
%include "rtld.inc"
......@@ -42,132 +43,41 @@ _smol_start:
; the second one isn't needed anymore, see code below (.next_link)
%endif
mov rdi, r12
push _smol_start
push r12
push -1
pop rcx
lea rax, [rel _smol_start] ; TODO: make offset positive!
repne scasq
pop rdi
pop rax
repne scasd ; technically, scasq should be used, but ehhhh
sub rdi, r12
sub rdi, LF_ENTRY_OFF+8
xchg r9 , rdi
;mov edi, _symbols
lea edi, [rel _symbols]
%ifdef LIBSEP
; for (rdi = (uint8_t*)_symbols; *rdi; ++rdi) {
.next_needed:
cmp byte [rdi], 0
je .needed_end
; do { // iter over the link_map
.next_link:
; entry = entry->l_next;
mov r12, [r12 + L_NEXT_OFF] ; skip the first one (this is our main
; binary, it has no symbols)
lea r10, [r12 + r9 + R10_BIAS]
; keep the current symbol in a backup reg
push rdi
pop rdx
; r11 = basename(rsi = entry->l_name)
mov rsi, [r12 + L_NAME_OFF]
.basename:
push rsi
pop r11
.basename.next:
lodsb
cmp al, '/'
cmove r11, rsi
or al, al
jnz short .basename.next
.basename.done:
; and place it back
push rdx
push rdx
pop rdi ; rdi == _symbol
pop rsi
; strcmp(rsi, r11) -> flags; rsi == first hash if matches
.strcmp:
lodsb
or al, al
jz short .strcmp.done
sub al, byte [r11]
cmovnz rsi, rdx
jnz short .next_link;.strcmp.done
inc r11
jmp short .strcmp
.strcmp.done:
xchg rsi, rdi
; if (strcmp(...)) goto next_link;
;cmovnz r12, [r12 + L_NEXT_OFF] ; this is guaranteed to be nonzero
;jnz short .next_link ; because otherwise ld.so would have complained
; now we have the right link_map of the library, so all we have
; to do now is to find the right symbol addresses corresponding
; to the hashes.
; do {
.next_hash:
; if (!*phash) break;
mov eax, dword [rdi]
or eax, eax
jz short .next_needed ; done the last hash, so move to the next lib
;link_symbol(struct link_map* entry = r12, size_t* phash = rsi, uint32_t hash = eax)
push rax
pop r11
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
xor edx, edx
mov ecx, dword [r10 + LF_NBUCKETS_OFF - R10_BIAS]
div ecx
; shift left because we don't want to compare the lowest bit
shr r11, 1
; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind]
mov r8, [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
mov edx, dword [r8 + rdx * 4]
; do {
.next_chain:
; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1
mov rcx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
mov ecx, dword [rcx + rdx * 4]
shr ecx, 1
sub rdi, LF_ENTRY_OFF+4
xchg r9, rdi
; if (luhash == hash) break;
cmp ecx, r11d
je short .chain_break
; ++bucket; } while (LIBSEP || (luhash & 1))
inc edx
jne short .next_chain
%else
; !LIBSEP
push _symbols
; back up link_map root
push r12
pop r11 ; back up link_map root
pop r11
pop rdi
;.loopme: jmp short .loopme ; debugging
.next_hash:
mov eax, dword [rdi]
or al, al
jz short .needed_end
push r11
push rax
push rax
mov r14d, dword [rdi]
; assume we need at least one function
; or al, al
; jz short .needed_end
mov r12, r11
; push r11
push r14
pop rbx
pop r14
pop r12
; pop r12
; shift left because we don't want to compare the lowest bit
shr ebx, 1
.next_link:
mov r12, [r12 + L_NEXT_OFF]
lea r10, [r12 + r9 + R10_BIAS]
lea r10, [r12 + r9 + R10_BIAS]
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
xor edx, edx
push r14
......@@ -177,31 +87,30 @@ repne scasq
; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind]
mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
mov edx, dword [r8 + rdx * 4]
mov ecx, dword [r8 + rdx * 4]
or edx, edx
jz short .next_link
; can be ignored apparently?
; jecxz .next_link
.next_chain:
; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1
mov rcx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
mov ecx, dword [rcx + rdx * 4]
; if (!(luhash & 1)) goto next_link; // nothing to be found in this lib.
mov al, cl
mov rdx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
mov edx, dword [rdx + rcx * 4]
shr ecx, 1
; TODO: make this not suck. (maybe using bt*?)
mov al, dl
shr edx, 1
; if (luhash == hash) break;
cmp ecx, ebx
cmp edx, ebx
je short .chain_break
; ++bucket; } while (luhash & 1);
and al, 1
jnz short .next_link
inc edx
inc ecx
jmp short .next_chain
%endif
.chain_break:
; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr
......@@ -213,21 +122,25 @@ repne scasq
; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr
mov rax, [rax + D_UN_PTR_OFF]
; ElfW(Addr) symoff(rax) = symtab[bucket].st_value
lea rdx, [rdx + rdx * 2]
lea rdx, [rcx + rcx * 2]
mov rax, [rax + rdx * 8 + ST_VALUE_OFF]
; void* finaladdr(rax) = symoff + entry->l_addr
mov rcx, [r12 + L_ADDR_OFF]
add rax, rcx
add rax, [r12 + L_ADDR_OFF]
; *phash = finaladdr
stosq
cmp byte [rdi], 0
jne short .next_hash
; } while (1)
jmp short .next_hash
; jmp short .next_hash
.needed_end:
;xor rbp, rbp ; still 0 from _dl_start_user
; int3 ; debugging
; xor rbp, rbp ; still 0 from _dl_start_user
%ifndef NO_START_ARG
; arg for _start
mov rdi, rsp
%endif
%ifdef ALIGN_STACK
push rax
%endif
......
......@@ -10,9 +10,9 @@
#define COLOR(r, g, b) ((r << 16) + (g << 8) + b)
/*__attribute__((__used__))
void _start() {*/
int main() {
__attribute__((__used__,__externally_visible__))
void _start() {
/*int main() {*/
#ifdef MAKE_ESC_WORK
Atom wmDeleteMessage;
#endif
......
......@@ -3,9 +3,15 @@
const char *f = "foo";
__attribute__((__externally_visible__, __section__(".text.startup._start"), __noreturn__))
__attribute__((__externally_visible__, __section__(".text.startup._start"),
__noreturn__
#ifndef __clang__
, __naked__
#endif
))
int _start(void) {
puts("Hello World!");//printf("hello world %s\n", f);
asm volatile("int3");//exit(42);
__builtin_unreachable();
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment