Commit d9dbaae2 authored by PoroCYon's avatar PoroCYon Committed by PoroCYon
Browse files

fixes. dnload mode should now be usableish

parent 26d96fc2
...@@ -18,7 +18,7 @@ CXXOPTFLAGS=$(COPTFLAGS) -fno-exceptions \ ...@@ -18,7 +18,7 @@ CXXOPTFLAGS=$(COPTFLAGS) -fno-exceptions \
-fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr \ -fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr \
-fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit -fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit
CFLAGS=-Wall -Wextra -Wpedantic -std=gnu11 -nostartfiles -fno-PIC $(COPTFLAGS) CFLAGS=-Wall -Wextra -Wpedantic -std=gnu11 -nostartfiles -fno-PIC $(COPTFLAGS) #-DUSE_DL_FINI
CXXFLAGS=-Wall -Wextra -Wpedantic -std=c++11 $(CXXOPTFLAGS) -nostartfiles -fno-PIC CXXFLAGS=-Wall -Wextra -Wpedantic -std=c++11 $(CXXOPTFLAGS) -nostartfiles -fno-PIC
ASFLAGS=-I $(SRCDIR)/ ASFLAGS=-I $(SRCDIR)/
...@@ -40,13 +40,14 @@ CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2) ...@@ -40,13 +40,14 @@ CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
LIBS=-lc LIBS=-lc
SMOLFLAGS += -d SMOLFLAGS +=
ASFLAGS += -DUSE_INTERP -DALIGN_STACK -DNO_START_ARG #-DUSE_DT_DEBUG ASFLAGS += -DUSE_INTERP -DALIGN_STACK
#-DUSE_DNLOAD_LOADER #-DUSE_DT_DEBUG #-DUSE_DL_FINI #-DNO_START_ARG #-DUNSAFE_DYNAMIC
NASM ?= nasm NASM ?= nasm
PYTHON3 ?= python3 PYTHON3 ?= python3
all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag-crt $(BINDIR)/hello-_start all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag $(BINDIR)/hello-_start
LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
......
...@@ -8,24 +8,32 @@ PoC by Shiz, bugfixing and 64-bit version by PoroCYon. ...@@ -8,24 +8,32 @@ PoC by Shiz, bugfixing and 64-bit version by PoroCYon.
```sh ```sh
./smol.py -lfoo -lbar input.o... smol-output.asm ./smol.py -lfoo -lbar input.o... smol-output.asm
nasm -I src/ [-DUSE_INTERP] [-DALIGN_STACK] [-DUSE_NX] [-DUSE_DL_FINI] \ nasm -I src/ [-Doption ...] -o nasm-output.o smol-output.asm
[-DUSE_DT_DEBUG] [-DSKIP_ENTRIES] -o nasm-output.o smol-output.asm ld -T ld/link.ld --oformat=binary -o output.elf nasm-output.o input.o...
ld -T ld/link.ld -o binary nasm-output.o input.o... # or cc -T ld/link.ld -Wl,--oformat=binary -o output.elf nasm-output.o input.o...
``` ```
* `USE_INTERP`: Include an interp segment in the output ELF file. If not, the * `USE_INTERP`: Include an interp segment in the output ELF file. If not, the
dynamic linker **must** be invoked *explicitely*! (You probably want to dynamic linker **must** be invoked *explicitely*! (You probably want to
enable this.) enable this.) Costs the size of a phdr plus the size of the interp string.
* `ALIGN_STACK`: *64-bit only*: realign the stack so that SSE instructions * `ALIGN_STACK`: *64-bit only*: realign the stack so that SSE instructions
won't segfault. won't segfault. Costs 1 byte.
* `USE_NX`: Don't use `RWE` segments at all. Not very well tested. * `USE_NX`: Don't use `RWE` segments at all. Not very well tested. Costs the
* `USE_DL_FINI`: keep track of the `_dl_fini` function and pass it to `_start`. size of 1 phdr.
* `USE_DL_FINI`: keep track of the `_dl_fini` function and pass it to your
`_start`. Costs 2 bytes, plus maybe a few more depending on how it's passed
to `__libc_start_main`.
* `USE_DT_DEBUG`: retrieve the `struct link_map` from the `r_debug` linker * `USE_DT_DEBUG`: retrieve the `struct link_map` from the `r_debug` linker
data (which is placed at `DT_DEBUG` at startup) instead of exploiting data data (which is placed at `DT_DEBUG` at startup) instead of exploiting data
leakage from `_dt_start_user`. Might be more compatible, but strictly worse leakage from `_dt_start_user`. Might be more compatible and compressable, but
size-wise on i386, and probably on x86_64 as well. strictly worse size-wise by 10 (i386) or 3 (x86_64) bytes.
* `SKIP_ENTRIES`: skip the first two entries of the `struct link_map`, which * `SKIP_ENTRIES`: skip the first two entries of the `struct link_map`, which
represent the main binary and the vDSO. represent the main binary and the vDSO. Costs around 5 bytes.
* `USE_DNLOAD_LOADER`: *64-bit only*: use the symbol loading mechanism as used
in dnload (i.e. traverse the symtab of the imported libraries). Slightly
larger, but probably better compressable.
* `NO_START_ARG`: *don't* pass the stack pointer to `_start` as the first arg.
Will make it unable to read argc/argv/environ, but gives you 3 bytes.
``` ```
usage: smol.py [-h] [-m TARGET] [-l LIB] [-L DIR] [--nasm NASM] [--cc CC] usage: smol.py [-h] [-m TARGET] [-l LIB] [-L DIR] [--nasm NASM] [--cc CC]
...@@ -58,6 +66,9 @@ imported by a `smol`-ified binary. This can thus be used to detect user mistakes ...@@ -58,6 +66,9 @@ imported by a `smol`-ified binary. This can thus be used to detect user mistakes
during dynamic linking. (Think of it as an equivalent of `ldd`, except that it during dynamic linking. (Think of it as an equivalent of `ldd`, except that it
also checks whether the imported functions are present as well.) also checks whether the imported functions are present as well.)
***NOTE***: `smoldd.py` currently doesn't support 64-bit binaries anymore, as
there's currently no (good) way of retrieving the symbol hash table anymore.
## Internal workings ## Internal workings
`smol.py` inspects the input object files for needed library files and symbols. `smol.py` inspects the input object files for needed library files and symbols.
......
...@@ -30,8 +30,8 @@ def main(): ...@@ -30,8 +30,8 @@ def main():
parser.add_argument('--readelf', default=shutil.which('readelf'), \ parser.add_argument('--readelf', default=shutil.which('readelf'), \
help="which readelf binary to use") help="which readelf binary to use")
parser.add_argument('-d', '--dnload', default=False, action='store_true', \ # parser.add_argument('-d', '--dnload', default=False, action='store_true', \
help="Use dnload's mechanism of importing functions. Slightly larger, but usually better compressable.") # help="Use dnload's mechanism of importing functions. Slightly larger, but usually better compressable.")
# parser.add_argument('--libsep', default=False, action='store_true', \ # parser.add_argument('--libsep', default=False, action='store_true', \
# help="Separete import symbols per library, instead of looking at every library when resolving a symbol.") # help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
...@@ -66,7 +66,7 @@ def main(): ...@@ -66,7 +66,7 @@ def main():
symbols.setdefault(library, []) symbols.setdefault(library, [])
symbols[library].append((symbol, reloc)) symbols[library].append((symbol, reloc))
output(arch, symbols, args.dnload, args.output) output(arch, symbols, args.output)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
......
...@@ -51,20 +51,26 @@ def output_x86(libraries, outf): ...@@ -51,20 +51,26 @@ def output_x86(libraries, outf):
# end output_x86 # end output_x86
def output_amd64(libraries, dnload, outf): def output_amd64(libraries, outf):
outf.write('; vim: set ft=nasm:\n') outf.write('; vim: set ft=nasm:\n')
outf.write('bits 64\n') outf.write('bits 64\n')
if dnload:
outf.write('%define USE_DNLOAD_LOADER\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries } shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
outf.write('%include "header64.asm"\n') outf.write('%include "header64.asm"\n')
outf.write('dynamic.needed:\n') outf.write('dynamic.needed:\n')
for library in libraries: for library in libraries:
outf.write('dq 1;DT_NEEDED\n') outf.write(' dq 1;DT_NEEDED\n')
outf.write('dq (_symbols.{} - _strtab)\n'.format(shorts[library])) outf.write(' dq (_symbols.{} - _strtab)\n'.format(shorts[library]))
outf.write('dynamic.end:\n') outf.write("""\
dynamic.symtab:
dq DT_SYMTAB ; d_tag
dq 0 ; d_un.d_ptr
dynamic.end:
%ifndef UNSAFE_DYNAMIC
dq DT_NULL
%endif
""")
outf.write('[section .rodata.neededlibs]\n') outf.write('[section .rodata.neededlibs]\n')
...@@ -110,9 +116,9 @@ global {name} ...@@ -110,9 +116,9 @@ global {name}
# end output_amd64 # end output_amd64
def output(arch, libraries, dnload, outf): def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, outf) if arch == 'i386': output_x86(libraries, outf)
elif arch == 'x86_64': output_amd64(libraries, dnload, outf) elif arch == 'x86_64': output_amd64(libraries, outf)
else: else:
eprintf("E: cannot emit for arch '" + str(arch) + "'") eprintf("E: cannot emit for arch '" + str(arch) + "'")
sys.exit(1) sys.exit(1)
......
...@@ -15,8 +15,11 @@ __attribute__((__externally_visible__, __section__(".text.startup._start"), ...@@ -15,8 +15,11 @@ __attribute__((__externally_visible__, __section__(".text.startup._start"),
, __naked__ , __naked__
#endif #endif
)) ))
int _start(void* stack) { int _start(void* stack
// TODO: _dl_fini etc. #ifdef USE_DL_FINI
, void (*dl_fini)(void)
#endif
) {
int argc=*(size_t*)stack; int argc=*(size_t*)stack;
char** argv=(void*)(&((size_t*)stack)[1]); char** argv=(void*)(&((size_t*)stack)[1]);
...@@ -32,7 +35,13 @@ int _start(void* stack) { ...@@ -32,7 +35,13 @@ int _start(void* stack) {
:"S"(argc), "D" (main), "d" (argv) :"S"(argc), "D" (main), "d" (argv)
:); :);
#else #else
__libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack); __libc_start_main(main, argc, argv, NULL, NULL,
#ifdef USE_DL_FINI
dl_fini
#else
NULL
#endif
, (void*)stack);
#endif #endif
__builtin_unreachable(); __builtin_unreachable();
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
%define PHDR_W (2) %define PHDR_W (2)
%define PHDR_X (1) %define PHDR_X (1)
%define DT_NULL ( 0)
%define DT_STRTAB ( 5) %define DT_STRTAB ( 5)
%define DT_SYMTAB ( 6) %define DT_SYMTAB ( 6)
%define DT_DEBUG (21) %define DT_DEBUG (21)
......
...@@ -100,9 +100,6 @@ dynamic: ...@@ -100,9 +100,6 @@ dynamic:
dynamic.strtab: dynamic.strtab:
dq DT_STRTAB ; d_tag dq DT_STRTAB ; d_tag
dq _strtab ; d_un.d_ptr dq _strtab ; d_un.d_ptr
dynamic.symtab:
dq DT_SYMTAB ; d_tag
dq 0 ; d_un.d_ptr
%ifdef USE_DT_DEBUG %ifdef USE_DT_DEBUG
dynamic.debug: dynamic.debug:
dq DT_DEBUG ; d_tag dq DT_DEBUG ; d_tag
......
...@@ -236,14 +236,11 @@ repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller ...@@ -236,14 +236,11 @@ repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller
mov rdi, rsp mov rdi, rsp
%endif %endif
%ifdef ALIGN_STACK %ifdef ALIGN_STACK
%ifdef USE_DNLOAD_LOADER
push rax push rax
%else
; apparently not needed?
%endif
%endif %endif
%ifdef USE_DL_FINI %ifdef USE_DL_FINI
xchg rsi, r13 ; _dl_fini xchg rsi, r13 ; _dl_fini
%endif %endif
; fallthru to _start ; fallthru to _start
;.loopme: jmp short .loopme
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
#define COLOR(r, g, b) ((r << 16) + (g << 8) + b) #define COLOR(r, g, b) ((r << 16) + (g << 8) + b)
/*__attribute__((__used__,__externally_visible__)) __attribute__((__used__,__externally_visible__))
void _start() {*/ void _start() {
int main() { /*int main() {*/
#ifdef MAKE_ESC_WORK #ifdef MAKE_ESC_WORK
Atom wmDeleteMessage; Atom wmDeleteMessage;
#endif #endif
......
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
__attribute__((__section__(".rodata.hello-_start$f"))) /*__attribute__((__section__(".rodata.hello-_start$f")))
const static char *f = "foo"; static const char *f = "foo";*/
__attribute__((__externally_visible__, __section__(".text.startup._start"), __attribute__((__externally_visible__, __section__(".text.startup._start"),
__noreturn__ __noreturn__
...@@ -11,7 +11,8 @@ __attribute__((__externally_visible__, __section__(".text.startup._start"), ...@@ -11,7 +11,8 @@ __attribute__((__externally_visible__, __section__(".text.startup._start"),
#endif #endif
)) ))
int _start(void) { int _start(void) {
puts("Hello World!");//printf("hello world %s\n", f); //printf("hello world %s\n", f);
puts("Hello World!");
asm volatile("int3");//exit(42); asm volatile("int3");//exit(42);
__builtin_unreachable(); __builtin_unreachable();
} }
......
...@@ -5,7 +5,7 @@ int main(void) { ...@@ -5,7 +5,7 @@ int main(void) {
SDL_Init(SDL_INIT_VIDEO); SDL_Init(SDL_INIT_VIDEO);
SDL_Window *w = SDL_CreateWindow("nice", SDL_WINDOWPOS_UNDEFINED, SDL_Window *w = SDL_CreateWindow("nice", SDL_WINDOWPOS_UNDEFINED,
SDL_WINDOWPOS_UNDEFINED, 1280, 720, SDL_WINDOW_OPENGL); SDL_WINDOWPOS_UNDEFINED, 1280, 720, SDL_WINDOW_OPENGL);
printf("%s\n", "ohai"); puts("ohai");//printf("%s\n", "ohai");
SDL_Delay(3000); SDL_Delay(3000);
SDL_DestroyWindow(w); SDL_DestroyWindow(w);
return 0; return 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment