Index: uspace/lib/c/arch/abs32le/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/abs32le/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/abs32le/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,17 @@
 #define _LIBC_abs32le_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	sp[-1] = (uintptr_t) arg;
+	sp[-2] = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/abs32le/src/thread_entry.c
===================================================================
--- uspace/lib/c/arch/abs32le/src/thread_entry.c	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/abs32le/src/thread_entry.c	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -30,4 +30,5 @@
  */
 
+#include <stdbool.h>
 #include <stddef.h>
 #include "../../../generic/private/thread.h"
@@ -35,5 +36,6 @@
 void __thread_entry(void)
 {
-	__thread_main(NULL);
+	while (true)
+		;
 }
 
Index: uspace/lib/c/arch/amd64/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/amd64/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/amd64/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,19 @@
 #define _LIBC_amd64_THREAD_H_
 
+#include <align.h>
+#include <stddef.h>
+#include <stdint.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	*--sp = (uintptr_t) arg;
+	*--sp = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/amd64/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/amd64/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/amd64/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,4 +35,8 @@
 #
 SYMBOL_BEGIN(__thread_entry)
+	# Pop libc thread entry function and argument
+	popq %rax
+	popq %rdi
+
 	#
 	# Create the first stack frame.
@@ -42,8 +46,4 @@
 	movq %rsp, %rbp
 
-	#
-	# RAX contains address of uarg
-	#
-	movq %rax, %rdi
-	call FUNCTION_REF(__thread_main)
+	call *%rax
 SYMBOL_END(__thread_entry)
Index: uspace/lib/c/arch/arm32/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/arm32/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/arm32/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -37,4 +37,17 @@
 #define _LIBC_arm32_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	*--sp = (uintptr_t) main;
+	*--sp = (uintptr_t) arg;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/arm32/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/arm32/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/arm32/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,4 +35,7 @@
 #
 SYMBOL(__thread_entry)
+	# Pop libc entry function and its argument.
+	pop { r0, r4 }
+
 	#
 	# Create the first stack frame.
@@ -43,3 +46,3 @@
 	sub fp, ip, #4
 
-	b __thread_main
+	bx r4
Index: uspace/lib/c/arch/arm64/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/arm64/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/arm64/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,17 @@
 #define _LIBC_arm64_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	*--sp = (uintptr_t) arg;
+	*--sp = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/arm64/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/arm64/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/arm64/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,10 +35,11 @@
 #
 SYMBOL(__thread_entry)
-	#
+	# Load entry function and argument from stack.
+	ldp x1, x0, [sp], #16
+
 	# Create the first stack frame.
-	#
 	mov x29, #0
 	stp x29, x30, [sp, #-16]!
 	mov x29, sp
 
-	b __thread_main
+	br x1
Index: uspace/lib/c/arch/ia32/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/ia32/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ia32/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,19 @@
 #define _LIBC_ia32_THREAD_H_
 
+#include <align.h>
+#include <stddef.h>
+#include <stdint.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	*--sp = (uintptr_t) arg;
+	*--sp = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/ia32/src/syscall.S
===================================================================
--- uspace/lib/c/arch/ia32/src/syscall.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ia32/src/syscall.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -77,5 +77,4 @@
 	pushl %ebp
 	mov %esp, %ebp
-	lea ra, %edi
 	movl 20(%esp), %edx	# First argument.
 	movl 24(%esp), %ecx	# Second argument.
@@ -83,6 +82,5 @@
 	movl 32(%esp), %esi	# Fourth argument.
 	movl 44(%esp), %eax	# Syscall number.
-	sysenter
-ra:
+	call 1f
 	movw %cs, %cx
 	addw $8, %cx
@@ -95,3 +93,8 @@
 	popl %ebx
 	ret
+
+	/* Trampoline for entering kernel */
+1:
+	pop %edi
+	sysenter
 FUNCTION_END(__syscall_fast)
Index: uspace/lib/c/arch/ia32/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/ia32/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ia32/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -41,4 +41,9 @@
 	# Do not set %gs, it contains descriptor that can see TLS
 
+	# Pop libc thread main function.
+	popl %eax
+	# Pop argument.
+	popl %ebx
+
 	#
 	# Create the first stack frame.
@@ -48,9 +53,6 @@
 	mov %esp, %ebp
 
-	#
-	# EAX contains address of uarg.
-	#
-	pushl %eax
-	call __thread_main
+	pushl %ebx
+	call *%eax
 
 	#
Index: uspace/lib/c/arch/ia64/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/ia64/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ia64/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,18 @@
 #define _LIBC_ia64_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) (ALIGN_DOWN((uintptr_t) stack + stack_size / 2, 16));
+
+	/* Store data under stack pointer */
+	sp[-1] = (uintptr_t) arg;
+	sp[-2] = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/ia64/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/ia64/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ia64/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -37,16 +37,18 @@
 	alloc loc0 = ar.pfs, 0, 1, 1, 0
 
-#ifndef CONFIG_RTLD
-	# XXX This does not work in a shared library
-	movl gp = __gp
-#endif
+	add r8 = -8, sp ;;
+	# Entry function argument
+	ld8 out0 = [r8], -8 ;;
 
-	#
-	# r8 contains address of uarg structure.
-	#
+	# Entry function descriptor
+	ld8 r8 = [r8] ;;
+	# Entry function address
+	ld8 r9 = [r8], 8 ;;
+	# Entry function global pointer
+	ld8 gp = [r8] ;;
 
-	mov out0 = r8 ;;
-	# XXX br.call.sptk.many b0 = FUNCTION_REF(__thread_main)
-	br.call.sptk.many b0 = __thread_main
+	mov b1 = r9 ;;
+
+	br.call.sptk.many b0 = b1 ;;
 
 	#
Index: uspace/lib/c/arch/mips32/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/mips32/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/mips32/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -37,4 +37,17 @@
 #define _LIBC_mips32_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	sp[-1] = (uintptr_t) arg;
+	sp[-2] = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/mips32/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/mips32/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/mips32/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -40,25 +40,12 @@
 #
 SYMBOL(__thread_entry)
-	.ent __thread_entry
-	.frame $sp, ABI_STACK_FRAME, $ra
-	.cpload $t9
+	# All registers should be zero, including $fp and $ra.
+	# Instead of setting up a stack frame here, we leave it for __thread_main.
 
-	#
-	# v0 contains address of uarg.
-	#
-	add $a0, $v0, 0
+	# Function argument.
+	lw $a0, -4($sp)
+	# Function pointer.
+	lw $t0, -8($sp)
 
-	# Allocate the stack frame.
-	addiu $sp, -ABI_STACK_FRAME
-
-	# Allow PIC code
-	.cprestore 16
-
-	jal __thread_main
+	j $t0
 	nop
-
-	#
-	# Not reached.
-	#
-	addiu $sp, ABI_STACK_FRAME
-	.end __thread_entry
Index: uspace/lib/c/arch/ppc32/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/ppc32/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ppc32/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,18 @@
 #define _LIBC_ppc32_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size - sizeof(void *), 16);
+
+	sp[0] = 0;
+	sp[-1] = (uintptr_t) arg;
+	sp[-2] = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/ppc32/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/ppc32/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/ppc32/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,11 +35,12 @@
 #
 SYMBOL(__thread_entry)
-	#
-	# Create the first stack frame.
-	#
-	li %r4, 0
-	stw %r4, 0(%r1)
-	stwu %r1, -16(%r1)
+	# Load function and argument.
+	lwz %r3, -4(%r1)
+	lwz %r4, -8(%r1)
 
-	b __thread_main
+	# Clear LR
+	li %r0, 0
+	mtlr %r0
 
+	mtctr %r4
+	bctr
Index: uspace/lib/c/arch/riscv64/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/riscv64/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/riscv64/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -36,4 +36,17 @@
 #define _LIBC_riscv64_THREAD_H_
 
+#include <align.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size, 16);
+
+	sp[-1] = (uintptr_t) arg;
+	sp[-2] = (uintptr_t) main;
+
+	return (uintptr_t) sp;
+}
+
 #endif
 
Index: uspace/lib/c/arch/riscv64/src/thread_entry.c
===================================================================
--- uspace/lib/c/arch/riscv64/src/thread_entry.c	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/riscv64/src/thread_entry.c	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,5 +35,7 @@
 void __thread_entry(void)
 {
-	__thread_main((void *) 0);
+	// TODO
+	while (true)
+		;
 }
 
Index: uspace/lib/c/arch/sparc64/include/libarch/thread.h
===================================================================
--- uspace/lib/c/arch/sparc64/include/libarch/thread.h	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/sparc64/include/libarch/thread.h	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,4 +35,20 @@
 #define _LIBC_sparc64_THREAD_H_
 
+#include <assert.h>
+#include <align.h>
+#include <libarch/stack.h>
+
+static inline uintptr_t arch_thread_prepare(void *stack, size_t stack_size,
+    void (*main)(void *), void *arg)
+{
+	/* We must leave space above the stack pointer for initial register spill area. */
+	uintptr_t *sp = (uintptr_t *) ALIGN_DOWN((uintptr_t) stack + stack_size - STACK_WINDOW_SAVE_AREA_SIZE - STACK_ARG_SAVE_AREA_SIZE, 16);
+
+	sp[-1] = (uintptr_t) arg;
+	sp[-2] = (uintptr_t) main;
+
+	return ((uintptr_t) sp) - STACK_BIAS;
+}
+
 #endif
 
Index: uspace/lib/c/arch/sparc64/src/thread_entry.S
===================================================================
--- uspace/lib/c/arch/sparc64/src/thread_entry.S	(revision 5907aa427217ea23e813270401aac4d23e87b165)
+++ uspace/lib/c/arch/sparc64/src/thread_entry.S	(revision d3109ffbe9773fed7630d7d9dae993d02f8967ac)
@@ -35,4 +35,6 @@
 #
 SYMBOL(__thread_entry)
+	add %sp, 0x7ff, %g1
+
 	#
 	# Create the first stack frame.
@@ -43,9 +45,12 @@
 
 	#
-	# Propagate the input arguments to the new window.
+	# Load libc entry point address and argument from stack
 	#
+	ldn [%g1 - 8], %o0
+	ldn [%g1 - 16], %g1
 
-	call __thread_main		! %o0 contains address of uarg
-	mov %i0, %o0
+	jmpl %g1, %r0
+	# Wipe link register
+	xor %o7, %o7, %o7
 
 	! not reached
