Index: uspace/lib/c/generic/dlfcn.c
===================================================================
--- uspace/lib/c/generic/dlfcn.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/dlfcn.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -49,23 +49,10 @@
 	module_t *m;
 
-	if (runtime_env == NULL) {
-		printf("Dynamic linker not set up -- initializing.\n");
-		rtld_init_static();
-	}
-
-	printf("dlopen(\"%s\", %d)\n", path, flag);
-
-	printf("module_find('%s')\n", path);
 	m = module_find(runtime_env, path);
 	if (m == NULL) {
-		printf("NULL. module_load('%s')\n", path);
 		m = module_load(runtime_env, path, mlf_local);
-		printf("module_load_deps(m)\n");
 		module_load_deps(m, mlf_local);
 		/* Now relocate. */
-		printf("module_process_relocs(m)\n");
 		module_process_relocs(m);
-	} else {
-		printf("not NULL\n");
 	}
 
@@ -81,8 +68,7 @@
 	module_t *sm;
 
-	printf("dlsym(0x%lx, \"%s\")\n", (long)mod, sym_name);
 	sd = symbol_bfs_find(sym_name, (module_t *) mod, &sm);
 	if (sd != NULL) {
-		return symbol_get_addr(sd, sm);
+		return symbol_get_addr(sd, sm, __tcb_get());
 	}
 
Index: uspace/lib/c/generic/elf/elf_mod.c
===================================================================
--- uspace/lib/c/generic/elf/elf_mod.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/elf/elf_mod.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -248,6 +248,22 @@
 }
 
+/** Process TLS program header.
+ *
+ * @param elf  Pointer to loader state buffer.
+ * @param hdr  TLS program header
+ * @param info Place to store TLS info
+ */
+static void tls_program_header(elf_ld_t *elf, elf_segment_header_t *hdr,
+    elf_tls_info_t *info)
+{
+	info->tdata = (void *)((uint8_t *)hdr->p_vaddr + elf->bias);
+	info->tdata_size = hdr->p_filesz;
+	info->tbss_size = hdr->p_memsz - hdr->p_filesz;
+	info->tls_align = hdr->p_align;
+}
+
 /** Process segment header.
  *
+ * @param elf   Pointer to loader state buffer.
  * @param entry	Segment header.
  *
@@ -277,4 +293,10 @@
 	case 0x70000000:
 		/* FIXME: MIPS reginfo */
+		break;
+	case PT_TLS:
+		/* Parse TLS program header */
+		tls_program_header(elf, entry, &elf->info->tls);
+		DPRINTF("TLS header found at %p\n",
+		    (void *)((uint8_t *)entry->p_vaddr + elf->bias));
 		break;
 	case PT_SHLIB:
Index: uspace/lib/c/generic/libc.c
===================================================================
--- uspace/lib/c/generic/libc.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/libc.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -41,4 +41,5 @@
  */
 
+#include <errno.h>
 #include <libc.h>
 #include <stdlib.h>
@@ -68,4 +69,16 @@
 	__malloc_init();
 	
+	/* Save the PCB pointer */
+	__pcb = (pcb_t *) pcb_ptr;
+	
+#ifdef CONFIG_RTLD
+	if (__pcb != NULL && __pcb->rtld_runtime != NULL) {
+		runtime_env = (rtld_t *) __pcb->rtld_runtime;
+	} else {
+		if (rtld_init_static() != EOK)
+			abort();
+	}
+#endif
+	
 	fibril_t *fibril = fibril_setup();
 	if (fibril == NULL)
@@ -74,6 +87,4 @@
 	__tcb_set(fibril->tcb);
 	
-	/* Save the PCB pointer */
-	__pcb = (pcb_t *) pcb_ptr;
 	
 #ifdef FUTEX_UPGRADABLE
@@ -89,9 +100,4 @@
 	char **argv;
 	
-#ifdef CONFIG_RTLD
-	if (__pcb != NULL && __pcb->rtld_runtime != NULL) {
-		runtime_env = (rtld_t *) __pcb->rtld_runtime;
-	}
-#endif
 	/*
 	 * Get command line arguments and initialize
Index: uspace/lib/c/generic/rtld/module.c
===================================================================
--- uspace/lib/c/generic/rtld/module.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/rtld/module.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -37,4 +37,5 @@
 #include <adt/list.h>
 #include <elf/elf_load.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <loader/pcb.h>
@@ -48,4 +49,37 @@
 #include <rtld/rtld_arch.h>
 #include <rtld/module.h>
+
+/** Create module for static executable.
+ *
+ * @param rtld Run-time dynamic linker
+ * @param rmodule Place to store pointer to new module or @c NULL
+ * @return EOK on success, ENOMEM if out of memory
+ */
+int module_create_static_exec(rtld_t *rtld, module_t **rmodule)
+{
+	module_t *module;
+
+	module = calloc(1, sizeof(module_t));
+	if (module == NULL)
+		return ENOMEM;
+
+	module->id = rtld_get_next_id(rtld);
+	module->dyn.soname = "[program]";
+
+	module->rtld = rtld;
+	module->exec = true;
+	module->local = true;
+
+	module->tdata = &_tdata_start;
+	module->tdata_size = &_tdata_end - &_tdata_start;
+	module->tbss_size = &_tbss_end - &_tbss_start;
+	module->tls_align = (uintptr_t)&_tls_alignment;
+
+	list_append(&module->modules_link, &rtld->modules);
+
+	if (rmodule != NULL)
+		*rmodule = module;
+	return EOK;
+}
 
 /** (Eagerly) process all relocation tables in a module.
@@ -135,5 +169,5 @@
 
 	m = calloc(1, sizeof(module_t));
-	if (!m) {
+	if (m == NULL) {
 		printf("malloc failed\n");
 		exit(1);
@@ -141,4 +175,6 @@
 
 	m->rtld = rtld;
+	m->id = rtld_get_next_id(rtld);
+
 	if ((flags & mlf_local) != 0)
 		m->local = true;
@@ -181,4 +217,13 @@
 	/* Insert into the list of loaded modules */
 	list_append(&m->modules_link, &rtld->modules);
+
+	/* Copy TLS info */
+	m->tdata = info.tls.tdata;
+	m->tdata_size = info.tls.tdata_size;
+	m->tbss_size = info.tls.tbss_size;
+	m->tls_align = info.tls.tls_align;
+
+	DPRINTF("tdata at %p size %zu, tbss size %zu\n",
+	    m->tdata, m->tdata_size, m->tbss_size);
 
 	return m;
@@ -243,4 +288,15 @@
 }
 
+/** Find module structure by ID. */
+module_t *module_by_id(rtld_t *rtld, unsigned long id)
+{
+	list_foreach(rtld->modules, modules_link, module_t, m) {
+		if (m->id == id)
+			return m;
+	}
+
+	return NULL;
+}
+
 /** Process relocations in modules.
  *
@@ -260,4 +316,28 @@
 }
 
+void modules_process_tls(rtld_t *rtld)
+{
+#ifdef CONFIG_TLS_VARIANT_1
+	list_foreach(rtld->modules, modules_link, module_t, m) {
+		m->ioffs = rtld->tls_size;
+		list_append(&m->imodules_link, &rtmd->imodules);
+		rtld->tls_size += m->tdata_size + m->tbss_size;
+	}
+#else /* CONFIG_TLS_VARIANT_2 */
+	size_t offs;
+
+	list_foreach(rtld->modules, modules_link, module_t, m) {
+		rtld->tls_size += m->tdata_size + m->tbss_size;
+	}
+
+	offs = 0;
+	list_foreach(rtld->modules, modules_link, module_t, m) {
+		offs += m->tdata_size + m->tbss_size;
+		m->ioffs = rtld->tls_size - offs;
+		list_append(&m->imodules_link, &rtld->imodules);
+	}
+#endif
+}
+
 /** Clear BFS tags of all modules.
  */
Index: uspace/lib/c/generic/rtld/rtld.c
===================================================================
--- uspace/lib/c/generic/rtld/rtld.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/rtld/rtld.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -43,13 +43,24 @@
 rtld_t *runtime_env;
 static rtld_t rt_env_static;
-static module_t prog_mod;
 
 /** Initialize the runtime linker for use in a statically-linked executable. */
-void rtld_init_static(void)
-{
+int rtld_init_static(void)
+{
+	int rc;
+
 	runtime_env = &rt_env_static;
 	list_initialize(&runtime_env->modules);
+	list_initialize(&runtime_env->imodules);
 	runtime_env->next_bias = 0x2000000;
 	runtime_env->program = NULL;
+	runtime_env->next_id = 1;
+
+	rc = module_create_static_exec(runtime_env, NULL);
+	if (rc != EOK)
+		return rc;
+
+	modules_process_tls(runtime_env);
+
+	return EOK;
 }
 
@@ -62,4 +73,5 @@
 {
 	rtld_t *env;
+	module_t *prog;
 
 	DPRINTF("Load dynamically linked program.\n");
@@ -70,4 +82,12 @@
 		return ENOMEM;
 
+	env->next_id = 1;
+
+	prog = calloc(1, sizeof(module_t));
+	if (prog == NULL) {
+		free(env);
+		return ENOMEM;
+	}
+
 	/*
 	 * First we need to process dynamic sections of the executable
@@ -76,17 +96,27 @@
 
 	DPRINTF("Parse program .dynamic section at %p\n", p_info->dynamic);
-	dynamic_parse(p_info->dynamic, 0, &prog_mod.dyn);
-	prog_mod.bias = 0;
-	prog_mod.dyn.soname = "[program]";
-	prog_mod.rtld = env;
-	prog_mod.exec = true;
-	prog_mod.local = false;
+	dynamic_parse(p_info->dynamic, 0, &prog->dyn);
+	prog->bias = 0;
+	prog->dyn.soname = "[program]";
+	prog->rtld = env;
+	prog->id = rtld_get_next_id(env);
+	prog->exec = true;
+	prog->local = false;
+
+	prog->tdata = p_info->tls.tdata;
+	prog->tdata_size = p_info->tls.tdata_size;
+	prog->tbss_size = p_info->tls.tbss_size;
+	prog->tls_align = p_info->tls.tls_align;
+
+	DPRINTF("prog tdata at %p size %zu, tbss size %zu\n",
+	    prog->tdata, prog->tdata_size, prog->tbss_size);
 
 	/* Initialize list of loaded modules */
 	list_initialize(&env->modules);
-	list_append(&prog_mod.modules_link, &env->modules);
+	list_initialize(&env->imodules);
+	list_append(&prog->modules_link, &env->modules);
 
 	/* Pointer to program module. Used as root of the module graph. */
-	env->program = &prog_mod;
+	env->program = prog;
 
 	/* Work around non-existent memory space allocation. */
@@ -98,5 +128,8 @@
 
 	DPRINTF("Load all program dependencies\n");
-	module_load_deps(&prog_mod, 0);
+	module_load_deps(prog, 0);
+
+	/* Compute static TLS size */
+	modules_process_tls(env);
 
 	/*
@@ -106,5 +139,5 @@
 	/* Process relocations in all modules */
 	DPRINTF("Relocate all modules\n");
-	modules_process_relocs(env, &prog_mod);
+	modules_process_relocs(env, prog);
 
 	*rre = env;
@@ -112,4 +145,130 @@
 }
 
+/** Create TLS (Thread Local Storage) data structures.
+ *
+ * @return Pointer to TCB.
+ */
+tcb_t *rtld_tls_make(rtld_t *rtld)
+{
+	void *data;
+	tcb_t *tcb;
+	size_t offset;
+	void **dtv;
+	size_t nmods;
+	size_t i;
+
+	tcb = tls_alloc_arch(&data, rtld->tls_size);
+	if (tcb == NULL)
+		return NULL;
+
+	/** Allocate dynamic thread vector */
+	nmods = list_count(&rtld->imodules);
+	dtv = malloc((nmods + 1) * sizeof(void *));
+	if (dtv == NULL) {
+		tls_free(tcb);
+		return NULL;
+	}
+
+	/*
+	 * We define generation number to be equal to vector length.
+	 * We start with a vector covering the initially loaded modules.
+	 */
+	DTV_GN(dtv) = nmods;
+
+	/*
+	 * Copy thread local data from the initialization images of initial
+	 * modules. Zero out thread-local uninitialized data.
+	 */
+
+#ifdef CONFIG_TLS_VARIANT_1
+	/*
+	 * Ascending addresses
+	 */
+	offset = 0; i = 1;
+	list_foreach(rtld->imodules, imodules_link, module_t, m) {
+		assert(i == m->id);
+		assert(offset + m->tdata_size + m->tbss_size <= rtld->tls_size);
+		dtv[i++] = data + offset;
+		memcpy(data + offset, m->tdata, m->tdata_size);
+		offset += m->tdata_size;
+		memset(data + offset, 0, m->tbss_size);
+		offset += m->tbss_size;
+	}
+#else /* CONFIG_TLS_VARIANT_2 */
+	/*
+	 * Descending addresses
+	 */
+	offset = 0; i = 1;
+	list_foreach(rtld->imodules, imodules_link, module_t, m) {
+		assert(i == m->id);
+		assert(offset + m->tdata_size + m->tbss_size <= rtld->tls_size);
+		offset += m->tbss_size;
+		memset(data + rtld->tls_size - offset, 0, m->tbss_size);
+		offset += m->tdata_size;
+		memcpy(data + rtld->tls_size - offset, m->tdata, m->tdata_size);
+		dtv[i++] = data + rtld->tls_size - offset;
+	}
+#endif
+
+	tcb->dtv = dtv;
+	return tcb;
+}
+
+unsigned long rtld_get_next_id(rtld_t *rtld)
+{
+	return rtld->next_id++;
+}
+
+/** Get address of thread-local variable.
+ *
+ * @param rtld RTLD instance
+ * @param tcb TCB of the thread whose instance to return
+ * @param mod_id Module ID
+ * @param offset Offset within TLS block of the module
+ *
+ * @return Address of thread-local variable
+ */
+void *rtld_tls_get_addr(rtld_t *rtld, tcb_t *tcb, unsigned long mod_id,
+    unsigned long offset)
+{
+	module_t *m;
+	size_t dtv_len;
+	void *tls_block;
+
+	dtv_len = DTV_GN(tcb->dtv);
+	if (dtv_len < mod_id) {
+		/* Vector is short */
+
+		tcb->dtv = realloc(tcb->dtv, (1 + mod_id) * sizeof(void *));
+		/* XXX This can fail if OOM */
+		assert(tcb->dtv != NULL);
+		/* Zero out new part of vector */
+		memset(tcb->dtv + (1 + dtv_len), 0, (mod_id - dtv_len) *
+		    sizeof(void *));
+	}
+
+	if (tcb->dtv[mod_id] == NULL) {
+		/* TLS block is not allocated */
+
+		m = module_by_id(rtld, mod_id);
+		assert(m != NULL);
+		/* Should not be initial module, those have TLS pre-allocated */
+		assert(!link_used(&m->imodules_link));
+
+		tls_block = malloc(m->tdata_size + m->tbss_size);
+		/* XXX This can fail if OOM */
+		assert(tls_block != NULL);
+
+		/* Copy tdata */
+		memcpy(tls_block, m->tdata, m->tdata_size);
+		/* Zero out tbss */
+		memset(tls_block + m->tdata_size, 0, m->tbss_size);
+
+		tcb->dtv[mod_id] = tls_block;
+	}
+
+	return (uint8_t *)(tcb->dtv[mod_id]) + offset;
+}
+
 /** @}
  */
Index: uspace/lib/c/generic/rtld/symbol.c
===================================================================
--- uspace/lib/c/generic/rtld/symbol.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/rtld/symbol.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -249,7 +249,21 @@
 }
 
-void *symbol_get_addr(elf_symbol_t *sym, module_t *m)
-{
-	if (sym->st_shndx == SHN_ABS) {
+/** Get symbol address.
+ *
+ * @param sym Symbol
+ * @param m Module contaning the symbol
+ * @param tcb TCB of the thread whose thread-local variable instance should
+ *            be returned. If @a tcb is @c NULL then @c NULL is returned for
+ *            thread-local variables.
+ *
+ * @return Symbol address
+ */
+void *symbol_get_addr(elf_symbol_t *sym, module_t *m, tcb_t *tcb)
+{
+	if (ELF_ST_TYPE(sym->st_info) == STT_TLS) {
+		if (tcb == NULL)
+			return NULL;
+		return rtld_tls_get_addr(m->rtld, tcb, m->id, sym->st_value);
+	} else if (sym->st_shndx == SHN_ABS) {
 		/* Do not add bias to absolute symbols */
 		return (void *) sym->st_value;
Index: uspace/lib/c/generic/tls.c
===================================================================
--- uspace/lib/c/generic/tls.c	(revision af2254ece47fffd627e909e8547dca085773c990)
+++ uspace/lib/c/generic/tls.c	(revision b272c67a36699ea68cd8634a6e11d904c307d1bc)
@@ -34,18 +34,35 @@
  * Support for thread-local storage, as described in:
  * 	Drepper U.: ELF Handling For Thread-Local Storage, 2005
- *
- * Only static model is supported.
- */ 
+ */
 
 #include <tls.h>
 #include <malloc.h>
 #include <str.h>
-#include <align.h>
 #include <unistd.h>
 
+#ifdef CONFIG_RTLD
+#include <rtld/rtld.h>
+#endif
+
+size_t tls_get_size(void)
+{
+#ifdef CONFIG_RTLD
+	if (runtime_env != NULL)
+		return runtime_env->tls_size;
+#endif
+	return &_tbss_end - &_tdata_start;
+}
+
+/** Get address of static TLS block */
+void *tls_get(void)
+{
+#ifdef CONFIG_TLS_VARIANT_1
+	return (uint8_t *)__tcb_get() + sizeof(tcb_t);
+#else /* CONFIG_TLS_VARIANT_2 */
+	return (uint8_t *)__tcb_get() - tls_get_size();
+#endif
+}
+
 /** Create TLS (Thread Local Storage) data structures.
- *
- * The code requires, that sections .tdata and .tbss are adjacent. It may be
- * changed in the future.
  *
  * @return Pointer to TCB.
@@ -56,9 +73,13 @@
 	tcb_t *tcb;
 	size_t tls_size = &_tbss_end - &_tdata_start;
-	
+
+#ifdef CONFIG_RTLD
+	if (runtime_env != NULL)
+		return rtld_tls_make(runtime_env);
+#endif
 	tcb = tls_alloc_arch(&data, tls_size);
 	if (!tcb)
 		return NULL;
-	
+
 	/*
 	 * Copy thread local data from the initialization image.
@@ -76,6 +97,6 @@
 void tls_free(tcb_t *tcb)
 {
-	size_t tls_size = &_tbss_end - &_tdata_start;
-	tls_free_arch(tcb, tls_size);
+	free(tcb->dtv);
+	tls_free_arch(tcb, tls_get_size());
 }
 
@@ -89,12 +110,13 @@
 tcb_t *tls_alloc_variant_1(void **data, size_t size)
 {
-	tcb_t *result;
+	tcb_t *tcb;
 
-	result = malloc(sizeof(tcb_t) + size);
-	if (!result)
+	tcb = malloc(sizeof(tcb_t) + size);
+	if (!tcb)
 		return NULL;
-	*data = ((void *)result) + sizeof(tcb_t);
+	*data = ((void *)tcb) + sizeof(tcb_t);
+	tcb->dtv = NULL;
 
-	return result;
+	return tcb;
 }
 
@@ -121,11 +143,11 @@
 {
 	tcb_t *tcb;
-	
-	size = ALIGN_UP(size, &_tls_alignment);
-	*data = memalign((uintptr_t) &_tls_alignment, sizeof(tcb_t) + size);
-	if (!*data)
+
+	*data = malloc(sizeof(tcb_t) + size);
+	if (*data == NULL)
 		return NULL;
 	tcb = (tcb_t *) (*data + size);
 	tcb->self = tcb;
+	tcb->dtv = NULL;
 
 	return tcb;
@@ -139,5 +161,4 @@
 void tls_free_variant_2(tcb_t *tcb, size_t size)
 {
-	size = ALIGN_UP(size, &_tls_alignment);
 	void *start = ((void *) tcb) - size;
 	free(start);
