diff --git a/boards/native/Makefile.include b/boards/native/Makefile.include
index 18e234accad99145463fe33eb21475d1f53822fd..d88c4afcc775146cd1ee7aaba10d7a75f9b93901 100644
--- a/boards/native/Makefile.include
+++ b/boards/native/Makefile.include
@@ -20,15 +20,23 @@ export DEBUGGER = gdb
 export TERMPROG = $(ELF)
 export FLASHER = true
 export VALGRIND ?= valgrind
+export CGANNOTATE ?= cg_annotate
+export GPROF ?= gprof
 
 # flags:
 export CFLAGS += -Wall -Wextra -pedantic -m32
 export LINKFLAGS += -m32 -gc -ldl
 export ASFLAGS =
 export DEBUGGER_FLAGS = $(ELF)
-export VALGRIND_FLAGS ?= --track-origins=yes
+term-memcheck: export VALGRIND_FLAGS ?= --track-origins=yes
+term-cachegrind: export CACHEGRIND_FLAGS += --tool=cachegrind
+term-gprof: export TERMPROG = GMON_OUT_PREFIX=gmon.out $(ELF)
 all-valgrind: export CFLAGS += -DHAVE_VALGRIND_H -g
 all-valgrind: export NATIVEINCLUDES += $(shell pkg-config valgrind --cflags)
+all-debug: export CFLAGS += -g
+all-cachegrind: export CFLAGS += -g
+all-gprof: export CFLAGS += -pg
+all-gprof: export LINKFLAGS += -pg
 
 export INCLUDES += $(NATIVEINCLUDES)
 
@@ -58,10 +66,25 @@ endif
 
 all: # do not override first target
 
+all-gprof: all
+
 all-valgrind: all
 
-valgrind:
+all-cachegrind: all
+
+term-valgrind:
 # use this if you want to attach gdb from valgrind:
 #	echo 0 > /proc/sys/kernel/yama/ptrace_scope
 #	VALGRIND_FLAGS += --db-attach=yes
 	$(VALGRIND) $(VALGRIND_FLAGS) $(ELF) $(PORT)
+
+term-cachegrind:
+	$(VALGRIND) $(CACHEGRIND_FLAGS) $(ELF) $(PORT)
+
+term-gprof: term
+
+eval-gprof:
+	$(GPROF) $(ELF) $(shell ls -rt gmon.out* | tail -1)
+
+eval-cachegrind:
+	$(CGANNOTATE) $(shell ls -rt cachegrind.out* | tail -1)
diff --git a/cpu/native/startup.c b/cpu/native/startup.c
index f9a893ab3a8bdce41153add3be0041a3f05aeeb9..8620f8329f7b0090462a555a1c411657ae9976e7 100644
--- a/cpu/native/startup.c
+++ b/cpu/native/startup.c
@@ -186,7 +186,6 @@ __attribute__((constructor)) static void startup(int argc, char **argv)
     *(void **)(&real_read) = dlsym(RTLD_NEXT, "read");
     *(void **)(&real_write) = dlsym(RTLD_NEXT, "write");
     *(void **)(&real_malloc) = dlsym(RTLD_NEXT, "malloc");
-    *(void **)(&real_calloc) = dlsym(RTLD_NEXT, "calloc");
     *(void **)(&real_realloc) = dlsym(RTLD_NEXT, "realloc");
     *(void **)(&real_free) = dlsym(RTLD_NEXT, "free");
 
diff --git a/cpu/native/syscalls.c b/cpu/native/syscalls.c
index 032396a0d29579e50386edf57699feadf588f8ae..3fbc979678d44ef877594e8e97906ac701fb4382 100644
--- a/cpu/native/syscalls.c
+++ b/cpu/native/syscalls.c
@@ -107,12 +107,22 @@ void free(void *ptr)
     _native_syscall_leave();
 }
 
+int _native_in_calloc;
 void *calloc(size_t nmemb, size_t size)
 {
-    /* XXX: This is a dirty hack to enable old dlsym versions to run.
-     * Throw it out when Ubuntu 12.04 support runs out (in 2017-04)! */
+    /* dynamically load calloc when it's needed - this is necessary to
+     * support profiling as it uses calloc before startup runs */
     if (!real_calloc) {
-        return NULL;
+        if (_native_in_calloc) {
+            /* XXX: This is a dirty hack to enable old dlsym versions to run.
+             * Throw it out when Ubuntu 12.04 support runs out (in 2017-04)! */
+            return NULL;
+        }
+        else {
+            _native_in_calloc = 1;
+            *(void **)(&real_calloc) = dlsym(RTLD_NEXT, "calloc");
+            _native_in_calloc = 0;
+        }
     }
 
     void *r;