From 3cc2c305decde52edf1caee0ce43ea369cfe4958 Mon Sep 17 00:00:00 2001
From: Guy Zana <guy@cloudius-systems.com>
Date: Thu, 12 Sep 2013 12:19:22 +0300
Subject: [PATCH] post-processing: (scheduler) visually view a histogram of
 threads' time-slices

---
 scripts/post-processing/scheduler/README.txt  | 23 +++++++++++
 scripts/post-processing/scheduler/histo.py    | 17 ++++++++
 .../scheduler/trace_sched_timings.py          | 39 +++++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 scripts/post-processing/scheduler/README.txt
 create mode 100644 scripts/post-processing/scheduler/histo.py
 create mode 100644 scripts/post-processing/scheduler/trace_sched_timings.py

diff --git a/scripts/post-processing/scheduler/README.txt b/scripts/post-processing/scheduler/README.txt
new file mode 100644
index 000000000..e1836c36b
--- /dev/null
+++ b/scripts/post-processing/scheduler/README.txt
@@ -0,0 +1,23 @@
+Thread time-slice histogram
+===========================
+
+Use this tool to investigate the time-slices of threads in OSv.
+
+HOW-TO:
+
+1. Start OSv with --trace=sched_switch
+
+2. Run your test
+
+3. Connect with gdb and perform:
+    (gdb) connect
+    (gdb) logtrace  // this will dump the sched_switch tracepoints to gdb.txt
+
+4. Run trace_sched_timings.py, it will analyze gdb.txt and write a new text file for each thread
+   that was scheduled during the tracepoint snapshot, the new files will contain histogram of 
+   time slices.
+
+5. Run histo.py for to graph the results visually
+
+
+
diff --git a/scripts/post-processing/scheduler/histo.py b/scripts/post-processing/scheduler/histo.py
new file mode 100644
index 000000000..03d8a1b53
--- /dev/null
+++ b/scripts/post-processing/scheduler/histo.py
@@ -0,0 +1,17 @@
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+import scipy
+from collections import Counter
+
+lines = file(sys.argv[1], "rt").readlines()
+s = map(lambda x: int(x.strip("\n")), lines)
+
+avg = np.mean(s)
+cnt = len(s)
+
+print "%s avg=%d(us) cnt=%d" % (sys.argv[1], avg, cnt)
+
+plt.hist(s, bins=60)
+plt.show()
diff --git a/scripts/post-processing/scheduler/trace_sched_timings.py b/scripts/post-processing/scheduler/trace_sched_timings.py
new file mode 100644
index 000000000..c153c8a36
--- /dev/null
+++ b/scripts/post-processing/scheduler/trace_sched_timings.py
@@ -0,0 +1,39 @@
+import re
+import os
+import sys
+
+l = file("gdb.txt", "rt").readlines()
+
+# thread->[(time, duration), ...]
+threads = {}
+
+# thread -> time of schedule-in
+sched = {}
+
+for tr in l:
+    (threadp, cpu, sec, microsec, event, info) = re.match("(0x.*?)\s+([0-9]+)\s+([0-9]+)\.([0-9]+)\s+(\S+)\s+(.*)\n", tr).groups()
+    sec = int(sec)
+    microsec = int(microsec)
+
+    if (event == "sched_switch"):
+        fromm = threadp
+        if (sched.has_key(fromm)):
+            if (not threads.has_key(fromm)):
+                threads[fromm] = []
+
+            threads[fromm] += [(sec - sched[fromm][0], microsec - sched[fromm][1])]
+
+            del sched[fromm]
+
+        to = re.match("to (.*)", info).group(1)
+        assert (not sched.has_key(to))
+        sched[to] = (sec, microsec)
+
+for tr in threads:
+    f = file("thread_%s_timings.txt" % tr, "wt")
+
+    for timing in threads[tr]:
+        f.write("%d\n" % (timing[0]*1000000 + timing[1]))
+    
+    f.close()
+
-- 
GitLab