From fff28fad2e18f7d8ef1d20ec50e6db0465f2da61 Mon Sep 17 00:00:00 2001
From: Tomasz Grabiec <tgrabiec@cloudius-systems.com>
Date: Mon, 5 May 2014 13:43:57 +0200
Subject: [PATCH] trace: include inlined-by functions in the backtrace

Fixes #255.

The default OSv build has plenty of inlined functions. The tracing
tool is using 'addr2line -Cfp' to resolve symbols. That mode of
operation of addr2line yields one line per symbol and that line will
only show the inlined function, even though the information about
"logical" callers is also available.

Example:

  $ addr2line -Cfp -e build/release/loader.elf
  0x32963d
  lock_guard at (...)

There is the '-i' switch we could use to also see code locations into
which this function was inlined:

  $ addr2line -Cfpi -e build/release/loader.elf
  0x32963d
  lock_guard at (...)
   (inlined by) lock_guard_for_with_lock at (...)
   (inlined by) mmu::vm_fault(unsigned long, exception_frame*) at (...)

In this case, 'mmu::vm_fault' is much more interesting than
lock_guard and thus we would like to show it in the backtrace. This change
allows symbol resolvers to return multiple source addresses per raw
address and changes the default implementation to return inlined-by
functions by default.

The new behavior can be suppressed with --no-inlined-by.

Signed-off-by: Tomasz Grabiec <tgrabiec@cloudius-systems.com>
Signed-off-by: Pekka Enberg <penberg@cloudius-systems.com>
---
 scripts/loader.py    | 59 ++++++++++++++++-----------
 scripts/osv/debug.py | 94 ++++++++++++++++++++++++++++----------------
 scripts/osv/prof.py  |  3 +-
 scripts/osv/trace.py | 18 +++++----
 scripts/trace.py     | 20 +++++-----
 5 files changed, 120 insertions(+), 74 deletions(-)

diff --git a/scripts/loader.py b/scripts/loader.py
index 9b25a2cf2..e56656a48 100644
--- a/scripts/loader.py
+++ b/scripts/loader.py
@@ -21,7 +21,7 @@ external = os.path.join(osv_dir, 'external', arch)
 sys.path.append(os.path.join(osv_dir, 'scripts'))
 
 from osv.trace import Trace,TracePoint,BacktraceFormatter,format_time,format_duration
-from osv import trace
+from osv import trace, debug
 
 virtio_driver_type = gdb.lookup_type('virtio::virtio_driver')
 
@@ -73,38 +73,49 @@ def load_elf(path, base):
 
     gdb.execute('add-symbol-file %s %s %s' % (path, text_addr, args))
 
-class syminfo(object):
+class syminfo_resolver(object):
     cache = dict()
-    def __init__(self, addr):
-        if addr in syminfo.cache:
-            cobj = syminfo.cache[addr]
-            self.func = cobj.func
-            self.source = cobj.source
-            return
+
+    def __call__(self, addr):
+        if addr in syminfo_resolver.cache:
+            return self.cache[addr]
         infosym = gdb.execute('info symbol 0x%x' % addr, False, True)
-        self.func = infosym[:infosym.find(" + ")]
+        func = infosym[:infosym.find(" + ")]
         sal = gdb.find_pc_line(addr)
         try :
             # prefer (filename:line),
-            self.source = '%s:%s' % (sal.symtab.filename, sal.line)
+            filename = sal.symtab.filename
+            line = sal.line
         except :
             # but if can't get it, at least give the name of the object
-            if infosym.startswith("No symbol matches") :
-                self.source = None
-            else:
-                self.source = infosym[infosym.rfind("/")+1:].rstrip()
-        if self.source and self.source.startswith('../../'):
-            self.source = self.source[6:]
-        syminfo.cache[addr] = self
-    def __str__(self):
-        ret = self.func
-        if self.source:
-            ret += ' (%s)' % (self.source,)
-        return ret
+            if not infosym.startswith("No symbol matches") :
+                filename = infosym[infosym.rfind("/")+1:].rstrip()
+
+        if filename and filename.startswith('../../'):
+            filename = filename[6:]
+        result = [debug.SourceAddress(addr, name=func, filename=filename, line=line)]
+        syminfo_resolver.cache[addr] = result
+        return result
+
     @classmethod
     def clear_cache(clazz):
         clazz.cache.clear()
 
+symbol_resolver = syminfo_resolver()
+
+def symbol_formatter(src_addr):
+    ret = src_addr.name
+    if src_addr.filename or src_addr.line:
+        ret += ' ('
+        ret += src_addr.filename
+        if src_addr.line:
+            ret += ':' + str(src_addr.line)
+        ret += ')'
+    return ret
+
+def syminfo(addr):
+    return symbol_formatter(syminfo_resolver(addr))
+
 def translate(path):
     '''given a path, try to find it on the host OS'''
     name = os.path.basename(path)
@@ -449,7 +460,7 @@ class osv_syms(gdb.Command):
         gdb.Command.__init__(self, 'osv syms',
                              gdb.COMMAND_USER, gdb.COMPLETE_NONE)
     def invoke(self, arg, from_tty):
-        syminfo.clear_cache()
+        syminfo_resolver.clear_cache()
         p = gdb.lookup_global_symbol('elf::program::s_objs').value()
         p = p.dereference().address
         while p.dereference():
@@ -939,7 +950,7 @@ def make_symbolic(addr):
 
 def dump_trace(out_func):
     indents = defaultdict(int)
-    bt_formatter = BacktraceFormatter(make_symbolic)
+    bt_formatter = BacktraceFormatter(symbol_resolver, symbol_formatter)
 
     def lookup_tp(name):
         return gdb.lookup_global_symbol(name).value().dereference()
diff --git a/scripts/osv/debug.py b/scripts/osv/debug.py
index 536c68bb6..006dc4fe0 100644
--- a/scripts/osv/debug.py
+++ b/scripts/osv/debug.py
@@ -1,6 +1,9 @@
 import os
 import re
 import subprocess
+import threading
+import itertools
+import select
 
 class SourceAddress:
     def __init__(self, addr, name=None, filename=None, line=None):
@@ -19,56 +22,81 @@ class DummyResolver(object):
         self.cache = {}
 
     def __call__(self, addr):
-        src_addr = self.cache.get(addr, None)
-        if not src_addr:
-            src_addr = SourceAddress(addr)
-            self.cache[addr] = src_addr
-        return src_addr
+        result = self.cache.get(addr, None)
+        if not result:
+            result = [SourceAddress(addr)]
+            self.cache[addr] = result
+        return result
 
 class SymbolResolver(object):
-    def __init__(self, object_path):
+    inline_prefix = ' (inlined by) '
+
+    def __init__(self, object_path, show_inline=True):
         if not os.path.exists(object_path):
             raise Exception('File not found: ' + object_path)
-        self.addr2line = subprocess.Popen(['addr2line', '-e', object_path, '-Cfp'],
+        self.show_inline = show_inline
+        flags = '-Cfp'
+        if show_inline:
+            flags += 'i'
+        self.addr2line = subprocess.Popen(['addr2line', '-e', object_path, flags],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
         self.cache = {}
 
+    def next_line(self):
+        return self.addr2line.stdout.readline().rstrip('\n')
+
+    def parse_line(self, addr, line):
+        # addr2line ver. 2.23.2 (Ubuntu)
+        m = re.match(r'^\?\?$', line)
+        if m:
+            line = self.next_line()
+            if not re.match(r'^\?\?:0$', line):
+                raise Exception('Unexpected response: ' + line)
+            return SourceAddress(addr)
+
+        # addr2line ver. 2.23.52.0.1-9.fc19
+        m = re.match(r'^\?\? \?\?:0$', line)
+        if m:
+            return SourceAddress(addr)
+
+        m = re.match(r'(?P<name>.*) at ((?P<file>.*?)|\?+):((?P<line>\d+)|\?+)', line)
+        if not m:
+            raise Exception('addr2line response not matched: ' + line)
+        return SourceAddress(addr, m.group('name'), m.group('file'), m.group('line'))
+
     def __call__(self, addr):
         """
-        Returns SourceAddress for given addr.
+        Returns an iterable of SourceAddress objects for given addr.
 
         """
-        src_addr = self.cache.get(addr, None)
-        if src_addr:
-            return src_addr
+        result = self.cache.get(addr, None)
+        if result:
+            return result
 
         self.addr2line.stdin.write('0x%x\n' % addr)
-        response = self.addr2line.stdout.readline().rstrip('\n')
 
-        # addr2line ver. 2.23.2 (Ubuntu)
-        m = re.match(r'^\?\?$', response)
-        if m:
-            response = self.addr2line.stdout.readline().rstrip('\n')
-            if not re.match(r'^\?\?:0$', response):
-                raise Exception('Unexpected response: ' + response)
-            src_addr = SourceAddress(addr)
-            self.cache[addr] = src_addr
-            return src_addr
+        if self.show_inline:
+            self.addr2line.stdin.write('0\n')
 
-        # addr2line ver. 2.23.52.0.1-9.fc19
-        m = re.match(r'^\?\? \?\?:0$', response)
-        if m:
-            src_addr = SourceAddress(addr)
-            self.cache[addr] = src_addr
-            return src_addr
+        result = [self.parse_line(addr, self.next_line())]
 
-        m = re.match(r'(?P<name>.*) at ((?P<file>.*?)|\?+):((?P<line>\d+)|\?+)', response)
-        if not m:
-            raise Exception('addr2line response not matched: ' + response)
-        src_addr = SourceAddress(addr, m.group('name'), m.group('file'), m.group('line'))
-        self.cache[addr] = src_addr
-        return src_addr
+        if self.show_inline:
+            line = self.next_line()
+            while line.startswith(self.inline_prefix):
+                result.append(self.parse_line(addr, line[len(self.inline_prefix):]))
+                line = self.next_line()
+
+        self.cache[addr] = result
+        return result
 
     def close():
         self.addr2line.stdin.close()
         self.addr2line.wait()
+
+def resolve_all(resolver, raw_addresses):
+    """
+    Returns iterable of SourceAddress objects for given list of raw addresses
+    using supplied resolver.
+
+    """
+    return itertools.chain.from_iterable(map(resolver, raw_addresses))
diff --git a/scripts/osv/prof.py b/scripts/osv/prof.py
index 73cfe2a98..d8b02a0d7 100644
--- a/scripts/osv/prof.py
+++ b/scripts/osv/prof.py
@@ -1,6 +1,7 @@
 import sys
 from operator import attrgetter
 from osv import trace, tree, debug
+import itertools
 
 class ProfNode(tree.TreeNode):
     def __init__(self, key):
@@ -171,7 +172,7 @@ def print_profile(samples, symbol_resolver, caller_oriented=False,
             if not sample:
                 continue
 
-        frames = [symbol_resolver(addr - 1) for addr in sample.backtrace]
+        frames = list(debug.resolve_all(symbol_resolver, (addr - 1 for addr in sample.backtrace)))
         frames = strip_garbage(frames)
         if caller_oriented:
             frames.reverse()
diff --git a/scripts/osv/trace.py b/scripts/osv/trace.py
index 2d11f66ab..64869d28b 100644
--- a/scripts/osv/trace.py
+++ b/scripts/osv/trace.py
@@ -2,6 +2,7 @@ import os
 import mmap
 import struct
 import sys
+from osv import debug
 
 # version 2 introduced thread_name
 # version 3 introduced variable-length arguments (blob)
@@ -20,22 +21,25 @@ def format_time(time):
     return "%12.9f" % nanos_to_seconds(time)
 
 class BacktraceFormatter:
-    def __init__(self, resolver):
+    def __init__(self, resolver, formatter):
         self.resolver = resolver
+        self.formatter = formatter
 
     def __call__(self, backtrace):
         if not backtrace:
             return ''
 
-        while self.resolver(backtrace[0] - 1).startswith("tracepoint"):
-            backtrace.pop(0)
+        frames = list(debug.resolve_all(self.resolver, (x - 1 for x in backtrace if x)))
 
-        return '   [' + ', '.join((str(self.resolver(x - 1)) for x in backtrace if x)) + ']'
+        while frames[0].name and frames[0].name.startswith("tracepoint"):
+            frames.pop(0)
 
-def simple_symbol_formatter(addr):
-    return '0x%x' % addr
+        return '   [' + ', '.join(map(self.formatter, frames)) + ']'
 
-default_backtrace_formatter = BacktraceFormatter(simple_symbol_formatter)
+def simple_symbol_formatter(src_addr):
+    return '0x%x' % src_addr.addr
+
+default_backtrace_formatter = BacktraceFormatter(debug.DummyResolver, simple_symbol_formatter)
 
 class TimeRange(object):
     """
diff --git a/scripts/trace.py b/scripts/trace.py
index e82e44765..99e919823 100755
--- a/scripts/trace.py
+++ b/scripts/trace.py
@@ -56,6 +56,7 @@ def add_symbol_resolution_options(parser):
     group.add_argument("-d", "--debug", action="store_true", help="use loader.elf from debug build")
     group.add_argument("-e", "--exe", action="store", help="path to the object file used for symbol resolution")
     group.add_argument("-x", "--no-resolve", action='store_true', help="do not resolve symbols")
+    group.add_argument("--no-inlined-by", action='store_true', help="do not show inlined-by functions")
     group.add_argument("-L", "--show-line-number", action='store_true', help="show line numbers")
     group.add_argument("-A", "--show-address", action='store_true', help="show raw addresses")
     group.add_argument("-F", "--show-file-name", action='store_true', help="show file names")
@@ -65,13 +66,14 @@ class BeautifyingResolver(object):
         self.delegate = delegate
 
     def __call__(self, addr):
-        src_addr = self.delegate(addr)
-        if src_addr.name:
-            if src_addr.name.startswith("void sched::thread::do_wait_until<"):
-                src_addr.name = "sched::thread::do_wait_until"
-            elif src_addr.name.startswith("do_wait_until<"):
-                src_addr.name = "do_wait_until"
-        return src_addr
+        resolution = self.delegate(addr)
+        for src_addr in resolution:
+            if src_addr.name:
+                if src_addr.name.startswith("void sched::thread::do_wait_until<"):
+                    src_addr.name = "sched::thread::do_wait_until"
+                elif src_addr.name.startswith("do_wait_until<"):
+                    src_addr.name = "do_wait_until"
+        return resolution
 
 def symbol_resolver(args):
     if args.no_resolve:
@@ -84,14 +86,14 @@ def symbol_resolver(args):
     else:
         elf_path = 'build/release/loader.elf'
 
-    return BeautifyingResolver(debug.SymbolResolver(elf_path))
+    return BeautifyingResolver(debug.SymbolResolver(elf_path, show_inline=not args.no_inlined_by))
 
 def get_backtrace_formatter(args):
     if not args.backtrace:
         return lambda backtrace: ''
 
     return trace.BacktraceFormatter(
-        symbol_printer(symbol_resolver(args), src_addr_formatter(args)))
+        symbol_resolver(args), src_addr_formatter(args))
 
 def list_trace(args):
     def data_formatter(sample):
-- 
GitLab