使用Python进行GDB漂亮打印递归结构

14

我对Python并不是很熟悉,最近才开始了解GDB Python脚本的功能;我的问题动机是要增强MELT monitor中值的打印,这将稍后连接到GCC MELT。但这里有一个更简单的变体。

我的系统是Linux/Debian/Sid/x86-64。GCC编译器是4.8.2;GDB调试器是7.6.2;它的Python版本是3.3

我想要调试一个使用“discriminated union”类型的C程序:

// file tiny.c in the public domain by Basile Starynkevitch
// compile with gcc -g3 -Wall -std=c99 tiny.c -o tiny
// debug with gdb tiny
// under gdb: python tiny-gdb.py
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef union my_un myval_t;
enum tag_en {
  tag_none,
  tag_int,
  tag_string,
  tag_sequence
};
struct boxint_st;
struct boxstring_st;
struct boxsequence_st;
union my_un {
  void* ptr;
  enum tag_en *ptag;
  struct boxint_st *pint;
  struct boxstring_st *pstr;
  struct boxsequence_st *pseq;
};

struct boxint_st {
  enum tag_en tag;      // for tag_int
  int ival;
};
struct boxstring_st {
  enum tag_en tag;      // for tag_string
  char strval[];        // a zero-terminated C string 
};
struct boxsequence_st {
  enum tag_en tag;      // for tag_sequence
  unsigned slen;
  myval_t valtab[];     // of length slen
};


int main (int argc, char **argv) {
  printf ("start %s, argc=%d", argv[0], argc);
  struct boxint_st *iv42 = malloc (sizeof (struct boxint_st));
  iv42->tag = tag_int;
  iv42->ival = 42;
  struct boxstring_st *istrhello =
    malloc (sizeof (struct boxstring_st) + sizeof ("hello") + 1);
  istrhello->tag = tag_string;
  strcpy (istrhello->strval, "hello");
  struct boxsequence_st *iseq3 =
    malloc (sizeof (struct boxsequence_st) + 3 * sizeof (myval_t));
  iseq3->tag = tag_sequence;
  iseq3->slen = 3;
  iseq3->valtab[0] = (myval_t)iv42;
  iseq3->valtab[1] = (myval_t)istrhello;
  iseq3->valtab[2] = (myval_t)NULL;
  printf ("before %s:%d gdb print iseq3\n", __FILE__, __LINE__);
}

这是我的Python文件,可以在gdb下读取

 # file tiny-gdb.py in the public domain by Basile Starynkevitch
 ## see also tiny.c file
class my_val_Printer:
    """pretty prints a my_val"""
    def __init__ (self, val):
        self.val = val
    def to_string (self):
        outs = "my_val@" + self.val['ptr']
        mytag = self.val['ptag'].dereference();
        if (mytag):
            outs = outs + mytag.to_string()
    def display_hint (self):
        return 'my_val'

def my_val_lookup(val):
    lookup = val.type.tag
    if (lookup == None):
        return None
    if lookup == "my_val":
        return my_val_Printer(val)
    return None

我卡在以下基本问题中:

  1. 如何在GDB下安装python的漂亮打印机?(文档中有几种方式,我无法选择合适的方式)。
  2. 如何确保GDB漂亮打印union my_un和其typedef-ed同义词myval_t的方式相同。
  3. 漂亮打印程序应如何检测空指针?
  4. 我的漂亮打印机如何递归struct boxsequence_st ? 这意味着检测指针是否非空,然后解引用它的ptag,将该标记与tag_sequence进行比较,漂亮打印valtab可变数组成员。
  5. 如何避免漂亮打印递归过深?
1个回答

13

我对gdb Python api的经验不足以称之为答案;我认为这只是一位开发者的研究笔记。我附在下面的代码相当粗糙且难看。然而,这确实可以在gdb-7.4和python-2.7.3上运行。以下是一个调试运行示例:

$ gcc -Wall -g3 tiny.c -o tiny
$ gdb tiny
(gdb) b 58
(gdb) run
(gdb) print iseq3
$1 = (struct boxsequence_st *) 0x602050
(gdb) print iv42
$2 = (struct boxint_st *) 0x602010
(gdb) print istrhello
$3 = (struct boxstring_st *) 0x602030

上述所有内容都是标准的漂亮输出 - 我的理解是我经常想看到指针是什么,所以不想覆盖它们。然而,解引用指针使用下面显示的漂亮打印程序:
(gdb) print *iseq3
$4 = (struct boxsequence_st)(3) = {(struct boxint_st)42, (struct boxstring_st)"hello"(5), NULL}
(gdb) print *iv42
$5 = (struct boxint_st)42
(gdb) print *istrhello
$6 = (struct boxstring_st)"hello"(5)
(gdb) set print array
(gdb) print *iseq3
$7 = (struct boxsequence_st)(3) = {
  (struct boxint_st)42,
  (struct boxstring_st)"hello"(5),
  NULL
}
(gdb) info auto-load
Loaded  Script                                                                 
Yes     /home/.../tiny-gdb.py

最后一行显示在调试tiny时,tiny-gdb.py文件会自动加载(虽然您可以禁用此功能,但我相信这是默认行为)。
上面使用的tiny-gdb.py文件:
def deref(reference):
    target = reference.dereference()
    if str(target.address) == '0x0':
        return 'NULL'
    else:
        return target

class cstringprinter:
    def __init__(self, value, maxlen=4096):
        try:
            ends = gdb.selected_inferior().search_memory(value.address, maxlen, b'\0')
            if ends is not None:
                maxlen = ends - int(str(value.address), 16)
                self.size = str(maxlen)
            else:
                self.size = '%s+' % str(maxlen)
            self.data = bytearray(gdb.selected_inferior().read_memory(value.address, maxlen))
        except:
            self.data = None
    def to_string(self):
        if self.data is None:
            return 'NULL'
        else:
            return '\"%s\"(%s)' % (str(self.data).encode('string_escape').replace('"', '\\"').replace("'", "\\\\'"), self.size)

class boxintprinter:
    def __init__(self, value):
        self.value = value.cast(gdb.lookup_type('struct boxint_st'))
    def to_string(self):
        return '(struct boxint_st)%s' % str(self.value['ival'])

class boxstringprinter:
    def __init__(self, value):
        self.value = value.cast(gdb.lookup_type('struct boxstring_st'))
    def to_string(self):
        return '(struct boxstring_st)%s' % (self.value['strval'])

class boxsequenceprinter:
    def __init__(self, value):
        self.value = value.cast(gdb.lookup_type('struct boxsequence_st'))
    def display_hint(self):
        return 'array'
    def to_string(self):
        return '(struct boxsequence_st)(%s)' % str(self.value['slen'])
    def children(self):
        value = self.value
        tag = str(value['tag'])
        count = int(str(value['slen']))
        result = []
        if tag == 'tag_none':
            for i in xrange(0, count):
                result.append( ( '#%d' % i, deref(value['valtab'][i]['ptag']) ))
        elif tag == 'tag_int':
            for i in xrange(0, count):
                result.append( ( '#%d' % i, deref(value['valtab'][i]['pint']) ))
        elif tag == 'tag_string':
            for i in xrange(0, count):
                result.append( ( '#%d' % i, deref(value['valtab'][i]['pstr']) ))
        elif tag == 'tag_sequence':
            for i in xrange(0, count):
                result.append( ( '#%d' % i, deref(value['valtab'][i]['pseq']) ))
        return result

def typefilter(value):
    "Pick a pretty-printer for 'value'."
    typename = str(value.type.strip_typedefs().unqualified())

    if typename == 'char []':
        return cstringprinter(value)

    if (typename == 'struct boxint_st' or
        typename == 'struct boxstring_st' or
        typename == 'struct boxsequence_st'):
        tag = str(value['tag'])
        if tag == 'tag_int':
            return boxintprinter(value)
        if tag == 'tag_string':
            return boxstringprinter(value)
        if tag == 'tag_sequence':
            return boxsequenceprinter(value)

    return None

gdb.pretty_printers.append(typefilter)

我做出这些选择的原因如下:
  1. How to install pretty-printers to gdb?

    There are two parts to this question: where to install the Python files, and how to hook the pretty-printers to gdb.

    Because the pretty-printer selection cannot rely on the inferred type alone, but has to peek into the actual data fields, you cannot use the regular expression matching functions. Instead, I chose to add my own pretty-printer selector function, typefilter(), to the global pretty-printers list, as described in the documentation. I did not implement the enable/disable functionality, because I believe it is easier to just load/not load the relevant Python script instead.

    (typefilter() gets called once per every variable reference, unless some other pretty-printer has already accepted it.)

    The file location issue is a more complicated one. For application-specific pretty-printers, putting them into a single Python script file sounds sensible, but for a library, some splitting seems to be in order. The documentation recommends packaging the functions into a Python module, so that a simple python import module enables the pretty-printer. Fortunately, Python packaging is quite straightforward. If you were to import gdb to the top and save it to /usr/lib/pythonX.Y/tiny.py, where X.Y is the python version used, you only need to run python import tiny in gdb to enable the pretty-printer.

    Of course, properly packaging the pretty-printer is a very good idea, especially if you intend to distribute it, but it does pretty much boil down to adding some variables et cetera to the beginning of the script, assuming you keep it as a single file. For more complex pretty-printers, using a directory layout might be a good idea.

  2. If you have a value val, then val.type is the gdb.Type object describing its type; converting it to string yields a human-readable type name.

    val.type.strip_typedefs() yields the actual type with all typedefs stripped. I even added .unqualified(), so that all const/volatile/etc. type qualifiers are removed.

  3. NULL pointer detection is a bit tricky.

    The best way I found, was to examine the stringified .address member of the target gdb.Value object, and see if it is "0x0".

    To make life easier, I was able to write a simple deref() function, which tries to dereference a pointer. If the target points to (void *)0, it returns the string "NULL", otherwise it returns the target gdb.Value object.

    The way I use deref() is based on the fact that "array" type pretty-printers yield a list of 2-tuples, where the first item is the name string, and the second item is either a gdb.Value object, or a string. This list is returned by the children() method of the pretty-printer object.

  4. Handling "discriminated union" types would be much easier, if you had a separate type for the generic entity. That is, if you had

    struct box_st {
        enum tag_en tag;
    };
    

    and it was used everywhere when the tag value is still uncertain; and the specific structure types only used where their tag value is fixed. This would allow a much simpler type inference.

    As it is, in tiny.c the struct box*_st types can be used interchangeably. (Or, more specifically, we cannot rely on a specific tag value based on the type alone.)

    The sequence case is actually quite simple, because valtab[] can be treated as simply as an array of void pointers. The sequence tag is used to pick the correct union member. In fact, if valtab[] was simply a void pointer array, then gdb.Value.cast(gdb.lookup_type()) or gdb.Value.reinterpret_cast(gdb.lookup_type()) can be used to change each pointer type as necessary, just like I do for the boxed structure types.

  5. Recursion limits?

    You can use the @ operator in print command to specify how many elements are printed, but that does not help with nesting.

    If you add iseq3->valtab[2] = (myval_t)iseq3; to tiny.c, you get an infinitely recursive sequence. gdb does print it nicely, especially with set print array, but it does not notice or care about the recursion.

在我看来,除了为深度嵌套或递归数据结构编写一个漂亮的打印机之外,您可能还希望编写一个。在我的测试中,我编写了一个命令,它使用Graphviz直接从gdb中绘制二叉树结构; 我绝对相信它比纯文本输出更好。

添加:如果将以下内容保存为/usr/lib/pythonX.Y/tree.py

import subprocess
import gdb

def pretty(value, field, otherwise=''):
    try:
        if str(value[field].type) == 'char []':
            data = str(gdb.selected_inferior().read_memory(value[field].address, 64))
            try:
                size = data.index("\0")
                return '\\"%s\\"' % data[0:size].encode('string_escape').replace('"', '\\"').replace("'", "\\'")
            except:
                return '\\"%s\\"..' % data.encode('string_escape').replace('"', '\\"').replace("'", "\\'")
        else:
            return str(value[field])
    except:
        return otherwise

class tee:
    def __init__(self, cmd, filename):
        self.file = open(filename, 'wb')
        gdb.write("Saving DOT to '%s'.\n" % filename)
        self.cmd = cmd
    def __del__(self):
        if self.file is not None:
            self.file.flush()
            self.file.close()
            self.file = None
    def __call__(self, arg):
        self.cmd(arg)
        if self.file is not None:
            self.file.write(arg)

def do_dot(value, output, visited, source, leg, label, left, right):
    if value.type.code != gdb.TYPE_CODE_PTR:
        return
    target = value.dereference()

    target_addr = int(str(target.address), 16)
    if target_addr == 0:
        return

    if target_addr in visited:
        if source is not None:
            path='%s.%s' % (source, target_addr)
            if path not in visited:
                visited.add(path)
                output('\t"%s" -> "%s" [ taillabel="%s" ];\n' % (source, target_addr, leg))
        return

    visited.add(target_addr)

    if source is not None:
        path='%s.%s' % (source, target_addr)
        if path not in visited:
            visited.add(path)
            output('\t"%s" -> "%s" [ taillabel="%s" ];\n' % (source, target_addr, leg))

    if label is None:
        output('\t"%s" [ label="%s" ];\n' % (target_addr, target_addr))
    elif "," in label:
        lab = ''
        for one in label.split(","):
            cur = pretty(target, one, '')
            if len(cur) > 0:
                if len(lab) > 0:
                    lab = '|'.join((lab,cur))
                else:
                    lab = cur
        output('\t"%s" [ shape=record, label="{%s}" ];\n' % (target_addr, lab))
    else:
        output('\t"%s" [ label="%s" ];\n' % (target_addr, pretty(target, label, target_addr)))

    if left is not None:
        try:
            target_left = target[left]
            do_dot(target_left, output, visited, target_addr, left, label, left, right)
        except:
            pass

    if right is not None:
        try:
            target_right = target[right]
            do_dot(target_right, output, visited, target_addr, right, label, left, right)
        except:
            pass

class Tree(gdb.Command):

    def __init__(self):
        super(Tree, self).__init__('tree', gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)

    def do_invoke(self, name, filename, left, right, label, cmd, arg):
        try:
            node = gdb.selected_frame().read_var(name)
        except:
            gdb.write('No symbol "%s" in current context.\n' % str(name))
            return
        if len(arg) < 1:
            cmdlist = [ cmd ]
        else:
            cmdlist = [ cmd, arg ]
        sub = subprocess.Popen(cmdlist, bufsize=16384, stdin=subprocess.PIPE, stdout=None, stderr=None)
        if filename is None:
            output = sub.stdin.write
        else:
            output = tee(sub.stdin.write, filename)
        output('digraph {\n')
        output('\ttitle = "%s";\n' % name)
        if len(label) < 1: label = None
        if len(left)  < 1: left  = None
        if len(right) < 1: right = None
        visited = set((0,))
        do_dot(node, output, visited, None, None, label, left, right)
        output('}\n')
        sub.communicate()
        sub.wait()

    def help(self):
        gdb.write('Usage: tree [OPTIONS] variable\n')
        gdb.write('Options:\n')
        gdb.write('   left=name          Name member pointing to left child\n')
        gdb.write('   right=name         Name right child pointer\n')
        gdb.write('   label=name[,name]  Define node fields\n')
        gdb.write('   cmd=dot arg=-Tx11  Specify the command (and one option)\n')
        gdb.write('   dot=filename.dot   Save .dot to a file\n')
        gdb.write('Suggestions:\n')
        gdb.write('   tree cmd=neato variable\n')

    def invoke(self, argument, from_tty):
        args = argument.split()
        if len(args) < 1:
            self.help()
            return
        num = 0
        cfg = { 'left':'left', 'right':'right', 'label':'value', 'cmd':'dot', 'arg':'-Tx11', 'dot':None }
        for arg in args[0:]:
            if '=' in arg:
                key, val = arg.split('=', 1)
                cfg[key] = val
            else:
                num += 1
                self.do_invoke(arg, cfg['dot'], cfg['left'], cfg['right'], cfg['label'], cfg['cmd'], cfg['arg'])
        if num < 1:
            self.help()

Tree()

您可以在gdb中使用它:

(gdb) python import tree
(gdb) tree
Usage: tree [OPTIONS] variable
Options:
   left=name          Name member pointing to left child
   right=name         Name right child pointer
   label=name[,name]  Define node fields
   cmd=dot arg=-Tx11  Specify the command (and one option)
   dot=filename.dot   Save .dot to a file
Suggestions:
   tree cmd=neato variable

如果你拥有例如以下这样的东西。
struct node {
    struct node *le;
    struct node *gt;
    long         key;
    char         val[];
}

struct node *sometree;

如果您已经有本地或远程的X11连接以及安装了Graphviz,那么您可以使用:
(gdb) tree left=le right=gt label=key,val sometree

查看树形结构。由于它保留了已访问节点的列表(作为Python集合),因此不会对递归结构感到困惑。

在发布之前,我可能应该清理一下我的Python片段,但无论如何,请将这些视为初步测试版本;使用时自负。 :)


非常感谢您的回复。我还没有测试它(因为目前我不需要使用gdb进行调试),但是这非常有帮助! - Basile Starynkevitch

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接