批评我的非侵入式堆调试器

Question

批评我的非侵入式堆调试器

c++debuggingheap-memory

14

这是Critique my heap debugger的跟进，昨天bitc建议我现在用手写的哈希表单独存储已分配的块的元数据。

堆调试器现在可以检测以下类型的错误：

内存泄漏（现在具有更详细的调试输出）
非法指针传递到delete（还处理了双重删除）
错误的删除形式（数组与非数组）
缓冲区溢出
缓冲区下溢

欢迎讨论，提前致谢！

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <new>

namespace
{
    // I don't want to #include <algorithm> for a single function template :)
    template <typename T>
    void my_swap(T& x, T& y)
    {
        T z(x);
        x = y;
        y = z;
    }

    typedef unsigned char byte;

    const byte CANARY[] = {0x5A, 0xFE, 0x6A, 0x8D,
                           0x5A, 0xFE, 0x6A, 0x8D,
                           0x5A, 0xFE, 0x6A, 0x8D,
                           0x5A, 0xFE, 0x6A, 0x8D};

    bool canary_dead(const byte* cage)
    {
        bool dead = memcmp(cage, CANARY, sizeof CANARY);
        if (dead)
        {
            for (size_t i = 0; i < sizeof CANARY; ++i)
            {
                byte b = cage[i];
                printf(b == CANARY[i] ? "__ " : "%2X ", b);
            }
            putchar('\n');
        }
        return dead;
    }

    enum kind_of_memory {AVAILABLE, TOMBSTONE, NON_ARRAY_MEMORY, ARRAY_MEMORY};

    const char* kind_string[] = {0, 0, "non-array memory", "    array memory"};

    struct metadata
    {
        byte* address;
        size_t size;
        kind_of_memory kind;

        bool in_use() const
        {
            return kind & 2;
        }

        void print() const
        {
            printf("%s at %p (%d bytes)\n", kind_string[kind], address, size);
        }

        bool must_keep_searching_for(void* address)
        {
            return kind == TOMBSTONE || (in_use() && address != this->address);
        }

        bool canaries_alive() const
        {
            bool alive = true;
            if (canary_dead(address - sizeof CANARY))
            {
                printf("ERROR:    buffer underflow at %p\n", address);
                alive = false;
            }
            if (canary_dead(address + size))
            {
                printf("ERROR:     buffer overflow at %p\n", address);
                alive = false;
            }
            return alive;
        }
    };

    const size_t MINIMUM_CAPACITY = 11;

    class hashtable
    {
        metadata* data;
        size_t used;
        size_t capacity;
        size_t tombstones;

    public:

        size_t size() const
        {
            return used - tombstones;
        }

        void print() const
        {
            for (size_t i = 0; i < capacity; ++i)
            {
                if (data[i].in_use())
                {
                    printf(":( leaked ");
                    data[i].print();
                }
            }
        }

        hashtable()
        {
            used = 0;
            capacity = MINIMUM_CAPACITY;
            data = static_cast<metadata*>(calloc(capacity, sizeof(metadata)));
            tombstones = 0;
        }

        ~hashtable()
        {
            free(data);
        }

        hashtable(const hashtable& that)
        {
            used = 0;
            capacity = 3 * that.size() | 1;
            if (capacity < MINIMUM_CAPACITY) capacity = MINIMUM_CAPACITY;
            data = static_cast<metadata*>(calloc(capacity, sizeof(metadata)));
            tombstones = 0;

            for (size_t i = 0; i < that.capacity; ++i)
            {
                if (that.data[i].in_use())
                {
                    insert_unsafe(that.data[i]);
                }
            }
        }

        hashtable& operator=(hashtable copy)
        {
            swap(copy);
            return *this;
        }

        void swap(hashtable& that)
        {
            my_swap(data, that.data);
            my_swap(used, that.used);
            my_swap(capacity, that.capacity);
            my_swap(tombstones, that.tombstones);
        }

        void insert_unsafe(const metadata& x)
        {
            *find(x.address) = x;
            ++used;
        }

        void insert(const metadata& x)
        {
            if (2 * used >= capacity)
            {
                hashtable copy(*this);
                swap(copy);
            }
            insert_unsafe(x);
        }

        metadata* find(void* address)
        {
            size_t index = reinterpret_cast<size_t>(address) % capacity;
            while (data[index].must_keep_searching_for(address))
            {
                ++index;
                if (index == capacity) index = 0;
            }
            return &data[index];
        }

        void erase(metadata* it)
        {
            it->kind = TOMBSTONE;
            ++tombstones;
        }
    } the_hashset;

    struct heap_debugger
    {
        heap_debugger()
        {
            puts("heap debugger started");
        }

        ~heap_debugger()
        {
            the_hashset.print();
            puts("heap debugger shutting down");
        }
    } the_heap_debugger;

    void* allocate(size_t size, kind_of_memory kind) throw (std::bad_alloc)
    {
        byte* raw = static_cast<byte*>(malloc(size + 2 * sizeof CANARY));
        if (raw == 0) throw std::bad_alloc();

        memcpy(raw, CANARY, sizeof CANARY);
        byte* payload = raw + sizeof CANARY;
        memcpy(payload + size, CANARY, sizeof CANARY);

        metadata md = {payload, size, kind};
        the_hashset.insert(md);
        printf("allocated ");
        md.print();
        return payload;
    }

    void release(void* payload, kind_of_memory kind) throw ()
    {
        if (payload == 0) return;

        metadata* p = the_hashset.find(payload);

        if (!p->in_use())
        {
            printf("ERROR:   no dynamic memory at %p\n", payload);
        }
        else if (p->kind != kind)
        {
            printf("ERROR:wrong form of delete at %p\n", payload);
        }
        else if (p->canaries_alive())
        {
            printf("releasing ");
            p->print();
            free(static_cast<byte*>(payload) - sizeof CANARY);
            the_hashset.erase(p);
        }
    }
}

void* operator new(size_t size) throw (std::bad_alloc)
{
    return allocate(size, NON_ARRAY_MEMORY);
}

void* operator new[](size_t size) throw (std::bad_alloc)
{
    return allocate(size, ARRAY_MEMORY);
}

void operator delete(void* payload) throw ()
{
    release(payload, NON_ARRAY_MEMORY);
}

void operator delete[](void* payload) throw ()
{
    release(payload, ARRAY_MEMORY);
}

int main()
{
    int* p = new int[1];
    delete p;   // wrong form of delete
    delete[] p; // ok
    delete p;   // no dynamic memory (double delete)

    p = new int[1];
    p[-1] = 0xcafebabe;
    p[+1] = 0x12345678;
    delete[] p; // underflow and overflow prevent release
                // p is not released, hence leak
}

- fredoverflow

难道你不能仅仅编辑昨天发布的这个问题而不是再次发起一个关于同样主题的新问题吗？ - Paul R

1

@Paul R：这个完全不同，因为它可能会起作用。 - UncleBens

不要使用异常规范，如果您想让人们知道，请将其放在注释中 :) - Matthieu M.

@Matt 如果我从operator new，operator new[]，operator delete和operator delete[]中删除异常规范，编译器会报错。 - fredoverflow

我尝试使用这个解决方案，我可以看到一些泄漏，但是我只能看到地址。如果我尝试使用https://dev59.com/3EnSa4cB1Zd3GeqPMUKv#1395287扩展此示例，我的编译器会给出错误，如“从'const void *'到'void *'的无效转换”。有没有办法让__FILE__和/或__LINE__，__FUNCTION__与此解决方案配合使用？（编译器i386-wrs-vxworks-g ++，GnuTools 4.1.2） - bart s

3个回答

2

你是否在使用一个非常弱的malloc，它没有这种内置功能？如果有的话，你会增加不必要的开销。此外，当进行小对象分配时，这种系统会受到很大的影响或者对它们无效，因为人们只分配了1个并管理自己的内存。

就代码而言，它看起来会做你所说的事情，并且设计良好，易于阅读。但是，如果你要费力去做这个，为什么不在源头上通过使用受控容器/指针/operator[]来捕获你的缓冲区溢出/下溢呢？这样，你可以在失败的现场进行调试，而不是在释放时发现有恶意事件发生。

我相信其他人会发现一些效率上的优势，但这些只是我在几分钟内查看你的代码后想到的一些想法。

- Michael Dorgan

2

我想询问如何检测下溢和上溢。

我的意思是，如果我有一个包含10个元素的数组，似乎您会检测到我在-1和10处写入，但如果我在20处写入呢？下溢或上溢不一定是作为缓冲区越界（连续）的一部分完成的。

此外，防止释放块的目的是什么？这个块（相对来说）还好，你（不幸地）破坏了它的邻居。

总之，这对我来说似乎很好，不过每个函数可能会有多个返回，因为单一出口没有意义。您似乎更像是C程序员而不是C++程序员 :)

- Matthieu M.

下溢和上溢在两个方向上最多检测到16个字节。如果这对您来说不够，请简单地增加CANARY数组的大小;-) - fredoverflow

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- bitc · Accepted Answer

非常好，的确如此。你的canaries实际上可以揭示一些溢出/下溢的真实情况（尽管像Matthieu指出的那样，并非全部情况）。

还有，你可能会遇到多线程应用程序的一些问题。也许需要保护散列表以防止并发访问？

现在，每次分配和释放都被记录下来了，如果您愿意，可以提供有关正在测试的程序的更多信息。了解任何时候分配的总数和平均数可能是很有趣的。已分配的最大、最小和平均字节数，以及分配的平均寿命。

如果您想比较不同的线程，至少使用Pthreads可以通过pthread_self()标识它们。这个堆调试器可以成为一个非常有用的分析工具。