我一直在监控运行多线程程序的服务器。
- 操作系统 : CentOS release 6.4 x86_64
- boost 版本 : BOOST_LIB_VERSION "1_41"
- ACE 版本 : 6.2.3
- apr 版本 : 2.0
- apr-util 版本 : 2.0
- log4cxx 版本 : 2.0
- 主程序为多线程应用
在 GDB 核心文件中发现了程序信息中的间歇性死机问题,如下所示。
情况 1
Using host libthread_db library "/lib64/libthread_db.so.1".
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00007f709cda7c8c in boost::detail::atomic_increment (pw=0x2d) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:66
#1 0x00007f709cda7d68 in boost::detail::sp_counted_base::add_ref_copy (this=0x25)
at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:133
#2 0x00007f709cda7e77 in boost::detail::shared_count::shared_count (this=0x7f708fffe8d8, r=...)
at /usr/include/boost/smart_ptr/detail/shared_count.hpp:228
#3 0x00007f709cda848d in boost::shared_ptr<AudioChunk>::shared_ptr (this=0x7f708fffe8d0) at /usr/include/boost/smart_ptr/shared_ptr.hpp:169
#4 0x00007f709cdb6ae4 in MediaData::GetPacketDescription (this=0x7f705cd1edd0) at MediaData.cpp:955
以下是源代码#0和#4:
源代码#0和#4如下所示。
#0 0x00007f709cda7c8c in boost::detail::atomic_increment (pw=0x2d) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:66
56 inline void atomic_increment( int * pw )
57 {
58 //atomic_exchange_and_add( pw, 1 );
59 __asm__
60 (
61 "lock\n\t" //
62 "incl %0":
63 "=m"( *pw ): // output (%0)
64 "m"( *pw ): // input (%1)
65 "cc" // clobbers
66 );
67 }
#4 0x00007f709cdb6ae4 in MediaData::GetPacketDescription (this=0x7f705cd1edd0) at MediaData.cpp:955
953 MediaPacketDescriptionRef MediaData::GetPacketDescription()
954 {
955 MediaPacketDescriptionRef packetDescriptionRef = m_mediaPacketDescriptionQueue.front();
956 m_mediaPacketDescriptionQueue.pop();
957 return packetDescriptionRef;
958 }
"M_mediaPacketDescriptionQueue" 的定义如下:
typedef boost::shared_ptr<MediaPacketDescription> MediaPacketDescriptionRef
std :: queue <MediaPacketDescriptionRef> m_mediaPacketDescriptionQueue;
案例二
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x0000000000415e60 in boost::detail::atomic_exchange_and_add (pw=0x5d, dv=-1) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:50
#1 0x0000000000415f39 in boost::detail::sp_counted_base::release (this=0x55) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:143
#2 0x0000000000415fe9 in boost::detail::shared_count::~shared_count (this=0x7fe8042a09f8, __in_chrg=<optimized out>) at /usr/include/boost/smart_ptr/detail/shared_count.hpp:217
#3 0x0000000000416408 in boost::shared_ptr<MediaPacketDescription>::~shared_ptr (this=0x7fe8042a09f0, __in_chrg=<optimized out>) at /usr/include/boost/smart_ptr/shared_ptr.hpp:169
#4 0x000000000041bf3c in std::_Destroy<boost::shared_ptr<MediaPacketDescription> > (__pointer=0x7fe8042a09f0) at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:90
#5 0x000000000041bed0 in std::_Destroy_aux<false>::__destroy<boost::shared_ptr<MediaPacketDescription>*> (__first=0x7fe8042a09f0, __last=0x7fe804244f30)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:100
#6 0x000000000041be27 in std::_Destroy<boost::shared_ptr<MediaPacketDescription>*> (__first=0x7fe8042a07f0, __last=0x7fe804244f30)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:123
#7 0x000000000041bcbb in std::_Destroy<boost::shared_ptr<MediaPacketDescription>*, boost::shared_ptr<MediaPacketDescription> > (__first=0x7fe8042a07f0, __last=0x7fe804244f30)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:149
#8 0x000000000041b9af in std::deque<boost::shared_ptr<MediaPacketDescription>, std::allocator<boost::shared_ptr<MediaPacketDescription> > >::_M_destroy_data_aux (this=0x7fe804a29a38, __first=..., __last=...)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/deque.tcc:733
#9 0x000000000041b661 in std::deque<boost::shared_ptr<MediaPacketDescription>, std::allocator<boost::shared_ptr<MediaPacketDescription> > >::_M_destroy_data (this=0x7fe804a29a38, __first=..., __last=...)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_deque.h:1653
#10 0x000000000041a3fe in std::deque<boost::shared_ptr<MediaPacketDescription>, std::allocator<boost::shared_ptr<MediaPacketDescription> > >::~deque (this=0x7fe804a29a38, __in_chrg=<optimized out>)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_deque.h:790
#11 0x000000000041930c in std::queue<boost::shared_ptr<MediaPacketDescription>, std::deque<boost::shared_ptr<MediaPacketDescription>, std::allocator<boost::shared_ptr<MediaPacketDescription> > > >::~queue (this=0x7fe804a29a38,
__in_chrg=<optimized out>) at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_queue.h:90
#12 0x0000000000419433 in MediaData::~MediaData (this=0x7fe804a29860, __in_chrg=<optimized out>) at ../orkbasecxx/MediaData.h:55
#13 0x0000000000419863 in boost::checked_delete<MediaData> (x=0x7fe804a29860) at /usr/include/boost/checked_delete.hpp:34
#14 0x000000000041ca96 in boost::detail::sp_counted_impl_p<MediaData>::dispose (this=0x7fe804263ac0) at /usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:78
#15 0x0000000000415f5a in boost::detail::sp_counted_base::release (this=0x7fe804263ac0) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:145
#16 0x0000000000415fe9 in boost::detail::shared_count::~shared_count (this=0x7fe804391f10, __in_chrg=<optimized out>) at /usr/include/boost/smart_ptr/detail/shared_count.hpp:217
#17 0x00000000004164ce in boost::shared_ptr<MediaData>::~shared_ptr (this=0x7fe804391f08, __in_chrg=<optimized out>) at /usr/include/boost/smart_ptr/shared_ptr.hpp:169
#18 0x00007fe857208d8d in ReportData::~ReportData (this=0x7fe804391f00, __in_chrg=<optimized out>) at ReportData.h:34
#19 0x00007fe857208de1 in boost::checked_delete<ReportData> (x=0x7fe804391f00) at /usr/include/boost/checked_delete.hpp:34
#20 0x00007fe85720a0b6 in boost::detail::sp_counted_impl_p<ReportData>::dispose (this=0x7fe8044cf900) at /usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:78
#21 0x0000000000415f5a in boost::detail::sp_counted_base::release (this=0x7fe8044cf900) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:145
#22 0x0000000000415fe9 in boost::detail::shared_count::~shared_count (this=0x7fe804b25838, __in_chrg=<optimized out>) at /usr/include/boost/smart_ptr/detail/shared_count.hpp:217
#23 0x00007fe8572036ae in boost::shared_ptr<ReportData>::~shared_ptr (this=0x7fe804b25830, __in_chrg=<optimized out>) at /usr/include/boost/smart_ptr/shared_ptr.hpp:169
#24 0x00007fe857209038 in __gnu_cxx::new_allocator<boost::shared_ptr<ReportData> >::destroy (this=0x7fe8261fab1f, __p=0x7fe804b25830)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/ext/new_allocator.h:115
#25 0x00007fe857208699 in std::_List_base<boost::shared_ptr<ReportData>, std::allocator<boost::shared_ptr<ReportData> > >::_M_clear (this=0x7fe8261fac20)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/list.tcc:76
#26 0x00007fe857207b63 in std::_List_base<boost::shared_ptr<ReportData>, std::allocator<boost::shared_ptr<ReportData> > >::~_List_base (this=0x7fe8261fac20, __in_chrg=<optimized out>)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_list.h:360
#27 0x00007fe8572074e0 in std::list<boost::shared_ptr<ReportData>, std::allocator<boost::shared_ptr<ReportData> > >::~list (this=0x7fe8261fac20, __in_chrg=<optimized out>)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_list.h:418
#28 0x00007fe857207365 in ReportDatas::Refresh (this=0x7fe8040291e8) at ReportData.cpp:479
以下是源代码#0和#28:
以下是源代码#0和#28:
#0 0x0000000000415e60 in boost::detail::atomic_exchange_and_add (pw=0x5d, dv=-1) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:50
35 inline int atomic_exchange_and_add( int * pw, int dv )
36 {
37 // int r = *pw;
38 // *pw += dv;
39 // return r;
40
41 int r;
42
43 __asm__ __volatile__
44 (
45 "lock\n\t"
46 "xadd %1, %0":
47 "=m"( *pw ), "=r"( r ): // outputs (%0, %1)
48 "m"( *pw ), "1"( dv ): // inputs (%2, %3 == %1)
49 "memory", "cc" // clobbers
50 );
51
52 return r;
53 }
#28 0x00007fe857207365 in ReportDatas::Refresh (this=0x7fe8040291e8) at ReportData.cpp:479
471 for (std::list<ReportDataRef>::iterator it = toEmpty.begin(); it != toEmpty.end() ; it++)
472 {
473 ReportDataRef data = *it;
474 data->Empty();
475 m_datas.erase(data->GetId());
476 LOG4CXX_DEBUG(s_log, data->GetId() + ": Become empty");
477 }
478 logMsg.Format("Refreshed %d datas. New data size:%d", (data - m_datas.size()), m_datas.size());
479 LOG4CXX_DEBUG(s_log, logMsg);
对于CASE1和Queue.front(),假设它不是一个问题。我认为Queue.front()必须正常执行,执行共享指针的引用计数增加操作。
问题在于我无法理解为什么在boost库原子操作中会出现间歇性SIGSEGV错误。
========================================================
抱歉,在评论中描述细节很困难。
感谢您的回答erikzenker。
我运行了您的代码,可以得到与以下GDB相同的结果。
Program received signal SIGSEGV, Segmentation fault.
0x000000000040092b in boost::detail::atomic_exchange_and_add (pw=0x20b49, dv=-1) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:50
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:50:1141:beg:0x40092b
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.192.el6.x86_64 libgcc-4.4.7-16.el6.x86_64 libstdc++-4.4.7-16.el6.x86_64
(gdb) bt
#0 0x000000000040092b in boost::detail::atomic_exchange_and_add (pw=0x20b49, dv=-1) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:50
#1 0x000000000040098b in boost::detail::sp_counted_base::release (this=0x20b41) at /usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:143
#2 0x0000000000400a25 in boost::detail::shared_count::~shared_count (this=0x6044c8, __in_chrg=<value optimized out>) at /usr/include/boost/smart_ptr/detail/shared_count.hpp:217
#3 0x0000000000400ad6 in boost::shared_ptr<int>::~shared_ptr (this=0x6044c0, __in_chrg=<value optimized out>) at /usr/include/boost/smart_ptr/shared_ptr.hpp:169
#4 0x0000000000401e86 in std::_Destroy<boost::shared_ptr<int> > (__pointer=0x6044c0) at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:90
#5 0x0000000000401cb4 in std::_Destroy_aux<false>::__destroy<boost::shared_ptr<int>*> (__first=0x6044c0, __last=0x6042c0)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:100
#6 0x0000000000401b25 in std::_Destroy<boost::shared_ptr<int>*> (__first=0x6042d0, __last=0x6042c0) at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:123
#7 0x000000000040185b in std::_Destroy<boost::shared_ptr<int>*, boost::shared_ptr<int> > (__first=0x6042d0, __last=0x6042c0)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_construct.h:149
#8 0x0000000000401366 in std::deque<boost::shared_ptr<int>, std::allocator<boost::shared_ptr<int> > >::_M_destroy_data_aux (this=0x7fffffffe350, __first=..., __last=...)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/deque.tcc:739
#9 0x0000000000400df9 in std::deque<boost::shared_ptr<int>, std::allocator<boost::shared_ptr<int> > >::_M_destroy_data (this=0x7fffffffe350, __first=..., __last=...)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_deque.h:1653
#10 0x0000000000400b50 in std::deque<boost::shared_ptr<int>, std::allocator<boost::shared_ptr<int> > >::~deque (this=0x7fffffffe350, __in_chrg=<value optimized out>)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_deque.h:790
#11 0x0000000000400a7c in std::queue<boost::shared_ptr<int>, std::deque<boost::shared_ptr<int>, std::allocator<boost::shared_ptr<int> > > >::~queue (this=0x7fffffffe350,
__in_chrg=<value optimized out>) at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/bits/stl_queue.h:90
#12 0x00000000004008be in main () at testc2.cpp:12
当我想到这个问题时,我认为它与CASE 2非常相似。
但是我还没有理解清楚。
我认为在CASE 1 shared_ptr中,操作的#0引用计数增加了。
由于在非空队列元素中成功执行,因此认为尝试进行“#0原子递增”。