Hi everyone,
At a customer we face recurring IQ fatal errors.
To see if upgrading IQ helps, we tried SP04 patch 6, SP04 patch 9, SP10 patch 01, and currently, on the test environment, we have SP10 patch 10.
In all these versions, the same error occurs every now and then. Unfortunately not reproducible, which makes it hard to file an SAP incident.
All stack traces are similar to this one:
** Error from IQ connection: ** Time of error: 2016-08-25 10:07:07 ** IQ Version: SAP IQ/16.0.101/20034/P/sp10.01 ** OS info: IQ built on: MS/Windows 2003, Executed on: Windows/IQP01/WinNT/6.1/Build 7601/Service Pack 1/x86 Family/level 6/Model 23/Stepping 6/32 CPU(s) ** Command status when error occured: CURSOR ACTIVE ** Command text: insert into <masked> Dump all thread stacks at stcxtlib\st_server.cxx:993 for PID: 24140 ***************** This is the STACKTRACE *************** ******************************** * Generating all thread stacks * ******************************** ===== Thread Number 25164 (IQ connID: 0000646904) ===== pc: 0x00000000D7F76C57: void __cdecl pcpstack(unsigned long,unsigned long,class db_log * __ptr64,class hos_fd * __ptr64,int) 00000001805f6b40 f hos_stacktrace.obj +0x117 pc: 0x00000000D7F78967: void __cdecl DumpAllThreads(char const * __ptr64,unsigned int,int,unsigned long,unsigned long) 00000001805f8750 f hos_stacktrace.obj +0x217 pc: 0x00000000D7D419F1: void __cdecl hos_ABORT(char const * __ptr64,unsigned int,char const * __ptr64,char * __ptr64,char * __ptr64) 00000001803c1830 f hos_abrt.obj +0x1c1 pc: 0x00000000D85D23FA: long __cdecl ExcpFilter(struct _EXCEPTION_POINTERS * __ptr64) 0000000180c523c0 f st_server.obj +0x3a pc: 0x000000007709B940: UnhandledExceptionFilter + 352 pc: 0x00000000771B3398: MD5Final + 7656 pc: 0x00000000771385C8: _C_specific_handler + 156 pc: 0x0000000077149D2D: RtlDecodePointer + 173 pc: 0x00000000771391CF: RtlUnwindEx + 3007 pc: 0x0000000077171248: KiUserExceptionDispatcher + 46 pc: 0x00000000D8071079: public: void __cdecl s_bufmanCallerStats::Merge(class s_bufmanCallerStats const & __ptr64) __ptr64 00000001806f0db0 f s_bufman.obj +0x2c9 pc: 0x00000000D8770A47: protected: void __cdecl vpp::MergeStats(void) __ptr64 0000000180df09c0 f vpp.obj +0x87 pc: 0x00000000D87743BB: public: virtual void __cdecl vpp::CompleteParallelFind(void) __ptr64 0000000180df4390 f vpp.obj +0x2b pc: 0x00000000D8780573: public: virtual void __cdecl vpp_Boolean::CompleteParallelFind(void) __ptr64 0000000180e00540 f vpp_Boolean.obj +0x33 pc: 0x00000000D8767865: public: virtual void __cdecl vpp_Combiner::CompleteParallelFind(void) __ptr64 0000000180de7800 f vpp.obj +0x65 pc: 0x00000000D87742F4: public: virtual void __cdecl vpp_Combiner::ExecuteOpus(void) __ptr64 0000000180df3db0 f vpp.obj +0x544 pc: 0x00000000D8770D25: public: virtual void __cdecl vpp::Execute(class s_bm * __ptr64 * __ptr64,class s_bm const * __ptr64,class s_bm const * __ptr64,enum hos_boolop) __ptr64 0000000180df0aa0 f vpp.obj +0x285 pc: 0x00000000D875F499: public: void __cdecl vp_Predicates::PreExecute(void) __ptr64 0000000180ddf260 f vp_Predicates.obj +0x239 pc: 0x00000000D87536E3: public: void __cdecl vp_Cursor::PreExecute(unsigned int) __ptr64 0000000180dd3660 f vp_Cursor.obj +0x83 pc: 0x00000000D7CDA8EC: public: virtual int __cdecl opt_QueryExpExecTopIter::ExecWork(int & __ptr64,unsigned int) __ptr64 000000018035a890 f i opt_QueryExp.obj +0x5c pc: 0x00000000D7F5AE56: public: virtual void __cdecl workAllocator::DoWork(unsigned int) __ptr64 00000001805dad40 f hos_team.obj +0x116 pc: 0x00000000D7F761EB: protected: virtual int __cdecl hos_thread::Main(void) __ptr64 00000001805f5f00 f hos_threadn.obj +0x2eb pc: 0x00000000D7F13E17: private: static int __cdecl hos_lwtask::Start(class hos_lwtask * __ptr64) 0000000180593dc0 f hos_lwtask.obj +0x57 pc: 0x00000000686641FC: void __cdecl IQWorkerStarter(void * __ptr64) 00000000200a41a0 f dbsupprt.obj +0x5c pc: 0x0000000068BA290C: private: static void __cdecl W32Task::pre_body(void * __ptr64) 00000000205e2870 f ntkernel.obj +0x9c pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 25712 ctid 25164 ===== Thread Number 25712 ===== pc: 0x00000000771712FA: NtWaitForSingleObject + 10 pc: 0x00000000FD1210DC: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x241b838c pc: 0x00000000FD44AFFB: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x244e22ab pc: 0x00000000FD449D61: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x244e1011 pc: 0x00000000FD449A51: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x244e0d01 pc: 0x0000000068DE2AED: int __cdecl SvcWinMain(void * __ptr64,void * __ptr64,char * __ptr64,int,class UTLangStringBase * __ptr64,enum ls_init_return,unsigned int) 0000000020822960 f utils:ntsvc.obj +0x18d pc: 0x0000000068B81BB9: WinMainGuts 00000000205c1a90 f winmain.obj +0x129 pc: 0x000000003F3C103A: <unknown> pc: 0x000000003F3C13B0: <unknown> pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 15756 ctid 25164 ===== Thread Number 15756 ===== pc: 0x000000007717186A: NtWaitForMultipleObjects + 10 pc: 0x000000007713B037: TpIsTimerSet + 2311 pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 27312 ctid 25164 ===== Thread Number 27312 ===== pc: 0x00000000771712FA: NtWaitForSingleObject + 10 pc: 0x00000000FD1210DC: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x241b838c pc: 0x0000000068DE2943: void __cdecl SvcMain(unsigned long,char * __ptr64 * __ptr64) 0000000020822590 f utils:ntsvc.obj +0x3b3 pc: 0x00000000FD44A82D: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x244e1add pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 964 ctid 25164 ===== Thread Number 964 ===== pc: 0x0000000076F19E6A: SfmDxSetSwapChainStats + 26 pc: 0x0000000076F1615E: GetMessageA + 78 pc: 0x0000000068B7F9B6: unsigned long __cdecl message_loop(void) 00000000205bf920 f loceng.obj +0x96 pc: 0x0000000068B7FB44: void __cdecl main_engine_task(void * __ptr64) 00000000205bfae0 f loceng.obj +0x64 pc: 0x000000006884320C: void __cdecl KInit(void (__cdecl*)(void * __ptr64),void * __ptr64,unsigned int) 00000000202831e0 f ksynch.obj +0x2c pc: 0x0000000068B7FCC9: short __cdecl wineng_start(int,char * __ptr64 * __ptr64) 00000000205bfb80 f loceng.obj +0x149 pc: 0x0000000068B83107: int __cdecl sv_start_service(void * __ptr64,void * __ptr64,char * __ptr64,int) 00000000205c2d80 f winmain.obj +0x387 pc: 0x0000000068DE1EAC: unsigned short __cdecl main_thread(void * __ptr64) 0000000020821e10 f utils:ntsvc.obj +0x9c pc: 0x0000000068E2381F: _callthreadstart 0000000020863808 f LIBCMT:thread.obj +0x17 pc: 0x0000000068E238A9: _threadstart 0000000020863834 f LIBCMT:thread.obj +0x75 pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 14544 ctid 25164 ===== Thread Number 14544 ===== pc: 0x000000007717134A: ZwRemoveIoCompletion + 10 pc: 0x00000000FD1216AD: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x241b895d pc: 0x00000000770099A1: GetQueuedCompletionStatus + 17 pc: 0x0000000068BA27E8: private: static void __cdecl W32Task::io_thread(void * __ptr64) 00000000205e2790 f ntkernel.obj +0x58 pc: 0x0000000068BA290C: private: static void __cdecl W32Task::pre_body(void * __ptr64) 00000000205e2870 f ntkernel.obj +0x9c pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 2096 ctid 25164 ===== Thread Number 2096 ===== pc: 0x00000000771712FA: NtWaitForSingleObject + 10 pc: 0x00000000FD1210DC: void __cdecl `dynamic atexit destructor for 'vtMissing''(void) 00000001815e8d50 f comsuppw:comutil.obj +0x241b838c pc: 0x0000000068BA29DC: private: unsigned short __cdecl W32Task::do_wait(unsigned short,int) __ptr64 00000000205e2920 f ntkernel.obj +0xbc pc: 0x000000006884226A: public: unsigned int __cdecl CondVar::wait(class Mutex & __ptr64,long) __ptr64 0000000020282200 f kernel.obj +0x6a pc: 0x0000000068842952: public: static void __cdecl ATimer::timer_body(void * __ptr64) 00000000202828b0 f ksynch.obj +0xa2 pc: 0x0000000068BA290C: private: static void __cdecl W32Task::pre_body(void * __ptr64) 00000000205e2870 f ntkernel.obj +0x9c pc: 0x00000000770159ED: BaseThreadInitThunk + 13 pc: 0x000000007714C541: RtlUserThreadStart + 33 pid = 24140 tEntry.th32OwnerProcessID 24140 tEntry.th32ThreadID 26536 ctid 25164 <etc etc etc>
See attached XLS file for an overview of all crashes in the past years. The actual load tables are masked, but they are different every time. So it's not like the crashes occur on only one table.
All environments run Windows Server 2008 R2 Enterprise, with multiple CPU/mem configurations.
Test: 8 cores, 64 GB RAM
Acc: 8 cores, 64 GB RAM
Prod: 32 cores, 192 GB RAM
DB files are on a SAN, using mounted NFS partitions. Using files, not raw devices.
Any ideas?