Issues

Select view

Select search mode

 

Assertion `owning_thread_id_ == wsrep::this_thread::get_id()' failed.

Cannot Reproduce

Description

GDB Info

#0 __pthread_kill (threadid=<optimized out>, signo=6) at ../sysdeps/unix/sysv/linux/pthread_kill.c:62 #1 0x0000000004dafdb4 in my_write_core (sig=6) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/mysys/stacktrace.cc:305 #2 0x0000000003b6d55f in handle_fatal_signal (sig=6) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/signal_handler.cc:182 #3 <signal handler called> #4 0x00007fee76a3d428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54 #5 0x00007fee76a3f02a in __GI_abort () at abort.c:89 #6 0x00007fee76a35bd7 in __assert_fail_base (fmt=<optimized out>, assertion=assertion@entry=0x59dbf48 "owning_thread_id_ == wsrep::this_thread::get_id()", file=file@entry=0x59dbeb0 "/mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/include/wsrep/client_state.hpp", line=line@entry=434, function=function@entry=0x59e0460 <wsrep::client_state::before_commit()::__PRETTY_FUNCTION__> "int wsrep::client_state::before_commit()") at assert.c:92 #7 0x00007fee76a35c82 in __GI___assert_fail (assertion=0x59dbf48 "owning_thread_id_ == wsrep::this_thread::get_id()", file=0x59dbeb0 "/mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/include/wsrep/client_state.hpp", line=434, function=0x59e0460 <wsrep::client_state::before_commit()::__PRETTY_FUNCTION__> "int wsrep::client_state::before_commit()") at assert.c:101 #8 0x00000000035151f6 in wsrep::client_state::before_commit (this=0x7fee21c2b8b8) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/include/wsrep/client_state.hpp:434 #9 0x00000000034f2560 in wsrep_before_commit (thd=0x7fee21c29000, all=true) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/wsrep_trans_observer.h:292 #10 0x00000000034f7113 in ha_commit_low (thd=0x7fee21c29000, all=true, run_after_commit=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/handler.cc:1938 #11 0x000000000499c4cf in MYSQL_BIN_LOG::process_commit_stage_queue (this=0x83636c0 <mysql_bin_log>, thd=0x7fee3802a000, first=0x7fee3802a000) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/binlog.cc:8858 #12 0x000000000499e4bf in MYSQL_BIN_LOG::ordered_commit (this=0x83636c0 <mysql_bin_log>, thd=0x7fee3802a000, all=false, skip_commit=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/binlog.cc:9533 #13 0x000000000499bcfe in MYSQL_BIN_LOG::commit (this=0x83636c0 <mysql_bin_log>, thd=0x7fee3802a000, all=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/binlog.cc:8661 #14 0x00000000034f6c42 in ha_commit_trans (thd=0x7fee3802a000, all=false, ignore_global_read_lock=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/handler.cc:1818 #15 0x0000000003af9219 in trans_commit_stmt (thd=0x7fee3802a000, ignore_global_read_lock=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/transaction.cc:556 #16 0x0000000003963eb1 in mysql_execute_command (thd=0x7fee3802a000, first_level=true) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/sql_parse.cc:5560 #17 0x00000000039663df in mysql_parse (thd=0x7fee3802a000, parser_state=0x7fee570c0b60, update_userstat=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/sql_parse.cc:6260 #18 0x000000000396a32b in wsrep_mysql_parse (thd=0x7fee3802a000, rawbuf=0x7fee381f7028 "INSERT INTO tt_9_p ( ipkey ,v1 ,d2 ,d3 ,b4 ,i5 ,f6 ,iN77 ) VALUES( NULL, 'cBGGAPS', 0.00051, 0.00018, 'ZAdPZ4A9YylXDqQn5xHgOsnCVNGhVYRE6XTxZyDVC4PmCJZBtNlxWxhtVOjpgXF67YBlyEyCKHhxi1FvQgk45qfe9ybv7s5PgQCIBg8G9684WggxXptyd36F8cmD6eSB0VPYgUKqSqFflPXpL0gmk8xwCEHS7mXBL8iXD0wKBnUhndyqcNZpmUmExe5yArMPOYiNvnzTmAE3myU5nLPlw3wo4jqBXYsrZStSMCaAM9AlEMOinq6SabuOKvBkk5nLauJghNgDXzg1mVBT79j6ac4hBsf4Wvpo3IVDKbZKl2ypm3BEyySDjleE8G5Ha3QS4MJyrTmGwnYyb2PbMIlSFlVuEq1xTBsc3eQ8MnP1uEYxgblQAJxw5AoKvCCUIIvq4mKZh9tmK93KwVuwP53rG3pOnY1la3ptJiyZoUoBesUOgq6Npqt8rdfufuvw9zB6mDeZnFnfrMOqKOkZyZ5idO1gl8P0V12rts6eQiEfBgLlSVF7Ipbdq2hX9x6r888XattZ1pdY8mDbHr0xhZq20ytdX96cCtp2P88Bt3Bi7jYYrur1uTwlIG3D5moEhz1lDkPr5DJTjhQp0B99Qxm6WK7fXUDWu28ZtFVqDnsiZpz61jO13nTj0cpVbP9c84xiCkEpApUyGGHYAUpODEu5Rxx9igDnTbiTDqi6DeWuhW2A6VGX3HQgx5B6Ua6Ti2CWuJ0iPsi9iGyy4S8SOb5E1HgX3W0gUIfyXLprusIPxduB0bb11nY8iJhE7yyN46KD6fhQQ3zwwizgE2GImhBh6IarXN0xxNF5jXvGmjlLFr5p7r4FV8NWGvGuw9bZkJBfbbOFVBxPa1QymDRiOodtH0QabD0ixod10tvRNNk0Qph0p8E9zFAajRnScWCqlq82qxc2xR2KzGv28ZP9Atma7JAFzRuFPJTG3OrGpsD6aY1x9MiQtc4Nk27XNy0CQKMAQFDX7uY2IATA', 6017, 0.93, 8634 )", length=1097, parser_state=0x7fee570c0b60, update_userstat=false) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/sql_parse.cc:7529 #19 0x0000000003957622 in dispatch_command (thd=0x7fee3802a000, com_data=0x7fee570c1c30, command=COM_QUERY) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/sql_parse.cc:2065 #20 0x00000000039551eb in do_command (thd=0x7fee3802a000) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/sql_parse.cc:1430 #21 0x0000000003b54e71 in handle_connection (arg=0x7fee61bfe480) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/conn_handler/connection_handler_per_thread.cc:312 #22 0x00000000055a8b63 in pfs_spawn_thread (arg=0x7fee760645e0) at /mnt/jenkins/workspace/pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/storage/perfschema/pfs.cc:2855 #23 0x00007fee78a276ba in start_thread (arg=0x7fee570c2700) at pthread_create.c:333 #24 0x00007fee76b0f41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

 

Environment

None

Smart Checklist

Details

Assignee

Reporter

Time tracking

6h 30m logged

Affects versions

Priority

Smart Checklist

Created July 15, 2020 at 1:45 PM
Updated March 6, 2024 at 9:27 PM
Resolved June 2, 2022 at 6:00 AM

Activity

Show:

Venkatesh PrasadJune 2, 2022 at 6:00 AM

Thanks for checking.

mohit.joshiJune 1, 2022 at 2:43 AM

Hi . The bug seems to be fixed in PXC 8.0.27. It is not reproducible anymore. You may close this ticket. In case, the crash is seen again I'll reopen the ticket.

mohit.joshiJanuary 13, 2022 at 4:56 PM

HI , I can still find this crash occuring on PXC 8.0.26. Any updates on this bug?

Venkatesh PrasadJuly 20, 2020 at 3:50 PM

Ramesh: Crash info : 10.30.6.204:/qa/pxc_bugs/https://perconadev.atlassian.net/browse/PXC-2934#icft=PXC-2934/


Kamil:

It looks like this is caused by group commit feature.

The assertion is caused by commiting of the transaction from the thread queued in MYSQL_BIN_LOG::state_manager.

In MYSQL_BIN_LOG::ordered_commit(), first thread becomes the leader, next threads are added to the list and wait for commit processed by leader (finish_commit()). Then leader processes the queue in process_commit_stage_queue(). Here we call wsrep::client_state::before_commit() from leader context and assert is triggered because client_state is owned by other thread than current thread.


Venkatesh:

Here's my observation after examining both the core files uploaded in the qa server.

In the second crash ( found in the `342/node1/core` from the above link), it looks like 4 threads are involved in the BGC and the thread that hit the assertion was executing ALTER TABLE.

 

 

In MYSQL_BIN_LOG::process_commit_stage_queue(gdb) p first->m_query_string (gdb) first->m_query_string $2 = { str = 0x7f0c7f633028 "ALTER TABLE tt_1_t MODIFY COLUMN i1 INT(23) AUTO_INCREMENT, LOCK=EXCLUSIVE, ALGORITHM=COPY", length = 91 } (gdb) p first->next_to_commit->m_query_string $3 = { str = 0x7f0c7e18a028 "REPLACE INTO tt_23_p ( ipkey ,mt1 ,d2 ,i3 ,i4 ,v5 ,v6 ) VALUES( 5687, 'OpCnneNEOHOI9Jd1yszrzFkw3F45qHUFcZeAvYvHQ1eJEyDtAqivHvdmxrGkfoPAq9SCEE1apeub3xVBlrWS1LSd<snipped>', 0.00104, 5626, 775, 'i1e1uouJpnS5PQI', '86XtxR7zof3RnPZxdk' )", length = 1003 } (gdb) p first->next_to_commit->next_to_commit->m_query_string $4 = { str = 0x7f0c87644028 "COMMIT", length = 6 } (gdb) p first->next_to_commit->next_to_commit->next_to_commit->m_query_string $5 = { str = 0x7f0c747b7028 "REPLACE INTO tt_14 ( ipkey ) VALUES( NULL )", length = 44 } (gdb) p first->next_to_commit->next_to_commit->next_to_commit->next_to_commit->m_query_string Cannot access memory at address 0x248 (gdb) p first->next_to_commit->next_to_commit->next_to_commit->next_to_commit $6 = (THD *) 0x0

 

This scenario is same in the first crash(`17/node1/core`) as well.

In both the failures, the failing thread seemed to have executed ALTER queries and have entered into BGC with other DML threads. This itself may be a hint and this bug may get reproduced when there is BGC happening with threads performing both DDLs and DMLs on https://perconadev.atlassian.net/browse/PXC-8#icft=PXC-8.0. I'm not really sure, but just a guess.


Venkatesh:

In the current bug's context, the assertion "owning_thread_id_ == wsrep::this_thread::get_id()" was hit while the transaction was committing inside the Binlog Group Commit. There are many other code paths which can hit the same assertion failure. One of such was found by Ramesh.

#9 0x00007f8dce33ac82 in __GI___assert_fail (assertion=0x59e3638 "owning_thread_id_ == wsrep::this_thread::get_id()", file=0x59e35a0 "/mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/include/wsrep/client_state.hpp", line=455, function=0x59e7b00 <wsrep::client_state::before_rollback()::__PRETTY_FUNCTION__> "int wsrep::client_state::before_rollback()") at assert.c:101 #10 0x0000000003515c78 in wsrep::client_state::before_rollback (this=0x7f8d835368b8) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/include/wsrep/client_state.hpp:455 #11 0x00000000034f30ce in wsrep_before_rollback (thd=0x7f8d83534000, all=true) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/wsrep_trans_observer.h:417 #12 0x00000000034f7d91 in ha_rollback_low (thd=0x7f8d83534000, all=true) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/handler.cc:2016 #13 0x000000000498cce4 in MYSQL_BIN_LOG::rollback (this=0x836aac0 <mysql_bin_log>, thd=0x7f8d83534000, all=true) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/binlog.cc:2769 #14 0x00000000034f8631 in ha_rollback_trans (thd=0x7f8d83534000, all=true) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/handler.cc:2137 #15 0x0000000003afde6d in trans_rollback (thd=0x7f8d83534000) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/transaction.cc:431 #16 0x0000000003b89778 in Wsrep_high_priority_service::rollback (this=0x7f8d8376a920, ws_handle=..., ws_meta=...) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/wsrep_high_priority_service.cc:362 #17 0x00000000058d983e in apply_fragment (server_state=..., high_priority_service=..., streaming_applier=0x7f8d8376a920, ws_handle=..., ws_meta=..., data=...) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/src/server_state.cpp:107 #18 0x00000000058da8e4 in apply_write_set (server_state=..., high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/src/server_state.cpp:391 #19 0x00000000058ddfee in wsrep::server_state::on_apply (this=0x7f8dc2ff4300, high_priority_service=..., ws_handle=..., ws_meta=..., data=...) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/src/server_state.cpp:1121 #20 0x00000000058f45e7 in wsrep::high_priority_service::apply (this=0x7f8dd0996c40, ws_handle=..., ws_meta=..., data=...) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/include/wsrep/high_priority_service.hpp:46 #21 0x00000000058f1866 in (anonymous namespace)::apply_cb (ctx=0x7f8dd0996c40, wsh=0x7f8dd0995770, flags=0, buf=0x7f8dd0995790, meta=0x7f8dd0995b20, exit_loop=0x7f8dd09959dd) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/src/wsrep_provider_v26.cpp:500 #22 0x00007f8db94704c3 in galera::TrxHandleSlave::apply (this=0x7f8d9a08ec00, recv_ctx=0x7f8dd0996c40, apply_cb=0x58f1635 <(anonymous namespace)::apply_cb(void*, wsrep_ws_handle_t const*, uint32_t, wsrep_buf_t const*, wsrep_trx_meta_t const*, wsrep_bool_t*)>, meta=..., exit_loop=@0x7f8dd09959dd: false) at galera/src/trx_handle.cpp:418 #23 0x00007f8db94bec2d in galera::ReplicatorSMM::apply_trx (this=0x7f8dc13fd000, recv_ctx=0x7f8dd0996c40, ts=...) at galera/src/replicator_smm.cpp:591 #24 0x00007f8db94c92d9 in galera::ReplicatorSMM::process_trx (this=0x7f8dc13fd000, recv_ctx=0x7f8dd0996c40, ts_ptr=...) at galera/src/replicator_smm.cpp:2277 #25 0x00007f8db949d775 in galera::GcsActionSource::process_writeset (this=0x7f8dbcff8640, recv_ctx=0x7f8dd0996c40, act=..., exit_loop=@0x7f8dd099693a: false) at galera/src/gcs_action_source.cpp:62 #26 0x00007f8db949de24 in galera::GcsActionSource::dispatch (this=0x7f8dbcff8640, recv_ctx=0x7f8dd0996c40, act=..., exit_loop=@0x7f8dd099693a: false) at galera/src/gcs_action_source.cpp:109 #27 0x00007f8db949e588 in galera::GcsActionSource::process (this=0x7f8dbcff8640, recv_ctx=0x7f8dd0996c40, exit_loop=@0x7f8dd099693a: false) at galera/src/gcs_action_source.cpp:182 #28 0x00007f8db94be002 in galera::ReplicatorSMM::async_recv (this=0x7f8dc13fd000, recv_ctx=0x7f8dd0996c40) at galera/src/replicator_smm.cpp:466 #29 0x00007f8db94f14d2 in galera_recv (gh=0x7f8dcd859600, recv_ctx=0x7f8dd0996c40) at galera/src/wsrep_provider.cpp:236 #30 0x00000000058f2618 in wsrep::wsrep_provider_v26::run_applier (this=0x7f8dbabf10c0, applier_ctx=0x7f8dd0996c40) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/wsrep-lib/src/wsrep_provider_v26.cpp:715 #31 0x0000000003bb7e91 in wsrep_replication_process (thd=0x7f8dab80f000, arg=0x0) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/wsrep_thd.cc:58 #32 0x0000000003721a7c in start_wsrep_THD (arg=0x7f8dcd83a6f0) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/sql/mysqld.cc:9298 #33 0x00000000055b02eb in pfs_spawn_thread (arg=0x7f8dcd85f720) at /mnt/jenkins/workspace/qa-pxc80-build/BUILD_TYPE/debug/Host/min-xenial-x64/storage/perfschema/pfs.cc:2855 #34 0x00007f8dd03886ba in start_thread (arg=0x7f8dd0997700) at pthread_create.c:333 #35 0x00007f8dce41441d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

The above pasted crash seems to be happening while the transaction is being rolled back when it is in streaming replication mode. This has been fixed by Codership(not merged yet) by https://github.com/codership/mariadb-server/pull/139. We should evaluate it and if it fixes the crash, then we can port the changes to PXC.