本文基于MySQL Community 8.0.23 Version

通过之前 InnoDB之UNDO LOG介绍这篇文章的介绍,我们已经基本明白了undo log的整个生命周期是怎样的,但是其中对于具体undo log是如何purge的没有进行分析更深入的介绍。

具体的,只介绍到purge undo log record时主要分为两种情况:清理TRX_UNDO_DEL_MARK_REC记录或者清理TRX_UNDO_UPD_EXIST_REC记录;但是没有介绍如何对这两种记录进行清理。今天这篇文章我们就主要介绍一下innodb是如何对这两种记录进行清理的。

清理undo log record的入口函数是row_purge_record(),在这个函数中分别对 undo log record type为TRX_UNDO_DEL_MARK_RECTRX_UNDO_UPD_EXIST_REC 完成清理,即可完成对旧索引记录的清理。

那么下面分别介绍一下这两种undo log record是如何进行清理的。

TRX_UNDO_DEL_MARK_REC

TRX_UNDO_DEL_MARK_REC类型的undo log record的入口函数为row_purge_del_mark()将所有的聚集索引与二级索引记录都清理掉,其具体流程如下:

  1. static MY_ATTRIBUTE((warn_unused_result)) bool row_purge_del_mark(
  2. purge_node_t *node) /*!< in/out: row purge node */
  3. {
  4. ...
  5. /** 依次遍历二级索引,挨个删除其记录 */
  6. while (node->index != NULL) {
  7. /* 跳过损坏的二级索引 */
  8. dict_table_skip_corrupt_index(node->index);
  9. row_purge_skip_uncommitted_virtual_index(node->index);
  10. if (!node->index) {
  11. break;
  12. }
  13. if (node->index->type != DICT_FTS) {
  14. /** 构造二级索引的entry */
  15. dtuple_t *entry = row_build_index_entry_low(node->row, NULL, node->index,
  16. heap, ROW_BUILD_FOR_PURGE);
  17. /** 从此二级索引中删除此entry */
  18. row_purge_remove_sec_if_poss(node, node->index, entry);
  19. mem_heap_empty(heap);
  20. }
  21. node->index = node->index->next();
  22. }
  23. ...
  24. /** 清理聚集索引 */
  25. return (row_purge_remove_clust_if_poss(node));
  26. }

从上面的row_purge_del_mark()具体实现不难看出,主要清理逻辑都在row_purge_remove_sec_if_poss()row_purge_remove_clust_if_poss()中,所以依次介绍这两个函数的实现

row_purge_remove_sec_if_poss()

  1. void row_purge_remove_sec_if_poss(purge_node_t *node, /*!< in: row purge node */
  2. dict_index_t *index, /*!< in: index */
  3. const dtuple_t *entry) /*!< in: index entry */
  4. {
  5. ...
  6. if (!entry) {
  7. /* 如果在这个索引创建之前就写入了这个undo log,那么就可能由于undo log中
  8. 缺少某些 field 而导致获取不到 entry。 */
  9. return;
  10. }
  11. /** 从二级索引的 leaf page 上删除此 entry */
  12. if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
  13. return;
  14. }
  15. retry:
  16. /** 通过修改索引树来删除 entry */
  17. success = row_purge_remove_sec_if_poss_tree(node, index, entry);
  18. /* The delete operation may fail if we have little
  19. file space left: TODO: easiest to crash the database
  20. and restart with more file space */
  21. if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
  22. n_tries++;
  23. os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
  24. goto retry;
  25. }
  26. ut_a(success);
  27. }
  28. /**====================row_purge_remove_sec_if_poss_leaf========================*/
  29. static MY_ATTRIBUTE((warn_unused_result)) bool row_purge_remove_sec_if_poss_leaf(
  30. purge_node_t *node, /*!< in: row purge node */
  31. dict_index_t *index, /*!< in: index */
  32. const dtuple_t *entry) /*!< in: index entry */
  33. {
  34. ...
  35. /** 从此二级索引树上查找此 entry 的位置 */
  36. search_result = row_search_index_entry(index, entry, mode, &pcur, &mtr);
  37. ...
  38. switch (search_result) {
  39. case ROW_FOUND:
  40. /* 在尝试purge之前此entry满足下列条件其一就可删除:
  41. 1. 从具体索引上回查主键,查不到主键。
  42. 2. 没有更老的版本的数据*/
  43. if (row_purge_poss_sec(node, index, entry)) {
  44. btr_cur_t *btr_cur = btr_pcur_get_btr_cur(&pcur);
  45. ...
  46. goto func_exit_no_pcur;
  47. }
  48. ...
  49. /** 从此二级索引乐观删除此数据 */
  50. if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
  51. /* The index entry could not be deleted. */
  52. success = false;
  53. }
  54. }
  55. /* fall through (the index entry is still needed,
  56. or the deletion succeeded) */
  57. case ROW_NOT_DELETED_REF:
  58. /* The index entry is still needed. */
  59. case ROW_BUFFERED:
  60. /* The deletion was buffered. */
  61. case ROW_NOT_FOUND:
  62. /* The index entry does not exist, nothing to do. */
  63. btr_pcur_close(&pcur);
  64. func_exit_no_pcur:
  65. mtr_commit(&mtr);
  66. return (success);
  67. }
  68. ut_error;
  69. return (false);
  70. }
  71. /**===========btr_cur_optimistic_delete=============*/
  72. ibool btr_cur_optimistic_delete_func(
  73. btr_cur_t *cursor, /*!< in: cursor on leaf page, on the record to
  74. delete; cursor stays valid: if deletion
  75. succeeds, on function exit it points to the
  76. successor of the deleted record */
  77. #ifdef UNIV_DEBUG
  78. ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
  79. #endif /* UNIV_DEBUG */
  80. mtr_t *mtr) /*!< in: mtr; if this function returns
  81. TRUE on a leaf page of a secondary
  82. index, the mtr must be committed
  83. before latching any further pages */
  84. {
  85. ...
  86. /* This is intended only for leaf page deletions */
  87. /** 获取 pcursor 所在的 block */
  88. block = btr_cur_get_block(cursor);
  89. ...
  90. /** 获取 record 及其 offset */
  91. rec = btr_cur_get_rec(cursor);
  92. offsets =
  93. rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap);
  94. ...
  95. /** 如果为 true 说明 record 都在此page中,乐观直接删除即可;
  96. 如果为false,则可能涉及到索引树的变化以及 blob 等数据,需要悲观删除。*/
  97. if (no_compress_needed) {
  98. ...
  99. /** 从 AHI hash 中删除此 record */
  100. btr_search_update_hash_on_delete(cursor);
  101. /** 从索引页上删除此 record */
  102. if (page_zip) {
  103. ...
  104. page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index, offsets,
  105. mtr);
  106. ...
  107. } else {
  108. ...
  109. page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index, offsets,
  110. mtr);
  111. ...
  112. }
  113. } else {
  114. /* 预取siblings block,为悲观删除做准备 */
  115. btr_cur_prefetch_siblings(block);
  116. }
  117. if (UNIV_LIKELY_NULL(heap)) {
  118. mem_heap_free(heap);
  119. }
  120. return (no_compress_needed);
  121. }
  122. /**===========row_purge_remove_sec_if_poss_tree=============*/
  123. static MY_ATTRIBUTE((warn_unused_result)) ibool
  124. row_purge_remove_sec_if_poss_tree(
  125. purge_node_t *node, /*!< in: row purge node */
  126. dict_index_t *index, /*!< in: index */
  127. const dtuple_t *entry) /*!< in: index entry */
  128. {
  129. ...
  130. /** 从此二级索引树上查找此 entry 的位置 */
  131. search_result = row_search_index_entry(
  132. index, entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, &pcur, &mtr);
  133. ...
  134. btr_cur = btr_pcur_get_btr_cur(&pcur);
  135. /* 在尝试purge之前此entry满足下列条件其一就可删除:
  136. 1. 从具体索引上回查主键,查不到主键。
  137. 2. 没有更老的版本的数据*/
  138. if (row_purge_poss_sec(node, index, entry)) {
  139. ...
  140. goto func_exit;
  141. }
  142. /** 悲观删除此 entry。 */
  143. btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, false, 0, node->undo_no,
  144. node->rec_type, &mtr);
  145. ...
  146. }
  147. }
  148. ...
  149. return (success);
  150. }
  151. /**===========btr_cur_pessimistic_delete=============*/
  152. ibool btr_cur_pessimistic_delete(
  153. dberr_t *err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
  154. the latter may occur because we may have
  155. to update node pointers on upper levels,
  156. and in the case of variable length keys
  157. these may actually grow in size */
  158. ibool has_reserved_extents, /*!< in: TRUE if the
  159. caller has already reserved enough free
  160. extents so that he knows that the operation
  161. will succeed */
  162. btr_cur_t *cursor, /*!< in: cursor on the record to delete;
  163. if compression does not occur, the cursor
  164. stays valid: it points to successor of
  165. deleted record on function exit */
  166. ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
  167. bool rollback, /*!< in: performing rollback? */
  168. trx_id_t trx_id, /*!< in: the current transaction id. */
  169. undo_no_t undo_no,
  170. /*!< in: the undo number within the
  171. current trx, used for rollback to savepoint
  172. for an LOB. */
  173. ulint rec_type,
  174. /*!< in: undo record type. */
  175. mtr_t *mtr) /*!< in: mtr */
  176. {
  177. ...
  178. /** 获取此 entry 相关信息 */
  179. block = btr_cur_get_block(cursor);
  180. page = buf_block_get_frame(block);
  181. index = btr_cur_get_index(cursor);
  182. ...
  183. /** 获取 将要删除的 record 及 offset 等信息 */
  184. rec = btr_cur_get_rec(cursor);
  185. ...
  186. offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
  187. /** 释放 extern 数据,包括 blob 等。 */
  188. if (rec_offs_any_extern(offsets)) {
  189. lob::BtrContext btr_ctx(mtr, NULL, index, rec, offsets, block);
  190. btr_ctx.free_externally_stored_fields(trx_id, undo_no, rollback, rec_type);
  191. ...
  192. }
  193. /** 如果此 page 中只有这一条 record,并且此 page 不是 root page;
  194. 那么需要将此 page 删除掉*/
  195. if (UNIV_UNLIKELY(page_get_n_recs(page) < 2) &&
  196. UNIV_UNLIKELY(dict_index_get_page(index) != block->page.id.page_no())) {
  197. /* If there is only one record, drop the whole page in
  198. btr_discard_page, if this is not the root page */
  199. btr_discard_page(cursor, mtr);
  200. ret = TRUE;
  201. goto return_after_reservations;
  202. }
  203. ...
  204. /** 获取此 page 在 B 树中的层高。 */
  205. level = btr_page_get_level(page, mtr);
  206. /** 如果层高大于0,并且此 rec 是 page 上最小的 rec
  207. 需要处理下面三种情况:*/
  208. if (level > 0 &&
  209. UNIV_UNLIKELY(rec == page_rec_get_next(page_get_infimum_rec(page)))) {
  210. rec_t *next_rec = page_rec_get_next(rec);
  211. if (btr_page_get_prev(page, mtr) == FIL_NULL) {
  212. /** 此page为最左边的page,那么需要更新 B 树的最小rec */
  213. /* If we delete the leftmost node pointer on a
  214. non-leaf level, we must mark the new leftmost node
  215. pointer as the predefined minimum record */
  216. ...
  217. btr_set_min_rec_mark(next_rec, mtr);
  218. } else if (dict_index_is_spatial(index)) {
  219. /** 如果是R树,则需要更新parent page */
  220. /* For rtree, if delete the leftmost node pointer,
  221. we need to update parent page. */
  222. ...
  223. rtr_page_get_father_block(NULL, heap, index, block, mtr, NULL,
  224. &father_cursor);
  225. offsets = rec_get_offsets(btr_cur_get_rec(&father_cursor), index, NULL,
  226. ULINT_UNDEFINED, &heap);
  227. father_rec = btr_cur_get_rec(&father_cursor);
  228. rtr_read_mbr(rec_get_nth_field(father_rec, offsets, 0, &len),
  229. &father_mbr);
  230. upd_ret = rtr_update_mbr_field(&father_cursor, offsets, NULL, page,
  231. &father_mbr, next_rec, mtr);
  232. ...
  233. } else {
  234. /* Otherwise, if we delete the leftmost node pointer
  235. on a page, we have to change the parent node pointer
  236. so that it is equal to the new leftmost node pointer
  237. on the page */
  238. btr_node_ptr_delete(index, block, mtr);
  239. dtuple_t *node_ptr = dict_index_build_node_ptr(
  240. index, next_rec, block->page.id.page_no(), heap, level);
  241. btr_insert_on_non_leaf_level(flags, index, level + 1, node_ptr, mtr);
  242. ut_d(parent_latched = true);
  243. }
  244. }
  245. /** 从 AHI hash 中删除此 rec */
  246. btr_search_update_hash_on_delete(cursor);
  247. /** 从此 page 上删除此 rec*/
  248. page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
  249. return_after_reservations:
  250. *err = DB_SUCCESS;
  251. ...
  252. DBUG_RETURN(ret);
  253. }

综上,我们就知道从二级索引上清理需要进行那些操作了。

row_purge_remove_clust_if_poss()

  1. static MY_ATTRIBUTE((warn_unused_result)) bool row_purge_remove_clust_if_poss(
  2. purge_node_t *node) /*!< in/out: row purge node */
  3. {
  4. /** 尝试仅从叶子节点上删除此 rec */
  5. if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
  6. return (true);
  7. }
  8. /** 从整个B树中删除此 rec */
  9. for (ulint n_tries = 0; n_tries < BTR_CUR_RETRY_DELETE_N_TIMES; n_tries++) {
  10. if (row_purge_remove_clust_if_poss_low(
  11. node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
  12. return (true);
  13. }
  14. os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
  15. }
  16. return (false);
  17. }
  18. /**===========row_purge_remove_clust_if_poss_low=============*/
  19. static MY_ATTRIBUTE((warn_unused_result)) bool row_purge_remove_clust_if_poss_low(
  20. purge_node_t *node, /*!< in/out: row purge node */
  21. ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
  22. {
  23. ...
  24. /** 获取聚集索引 */
  25. index = node->table->first_index();
  26. ...
  27. /** 获取此 rec 在B树中的位置,如果获取不到说明此 rec 可能已被删除,直接退出 */
  28. if (!row_purge_reposition_pcur(mode, node, &mtr)) {
  29. /* The record was already removed. */
  30. goto func_exit;
  31. }
  32. /** 获取 rec 及 offsets 等信息 */
  33. rec = btr_pcur_get_rec(&node->pcur);
  34. offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap);
  35. /** 校验我们需要 purge 的 rec 中的 roll_ptr 与 B 树中 rec 的 roll_ptr 是否一致。
  36. 如果不一致,说明此 record 又被更新了,故退出。*/
  37. if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
  38. /* Someone else has modified the record later: do not remove */
  39. goto func_exit;
  40. }
  41. ...
  42. /** 先尝试只从叶子节点上删除此 rec,不行则从整个B树上删除此 rec */
  43. if (mode == BTR_MODIFY_LEAF) {
  44. success =
  45. btr_cur_optimistic_delete(btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
  46. } else {
  47. dberr_t err;
  48. ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
  49. btr_cur_pessimistic_delete(&err, FALSE, btr_pcur_get_btr_cur(&node->pcur),
  50. 0, false, node->trx_id, node->undo_no,
  51. node->rec_type, &mtr);
  52. ...
  53. }
  54. }
  55. func_exit:
  56. ...
  57. return (success);
  58. }

因为btr_cur_optimistic_delete()btr_cur_pessimistic_delete()在之前介绍row_purge_remove_sec_if_poss()删除二级索引是就已经介绍,所以这里就不在赘述。

至此我们已经介绍完清理一条TRX_UNDO_DEL_MARK_RECundo log需要进行那些操作。

TRX_UNDO_UPD_EXIST_REC

TRX_UNDO_UPD_EXIST_REC类型的undo log record的入口函数为row_purge_upd_exist_or_extern()将旧的二级索引记录清理掉,其具体流程如下:

  1. static void row_purge_upd_exist_or_extern_func(
  2. #ifdef UNIV_DEBUG
  3. const que_thr_t *thr, /*!< in: query thread */
  4. #endif /* UNIV_DEBUG */
  5. purge_node_t *node, /*!< in: row purge node */
  6. trx_undo_rec_t *undo_rec) /*!< in: record to purge */
  7. {
  8. ...
  9. /** 如果 rec_type 为 TRX_UNDO_UPD_DEL_REC,那么说明此 rec 之前被删除,二级索引也被删除。
  10. 如果cmpl_info 为 UPD_NODE_NO_ORD_CHANGE,那么说明二级索引没有被修改。*/
  11. if (node->rec_type == TRX_UNDO_UPD_DEL_REC ||
  12. (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
  13. goto skip_secondaries;
  14. }
  15. ...
  16. /** 遍历改 rec 的所有二级索引 */
  17. while (node->index != NULL) {
  18. /** 跳过可能已经损坏的二级索引 */
  19. dict_table_skip_corrupt_index(node->index);
  20. row_purge_skip_uncommitted_virtual_index(node->index);
  21. if (!node->index) {
  22. break;
  23. }
  24. /** 构造出此 rec 更新之前的 rec */
  25. if (row_upd_changes_ord_field_binary(node->index, node->update, thr, NULL,
  26. NULL)) {
  27. /* Build the older version of the index entry */
  28. dtuple_t *entry = row_build_index_entry_low(node->row, NULL, node->index,
  29. heap, ROW_BUILD_FOR_PURGE);
  30. /** 将 old rec 从二级索引上删除 */
  31. row_purge_remove_sec_if_poss(node, node->index, entry);
  32. ...
  33. }
  34. node->index = node->index->next();
  35. }
  36. mem_heap_free(heap);
  37. skip_secondaries:
  38. /* Free possible externally stored fields */
  39. for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
  40. const upd_field_t *ufield = upd_get_nth_field(node->update, i);
  41. if (dfield_is_ext(&ufield->new_val)) {
  42. buf_block_t *block;
  43. ...
  44. /* We use the fact that new_val points to
  45. undo_rec and get thus the offset of
  46. dfield data inside the undo record. Then we
  47. can calculate from node->roll_ptr the file
  48. address of the new_val data */
  49. internal_offset =
  50. ((const byte *)dfield_get_data(&ufield->new_val)) - undo_rec;
  51. ut_a(internal_offset < UNIV_PAGE_SIZE);
  52. trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no,
  53. &offset);
  54. /* If table is temp then it can't have its undo log
  55. residing in rollback segment with REDO log enabled. */
  56. bool is_temp = node->table->is_temporary();
  57. undo_space_id = trx_rseg_id_to_space_id(rseg_id, is_temp);
  58. mtr_start(&mtr);
  59. /* We have to acquire an SX-latch to the clustered
  60. index tree (exclude other tree changes) */
  61. index = node->table->first_index();
  62. mtr_sx_lock(dict_index_get_lock(index), &mtr);
  63. /* NOTE: we must also acquire an X-latch to the
  64. root page of the tree. We will need it when we
  65. free pages from the tree. If the tree is of height 1,
  66. the tree X-latch does NOT protect the root page,
  67. because it is also a leaf page. Since we will have a
  68. latch on an undo log page, we would break the
  69. latching order if we would only later latch the
  70. root page of such a tree! */
  71. btr_root_get(index, &mtr);
  72. block = buf_page_get(page_id_t(undo_space_id, page_no), univ_page_size,
  73. RW_X_LATCH, &mtr);
  74. buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
  75. data_field = buf_block_get_frame(block) + offset + internal_offset;
  76. ut_a(dfield_get_len(&ufield->new_val) >= BTR_EXTERN_FIELD_REF_SIZE);
  77. byte *field_ref = data_field + dfield_get_len(&ufield->new_val) -
  78. BTR_EXTERN_FIELD_REF_SIZE;
  79. lob::BtrContext btr_ctx(&mtr, NULL, index, NULL, NULL, block);
  80. lob::DeleteContext ctx(btr_ctx, field_ref, 0, false);
  81. lob::ref_t lobref(field_ref);
  82. /** 将 blob 数据清理掉 */
  83. lob::purge(&ctx, index, node->modifier_trx_id,
  84. trx_undo_rec_get_undo_no(undo_rec), lobref, node->rec_type,
  85. ufield);
  86. mtr_commit(&mtr);
  87. }
  88. }
  89. }

因为row_purge_remove_sec_if_poss()在之前介绍row_purge_del_mark()清理TRX_UNDO_DEL_MARK_REC类型的undo log时就已经介绍过,所以这里就不在赘述。

总结

至此,我们就已经把undo log是如何进行purge的已经全部介绍完;关于blob数据如何清理,后面有机会继续介绍。

参考内容

MySQL 8.0.23’s source code

MySQL 8.0 Reference Manual

InnoDB之UNDO LOG介绍