commit f41d1ddd5faf95483f66e3dfeb31ea51b4c7a997 from: Serge Petrenko via: Kirill Yukhin date: Mon Jul 06 07:19:18 2020 UTC wal: fix tx boundaries In order to preserve transaction boundaries in replication protocol, wal assigns each tx row a transaction sequence number (tsn). Tsn is equal to the lsn of the first transaction row. Starting with commit 7eb4650eecf1ac382119d0038076c19b2708f4a1, local space requests are assigned a special replica id, 0, and have their own lsns. These operations are not replicated. If a transaction starting with a local space operation ends up in the WAL, it gets a tsn equal to the lsn of the local space request. Then, during replication, when such a transaction is replicated, the local space request is omitted, and replica receives a global part of the transaction with a seemingly random tsn, yielding an ER_PROTOCOL error: "Transaction id must be equal to LSN of the first row in the transaction". Assign tsn as equal to the lsn of the first global row in the transaction to fix the problem, and assign tsn as before for fully local transactions. Follow-up #4114 Part-of #4928 Reviewed-by: Cyrill Gorcunov commit - 9fcbbb3e7d5e9f5a876ee27a7bf93303321e26b2 commit + f41d1ddd5faf95483f66e3dfeb31ea51b4c7a997 blob - 74cc74684ee63167683b4317f549d0141ece4d39 blob + ef89733ede3ff5dc695197b5527115260cd9a60e --- src/box/wal.c +++ src/box/wal.c @@ -955,12 +955,14 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclo struct xrow_header **end) { int64_t tsn = 0; + struct xrow_header **start = row; + struct xrow_header **first_glob_row = row; /** Assign LSN to all local rows. */ for ( ; row < end; row++) { if ((*row)->replica_id == 0) { /* * All rows representing local space data - * manipulations are signed wth a zero + * manipulations are signed with a zero * instance id. This is also true for * anonymous replicas, since they are * only capable of writing to local and @@ -971,9 +973,18 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclo (*row)->lsn = vclock_inc(vclock_diff, (*row)->replica_id) + vclock_get(base, (*row)->replica_id); - /* Use lsn of the first local row as transaction id. */ - tsn = tsn == 0 ? (*row)->lsn : tsn; - (*row)->tsn = tsn; + /* + * Use lsn of the first global row as + * transaction id. + */ + if ((*row)->group_id != GROUP_LOCAL && tsn == 0) { + tsn = (*row)->lsn; + /* + * Remember the tail being processed. + */ + first_glob_row = row; + } + (*row)->tsn = tsn == 0 ? (*start)->lsn : tsn; (*row)->is_commit = row == end - 1; } else { int64_t diff = (*row)->lsn - vclock_get(base, (*row)->replica_id); @@ -992,6 +1003,14 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclo } } } + + /* + * Fill transaction id for all the local rows preceding + * the first global row. tsn was yet unknown when those + * rows were processed. + */ + for (row = start; row < first_glob_row; row++) + (*row)->tsn = tsn; } static void