From 7212272149f2ff43f6057c43cf70b17b70d60796 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 30 Oct 2014 13:32:14 -0500 Subject: [PATCH] sanlock: fix checksum endian handling checksums must be computed while the data is in the ondisk format (little endian). When writing a structure: - byte swap the structure data from host format to ondisk format - compute the host format checksum value from the ondisk data - byte swap the checksum value from host format to ondisk format - write the ondisk format checksum into the structure When reading a structure: - compute the host format checksum from the ondisk data - byte swap the structure data from ondisk format to host format - verify the computed host format checksum matches the checksum in the host format structure Signed-off-by: David Teigland --- src/delta_lease.c | 76 +++++++++++++++++++++++++----- src/direct.c | 17 +++++++ src/ondisk.c | 28 ++--------- src/ondisk.h | 25 ++++++++++ src/paxos_lease.c | 135 ++++++++++++++++++++++++++++++++++++++---------------- src/paxos_lease.h | 1 + src/resource.c | 5 +- 7 files changed, 210 insertions(+), 77 deletions(-) diff --git a/src/delta_lease.c b/src/delta_lease.c index 5b6dc9f873f0..ef1cb3c8332a 100644 --- a/src/delta_lease.c +++ b/src/delta_lease.c @@ -81,9 +81,9 @@ static int verify_leader(struct sync_disk *disk, char *space_name, uint64_t host_id, struct leader_record *lr, + uint32_t checksum, const char *caller) { - uint32_t sum; int result; if (lr->magic != DELTA_DISK_MAGIC) { @@ -118,12 +118,10 @@ static int verify_leader(struct sync_disk *disk, goto fail; } - sum = leader_checksum(lr); - - if (lr->checksum != sum) { + if (lr->checksum != checksum) { log_error("verify_leader %llu wrong checksum %x %x %s", (unsigned long long)host_id, - lr->checksum, sum, disk->path); + lr->checksum, checksum, disk->path); result = SANLK_LEADER_CHECKSUM; goto fail; } @@ -164,6 +162,7 @@ int delta_read_lockspace(struct task *task, { struct leader_record leader_end; struct leader_record leader; + uint32_t checksum; char *space_name; int rv, error; @@ -176,6 +175,9 @@ int delta_read_lockspace(struct task *task, if (rv < 0) return rv; + /* N.B. compute checksum before byte swapping */ + checksum = leader_checksum(&leader_end); + leader_record_in(&leader_end, &leader); if (!ls->name[0]) @@ -183,7 +185,7 @@ int delta_read_lockspace(struct task *task, else space_name = ls->name; - error = verify_leader(disk, space_name, host_id, &leader, "read_lockspace"); + error = verify_leader(disk, space_name, host_id, &leader, checksum, "read_lockspace"); if (error == SANLK_OK) { memcpy(ls->name, leader.space_name, SANLK_NAME_LEN); @@ -203,6 +205,7 @@ int delta_lease_leader_read(struct task *task, int io_timeout, { struct leader_record leader_end; struct leader_record leader; + uint32_t checksum; int rv, error; /* host_id N is block offset N-1 */ @@ -215,9 +218,12 @@ int delta_lease_leader_read(struct task *task, int io_timeout, if (rv < 0) return rv; + /* N.B. compute checksum before byte swapping */ + checksum = leader_checksum(&leader_end); + leader_record_in(&leader_end, &leader); - error = verify_leader(disk, space_name, host_id, &leader, caller); + error = verify_leader(disk, space_name, host_id, &leader, checksum, caller); memcpy(leader_ret, &leader, sizeof(struct leader_record)); return error; @@ -274,6 +280,7 @@ int delta_lease_acquire(struct task *task, struct leader_record leader1; struct leader_record leader_end; uint64_t new_ts; + uint32_t checksum; int other_io_timeout, other_host_dead_seconds, other_id_renewal_seconds; int i, error, rv, delay, delta_large_delay; @@ -385,7 +392,7 @@ int delta_lease_acquire(struct task *task, leader.owner_id = host_id; leader.owner_generation++; snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name); - leader.checksum = leader_checksum(&leader); + leader.checksum = 0; /* set below */ log_space(sp, "delta_acquire write %llu %llu %llu %.48s", (unsigned long long)leader.owner_id, @@ -395,6 +402,13 @@ int delta_lease_acquire(struct task *task, leader_record_out(&leader, &leader_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&leader_end); + leader.checksum = checksum; + leader_end.checksum = cpu_to_le32(checksum); + rv = write_sector(disk, host_id - 1, (char *)&leader_end, sizeof(struct leader_record), task, sp->io_timeout, "delta_leader"); if (rv < 0) { @@ -458,6 +472,7 @@ int delta_lease_renew(struct task *task, char **p_iobuf; char **p_wbuf; char *wbuf; + uint32_t checksum; uint64_t host_id, id_offset, new_ts; int rv, iobuf_len, sector_size; @@ -568,9 +583,13 @@ int delta_lease_renew(struct task *task, read_done: *read_result = SANLK_OK; memcpy(&leader_end, task->iobuf+id_offset, sizeof(struct leader_record)); + + /* N.B. compute checksum before byte swapping */ + checksum = leader_checksum(&leader_end); + leader_record_in(&leader_end, &leader); - rv = verify_leader(disk, space_name, host_id, &leader, "delta_renew"); + rv = verify_leader(disk, space_name, host_id, &leader, checksum, "delta_renew"); if (rv < 0) { log_erros(sp, "delta_renew verify_leader error %d", rv); return rv; @@ -611,7 +630,7 @@ int delta_lease_renew(struct task *task, } leader.timestamp = new_ts; - leader.checksum = leader_checksum(&leader); + leader.checksum = 0; /* set below */ /* TODO: rename the leader fields */ if (extra) { @@ -630,6 +649,13 @@ int delta_lease_renew(struct task *task, leader_record_out(&leader, &leader_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&leader_end); + leader.checksum = checksum; + leader_end.checksum = cpu_to_le32(checksum); + memcpy(wbuf, &leader_end, sizeof(struct leader_record)); memcpy(wbuf+LEADER_RECORD_MAX, bitmap, HOSTID_BITMAP_SIZE); @@ -666,6 +692,7 @@ int delta_lease_release(struct task *task, struct leader_record leader; struct leader_record leader_end; uint64_t host_id; + uint32_t checksum; int rv; if (!leader_last) @@ -678,10 +705,17 @@ int delta_lease_release(struct task *task, memcpy(&leader, leader_last, sizeof(struct leader_record)); leader.timestamp = LEASE_FREE; - leader.checksum = leader_checksum(&leader); + leader.checksum = 0; /* set below */ leader_record_out(&leader, &leader_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&leader_end); + leader.checksum = checksum; + leader_end.checksum = cpu_to_le32(checksum); + rv = write_sector(disk, host_id - 1, (char *)&leader_end, sizeof(struct leader_record), task, sp->io_timeout, "delta_leader"); if (rv < 0) { @@ -714,6 +748,7 @@ int delta_lease_init(struct task *task, int iobuf_len; int align_size; int i, rv; + uint32_t checksum; if (!max_hosts) max_hosts = DEFAULT_MAX_HOSTS; @@ -752,7 +787,7 @@ int delta_lease_init(struct task *task, leader.timestamp = LEASE_FREE; leader.io_timeout = io_timeout; strncpy(leader.space_name, space_name, NAME_ID_SIZE); - leader.checksum = leader_checksum(&leader); + leader.checksum = 0; /* set below */ /* make the first record invalid so we can do a single atomic write below to commit the whole thing */ @@ -763,6 +798,13 @@ int delta_lease_init(struct task *task, leader_record_out(&leader, &leader_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&leader_end); + leader.checksum = checksum; + leader_end.checksum = cpu_to_le32(checksum); + memcpy(iobuf + (i * disk->sector_size), &leader_end, sizeof(struct leader_record)); } @@ -773,7 +815,17 @@ int delta_lease_init(struct task *task, /* commit the whole lockspace by making the first record valid */ leader_first.magic = DELTA_DISK_MAGIC; + leader_first.checksum = 0; /* set below */ + leader_record_out(&leader_first, &leader_end); + + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&leader_end); + leader_first.checksum = checksum; + leader_end.checksum = cpu_to_le32(checksum); + memcpy(iobuf, &leader_end, sizeof(struct leader_record)); rv = write_iobuf(disk->fd, disk->offset, iobuf, disk->sector_size, task, io_timeout); diff --git a/src/direct.c b/src/direct.c index 8813b2f14716..2dbb098d064b 100644 --- a/src/direct.c +++ b/src/direct.c @@ -400,6 +400,7 @@ int direct_dump(struct task *task, char *dump_path, int force_mode) struct request_record rr; struct mode_block mb; struct sync_disk sd; + struct paxos_dblock dblock; char sname[NAME_ID_SIZE+1]; char rname[NAME_ID_SIZE+1]; uint64_t sector_nr; @@ -522,6 +523,22 @@ int direct_dump(struct task *task, char *dump_path, int force_mode) char *pd_end = data + ((2 + i) * sd.sector_size); struct mode_block *mb_end = (struct mode_block *)(pd_end + MBLOCK_OFFSET); + if (force_mode > 1) { + paxos_dblock_in((struct paxos_dblock *)pd_end, &dblock); + + if (dblock.mbal || dblock.inp || dblock.lver) { + printf("dblock[%04d] mbal %llu bal %llu inp %llu inp2 %llu inp3 %llu lver %llu sum %x\n", + i, + (unsigned long long)dblock.mbal, + (unsigned long long)dblock.bal, + (unsigned long long)dblock.inp, + (unsigned long long)dblock.inp2, + (unsigned long long)dblock.inp3, + (unsigned long long)dblock.lver, + dblock.checksum); + } + } + mode_block_in(mb_end, &mb); if (!(mb.flags & MBLOCK_SHARED)) diff --git a/src/ondisk.c b/src/ondisk.c index 6af8effbcf67..c161f8b23478 100644 --- a/src/ondisk.c +++ b/src/ondisk.c @@ -16,28 +16,6 @@ #include "ondisk.h" /* - * sanlock ondisk format is little endian. - */ - -#if __BYTE_ORDER == __BIG_ENDIAN -#define le16_to_cpu(x) (bswap_16((x))) -#define le32_to_cpu(x) (bswap_32((x))) -#define le64_to_cpu(x) (bswap_64((x))) -#define cpu_to_le16(x) (bswap_16((x))) -#define cpu_to_le32(x) (bswap_32((x))) -#define cpu_to_le64(x) (bswap_64((x))) -#endif - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define le16_to_cpu(x) (x) -#define le32_to_cpu(x) (x) -#define le64_to_cpu(x) (x) -#define cpu_to_le16(x) (x) -#define cpu_to_le32(x) (x) -#define cpu_to_le64(x) (x) -#endif - -/* * "end" variables point to ondisk format (endian converted) structures. */ @@ -79,7 +57,8 @@ void leader_record_out(struct leader_record *lr, struct leader_record *end) memcpy(end->resource_name, lr->resource_name, NAME_ID_SIZE); end->timestamp = cpu_to_le64(lr->timestamp); end->unused1 = cpu_to_le64(lr->unused1); - end->checksum = cpu_to_le32(lr->checksum); + /* N.B. the checksum must be computed after the byte swapping */ + /* leader_record_out(lr, end); checksum = compute(end); end->checksum = cpu_to_le32(checksum); */ end->unused2 = cpu_to_le16(lr->unused2); end->io_timeout = cpu_to_le16(lr->io_timeout); end->write_id = cpu_to_le64(lr->write_id); @@ -122,7 +101,8 @@ void paxos_dblock_out(struct paxos_dblock *pd, struct paxos_dblock *end) end->inp2 = cpu_to_le64(pd->inp2); end->inp3 = cpu_to_le64(pd->inp3); end->lver = cpu_to_le64(pd->lver); - end->checksum = cpu_to_le32(pd->checksum); + /* N.B. the checksum must be computed after the byte swapping */ + /* paxos_dblock_out(pd, end); checksum = compute(end), end->checksum = cpu_to_le32(checksum); */ } void mode_block_in(struct mode_block *end, struct mode_block *mb) diff --git a/src/ondisk.h b/src/ondisk.h index 2c62c8836f98..3ae48330d529 100644 --- a/src/ondisk.h +++ b/src/ondisk.h @@ -10,6 +10,31 @@ #ifndef __ONDISK_H__ #define __ONDISK_H__ +#include +#include + +/* + * sanlock ondisk format is little endian. + */ + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu(x) (bswap_16((x))) +#define le32_to_cpu(x) (bswap_32((x))) +#define le64_to_cpu(x) (bswap_64((x))) +#define cpu_to_le16(x) (bswap_16((x))) +#define cpu_to_le32(x) (bswap_32((x))) +#define cpu_to_le64(x) (bswap_64((x))) +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define le16_to_cpu(x) (x) +#define le32_to_cpu(x) (x) +#define le64_to_cpu(x) (x) +#define cpu_to_le16(x) (x) +#define cpu_to_le32(x) (x) +#define cpu_to_le64(x) (x) +#endif + void leader_record_in(struct leader_record *end, struct leader_record *lr); void leader_record_out(struct leader_record *lr, struct leader_record *end); void request_record_in(struct request_record *end, struct request_record *rr); diff --git a/src/paxos_lease.c b/src/paxos_lease.c index 7b8e95a5c0b8..3e9c306ac0d4 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -47,6 +47,16 @@ static uint32_t roundup_power_of_two(uint32_t val) return val; } +uint32_t leader_checksum(struct leader_record *lr) +{ + return crc32c((uint32_t)~1, (uint8_t *)lr, LEADER_CHECKSUM_LEN); +} + +static uint32_t dblock_checksum(struct paxos_dblock *pd) +{ + return crc32c((uint32_t)~1, (uint8_t *)pd, DBLOCK_CHECKSUM_LEN); +} + int paxos_lease_request_read(struct task *task, struct token *token, struct request_record *rr) { @@ -138,6 +148,7 @@ static int write_dblock_mblock_sh(struct task *task, struct mode_block mb_end; char *iobuf, **p_iobuf; uint64_t offset; + uint32_t checksum; int iobuf_len, rv; memset(&mb, 0, sizeof(mb)); @@ -157,6 +168,14 @@ static int write_dblock_mblock_sh(struct task *task, offset = disk->offset + ((2 + host_id - 1) * disk->sector_size); paxos_dblock_out(pd, &pd_end); + + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = dblock_checksum(&pd_end); + pd->checksum = checksum; + pd_end.checksum = cpu_to_le32(checksum); + mode_block_out(&mb, &mb_end); memcpy(iobuf, (char *)&pd_end, sizeof(struct paxos_dblock)); @@ -183,6 +202,7 @@ static int write_dblock(struct task *task, struct paxos_dblock *pd) { struct paxos_dblock pd_end; + uint32_t checksum; int rv; if (token->flags & T_WRITE_DBLOCK_MBLOCK_SH) { @@ -195,6 +215,13 @@ static int write_dblock(struct task *task, paxos_dblock_out(pd, &pd_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = dblock_checksum(&pd_end); + pd->checksum = checksum; + pd_end.checksum = cpu_to_le32(checksum); + rv = write_sector(disk, 2 + host_id - 1, (char *)&pd_end, sizeof(struct paxos_dblock), task, token->io_timeout, "dblock"); return rv; @@ -206,10 +233,18 @@ static int write_leader(struct task *task, struct leader_record *lr) { struct leader_record lr_end; + uint32_t checksum; int rv; leader_record_out(lr, &lr_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&lr_end); + lr->checksum = checksum; + lr_end.checksum = cpu_to_le32(checksum); + rv = write_sector(disk, 0, (char *)&lr_end, sizeof(struct leader_record), task, token->io_timeout, "leader"); return rv; @@ -227,10 +262,18 @@ int paxos_lease_leader_clobber(struct task *task, const char *caller) { struct leader_record lr_end; + uint32_t checksum; int rv; leader_record_out(leader, &lr_end); + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&lr_end); + leader->checksum = checksum; + lr_end.checksum = cpu_to_le32(checksum); + rv = write_sector(&token->disks[0], 0, (char *)&lr_end, sizeof(struct leader_record), task, token->io_timeout, caller); return rv; @@ -303,7 +346,8 @@ static int read_dblocks(struct task *task, static int read_leader(struct task *task, struct token *token, struct sync_disk *disk, - struct leader_record *lr) + struct leader_record *lr, + uint32_t *checksum) { struct leader_record lr_end; int rv; @@ -313,28 +357,22 @@ static int read_leader(struct task *task, rv = read_sectors(disk, 0, 1, (char *)&lr_end, sizeof(struct leader_record), task, token->io_timeout, "leader"); + /* N.B. checksum is computed while the data is in ondisk format. */ + *checksum = leader_checksum(&lr_end); + leader_record_in(&lr_end, lr); return rv; } -static uint32_t dblock_checksum(struct paxos_dblock *pd) -{ - return crc32c((uint32_t)~1, (uint8_t *)pd, DBLOCK_CHECKSUM_LEN); -} - -static int verify_dblock(struct token *token, struct paxos_dblock *pd) +static int verify_dblock(struct token *token, struct paxos_dblock *pd, uint32_t checksum) { - uint32_t sum; - if (!pd->checksum && !pd->mbal && !pd->bal && !pd->inp && !pd->lver) return SANLK_OK; - sum = dblock_checksum(pd); - - if (pd->checksum != sum) { + if (pd->checksum != checksum) { log_errot(token, "verify_dblock wrong checksum %x %x", - pd->checksum, sum); + pd->checksum, checksum); return SANLK_DBLOCK_CHECKSUM; } @@ -400,6 +438,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts, struct sync_disk *disk; char *iobuf[SANLK_MAX_DISKS]; char **p_iobuf[SANLK_MAX_DISKS]; + uint32_t checksum; int num_disks = token->r.num_disks; int num_writes, num_reads; int sector_size = token->disks[0].sector_size; @@ -444,7 +483,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts, memset(&dblock, 0, sizeof(struct paxos_dblock)); dblock.mbal = our_mbal; dblock.lver = next_lver; - dblock.checksum = dblock_checksum(&dblock); + dblock.checksum = 0; /* set after paxos_dblock_out */ memset(&bk_max, 0, sizeof(struct paxos_dblock)); @@ -483,10 +522,12 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts, for (q = 0; q < num_hosts; q++) { bk_end = (struct paxos_dblock *)(iobuf[d] + ((2 + q)*sector_size)); + checksum = dblock_checksum(bk_end); + paxos_dblock_in(bk_end, &bk_in); bk = &bk_in; - rv = verify_dblock(token, bk); + rv = verify_dblock(token, bk, checksum); if (rv < 0) continue; @@ -569,7 +610,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts, dblock.inp3 = monotime(); } dblock.bal = dblock.mbal; - dblock.checksum = dblock_checksum(&dblock); + dblock.checksum = 0; /* set after paxos_dblock_out */ if (bk_max.inp) { /* not a problem, but interesting to see, so use log_error */ @@ -635,10 +676,12 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts, for (q = 0; q < num_hosts; q++) { bk_end = (struct paxos_dblock *)(iobuf[d] + ((2 + q)*sector_size)); + checksum = dblock_checksum(bk_end); + paxos_dblock_in(bk_end, &bk_in); bk = &bk_in; - rv = verify_dblock(token, bk); + rv = verify_dblock(token, bk, checksum); if (rv < 0) continue; @@ -724,11 +767,6 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts, return error; } -uint32_t leader_checksum(struct leader_record *lr) -{ - return crc32c((uint32_t)~1, (uint8_t *)lr, LEADER_CHECKSUM_LEN); -} - static void log_leader_error(int result, struct token *token, struct sync_disk *disk, @@ -771,11 +809,11 @@ static void log_leader_error(int result, static int verify_leader(struct token *token, struct sync_disk *disk, struct leader_record *lr, + uint32_t checksum, const char *caller) { struct leader_record leader_end; struct leader_record leader_rr; - uint32_t sum; int result, rv; if (lr->magic != PAXOS_DISK_MAGIC) { @@ -821,11 +859,9 @@ static int verify_leader(struct token *token, goto fail; } - sum = leader_checksum(lr); - - if (lr->checksum != sum) { + if (lr->checksum != checksum) { log_errot(token, "verify_leader wrong checksum %x %x %s", - lr->checksum, sum, disk->path); + lr->checksum, checksum, disk->path); result = SANLK_LEADER_CHECKSUM; goto fail; } @@ -851,9 +887,10 @@ static int verify_leader(struct token *token, int paxos_verify_leader(struct token *token, struct sync_disk *disk, struct leader_record *lr, + uint32_t checksum, const char *caller) { - return verify_leader(token, disk, lr, caller); + return verify_leader(token, disk, lr, checksum, caller); } static int leaders_match(struct leader_record *a, struct leader_record *b) @@ -870,11 +907,12 @@ int paxos_read_resource(struct task *task, struct sanlk_resource *res) { struct leader_record leader; + uint32_t checksum; int rv; memset(&leader, 0, sizeof(struct leader_record)); - rv = read_leader(task, token, &token->disks[0], &leader); + rv = read_leader(task, token, &token->disks[0], &leader, &checksum); if (rv < 0) return rv; @@ -884,7 +922,7 @@ int paxos_read_resource(struct task *task, if (!res->name[0]) memcpy(token->r.name, leader.resource_name, NAME_ID_SIZE); - rv = verify_leader(token, &token->disks[0], &leader, "read_resource"); + rv = verify_leader(token, &token->disks[0], &leader, checksum, "read_resource"); if (rv == SANLK_OK) { memcpy(res->lockspace_name, leader.space_name, NAME_ID_SIZE); @@ -928,15 +966,16 @@ static int _leader_read_one(struct task *task, const char *caller) { struct leader_record leader; + uint32_t checksum; int rv; memset(&leader, 0, sizeof(struct leader_record)); - rv = read_leader(task, token, &token->disks[0], &leader); + rv = read_leader(task, token, &token->disks[0], &leader, &checksum); if (rv < 0) return rv; - rv = verify_leader(token, &token->disks[0], &leader, caller); + rv = verify_leader(token, &token->disks[0], &leader, checksum, caller); /* copy what we read even if verify finds a problem */ @@ -953,6 +992,7 @@ static int _leader_read_num(struct task *task, { struct leader_record leader; struct leader_record *leaders; + uint32_t checksum; int *leader_reps; int leaders_len, leader_reps_len; int num_reads; @@ -985,11 +1025,11 @@ static int _leader_read_num(struct task *task, num_reads = 0; for (d = 0; d < num_disks; d++) { - rv = read_leader(task, token, &token->disks[d], &leaders[d]); + rv = read_leader(task, token, &token->disks[d], &leaders[d], &checksum); if (rv < 0) continue; - rv = verify_leader(token, &token->disks[d], &leaders[d], caller); + rv = verify_leader(token, &token->disks[d], &leaders[d], checksum, caller); if (rv < 0) continue; @@ -1083,6 +1123,7 @@ static int _lease_read_one(struct task *task, char *iobuf, **p_iobuf; uint32_t host_id = token->host_id; uint32_t sector_size = disk->sector_size; + uint32_t checksum; struct paxos_dblock *bk_end; uint64_t tmp_mbal = 0; int q, tmp_q = -1, rv, iobuf_len; @@ -1104,21 +1145,26 @@ static int _lease_read_one(struct task *task, goto out; memcpy(&leader_end, iobuf, sizeof(struct leader_record)); + + checksum = leader_checksum(&leader_end); + leader_record_in(&leader_end, leader_ret); memcpy(&our_dblock_end, iobuf + ((host_id + 1) * sector_size), sizeof(struct paxos_dblock)); paxos_dblock_in(&our_dblock_end, our_dblock); - rv = verify_leader(token, disk, leader_ret, caller); + rv = verify_leader(token, disk, leader_ret, checksum, caller); if (rv < 0) goto out; for (q = 0; q < leader_ret->num_hosts; q++) { bk_end = (struct paxos_dblock *)(iobuf + ((2 + q) * sector_size)); + checksum = dblock_checksum(bk_end); + paxos_dblock_in(bk_end, &bk); - rv = verify_dblock(token, &bk); + rv = verify_dblock(token, &bk, checksum); if (rv < 0) goto out; @@ -1773,7 +1819,7 @@ int paxos_lease_acquire(struct task *task, new_leader.flags &= ~LFL_SHORT_HOLD; } - new_leader.checksum = leader_checksum(&new_leader); + new_leader.checksum = 0; /* set after leader_record_out */ error = write_new_leader(task, token, &new_leader, "paxos_acquire"); if (error < 0) { @@ -1843,7 +1889,7 @@ int paxos_lease_renew(struct task *task, } new_leader.timestamp = monotime(); - new_leader.checksum = leader_checksum(&new_leader); + new_leader.checksum = 0; /* set after leader_record_out */ error = write_new_leader(task, token, &new_leader); if (error < 0) @@ -1941,7 +1987,7 @@ int paxos_lease_release(struct task *task, leader.write_generation = token->host_generation; leader.write_timestamp = monotime(); leader.flags &= ~LFL_SHORT_HOLD; - leader.checksum = leader_checksum(&leader); + leader.checksum = 0; /* set after leader_record_out */ error = write_new_leader(task, token, &leader, "paxos_release"); if (error < 0) @@ -1961,6 +2007,7 @@ int paxos_lease_init(struct task *task, struct leader_record leader_end; struct request_record rr; struct request_record rr_end; + uint32_t checksum; int iobuf_len; int sector_size; int align_size; @@ -2009,13 +2056,21 @@ int paxos_lease_init(struct task *task, leader.timestamp = LEASE_FREE; strncpy(leader.space_name, token->r.lockspace_name, NAME_ID_SIZE); strncpy(leader.resource_name, token->r.name, NAME_ID_SIZE); - leader.checksum = leader_checksum(&leader); + leader.checksum = 0; /* set after leader_record_out */ memset(&rr, 0, sizeof(rr)); rr.magic = REQ_DISK_MAGIC; rr.version = REQ_DISK_VERSION_MAJOR | REQ_DISK_VERSION_MINOR; leader_record_out(&leader, &leader_end); + + /* + * N.B. must compute checksum after the data has been byte swapped. + */ + checksum = leader_checksum(&leader_end); + leader.checksum = checksum; + leader_end.checksum = cpu_to_le32(checksum); + request_record_out(&rr, &rr_end); memcpy(iobuf, &leader_end, sizeof(struct leader_record)); diff --git a/src/paxos_lease.h b/src/paxos_lease.h index 84082f49f4a8..de5e095ad358 100644 --- a/src/paxos_lease.h +++ b/src/paxos_lease.h @@ -54,6 +54,7 @@ int paxos_read_buf(struct task *task, int paxos_verify_leader(struct token *token, struct sync_disk *disk, struct leader_record *lr, + uint32_t checksum, const char *caller); int paxos_erase_dblock(struct task *task, diff --git a/src/resource.c b/src/resource.c index 9834a8448b1e..8e94a6388187 100644 --- a/src/resource.c +++ b/src/resource.c @@ -99,6 +99,7 @@ int read_resource_owners(struct task *task, struct token *token, struct sanlk_host *host; struct mode_block *mb_end; uint64_t host_id; + uint32_t checksum; char *lease_buf_dblock; char *lease_buf = NULL; char *hosts_buf = NULL; @@ -120,9 +121,11 @@ int read_resource_owners(struct task *task, struct token *token, memcpy(&leader_end, lease_buf, sizeof(struct leader_record)); + checksum = leader_checksum(&leader_end); + leader_record_in(&leader_end, &leader); - rv = paxos_verify_leader(token, disk, &leader, "read_resource_owners"); + rv = paxos_verify_leader(token, disk, &leader, checksum, "read_resource_owners"); if (rv < 0) goto out; -- 1.8.3.1