Blob Blame History Raw
From 7212272149f2ff43f6057c43cf70b17b70d60796 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 30 Oct 2014 13:32:14 -0500
Subject: [PATCH] sanlock: fix checksum endian handling

checksums must be computed while the data is in the
ondisk format (little endian).

When writing a structure:
- byte swap the structure data from host format to ondisk format
- compute the host format checksum value from the ondisk data
- byte swap the checksum value from host format to ondisk format
- write the ondisk format checksum into the structure

When reading a structure:
- compute the host format checksum from the ondisk data
- byte swap the structure data from ondisk format to host format
- verify the computed host format checksum matches the
  checksum in the host format structure

Signed-off-by: David Teigland <teigland@redhat.com>
---
 src/delta_lease.c |  76 +++++++++++++++++++++++++-----
 src/direct.c      |  17 +++++++
 src/ondisk.c      |  28 ++---------
 src/ondisk.h      |  25 ++++++++++
 src/paxos_lease.c | 135 ++++++++++++++++++++++++++++++++++++++----------------
 src/paxos_lease.h |   1 +
 src/resource.c    |   5 +-
 7 files changed, 210 insertions(+), 77 deletions(-)

diff --git a/src/delta_lease.c b/src/delta_lease.c
index 5b6dc9f873f0..ef1cb3c8332a 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -81,9 +81,9 @@ static int verify_leader(struct sync_disk *disk,
 			 char *space_name,
 			 uint64_t host_id,
 			 struct leader_record *lr,
+			 uint32_t checksum,
 			 const char *caller)
 {
-	uint32_t sum;
 	int result;
 
 	if (lr->magic != DELTA_DISK_MAGIC) {
@@ -118,12 +118,10 @@ static int verify_leader(struct sync_disk *disk,
 		goto fail;
 	}
 
-	sum = leader_checksum(lr);
-
-	if (lr->checksum != sum) {
+	if (lr->checksum != checksum) {
 		log_error("verify_leader %llu wrong checksum %x %x %s",
 			  (unsigned long long)host_id,
-			  lr->checksum, sum, disk->path);
+			  lr->checksum, checksum, disk->path);
 		result = SANLK_LEADER_CHECKSUM;
 		goto fail;
 	}
@@ -164,6 +162,7 @@ int delta_read_lockspace(struct task *task,
 {
 	struct leader_record leader_end;
 	struct leader_record leader;
+	uint32_t checksum;
 	char *space_name;
 	int rv, error;
 
@@ -176,6 +175,9 @@ int delta_read_lockspace(struct task *task,
 	if (rv < 0)
 		return rv;
 
+	/* N.B. compute checksum before byte swapping */
+	checksum = leader_checksum(&leader_end);
+
 	leader_record_in(&leader_end, &leader);
 
 	if (!ls->name[0])
@@ -183,7 +185,7 @@ int delta_read_lockspace(struct task *task,
 	else
 		space_name = ls->name;
 
-	error = verify_leader(disk, space_name, host_id, &leader, "read_lockspace");
+	error = verify_leader(disk, space_name, host_id, &leader, checksum, "read_lockspace");
 
 	if (error == SANLK_OK) {
 		memcpy(ls->name, leader.space_name, SANLK_NAME_LEN);
@@ -203,6 +205,7 @@ int delta_lease_leader_read(struct task *task, int io_timeout,
 {
 	struct leader_record leader_end;
 	struct leader_record leader;
+	uint32_t checksum;
 	int rv, error;
 
 	/* host_id N is block offset N-1 */
@@ -215,9 +218,12 @@ int delta_lease_leader_read(struct task *task, int io_timeout,
 	if (rv < 0)
 		return rv;
 
+	/* N.B. compute checksum before byte swapping */
+	checksum = leader_checksum(&leader_end);
+
 	leader_record_in(&leader_end, &leader);
 
-	error = verify_leader(disk, space_name, host_id, &leader, caller);
+	error = verify_leader(disk, space_name, host_id, &leader, checksum, caller);
 
 	memcpy(leader_ret, &leader, sizeof(struct leader_record));
 	return error;
@@ -274,6 +280,7 @@ int delta_lease_acquire(struct task *task,
 	struct leader_record leader1;
 	struct leader_record leader_end;
 	uint64_t new_ts;
+	uint32_t checksum;
 	int other_io_timeout, other_host_dead_seconds, other_id_renewal_seconds;
 	int i, error, rv, delay, delta_large_delay;
 
@@ -385,7 +392,7 @@ int delta_lease_acquire(struct task *task,
 	leader.owner_id = host_id;
 	leader.owner_generation++;
 	snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name);
-	leader.checksum = leader_checksum(&leader);
+	leader.checksum = 0; /* set below */
 
 	log_space(sp, "delta_acquire write %llu %llu %llu %.48s",
 		  (unsigned long long)leader.owner_id,
@@ -395,6 +402,13 @@ int delta_lease_acquire(struct task *task,
 
 	leader_record_out(&leader, &leader_end);
 
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&leader_end);
+	leader.checksum = checksum;
+	leader_end.checksum = cpu_to_le32(checksum);
+
 	rv = write_sector(disk, host_id - 1, (char *)&leader_end, sizeof(struct leader_record),
 			  task, sp->io_timeout, "delta_leader");
 	if (rv < 0) {
@@ -458,6 +472,7 @@ int delta_lease_renew(struct task *task,
 	char **p_iobuf;
 	char **p_wbuf;
 	char *wbuf;
+	uint32_t checksum;
 	uint64_t host_id, id_offset, new_ts;
 	int rv, iobuf_len, sector_size;
 
@@ -568,9 +583,13 @@ int delta_lease_renew(struct task *task,
  read_done:
 	*read_result = SANLK_OK;
 	memcpy(&leader_end, task->iobuf+id_offset, sizeof(struct leader_record));
+
+	/* N.B. compute checksum before byte swapping */
+	checksum = leader_checksum(&leader_end);
+
 	leader_record_in(&leader_end, &leader);
 
-	rv = verify_leader(disk, space_name, host_id, &leader, "delta_renew");
+	rv = verify_leader(disk, space_name, host_id, &leader, checksum, "delta_renew");
 	if (rv < 0) {
 		log_erros(sp, "delta_renew verify_leader error %d", rv);
 		return rv;
@@ -611,7 +630,7 @@ int delta_lease_renew(struct task *task,
 	}
 
 	leader.timestamp = new_ts;
-	leader.checksum = leader_checksum(&leader);
+	leader.checksum = 0; /* set below */
 
 	/* TODO: rename the leader fields */
 	if (extra) {
@@ -630,6 +649,13 @@ int delta_lease_renew(struct task *task,
 
 	leader_record_out(&leader, &leader_end);
 
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&leader_end);
+	leader.checksum = checksum;
+	leader_end.checksum = cpu_to_le32(checksum);
+
 	memcpy(wbuf, &leader_end, sizeof(struct leader_record));
 	memcpy(wbuf+LEADER_RECORD_MAX, bitmap, HOSTID_BITMAP_SIZE);
 
@@ -666,6 +692,7 @@ int delta_lease_release(struct task *task,
 	struct leader_record leader;
 	struct leader_record leader_end;
 	uint64_t host_id;
+	uint32_t checksum;
 	int rv;
 
 	if (!leader_last)
@@ -678,10 +705,17 @@ int delta_lease_release(struct task *task,
 
 	memcpy(&leader, leader_last, sizeof(struct leader_record));
 	leader.timestamp = LEASE_FREE;
-	leader.checksum = leader_checksum(&leader);
+	leader.checksum = 0; /* set below */
 
 	leader_record_out(&leader, &leader_end);
 
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&leader_end);
+	leader.checksum = checksum;
+	leader_end.checksum = cpu_to_le32(checksum);
+
 	rv = write_sector(disk, host_id - 1, (char *)&leader_end, sizeof(struct leader_record),
 			  task, sp->io_timeout, "delta_leader");
 	if (rv < 0) {
@@ -714,6 +748,7 @@ int delta_lease_init(struct task *task,
 	int iobuf_len;
 	int align_size;
 	int i, rv;
+	uint32_t checksum;
 
 	if (!max_hosts)
 		max_hosts = DEFAULT_MAX_HOSTS;
@@ -752,7 +787,7 @@ int delta_lease_init(struct task *task,
 		leader.timestamp = LEASE_FREE;
 		leader.io_timeout = io_timeout;
 		strncpy(leader.space_name, space_name, NAME_ID_SIZE);
-		leader.checksum = leader_checksum(&leader);
+		leader.checksum = 0; /* set below */
 
 		/* make the first record invalid so we can do a single atomic
 		   write below to commit the whole thing */
@@ -763,6 +798,13 @@ int delta_lease_init(struct task *task,
 
 		leader_record_out(&leader, &leader_end);
 
+		/*
+		 * N.B. must compute checksum after the data has been byte swapped.
+		 */
+		checksum = leader_checksum(&leader_end);
+		leader.checksum = checksum;
+		leader_end.checksum = cpu_to_le32(checksum);
+
 		memcpy(iobuf + (i * disk->sector_size), &leader_end, sizeof(struct leader_record));
 	}
 
@@ -773,7 +815,17 @@ int delta_lease_init(struct task *task,
 	/* commit the whole lockspace by making the first record valid */
 
 	leader_first.magic = DELTA_DISK_MAGIC;
+	leader_first.checksum = 0; /* set below */
+
 	leader_record_out(&leader_first, &leader_end);
+
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&leader_end);
+	leader_first.checksum = checksum;
+	leader_end.checksum = cpu_to_le32(checksum);
+
 	memcpy(iobuf, &leader_end, sizeof(struct leader_record));
 
 	rv = write_iobuf(disk->fd, disk->offset, iobuf, disk->sector_size, task, io_timeout);
diff --git a/src/direct.c b/src/direct.c
index 8813b2f14716..2dbb098d064b 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -400,6 +400,7 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
 	struct request_record rr;
 	struct mode_block mb;
 	struct sync_disk sd;
+	struct paxos_dblock dblock;
 	char sname[NAME_ID_SIZE+1];
 	char rname[NAME_ID_SIZE+1];
 	uint64_t sector_nr;
@@ -522,6 +523,22 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
 				char *pd_end = data + ((2 + i) * sd.sector_size);
 				struct mode_block *mb_end = (struct mode_block *)(pd_end + MBLOCK_OFFSET);
 
+				if (force_mode > 1) {
+					paxos_dblock_in((struct paxos_dblock *)pd_end, &dblock);
+
+					if (dblock.mbal || dblock.inp || dblock.lver) {
+						printf("dblock[%04d] mbal %llu bal %llu inp %llu inp2 %llu inp3 %llu lver %llu sum %x\n",
+						       i,
+						       (unsigned long long)dblock.mbal,
+					               (unsigned long long)dblock.bal,
+					               (unsigned long long)dblock.inp,
+					               (unsigned long long)dblock.inp2,
+					               (unsigned long long)dblock.inp3,
+					               (unsigned long long)dblock.lver,
+					               dblock.checksum);
+					}
+				}
+
 				mode_block_in(mb_end, &mb);
 
 				if (!(mb.flags & MBLOCK_SHARED))
diff --git a/src/ondisk.c b/src/ondisk.c
index 6af8effbcf67..c161f8b23478 100644
--- a/src/ondisk.c
+++ b/src/ondisk.c
@@ -16,28 +16,6 @@
 #include "ondisk.h"
 
 /*
- * sanlock ondisk format is little endian.
- */
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-#define le16_to_cpu(x) (bswap_16((x)))
-#define le32_to_cpu(x) (bswap_32((x)))
-#define le64_to_cpu(x) (bswap_64((x)))
-#define cpu_to_le16(x) (bswap_16((x)))
-#define cpu_to_le32(x) (bswap_32((x)))
-#define cpu_to_le64(x) (bswap_64((x)))
-#endif
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define le16_to_cpu(x) (x)
-#define le32_to_cpu(x) (x)
-#define le64_to_cpu(x) (x)
-#define cpu_to_le16(x) (x)
-#define cpu_to_le32(x) (x)
-#define cpu_to_le64(x) (x)
-#endif
-
-/*
  * "end" variables point to ondisk format (endian converted) structures.
  */
 
@@ -79,7 +57,8 @@ void leader_record_out(struct leader_record *lr, struct leader_record *end)
 	memcpy(end->resource_name, lr->resource_name, NAME_ID_SIZE);
 	end->timestamp        = cpu_to_le64(lr->timestamp);
 	end->unused1          = cpu_to_le64(lr->unused1);
-	end->checksum         = cpu_to_le32(lr->checksum);
+	/* N.B. the checksum must be computed after the byte swapping */
+	/* leader_record_out(lr, end); checksum = compute(end); end->checksum = cpu_to_le32(checksum); */
 	end->unused2          = cpu_to_le16(lr->unused2);
 	end->io_timeout       = cpu_to_le16(lr->io_timeout);
 	end->write_id         = cpu_to_le64(lr->write_id);
@@ -122,7 +101,8 @@ void paxos_dblock_out(struct paxos_dblock *pd, struct paxos_dblock *end)
 	end->inp2     = cpu_to_le64(pd->inp2);
 	end->inp3     = cpu_to_le64(pd->inp3);
 	end->lver     = cpu_to_le64(pd->lver);
-	end->checksum = cpu_to_le32(pd->checksum);
+	/* N.B. the checksum must be computed after the byte swapping */
+	/* paxos_dblock_out(pd, end); checksum = compute(end), end->checksum = cpu_to_le32(checksum); */
 }
 
 void mode_block_in(struct mode_block *end, struct mode_block *mb)
diff --git a/src/ondisk.h b/src/ondisk.h
index 2c62c8836f98..3ae48330d529 100644
--- a/src/ondisk.h
+++ b/src/ondisk.h
@@ -10,6 +10,31 @@
 #ifndef __ONDISK_H__
 #define __ONDISK_H__
 
+#include <endian.h>
+#include <byteswap.h>
+
+/*
+ * sanlock ondisk format is little endian.
+ */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu(x) (bswap_16((x)))
+#define le32_to_cpu(x) (bswap_32((x)))
+#define le64_to_cpu(x) (bswap_64((x)))
+#define cpu_to_le16(x) (bswap_16((x)))
+#define cpu_to_le32(x) (bswap_32((x)))
+#define cpu_to_le64(x) (bswap_64((x)))
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define le16_to_cpu(x) (x)
+#define le32_to_cpu(x) (x)
+#define le64_to_cpu(x) (x)
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le64(x) (x)
+#endif
+
 void leader_record_in(struct leader_record *end, struct leader_record *lr);
 void leader_record_out(struct leader_record *lr, struct leader_record *end);
 void request_record_in(struct request_record *end, struct request_record *rr);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 7b8e95a5c0b8..3e9c306ac0d4 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -47,6 +47,16 @@ static uint32_t roundup_power_of_two(uint32_t val)
 	return val;
 }
 
+uint32_t leader_checksum(struct leader_record *lr)
+{
+	return crc32c((uint32_t)~1, (uint8_t *)lr, LEADER_CHECKSUM_LEN);
+}
+
+static uint32_t dblock_checksum(struct paxos_dblock *pd)
+{
+	return crc32c((uint32_t)~1, (uint8_t *)pd, DBLOCK_CHECKSUM_LEN);
+}
+
 int paxos_lease_request_read(struct task *task, struct token *token,
 			     struct request_record *rr)
 {
@@ -138,6 +148,7 @@ static int write_dblock_mblock_sh(struct task *task,
 	struct mode_block mb_end;
 	char *iobuf, **p_iobuf;
 	uint64_t offset;
+	uint32_t checksum;
 	int iobuf_len, rv;
 
 	memset(&mb, 0, sizeof(mb));
@@ -157,6 +168,14 @@ static int write_dblock_mblock_sh(struct task *task,
 	offset = disk->offset + ((2 + host_id - 1) * disk->sector_size);
 
 	paxos_dblock_out(pd, &pd_end);
+
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = dblock_checksum(&pd_end);
+	pd->checksum = checksum;
+	pd_end.checksum = cpu_to_le32(checksum);
+
 	mode_block_out(&mb, &mb_end);
 
 	memcpy(iobuf, (char *)&pd_end, sizeof(struct paxos_dblock));
@@ -183,6 +202,7 @@ static int write_dblock(struct task *task,
 			struct paxos_dblock *pd)
 {
 	struct paxos_dblock pd_end;
+	uint32_t checksum;
 	int rv;
 
 	if (token->flags & T_WRITE_DBLOCK_MBLOCK_SH) {
@@ -195,6 +215,13 @@ static int write_dblock(struct task *task,
 
 	paxos_dblock_out(pd, &pd_end);
 
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = dblock_checksum(&pd_end);
+	pd->checksum = checksum;
+	pd_end.checksum = cpu_to_le32(checksum);
+
 	rv = write_sector(disk, 2 + host_id - 1, (char *)&pd_end, sizeof(struct paxos_dblock),
 			  task, token->io_timeout, "dblock");
 	return rv;
@@ -206,10 +233,18 @@ static int write_leader(struct task *task,
 			struct leader_record *lr)
 {
 	struct leader_record lr_end;
+	uint32_t checksum;
 	int rv;
 
 	leader_record_out(lr, &lr_end);
 
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&lr_end);
+	lr->checksum = checksum;
+	lr_end.checksum = cpu_to_le32(checksum);
+
 	rv = write_sector(disk, 0, (char *)&lr_end, sizeof(struct leader_record),
 			  task, token->io_timeout, "leader");
 	return rv;
@@ -227,10 +262,18 @@ int paxos_lease_leader_clobber(struct task *task,
 			       const char *caller)
 {
 	struct leader_record lr_end;
+	uint32_t checksum;
 	int rv;
 
 	leader_record_out(leader, &lr_end);
 
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&lr_end);
+	leader->checksum = checksum;
+	lr_end.checksum = cpu_to_le32(checksum);
+
 	rv = write_sector(&token->disks[0], 0, (char *)&lr_end, sizeof(struct leader_record),
 			  task, token->io_timeout, caller);
 	return rv;
@@ -303,7 +346,8 @@ static int read_dblocks(struct task *task,
 static int read_leader(struct task *task,
 		       struct token *token,
 		       struct sync_disk *disk,
-		       struct leader_record *lr)
+		       struct leader_record *lr,
+		       uint32_t *checksum)
 {
 	struct leader_record lr_end;
 	int rv;
@@ -313,28 +357,22 @@ static int read_leader(struct task *task,
 	rv = read_sectors(disk, 0, 1, (char *)&lr_end, sizeof(struct leader_record),
 			  task, token->io_timeout, "leader");
 
+	/* N.B. checksum is computed while the data is in ondisk format. */
+	*checksum = leader_checksum(&lr_end);
+
 	leader_record_in(&lr_end, lr);
 
 	return rv;
 }
 
-static uint32_t dblock_checksum(struct paxos_dblock *pd)
-{
-	return crc32c((uint32_t)~1, (uint8_t *)pd, DBLOCK_CHECKSUM_LEN);
-}
-
-static int verify_dblock(struct token *token, struct paxos_dblock *pd)
+static int verify_dblock(struct token *token, struct paxos_dblock *pd, uint32_t checksum)
 {
-	uint32_t sum;
-
 	if (!pd->checksum && !pd->mbal && !pd->bal && !pd->inp && !pd->lver)
 		return SANLK_OK;
 
-	sum = dblock_checksum(pd);
-
-	if (pd->checksum != sum) {
+	if (pd->checksum != checksum) {
 		log_errot(token, "verify_dblock wrong checksum %x %x",
-			  pd->checksum, sum);
+			  pd->checksum, checksum);
 		return SANLK_DBLOCK_CHECKSUM;
 	}
 
@@ -400,6 +438,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
 	struct sync_disk *disk;
 	char *iobuf[SANLK_MAX_DISKS];
 	char **p_iobuf[SANLK_MAX_DISKS];
+	uint32_t checksum;
 	int num_disks = token->r.num_disks;
 	int num_writes, num_reads;
 	int sector_size = token->disks[0].sector_size;
@@ -444,7 +483,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
 	memset(&dblock, 0, sizeof(struct paxos_dblock));
 	dblock.mbal = our_mbal;
 	dblock.lver = next_lver;
-	dblock.checksum = dblock_checksum(&dblock);
+	dblock.checksum = 0; /* set after paxos_dblock_out */
 
 	memset(&bk_max, 0, sizeof(struct paxos_dblock));
 
@@ -483,10 +522,12 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
 		for (q = 0; q < num_hosts; q++) {
 			bk_end = (struct paxos_dblock *)(iobuf[d] + ((2 + q)*sector_size));
 
+			checksum = dblock_checksum(bk_end);
+
 			paxos_dblock_in(bk_end, &bk_in);
 			bk = &bk_in;
 
-			rv = verify_dblock(token, bk);
+			rv = verify_dblock(token, bk, checksum);
 			if (rv < 0)
 				continue;
 
@@ -569,7 +610,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
 		dblock.inp3 = monotime();
 	}
 	dblock.bal = dblock.mbal;
-	dblock.checksum = dblock_checksum(&dblock);
+	dblock.checksum = 0; /* set after paxos_dblock_out */
 
 	if (bk_max.inp) {
 		/* not a problem, but interesting to see, so use log_error */
@@ -635,10 +676,12 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
 		for (q = 0; q < num_hosts; q++) {
 			bk_end = (struct paxos_dblock *)(iobuf[d] + ((2 + q)*sector_size));
 
+			checksum = dblock_checksum(bk_end);
+
 			paxos_dblock_in(bk_end, &bk_in);
 			bk = &bk_in;
 
-			rv = verify_dblock(token, bk);
+			rv = verify_dblock(token, bk, checksum);
 			if (rv < 0)
 				continue;
 
@@ -724,11 +767,6 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
 	return error;
 }
 
-uint32_t leader_checksum(struct leader_record *lr)
-{
-	return crc32c((uint32_t)~1, (uint8_t *)lr, LEADER_CHECKSUM_LEN);
-}
-
 static void log_leader_error(int result,
 			     struct token *token,
 			     struct sync_disk *disk,
@@ -771,11 +809,11 @@ static void log_leader_error(int result,
 static int verify_leader(struct token *token,
 			 struct sync_disk *disk,
 			 struct leader_record *lr,
+			 uint32_t checksum,
 			 const char *caller)
 {
 	struct leader_record leader_end;
 	struct leader_record leader_rr;
-	uint32_t sum;
 	int result, rv;
 
 	if (lr->magic != PAXOS_DISK_MAGIC) {
@@ -821,11 +859,9 @@ static int verify_leader(struct token *token,
 		goto fail;
 	}
 
-	sum = leader_checksum(lr);
-
-	if (lr->checksum != sum) {
+	if (lr->checksum != checksum) {
 		log_errot(token, "verify_leader wrong checksum %x %x %s",
-			  lr->checksum, sum, disk->path);
+			  lr->checksum, checksum, disk->path);
 		result = SANLK_LEADER_CHECKSUM;
 		goto fail;
 	}
@@ -851,9 +887,10 @@ static int verify_leader(struct token *token,
 int paxos_verify_leader(struct token *token,
 			 struct sync_disk *disk,
 			 struct leader_record *lr,
+			 uint32_t checksum,
 			 const char *caller)
 {
-	return verify_leader(token, disk, lr, caller);
+	return verify_leader(token, disk, lr, checksum, caller);
 }
 
 static int leaders_match(struct leader_record *a, struct leader_record *b)
@@ -870,11 +907,12 @@ int paxos_read_resource(struct task *task,
 			struct sanlk_resource *res)
 {
 	struct leader_record leader;
+	uint32_t checksum;
 	int rv;
 
 	memset(&leader, 0, sizeof(struct leader_record));
 
-	rv = read_leader(task, token, &token->disks[0], &leader);
+	rv = read_leader(task, token, &token->disks[0], &leader, &checksum);
 	if (rv < 0)
 		return rv;
 
@@ -884,7 +922,7 @@ int paxos_read_resource(struct task *task,
 	if (!res->name[0])
 		memcpy(token->r.name, leader.resource_name, NAME_ID_SIZE);
 
-	rv = verify_leader(token, &token->disks[0], &leader, "read_resource");
+	rv = verify_leader(token, &token->disks[0], &leader, checksum, "read_resource");
 
 	if (rv == SANLK_OK) {
 		memcpy(res->lockspace_name, leader.space_name, NAME_ID_SIZE);
@@ -928,15 +966,16 @@ static int _leader_read_one(struct task *task,
 			    const char *caller)
 {
 	struct leader_record leader;
+	uint32_t checksum;
 	int rv;
 
 	memset(&leader, 0, sizeof(struct leader_record));
 
-	rv = read_leader(task, token, &token->disks[0], &leader);
+	rv = read_leader(task, token, &token->disks[0], &leader, &checksum);
 	if (rv < 0)
 		return rv;
 
-	rv = verify_leader(token, &token->disks[0], &leader, caller);
+	rv = verify_leader(token, &token->disks[0], &leader, checksum, caller);
 
 	/* copy what we read even if verify finds a problem */
 
@@ -953,6 +992,7 @@ static int _leader_read_num(struct task *task,
 {
 	struct leader_record leader;
 	struct leader_record *leaders;
+	uint32_t checksum;
 	int *leader_reps;
 	int leaders_len, leader_reps_len;
 	int num_reads;
@@ -985,11 +1025,11 @@ static int _leader_read_num(struct task *task,
 	num_reads = 0;
 
 	for (d = 0; d < num_disks; d++) {
-		rv = read_leader(task, token, &token->disks[d], &leaders[d]);
+		rv = read_leader(task, token, &token->disks[d], &leaders[d], &checksum);
 		if (rv < 0)
 			continue;
 
-		rv = verify_leader(token, &token->disks[d], &leaders[d], caller);
+		rv = verify_leader(token, &token->disks[d], &leaders[d], checksum, caller);
 		if (rv < 0)
 			continue;
 
@@ -1083,6 +1123,7 @@ static int _lease_read_one(struct task *task,
 	char *iobuf, **p_iobuf;
 	uint32_t host_id = token->host_id;
 	uint32_t sector_size = disk->sector_size;
+	uint32_t checksum;
 	struct paxos_dblock *bk_end;
 	uint64_t tmp_mbal = 0;
 	int q, tmp_q = -1, rv, iobuf_len;
@@ -1104,21 +1145,26 @@ static int _lease_read_one(struct task *task,
 		goto out;
 
 	memcpy(&leader_end, iobuf, sizeof(struct leader_record));
+
+	checksum = leader_checksum(&leader_end);
+
 	leader_record_in(&leader_end, leader_ret);
 
 	memcpy(&our_dblock_end, iobuf + ((host_id + 1) * sector_size), sizeof(struct paxos_dblock));
 	paxos_dblock_in(&our_dblock_end, our_dblock);
 
-	rv = verify_leader(token, disk, leader_ret, caller);
+	rv = verify_leader(token, disk, leader_ret, checksum, caller);
 	if (rv < 0)
 		goto out;
 
 	for (q = 0; q < leader_ret->num_hosts; q++) {
 		bk_end = (struct paxos_dblock *)(iobuf + ((2 + q) * sector_size));
 
+		checksum = dblock_checksum(bk_end);
+
 		paxos_dblock_in(bk_end, &bk);
 
-		rv = verify_dblock(token, &bk);
+		rv = verify_dblock(token, &bk, checksum);
 		if (rv < 0)
 			goto out;
 
@@ -1773,7 +1819,7 @@ int paxos_lease_acquire(struct task *task,
 			new_leader.flags &= ~LFL_SHORT_HOLD;
 	}
 
-	new_leader.checksum = leader_checksum(&new_leader);
+	new_leader.checksum = 0; /* set after leader_record_out */
 
 	error = write_new_leader(task, token, &new_leader, "paxos_acquire");
 	if (error < 0) {
@@ -1843,7 +1889,7 @@ int paxos_lease_renew(struct task *task,
 	}
 
 	new_leader.timestamp = monotime();
-	new_leader.checksum = leader_checksum(&new_leader);
+	new_leader.checksum = 0; /* set after leader_record_out */
 
 	error = write_new_leader(task, token, &new_leader);
 	if (error < 0)
@@ -1941,7 +1987,7 @@ int paxos_lease_release(struct task *task,
 	leader.write_generation = token->host_generation;
 	leader.write_timestamp = monotime();
 	leader.flags &= ~LFL_SHORT_HOLD;
-	leader.checksum = leader_checksum(&leader);
+	leader.checksum = 0; /* set after leader_record_out */
 
 	error = write_new_leader(task, token, &leader, "paxos_release");
 	if (error < 0)
@@ -1961,6 +2007,7 @@ int paxos_lease_init(struct task *task,
 	struct leader_record leader_end;
 	struct request_record rr;
 	struct request_record rr_end;
+	uint32_t checksum;
 	int iobuf_len;
 	int sector_size;
 	int align_size;
@@ -2009,13 +2056,21 @@ int paxos_lease_init(struct task *task,
 	leader.timestamp = LEASE_FREE;
 	strncpy(leader.space_name, token->r.lockspace_name, NAME_ID_SIZE);
 	strncpy(leader.resource_name, token->r.name, NAME_ID_SIZE);
-	leader.checksum = leader_checksum(&leader);
+	leader.checksum = 0; /* set after leader_record_out */
 
 	memset(&rr, 0, sizeof(rr));
 	rr.magic = REQ_DISK_MAGIC;
 	rr.version = REQ_DISK_VERSION_MAJOR | REQ_DISK_VERSION_MINOR;
 
 	leader_record_out(&leader, &leader_end);
+
+	/*
+	 * N.B. must compute checksum after the data has been byte swapped.
+	 */
+	checksum = leader_checksum(&leader_end);
+	leader.checksum = checksum;
+	leader_end.checksum = cpu_to_le32(checksum);
+
 	request_record_out(&rr, &rr_end);
 
 	memcpy(iobuf, &leader_end, sizeof(struct leader_record));
diff --git a/src/paxos_lease.h b/src/paxos_lease.h
index 84082f49f4a8..de5e095ad358 100644
--- a/src/paxos_lease.h
+++ b/src/paxos_lease.h
@@ -54,6 +54,7 @@ int paxos_read_buf(struct task *task,
 int paxos_verify_leader(struct token *token,
                          struct sync_disk *disk,
                          struct leader_record *lr,
+			 uint32_t checksum,
                          const char *caller);
 
 int paxos_erase_dblock(struct task *task,
diff --git a/src/resource.c b/src/resource.c
index 9834a8448b1e..8e94a6388187 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -99,6 +99,7 @@ int read_resource_owners(struct task *task, struct token *token,
 	struct sanlk_host *host;
 	struct mode_block *mb_end;
 	uint64_t host_id;
+	uint32_t checksum;
 	char *lease_buf_dblock;
 	char *lease_buf = NULL;
 	char *hosts_buf = NULL;
@@ -120,9 +121,11 @@ int read_resource_owners(struct task *task, struct token *token,
 
 	memcpy(&leader_end, lease_buf, sizeof(struct leader_record));
 
+	checksum = leader_checksum(&leader_end);
+
 	leader_record_in(&leader_end, &leader);
 
-	rv = paxos_verify_leader(token, disk, &leader, "read_resource_owners");
+	rv = paxos_verify_leader(token, disk, &leader, checksum, "read_resource_owners");
 	if (rv < 0)
 		goto out;
 
-- 
1.8.3.1