--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/add.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/add.h 2023-03-08 09:35:23.059759304 -0700
@@ -413,15 +413,14 @@ template <class t>
ubv::zero(alignedSum.getWidth()),
(shiftedStickyBit | ITE(!overflow, ubv::zero(1), sum.extract(0,0)).extend(alignedSum.getWidth() - 1))));
-
- // Put it back together
- unpackedFloat<t> sumResult(resultSign, correctedExponent, (alignedSum | stickyBit).contract(1));
-
// We return something in an extended format
// *. One extra exponent bit to deal with the 'overflow' case
// *. Two extra significand bits for the guard and sticky bits
fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() + 2);
+ // Put it back together
+ unpackedFloat<t> sumResult(extendedFormat, resultSign, correctedExponent, (alignedSum | stickyBit).contract(1));
+
// Deal with the major cancellation case
// It would be nice to use normaliseUpDetectZero but the sign
// of the zero depends on the rounding mode.
@@ -541,7 +540,7 @@ template <class t>
extendedLargerExponent.decrement())));
// Far path : Construct result
- unpackedFloat<t> farPathResult(resultSign, correctedExponent, (alignedSum | shiftedStickyBit).contract(1));
+ unpackedFloat<t> farPathResult(extendedFormat, resultSign, correctedExponent, (alignedSum | shiftedStickyBit).contract(1));
@@ -559,13 +558,14 @@ template <class t>
prop nearNoCancel(nearSum.extract(sumWidth - 2, sumWidth - 2).isAllOnes());
ubv choppedNearSum(nearSum.extract(sumWidth - 3,1)); // In the case this is used, cut bits are all 0
- unpackedFloat<t> cancellation(resultSign,
+ unpackedFloat<t> cancellation(extendedFormat,
+ resultSign,
larger.getExponent().decrement(),
choppedNearSum);
// Near path : Construct result
- unpackedFloat<t> nearPathResult(resultSign, extendedLargerExponent, nearSum.contract(1));
+ unpackedFloat<t> nearPathResult(extendedFormat, resultSign, extendedLargerExponent, nearSum.contract(1));
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/convert.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/convert.h 2023-03-08 09:35:23.059759304 -0700
@@ -111,14 +111,20 @@ unpackedFloat<t> roundToIntegral (const
// Otherwise, compute rounding location
sbv initialRoundingPoint(expandingSubtract<t>(packedSigWidth,exponent)); // Expansion only needed in obscure formats
- sbv roundingPoint(collar<t>(initialRoundingPoint,
- sbv::zero(exponentWidth + 1),
- unpackedSigWidth.extend(1).increment()));
+ sbv collaredRoundingPoint(collar<t>(initialRoundingPoint,
+ sbv::zero(exponentWidth + 1),
+ unpackedSigWidth.extend(1).increment()));
// Round
ubv significand(input.getSignificand());
+ bwt significandWidth(significand.getWidth());
+ ubv roundingPoint((significandWidth >= exponentWidth) ?
+ collaredRoundingPoint.toUnsigned().matchWidth(significand) :
+ collaredRoundingPoint.toUnsigned().extract(significandWidth - 1, 0));
+ // Extract is safe because of the collar
+
significandRounderResult<t> roundedResult(variablePositionRound<t>(roundingMode, input.getSign(), significand,
- roundingPoint.toUnsigned().matchWidth(significand),
+ roundingPoint,
prop(false), // TODO : Could actually be exponent >= 0
isID)); // The fast-path case so just deactives some code
@@ -185,14 +191,18 @@ unpackedFloat<t> roundToIntegral (const
template <class t>
unpackedFloat<t> convertUBVToFloat (const typename t::fpt &targetFormat,
const typename t::rm &roundingMode,
- const typename t::ubv &input,
+ const typename t::ubv &preInput,
const typename t::bwt &decimalPointPosition = 0) {
typedef typename t::bwt bwt;
typedef typename t::prop prop;
+ typedef typename t::ubv ubv;
typedef typename t::sbv sbv;
typedef typename t::fpt fpt;
+ // In the case of a 1 bit input(?) extend to 2 bits so that the intermediate float is a sensible format
+ ubv input((preInput.getWidth() == 1) ? preInput.extend(1) : preInput);
+
bwt inputWidth(input.getWidth());
PRECONDITION(decimalPointPosition <= inputWidth);
@@ -225,6 +235,7 @@ template <class t>
bwt inputWidth(input.getWidth());
+ PRECONDITION(inputWidth > 1); // A 1 bit signed-number is ???
PRECONDITION(decimalPointPosition <= inputWidth);
// Devise an appropriate format
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/divide.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/divide.h 2023-03-08 09:35:23.060759290 -0700
@@ -122,7 +122,8 @@ template <class t>
ubv finishedSignificand(alignedSignificand | ubv(divided.remainderBit).extend(resWidth - 1));
// Put back together
- unpackedFloat<t> divideResult(divideSign, alignedExponent.extend(1), finishedSignificand);
+ fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() + 2);
+ unpackedFloat<t> divideResult(extendedFormat, divideSign, alignedExponent, finishedSignificand);
// A brief word about formats.
// You might think that the extend above is unnecessary : it is from a overflow point of view.
@@ -132,7 +133,6 @@ template <class t>
// can have an exponent greater than very large normal * 2 ( + 1)
// because the exponent range is asymmetric with more subnormal than normal.
- fpt extendedFormat(format.exponentWidth() + 2, format.significandWidth() + 2);
POSTCONDITION(divideResult.valid(extendedFormat));
return divideResult;
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/multiply.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/multiply.h 2023-03-08 09:35:23.060759290 -0700
@@ -114,10 +114,9 @@ template <class t>
// Put back together
- unpackedFloat<t> multiplyResult(multiplySign, alignedExponent, alignedSignificand);
-
-
fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() * 2);
+ unpackedFloat<t> multiplyResult(extendedFormat, multiplySign, alignedExponent, alignedSignificand);
+
POSTCONDITION(multiplyResult.valid(extendedFormat));
return multiplyResult;
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/operations.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/operations.h 2023-03-08 09:35:23.060759290 -0700
@@ -41,6 +41,24 @@
namespace symfpu {
+ /*** Size matching ***/
+ template <class t, class bv>
+ std::pair<bv, bv> sameWidth(const bv &op1, const bv &op2) {
+ typename t::bwt w1(op1.getWidth());
+ typename t::bwt w2(op2.getWidth());
+
+ if (w1 == w2) {
+ return std::make_pair(op1, op2);
+ } else if (w1 < w2) {
+ return std::make_pair(op1.matchWidth(op2), op2);
+ } else {
+ INVARIANT(w1 > w2);
+ return std::make_pair(op1.matchWidth(op2), op2);
+ }
+ }
+
+
+
/*** Expanding operations ***/
template <class t, class bv>
bv expandingAdd (const bv &op1, const bv &op2) {
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/packing.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/packing.h 2023-03-08 09:35:23.060759290 -0700
@@ -119,7 +119,7 @@ namespace symfpu {
ubv packedSign(uf.getSign());
// Exponent
- bwt packedExWidth = format.packedExponentWidth();
+ bwt packedExWidth(format.packedExponentWidth());
prop inNormalRange(uf.inNormalRange(format, prop(true)));
INVARIANT(inNormalRange || uf.inSubnormalRange(format, prop(true))); // Default values ensure this.
@@ -148,10 +148,18 @@ namespace symfpu {
// Significand
bwt packedSigWidth = format.packedSignificandWidth();
ubv unpackedSignificand(uf.getSignificand());
+ bwt unpackedSignificandWidth = unpackedSignificand.getWidth();
- INVARIANT(packedSigWidth == unpackedSignificand.getWidth() - 1);
+ INVARIANT(packedSigWidth == unpackedSignificandWidth - 1);
ubv dropLeadingOne(unpackedSignificand.extract(packedSigWidth - 1,0));
- ubv correctedSubnormal((unpackedSignificand >> (uf.getSubnormalAmount(format).toUnsigned().matchWidth(unpackedSignificand))).extract(packedSigWidth - 1,0));
+
+ ubv subnormalShiftAmount(uf.getSubnormalAmount(format).toUnsigned());
+ ubv shiftAmount((subnormalShiftAmount.getWidth() <= unpackedSignificandWidth) ?
+ subnormalShiftAmount.matchWidth(unpackedSignificand) :
+ subnormalShiftAmount.extract(unpackedSignificandWidth - 1, 0));
+ // The extraction could loose data if exponent is much larger than the significand
+ // However getSubnormalAmount is between 0 and packedSigWidth so it should be safe
+ ubv correctedSubnormal((unpackedSignificand >> shiftAmount).extract(packedSigWidth - 1,0));
prop hasFixedSignificand(uf.getNaN() || uf.getInf() || uf.getZero());
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/rounder.h.orig 2023-03-08 09:34:54.844158414 -0700
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/rounder.h 2023-03-08 09:35:23.060759290 -0700
@@ -403,7 +403,14 @@ template <class t>
// Note that this is negative if normal, giving a full subnormal mask
// but the result will be ignored (see the next invariant)
- ubv subnormalShiftPrepared(subnormalAmount.toUnsigned().matchWidth(extractedSignificand));
+
+ // Care is needed if the exponents are longer than the significands
+ // In the case when data is lost it is negative and not used
+ bwt extractedSignificandWidth(extractedSignificand.getWidth());
+ ubv subnormalShiftPrepared((extractedSignificandWidth >= expWidth + 1) ?
+ subnormalAmount.toUnsigned().matchWidth(extractedSignificand) :
+ subnormalAmount.toUnsigned().extract(extractedSignificandWidth - 1, 0));
+
// Compute masks
ubv subnormalMask(orderEncode<t>(subnormalShiftPrepared)); // Invariant implies this if all ones, it will not be used
@@ -591,8 +598,11 @@ unpackedFloat<t> originalRounder (const
prop aboveLimit(subnormalAmount >= sbv(expWidth, targetSignificandWidth)); // Will underflow
sbv subnormalShift(ITE((belowLimit || aboveLimit), sbv::zero(expWidth), subnormalAmount));
// Optimisation : collar
-
- ubv subnormalShiftPrepared(subnormalShift.toUnsigned().extend(targetSignificandWidth - expWidth));
+
+ bwt extractedSignificandWidth(extractedSignificand.getWidth());
+ ubv subnormalShiftPrepared((extractedSignificandWidth >= expWidth + 1) ?
+ subnormalAmount.toUnsigned().extend(targetSignificandWidth - expWidth) :
+ subnormalAmount.toUnsigned().extract(extractedSignificandWidth - 1, 0));
ubv guardLocation(ubv::one(targetSignificandWidth) << subnormalShiftPrepared);
ubv stickyMask(guardLocation.decrement());
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/sqrt.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/sqrt.h 2023-03-08 09:35:23.060759290 -0700
@@ -108,12 +108,11 @@ template <class t>
ubv finishedSignificand(sqrtd.result.append(ubv(sqrtd.remainderBit)));
- unpackedFloat<t> sqrtResult(sqrtSign, exponentHalved, finishedSignificand);
-
-
fpt extendedFormat(format.exponentWidth(), format.significandWidth() + 2);
// format.exponentWidth() - 1 should also be true but requires shrinking the exponent and
// then increasing it in the rounder
+ unpackedFloat<t> sqrtResult(extendedFormat, sqrtSign, exponentHalved, finishedSignificand);
+
POSTCONDITION(sqrtResult.valid(extendedFormat));
return sqrtResult;
--- symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/unpackedFloat.h.orig 2019-05-17 16:03:14.000000000 -0600
+++ symfpu-c3acaf62b137c36aae5eb380f1d883bfa9095f60/core/unpackedFloat.h 2023-03-08 09:35:23.060759290 -0700
@@ -119,6 +119,17 @@ namespace symfpu {
sign(s), exponent(exp), significand(signif)
{}
+ // An intermediate point in some operations is producing a value in an extended
+ // format (for example, multiply produces an intermediate value with one extra exponent
+ // bit and double the number of significand bits). However the unpacked size of the
+ // significand is larger than the bits given by the format. This constructor does
+ // the appropriate extension so that what is constructed is a valid unpacked float
+ // in the given format.
+ unpackedFloat (const fpt &fmt, const prop &s, const sbv &exp, const ubv &signif) :
+ nan(false), inf(false), zero(false),
+ sign(s), exponent(exp.matchWidth(defaultExponent(fmt))), significand(signif)
+ {}
+
unpackedFloat (const unpackedFloat<t> &old) :
nan(old.nan), inf(old.inf), zero(old.zero),
sign(old.sign), exponent(old.exponent), significand(old.significand)
@@ -172,6 +183,8 @@ namespace symfpu {
static bwt exponentWidth(const fpt &format) {
+ // This calculation is a little more complex than you might think...
+
// Note that there is one more exponent above 0 than there is
// below. This is the opposite of 2's compliment but this is not
// a problem because the highest packed exponent corresponds to
@@ -180,17 +193,33 @@ namespace symfpu {
// However we do need to increase it to allow subnormals (packed)
// to be normalised.
- bwt width = format.exponentWidth();
+ // The smallest exponent is:
+ // -2^(format.exponentWidth() - 1) - 2 - (format.significandWidth() - 1)
+ //
+ // We need an unpacked exponent width u such that
+ // -2^(u-1) <= -2^(format.exponentWidth() - 1) - 2 - (format.significandWidth() - 1)
+ // i.e.
+ // 2^(u-1) >= 2^(format.exponentWidth() - 1) + (format.significandWidth() - 3)
- // Could be improved to remove overflow concerns
- uint64_t minimumExponent = ((1 << (width - 1)) - 2) + (format.significandWidth() - 1);
+ bwt formatExponentWidth = format.exponentWidth();
+ bwt formatSignificandWidth = format.significandWidth();
- // Increase width until even the smallest subnormal can be normalised
- while ((1UL << (width - 1)) < minimumExponent) {
- ++width;
+ if (formatSignificandWidth <= 3) {
+ // Subnormals fit into the gap between minimum normal exponent and what is represenatble
+ // using a signed number
+ return formatExponentWidth;
}
- return width;
+ bwt bitsNeededForSubnormals = bitsToRepresent(format.significandWidth() - 3);
+ if (bitsNeededForSubnormals < formatExponentWidth - 1) {
+ // Significand is short compared to exponent range,
+ // one extra bit should be sufficient
+ return formatExponentWidth + 1;
+
+ } else {
+ // Significand is long compared to exponent range
+ return bitsToRepresent((bwt(1) << (formatExponentWidth - 1)) + formatSignificandWidth - 3) + 1;
+ }
}
static bwt significandWidth(const fpt &format) {
@@ -426,7 +455,10 @@ namespace symfpu {
(subnormalAmount <= sbv(exWidth,sigWidth)));
// Invariant implies this following steps do not loose data
- ubv mask(orderEncode<t>(subnormalAmount.toUnsigned().matchWidth(significand)));
+ ubv mask(orderEncode<t>((sigWidth >= exWidth) ?
+ subnormalAmount.toUnsigned().matchWidth(significand) :
+ subnormalAmount.toUnsigned().extract(sigWidth - 1, 0)
+ ));
prop correctlyAbbreviated((mask & significand).isAllZeros());