From a24b8d91b20b25951c4f72fd881806a690b2fcc5 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Jan 09 2020 07:53:10 +0000 Subject: Fix test issues on ppc64le. --- diff --git a/0001-Fix-test-1729-on-ppc64le.patch b/0001-Fix-test-1729-on-ppc64le.patch new file mode 100644 index 0000000..e69322a --- /dev/null +++ b/0001-Fix-test-1729-on-ppc64le.patch @@ -0,0 +1,41 @@ +From f02279985b388aeb5d9c58f6fb82fea423fd477b Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Wed, 13 Nov 2019 20:49:24 -0500 +Subject: [PATCH 1/2] Fix test 1729 on ppc64le. + +Signed-off-by: Elliott Sales de Andrade +--- + inst/tests/tests.Rraw | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw +index d287a398..de883b37 100644 +--- a/inst/tests/tests.Rraw ++++ b/inst/tests/tests.Rraw +@@ -10342,7 +10342,7 @@ test(1729.05, nrow(DT), 507L) + + options(datatable.verbose = FALSE) # capture.output() exact tests must not be polluted with verbosity + x = capture.output(fwrite(DT,na="NA"))[-1] # -1 to remove the column name V1 +-y = capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))[-1] ++y = gsub("\\.?0+e", "e", capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))[-1]) + # One mismatch that seems to be accuracy in base R's write.csv + # tmp = cbind(row=1:length(x), `fwrite`=x, `write.csv`=y) + # tmp[x!=y,] +@@ -10388,12 +10388,12 @@ if (isTRUE(LD<-capabilities()["long.double"])) { #3258 + cat('Skipped test 1729.9 due to capabilities()["long.double"] ==', LD, '\n') + } + test(1729.10, fwrite(DT,na=""), output=ans) +-test(1729.11, write.csv(DT,row.names=FALSE,quote=FALSE), output=ans) ++test(1729.11, gsub("\\.?0+e", "e", capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))), ans) + DT = data.table(unlist(.Machine[c("double.eps","double.neg.eps","double.xmin","double.xmax")])) + # double.eps double.neg.eps double.xmin double.xmax + # 2.220446e-16 1.110223e-16 2.225074e-308 1.797693e+308 + test(1729.12, typeof(DT[[1L]]), "double") +-test(1729.13, capture.output(fwrite(DT)), capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))) ++test(1729.13, capture.output(fwrite(DT)), gsub("\\.?0+e", "e", capture.output(write.csv(DT,row.names=FALSE,quote=FALSE)))) + + if (test_bit64) { + test(1730.1, typeof(-2147483647L), "integer") +-- +2.21.0 + diff --git a/0002-Improve-fread-for-very-small-or-very-large-fp-number.patch b/0002-Improve-fread-for-very-small-or-very-large-fp-number.patch new file mode 100644 index 0000000..5430667 --- /dev/null +++ b/0002-Improve-fread-for-very-small-or-very-large-fp-number.patch @@ -0,0 +1,237 @@ +From f4334c4ac845f811b4be531c921cb50dc77007d1 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Thu, 9 Jan 2020 01:06:25 -0500 +Subject: [PATCH 2/2] Improve `fread` for very small or very large fp numbers. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On non-x86 architectures (armv7hl and ppc64le), test 1018 fails with a +slightly differently parsed number. In base R, `R_strtod` handles small +numbers by pre-dividing numerator and divsor before applying the +exponent part (instead of dividing all together.) However, it does not +use a lookup table. + +For `fread`, trim the exponent lookup table from ±350 to ±300, and if +anything is in that removed range, do two multiplications instead. This +results in approximately the same effect as in base R. + +Removing some of the range from the lookup table also fixes several +warnings such as: + +``` +freadLookups.h:57:1: warning: floating constant truncated to zero [-Woverflow] + 57 | 1.0E-324L, + | ^~~~~~~~~ +freadLookups.h:690:1: warning: floating constant exceeds range of 'long double' [-Woverflow] + 690 | 1.0E309L, + | ^~~~~~~~ +``` + +See #3492 and #4032. + +Signed-off-by: Elliott Sales de Andrade +--- + inst/tests/tests.Rraw | 3 +- + src/fread.c | 26 ++++++++--- + src/fread.h | 2 +- + src/freadLookups.h | 104 +----------------------------------------- + 5 files changed, 26 insertions(+), 111 deletions(-) + +diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw +index de883b37..06b6dc2c 100644 +--- a/inst/tests/tests.Rraw ++++ b/inst/tests/tests.Rraw +@@ -2929,7 +2929,8 @@ test(1017.2, fread(f, integer64="character"), DT) + unlink(f) + + # ERANGE errno handled, #4879 +-test(1018, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313))) ++test(1018.1, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313))) ++test(1018.2, identical(fread("1.46761e+313\n"), data.table(V1=1.46761e+313))) + test(1019, fread("A\n1.23456789123456789123456999\n"), data.table(A=1.234567891234568)) + + # crash assigning to row 0, #2754 +diff --git a/src/fread.c b/src/fread.c +index 7d6e4f05..57cdfc83 100644 +--- a/src/fread.c ++++ b/src/fread.c +@@ -644,9 +644,9 @@ static void StrtoI64(FieldParseContext *ctx) + // TODO: review ERANGE checks and tests; that range outside [1.7e-308,1.7e+308] coerces to [0.0,Inf] + /* + f = "~/data.table/src/freadLookups.h" +-cat("const long double pow10lookup[701] = {\n", file=f, append=FALSE) +-for (i in (-350):(349)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) +-cat("1.0E350L\n};\n", file=f, append=TRUE) ++cat("const long double pow10lookup[601] = {\n", file=f, append=FALSE) ++for (i in (-300):(299)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) ++cat("1.0E300L\n};\n", file=f, append=TRUE) + */ + + +@@ -763,11 +763,23 @@ static void parse_double_regular(FieldParseContext *ctx) + } + e += Eneg? -E : E; + } +- e += 350; // lookup table is arranged from -350 (0) to +350 (700) +- if (e<0 || e>700) goto fail; ++ if (e<-350 || e>350) goto fail; + +- double r = (double)((long double)acc * pow10lookup[e]); +- *target = neg? -r : r; ++ long double r = (long double)acc; ++ if (e < -300 || e > 300) { ++ // Handle extra precision by pre-multiplying the result by pow(10, extra), ++ // and then remove extra from e. ++ // This avoids having to store very small or very large constants that may ++ // fail to be encoded by the compiler, even though the values can actually ++ // be stored correctly. ++ int_fast8_t extra = e < 0 ? e + 300 : e - 300; ++ r *= pow10lookup[extra + 300]; ++ e -= extra; ++ } ++ e += 300; // lookup table is arranged from -300 (0) to +300 (600) ++ ++ r *= pow10lookup[e]; ++ *target = (double)(neg? -r : r); + *(ctx->ch) = ch; + return; + +diff --git a/src/fread.h b/src/fread.h +index ec230c69..64150d66 100644 +--- a/src/fread.h ++++ b/src/fread.h +@@ -30,7 +30,7 @@ typedef enum { + + extern int8_t typeSize[NUMTYPE]; + extern const char typeName[NUMTYPE][10]; +-extern const long double pow10lookup[701]; ++extern const long double pow10lookup[601]; + extern const uint8_t hexdigits[256]; + + +diff --git a/src/freadLookups.h b/src/freadLookups.h +index faeb4ade..bb736a60 100644 +--- a/src/freadLookups.h ++++ b/src/freadLookups.h +@@ -27,57 +27,7 @@ const uint8_t hexdigits[256] = { + }; + + +-const long double pow10lookup[701] = { +-1.0E-350L, +-1.0E-349L, +-1.0E-348L, +-1.0E-347L, +-1.0E-346L, +-1.0E-345L, +-1.0E-344L, +-1.0E-343L, +-1.0E-342L, +-1.0E-341L, +-1.0E-340L, +-1.0E-339L, +-1.0E-338L, +-1.0E-337L, +-1.0E-336L, +-1.0E-335L, +-1.0E-334L, +-1.0E-333L, +-1.0E-332L, +-1.0E-331L, +-1.0E-330L, +-1.0E-329L, +-1.0E-328L, +-1.0E-327L, +-1.0E-326L, +-1.0E-325L, +-1.0E-324L, +-1.0E-323L, +-1.0E-322L, +-1.0E-321L, +-1.0E-320L, +-1.0E-319L, +-1.0E-318L, +-1.0E-317L, +-1.0E-316L, +-1.0E-315L, +-1.0E-314L, +-1.0E-313L, +-1.0E-312L, +-1.0E-311L, +-1.0E-310L, +-1.0E-309L, +-1.0E-308L, +-1.0E-307L, +-1.0E-306L, +-1.0E-305L, +-1.0E-304L, +-1.0E-303L, +-1.0E-302L, +-1.0E-301L, ++const long double pow10lookup[601] = { + 1.0E-300L, + 1.0E-299L, + 1.0E-298L, +@@ -678,57 +628,7 @@ const long double pow10lookup[701] = { + 1.0E297L, + 1.0E298L, + 1.0E299L, +-1.0E300L, +-1.0E301L, +-1.0E302L, +-1.0E303L, +-1.0E304L, +-1.0E305L, +-1.0E306L, +-1.0E307L, +-1.0E308L, +-1.0E309L, +-1.0E310L, +-1.0E311L, +-1.0E312L, +-1.0E313L, +-1.0E314L, +-1.0E315L, +-1.0E316L, +-1.0E317L, +-1.0E318L, +-1.0E319L, +-1.0E320L, +-1.0E321L, +-1.0E322L, +-1.0E323L, +-1.0E324L, +-1.0E325L, +-1.0E326L, +-1.0E327L, +-1.0E328L, +-1.0E329L, +-1.0E330L, +-1.0E331L, +-1.0E332L, +-1.0E333L, +-1.0E334L, +-1.0E335L, +-1.0E336L, +-1.0E337L, +-1.0E338L, +-1.0E339L, +-1.0E340L, +-1.0E341L, +-1.0E342L, +-1.0E343L, +-1.0E344L, +-1.0E345L, +-1.0E346L, +-1.0E347L, +-1.0E348L, +-1.0E349L, +-1.0E350L ++1.0E300L + }; + + #endif +-- +2.21.0 + diff --git a/R-data.table.spec b/R-data.table.spec index d99072d..dee2895 100644 --- a/R-data.table.spec +++ b/R-data.table.spec @@ -12,6 +12,10 @@ Summary: Extension of `data.frame` License: MPLv2.0 URL: https://CRAN.R-project.org/package=%{packname} Source0: https://cran.r-project.org/src/contrib/%{packname}_%{version}.tar.gz +# https://github.com/Rdatatable/data.table/issues/4032 +Patch0001: 0001-Fix-test-1729-on-ppc64le.patch +# https://github.com/Rdatatable/data.table/pull/4165 +Patch0002: 0002-Improve-fread-for-very-small-or-very-large-fp-number.patch # Here's the R view of the dependencies world: # Depends: @@ -53,6 +57,13 @@ natural and flexible syntax, for faster development. %prep %setup -q -c -n %{packname} +pushd %{packname} +bunzip2 inst/tests/tests.Rraw.bz2 +%patch0001 -p1 +%patch0002 -p1 +bzip2 inst/tests/tests.Rraw +popd + %build