From 4792fe2ea2a4984ed37a99c99c6534a1bfc248d1 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Jan 30 2020 08:12:52 +0000 Subject: Merge branch 'f30' --- diff --git a/.gitignore b/.gitignore index 0cece03..c77077c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /data.table_1.12.2.tar.gz /data.table_1.12.4.tar.gz /data.table_1.12.6.tar.gz +/data.table_1.12.8.tar.gz diff --git a/0001-Fix-test-1729-on-ppc64le.patch b/0001-Fix-test-1729-on-ppc64le.patch new file mode 100644 index 0000000..20813a4 --- /dev/null +++ b/0001-Fix-test-1729-on-ppc64le.patch @@ -0,0 +1,41 @@ +From f02279985b388aeb5d9c58f6fb82fea423fd477b Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Wed, 13 Nov 2019 20:49:24 -0500 +Subject: [PATCH 1/3] Fix test 1729 on ppc64le. + +Signed-off-by: Elliott Sales de Andrade +--- + inst/tests/tests.Rraw | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw +index d287a398..de883b37 100644 +--- a/inst/tests/tests.Rraw ++++ b/inst/tests/tests.Rraw +@@ -10342,7 +10342,7 @@ test(1729.05, nrow(DT), 507L) + + options(datatable.verbose = FALSE) # capture.output() exact tests must not be polluted with verbosity + x = capture.output(fwrite(DT,na="NA"))[-1] # -1 to remove the column name V1 +-y = capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))[-1] ++y = gsub("\\.?0+e", "e", capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))[-1]) + # One mismatch that seems to be accuracy in base R's write.csv + # tmp = cbind(row=1:length(x), `fwrite`=x, `write.csv`=y) + # tmp[x!=y,] +@@ -10388,12 +10388,12 @@ if (isTRUE(LD<-capabilities()["long.double"])) { #3258 + cat('Skipped test 1729.9 due to capabilities()["long.double"] ==', LD, '\n') + } + test(1729.10, fwrite(DT,na=""), output=ans) +-test(1729.11, write.csv(DT,row.names=FALSE,quote=FALSE), output=ans) ++test(1729.11, gsub("\\.?0+e", "e", capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))), ans) + DT = data.table(unlist(.Machine[c("double.eps","double.neg.eps","double.xmin","double.xmax")])) + # double.eps double.neg.eps double.xmin double.xmax + # 2.220446e-16 1.110223e-16 2.225074e-308 1.797693e+308 + test(1729.12, typeof(DT[[1L]]), "double") +-test(1729.13, capture.output(fwrite(DT)), capture.output(write.csv(DT,row.names=FALSE,quote=FALSE))) ++test(1729.13, capture.output(fwrite(DT)), gsub("\\.?0+e", "e", capture.output(write.csv(DT,row.names=FALSE,quote=FALSE)))) + + if (test_bit64) { + test(1730.1, typeof(-2147483647L), "integer") +-- +2.21.0 + diff --git a/0002-Improve-fread-for-very-small-or-very-large-fp-number.patch b/0002-Improve-fread-for-very-small-or-very-large-fp-number.patch new file mode 100644 index 0000000..12254ad --- /dev/null +++ b/0002-Improve-fread-for-very-small-or-very-large-fp-number.patch @@ -0,0 +1,237 @@ +From f4334c4ac845f811b4be531c921cb50dc77007d1 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Thu, 9 Jan 2020 01:06:25 -0500 +Subject: [PATCH 2/3] Improve `fread` for very small or very large fp numbers. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On non-x86 architectures (armv7hl and ppc64le), test 1018 fails with a +slightly differently parsed number. In base R, `R_strtod` handles small +numbers by pre-dividing numerator and divsor before applying the +exponent part (instead of dividing all together.) However, it does not +use a lookup table. + +For `fread`, trim the exponent lookup table from ±350 to ±300, and if +anything is in that removed range, do two multiplications instead. This +results in approximately the same effect as in base R. + +Removing some of the range from the lookup table also fixes several +warnings such as: + +``` +freadLookups.h:57:1: warning: floating constant truncated to zero [-Woverflow] + 57 | 1.0E-324L, + | ^~~~~~~~~ +freadLookups.h:690:1: warning: floating constant exceeds range of 'long double' [-Woverflow] + 690 | 1.0E309L, + | ^~~~~~~~ +``` + +See #3492 and #4032. + +Signed-off-by: Elliott Sales de Andrade +--- + inst/tests/tests.Rraw | 3 +- + src/fread.c | 26 ++++++++--- + src/fread.h | 2 +- + src/freadLookups.h | 104 +----------------------------------------- + 5 files changed, 26 insertions(+), 111 deletions(-) + +diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw +index de883b37..06b6dc2c 100644 +--- a/inst/tests/tests.Rraw ++++ b/inst/tests/tests.Rraw +@@ -2929,7 +2929,8 @@ test(1017.2, fread(f, integer64="character"), DT) + unlink(f) + + # ERANGE errno handled, #4879 +-test(1018, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313))) ++test(1018.1, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313))) ++test(1018.2, identical(fread("1.46761e+313\n"), data.table(V1=1.46761e+313))) + test(1019, fread("A\n1.23456789123456789123456999\n"), data.table(A=1.234567891234568)) + + # crash assigning to row 0, #2754 +diff --git a/src/fread.c b/src/fread.c +index 7d6e4f05..57cdfc83 100644 +--- a/src/fread.c ++++ b/src/fread.c +@@ -644,9 +644,9 @@ static void StrtoI64(FieldParseContext *ctx) + // TODO: review ERANGE checks and tests; that range outside [1.7e-308,1.7e+308] coerces to [0.0,Inf] + /* + f = "~/data.table/src/freadLookups.h" +-cat("const long double pow10lookup[701] = {\n", file=f, append=FALSE) +-for (i in (-350):(349)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) +-cat("1.0E350L\n};\n", file=f, append=TRUE) ++cat("const long double pow10lookup[601] = {\n", file=f, append=FALSE) ++for (i in (-300):(299)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) ++cat("1.0E300L\n};\n", file=f, append=TRUE) + */ + + +@@ -763,11 +763,23 @@ static void parse_double_regular(FieldParseContext *ctx) + } + e += Eneg? -E : E; + } +- e += 350; // lookup table is arranged from -350 (0) to +350 (700) +- if (e<0 || e>700) goto fail; ++ if (e<-350 || e>350) goto fail; + +- double r = (double)((long double)acc * pow10lookup[e]); +- *target = neg? -r : r; ++ long double r = (long double)acc; ++ if (e < -300 || e > 300) { ++ // Handle extra precision by pre-multiplying the result by pow(10, extra), ++ // and then remove extra from e. ++ // This avoids having to store very small or very large constants that may ++ // fail to be encoded by the compiler, even though the values can actually ++ // be stored correctly. ++ int_fast8_t extra = e < 0 ? e + 300 : e - 300; ++ r *= pow10lookup[extra + 300]; ++ e -= extra; ++ } ++ e += 300; // lookup table is arranged from -300 (0) to +300 (600) ++ ++ r *= pow10lookup[e]; ++ *target = (double)(neg? -r : r); + *(ctx->ch) = ch; + return; + +diff --git a/src/fread.h b/src/fread.h +index ec230c69..64150d66 100644 +--- a/src/fread.h ++++ b/src/fread.h +@@ -30,7 +30,7 @@ typedef enum { + + extern int8_t typeSize[NUMTYPE]; + extern const char typeName[NUMTYPE][10]; +-extern const long double pow10lookup[701]; ++extern const long double pow10lookup[601]; + extern const uint8_t hexdigits[256]; + + +diff --git a/src/freadLookups.h b/src/freadLookups.h +index faeb4ade..bb736a60 100644 +--- a/src/freadLookups.h ++++ b/src/freadLookups.h +@@ -27,57 +27,7 @@ const uint8_t hexdigits[256] = { + }; + + +-const long double pow10lookup[701] = { +-1.0E-350L, +-1.0E-349L, +-1.0E-348L, +-1.0E-347L, +-1.0E-346L, +-1.0E-345L, +-1.0E-344L, +-1.0E-343L, +-1.0E-342L, +-1.0E-341L, +-1.0E-340L, +-1.0E-339L, +-1.0E-338L, +-1.0E-337L, +-1.0E-336L, +-1.0E-335L, +-1.0E-334L, +-1.0E-333L, +-1.0E-332L, +-1.0E-331L, +-1.0E-330L, +-1.0E-329L, +-1.0E-328L, +-1.0E-327L, +-1.0E-326L, +-1.0E-325L, +-1.0E-324L, +-1.0E-323L, +-1.0E-322L, +-1.0E-321L, +-1.0E-320L, +-1.0E-319L, +-1.0E-318L, +-1.0E-317L, +-1.0E-316L, +-1.0E-315L, +-1.0E-314L, +-1.0E-313L, +-1.0E-312L, +-1.0E-311L, +-1.0E-310L, +-1.0E-309L, +-1.0E-308L, +-1.0E-307L, +-1.0E-306L, +-1.0E-305L, +-1.0E-304L, +-1.0E-303L, +-1.0E-302L, +-1.0E-301L, ++const long double pow10lookup[601] = { + 1.0E-300L, + 1.0E-299L, + 1.0E-298L, +@@ -678,57 +628,7 @@ const long double pow10lookup[701] = { + 1.0E297L, + 1.0E298L, + 1.0E299L, +-1.0E300L, +-1.0E301L, +-1.0E302L, +-1.0E303L, +-1.0E304L, +-1.0E305L, +-1.0E306L, +-1.0E307L, +-1.0E308L, +-1.0E309L, +-1.0E310L, +-1.0E311L, +-1.0E312L, +-1.0E313L, +-1.0E314L, +-1.0E315L, +-1.0E316L, +-1.0E317L, +-1.0E318L, +-1.0E319L, +-1.0E320L, +-1.0E321L, +-1.0E322L, +-1.0E323L, +-1.0E324L, +-1.0E325L, +-1.0E326L, +-1.0E327L, +-1.0E328L, +-1.0E329L, +-1.0E330L, +-1.0E331L, +-1.0E332L, +-1.0E333L, +-1.0E334L, +-1.0E335L, +-1.0E336L, +-1.0E337L, +-1.0E338L, +-1.0E339L, +-1.0E340L, +-1.0E341L, +-1.0E342L, +-1.0E343L, +-1.0E344L, +-1.0E345L, +-1.0E346L, +-1.0E347L, +-1.0E348L, +-1.0E349L, +-1.0E350L ++1.0E300L + }; + + #endif +-- +2.21.0 + diff --git a/0003-Use-consistent-types-with-fwriteMainArgs.nrow.patch b/0003-Use-consistent-types-with-fwriteMainArgs.nrow.patch new file mode 100644 index 0000000..08bd403 --- /dev/null +++ b/0003-Use-consistent-types-with-fwriteMainArgs.nrow.patch @@ -0,0 +1,99 @@ +From 79911aaf7cd877c0d25fccd5aaecdecd9a5d0204 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Thu, 30 Jan 2020 01:26:50 -0500 +Subject: [PATCH 3/3] Use consistent types with fwriteMainArgs.nrow. + +On armv7hl, test 1737.5 fails due to garbage in the column length +mismatch error message. This is because the message tries to format +`args.nrow` (an `int64_t`) using `%d` (i.e., `int`). Strangely, this +does not fail on any other architectures, but this is likely a fluke. + +In all the other formatting calls, remove the unnecessary `(int64_t)` +type cast, since `fwriteMainArgs.nargs` already is one. + +Signed-off-by: Elliott Sales de Andrade +--- + src/fwrite.c | 6 +++--- + src/fwriteR.c | 13 +++++++------ + 2 files changed, 10 insertions(+), 9 deletions(-) + +diff --git a/src/fwrite.c b/src/fwrite.c +index b5ff08cd..93cbaf62 100644 +--- a/src/fwrite.c ++++ b/src/fwrite.c +@@ -620,7 +620,7 @@ void fwriteMain(fwriteMainArgs args) + for (int j=args.ncol-10; j - 1.12.8-1 +- Update to latest version + * Mon Oct 21 2019 Elliott Sales de Andrade - 1.12.6-1 - Update to latest version diff --git a/sources b/sources index 47d8a15..98658a7 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (data.table_1.12.6.tar.gz) = 468a2728d9f2ae8f14ed7675f04c7526ef3b55123f84a9693bfdbec00355f4ebc75a6b3a45e0c451ef590ed6b8db716f61a973e8240c38908c4ac62edbce9cba +SHA512 (data.table_1.12.8.tar.gz) = 04ffe5e2b5c7ad221dc8d48fb51f2b6717cece14926b71186177f7a3aba31fcb47441dbded0a6f298e434a18baa96d372b30ee16bfb8bd6f6e59c49edeea2b39