commit 799c8d6905433ad56f26ccab4855b36f1d1ddbfc Author: Mike FABIAN Date: Thu Sep 7 15:28:28 2017 +0200 Add new codepage charmaps/IBM858 [BZ #21084] This code page is identical to code page 850 except that X'D5' has been changed from LI61 (dotless i) to SC20 (euro symbol). The code points from /x01 to /x1f in the /localedata/charmaps/IBM858 file have the same mapping as those in localedata/charmaps/ANSI_X3.4-1968. That means they disagree with with ftp://ftp.software.ibm.com/software/globalization/gcoc/attachments/CP00858.txt in that range. For example, localedata/charmaps/IBM858 and localedata/charmaps/ANSI_X3.4-1968 have: “ /x01 START OF HEADING (SOH)” whereas CP00858.txt has: “01 SS000000 Smiling Face” That means that CP00858.txt is not really ASCII-compatible and to make it ASCII-compatible we deviate fro CP00858.txt in the code points from /x01 to /x1f. [BZ #21084] * benchtests/strcoll-inputs/filelist#en_US.UTF-8: Add IBM858 and ibm858.c. * iconvdata/Makefile: Add IBM858. * iconvdata/gconv-modules: Add IBM858. * iconvdata/ibm858.c: New file. * iconvdata/tst-tables.sh: Add IBM858 * localedata/charmaps/IBM858: New file. diff --git a/benchtests/strcoll-inputs/filelist#en_US.UTF-8 b/benchtests/strcoll-inputs/filelist#en_US.UTF-8 index b7b38017d836aee8..4fd74821feb0f22b 100644 --- a/benchtests/strcoll-inputs/filelist#en_US.UTF-8 +++ b/benchtests/strcoll-inputs/filelist#en_US.UTF-8 @@ -11233,6 +11233,7 @@ ISO-8859-9E UTF-8 ISO-8859-2 IBM850 +IBM858 EUC-TW KOI8-U IBM903 @@ -13922,6 +13923,7 @@ ibm12712.c ibm1145.h ibm932.c ibm850.c +ibm858.c ibm437.c ibm1399.c stdio-common diff --git a/iconvdata/Makefile b/iconvdata/Makefile index e4845871f559b406..6975b46fbba422bd 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -36,9 +36,9 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \ IBM874 CP737 CP775 ISO-2022-KR HP-TURKISH8 HP-THAI8 HP-GREEK8 \ KOI8-R LATIN-GREEK LATIN-GREEK-1 IBM256 IBM273 IBM277 IBM278 \ IBM280 IBM281 IBM284 IBM285 IBM290 IBM297 IBM420 IBM424 \ - IBM437 IBM850 IBM851 IBM852 IBM855 IBM857 IBM860 IBM861 \ - IBM862 IBM863 IBM864 IBM865 IBM868 IBM869 IBM875 IBM880 \ - IBM866 CP1258 IBM922 IBM1124 IBM1129 IBM932 IBM943 \ + IBM437 IBM850 IBM851 IBM852 IBM855 IBM857 IBM858 IBM860 \ + IBM861 IBM862 IBM863 IBM864 IBM865 IBM868 IBM869 IBM875 \ + IBM880 IBM866 CP1258 IBM922 IBM1124 IBM1129 IBM932 IBM943 \ IBM856 IBM930 IBM933 IBM935 IBM937 IBM939 IBM1046 \ IBM1132 IBM1133 IBM1160 IBM1161 IBM1162 IBM1163 IBM1164 \ IBM918 IBM1004 IBM1026 CP1125 CP1250 CP1251 CP1252 CP1253 \ @@ -153,11 +153,11 @@ gen-8bit-modules := iso8859-2 iso8859-3 iso8859-4 iso8859-6 iso8859-9 koi-8 \ gen-8bit-gap-modules := koi8-r latin-greek latin-greek-1 ibm256 ibm273 \ ibm277 ibm278 ibm280 ibm281 ibm284 ibm285 ibm290 \ ibm297 ibm420 ibm424 ibm437 ibm850 ibm851 ibm852 \ - ibm855 ibm857 ibm860 ibm861 ibm862 ibm863 ibm864 \ - ibm865 ibm868 ibm869 ibm875 ibm880 ibm918 ibm1004 \ - ibm1026 cp1125 cp1250 cp1251 cp1252 cp1253 cp1254 \ - cp1256 cp1257 ibm866 iso8859-5 iso8859-7 iso8859-8 \ - iso8859-10 macintosh iec_p27-1 asmo_449 \ + ibm855 ibm857 ibm858 ibm860 ibm861 ibm862 ibm863 \ + ibm864 ibm865 ibm868 ibm869 ibm875 ibm880 ibm918 \ + ibm1004 ibm1026 cp1125 cp1250 cp1251 cp1252 cp1253 \ + cp1254 cp1256 cp1257 ibm866 iso8859-5 iso8859-7 \ + iso8859-8 iso8859-10 macintosh iec_p27-1 asmo_449 \ csn_369103 cwi dec-mcs ecma-cyrillic gost_19768-74 \ greek-ccitt greek7 greek7-old inis inis-8 \ inis-cyrillic iso_2033 iso_5427 iso_5427-ext \ diff --git a/iconvdata/gconv-modules b/iconvdata/gconv-modules index e959f16ad9b6dd3c..7d988c8ad9972858 100644 --- a/iconvdata/gconv-modules +++ b/iconvdata/gconv-modules @@ -744,6 +744,13 @@ module IBM850// INTERNAL IBM850 1 module INTERNAL IBM850// IBM850 1 # from to module cost +alias CP858// IBM858// +alias 858// IBM858// +alias CSPC858MULTILINGUAL// IBM858// +module IBM858// INTERNAL IBM858 1 +module INTERNAL IBM858// IBM858 1 + +# from to module cost alias CP851// IBM851// alias 851// IBM851// alias CSIBM851// IBM851// diff --git a/iconvdata/ibm858.c b/iconvdata/ibm858.c new file mode 100644 index 0000000000000000..ed2a48e3cf79e2b9 --- /dev/null +++ b/iconvdata/ibm858.c @@ -0,0 +1,27 @@ +/* Conversion from and to IBM858. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* Get the conversion table. */ +#define TABLES + +#define CHARSET_NAME "IBM858//" +#define HAS_HOLES 1 /* Not all 256 character are defined. */ + +#include <8bit-gap.c> diff --git a/iconvdata/tst-tables.sh b/iconvdata/tst-tables.sh index a027f5df5b27b904..77338f05149ccb98 100755 --- a/iconvdata/tst-tables.sh +++ b/iconvdata/tst-tables.sh @@ -125,6 +125,7 @@ cat < IBM858 + % + / +% version: 1.0 +% source: ftp://ftp.software.ibm.com/software/globalization/gcoc/attachments/CP00858.txt, 1998 + +% source: UNICODE 1.0 + +% This code page is identical to code page 850 except that X'D5' +% has been changed from LI61 (dotless i) to SC20 (euro symbol). + +% The code points from /x01 to /x1f in this file have the same mapping +% as those in ANSI_X3.4-1968. That means they disagree with with CP00858.txt +% in that range. For example, this file and ANSI_X3.4-1968 have: +% “ /x01 START OF HEADING (SOH)” +% whereas CP00858.txt has: +% “01 SS000000 Smiling Face” +% That means that CP00858.txt is not really ASCII-compatible and to make +% it ASCII-compatible we deviate fro CP00858.txt in the code points from /x01 +% to /x1f. + +% alias CP858 +% alias 858 +CHARMAP + /x00 NULL (NUL) + /x01 START OF HEADING (SOH) + /x02 START OF TEXT (STX) + /x03 END OF TEXT (ETX) + /x04 END OF TRANSMISSION (EOT) + /x05 ENQUIRY (ENQ) + /x06 ACKNOWLEDGE (ACK) + /x07 BELL (BEL) + /x08 BACKSPACE (BS) + /x09 CHARACTER TABULATION (HT) + /x0a LINE FEED (LF) + /x0b LINE TABULATION (VT) + /x0c FORM FEED (FF) + /x0d CARRIAGE RETURN (CR) + /x0e SHIFT OUT (SO) + /x0f SHIFT IN (SI) + /x10 DATALINK ESCAPE (DLE) + /x11 DEVICE CONTROL ONE (DC1) + /x12 DEVICE CONTROL TWO (DC2) + /x13 DEVICE CONTROL THREE (DC3) + /x14 DEVICE CONTROL FOUR (DC4) + /x15 NEGATIVE ACKNOWLEDGE (NAK) + /x16 SYNCHRONOUS IDLE (SYN) + /x17 END OF TRANSMISSION BLOCK (ETB) + /x18 CANCEL (CAN) + /x19 END OF MEDIUM (EM) + /x1a SUBSTITUTE (SUB) + /x1b ESCAPE (ESC) + /x1c FILE SEPARATOR (IS4) + /x1d GROUP SEPARATOR (IS3) + /x1e RECORD SEPARATOR (IS2) + /x1f UNIT SEPARATOR (IS1) + /x20 SPACE + /x21 EXCLAMATION MARK + /x22 QUOTATION MARK + /x23 NUMBER SIGN + /x24 DOLLAR SIGN + /x25 PERCENT SIGN + /x26 AMPERSAND + /x27 APOSTROPHE + /x28 LEFT PARENTHESIS + /x29 RIGHT PARENTHESIS + /x2a ASTERISK + /x2b PLUS SIGN + /x2c COMMA + /x2d HYPHEN-MINUS + /x2e FULL STOP + /x2f SOLIDUS + /x30 DIGIT ZERO + /x31 DIGIT ONE + /x32 DIGIT TWO + /x33 DIGIT THREE + /x34 DIGIT FOUR + /x35 DIGIT FIVE + /x36 DIGIT SIX + /x37 DIGIT SEVEN + /x38 DIGIT EIGHT + /x39 DIGIT NINE + /x3a COLON + /x3b SEMICOLON + /x3c LESS-THAN SIGN + /x3d EQUALS SIGN + /x3e GREATER-THAN SIGN + /x3f QUESTION MARK + /x40 COMMERCIAL AT + /x41 LATIN CAPITAL LETTER A + /x42 LATIN CAPITAL LETTER B + /x43 LATIN CAPITAL LETTER C + /x44 LATIN CAPITAL LETTER D + /x45 LATIN CAPITAL LETTER E + /x46 LATIN CAPITAL LETTER F + /x47 LATIN CAPITAL LETTER G + /x48 LATIN CAPITAL LETTER H + /x49 LATIN CAPITAL LETTER I + /x4a LATIN CAPITAL LETTER J + /x4b LATIN CAPITAL LETTER K + /x4c LATIN CAPITAL LETTER L + /x4d LATIN CAPITAL LETTER M + /x4e LATIN CAPITAL LETTER N + /x4f LATIN CAPITAL LETTER O + /x50 LATIN CAPITAL LETTER P + /x51 LATIN CAPITAL LETTER Q + /x52 LATIN CAPITAL LETTER R + /x53 LATIN CAPITAL LETTER S + /x54 LATIN CAPITAL LETTER T + /x55 LATIN CAPITAL LETTER U + /x56 LATIN CAPITAL LETTER V + /x57 LATIN CAPITAL LETTER W + /x58 LATIN CAPITAL LETTER X + /x59 LATIN CAPITAL LETTER Y + /x5a LATIN CAPITAL LETTER Z + /x5b LEFT SQUARE BRACKET + /x5c REVERSE SOLIDUS + /x5d RIGHT SQUARE BRACKET + /x5e CIRCUMFLEX ACCENT + /x5f LOW LINE + /x60 GRAVE ACCENT + /x61 LATIN SMALL LETTER A + /x62 LATIN SMALL LETTER B + /x63 LATIN SMALL LETTER C + /x64 LATIN SMALL LETTER D + /x65 LATIN SMALL LETTER E + /x66 LATIN SMALL LETTER F + /x67 LATIN SMALL LETTER G + /x68 LATIN SMALL LETTER H + /x69 LATIN SMALL LETTER I + /x6a LATIN SMALL LETTER J + /x6b LATIN SMALL LETTER K + /x6c LATIN SMALL LETTER L + /x6d LATIN SMALL LETTER M + /x6e LATIN SMALL LETTER N + /x6f LATIN SMALL LETTER O + /x70 LATIN SMALL LETTER P + /x71 LATIN SMALL LETTER Q + /x72 LATIN SMALL LETTER R + /x73 LATIN SMALL LETTER S + /x74 LATIN SMALL LETTER T + /x75 LATIN SMALL LETTER U + /x76 LATIN SMALL LETTER V + /x77 LATIN SMALL LETTER W + /x78 LATIN SMALL LETTER X + /x79 LATIN SMALL LETTER Y + /x7a LATIN SMALL LETTER Z + /x7b LEFT CURLY BRACKET + /x7c VERTICAL LINE + /x7d RIGHT CURLY BRACKET + /x7e TILDE + /x7f DELETE (DEL) + /x80 LATIN CAPITAL LETTER C WITH CEDILLA + /x81 LATIN SMALL LETTER U WITH DIAERESIS + /x82 LATIN SMALL LETTER E WITH ACUTE + /x83 LATIN SMALL LETTER A WITH CIRCUMFLEX + /x84 LATIN SMALL LETTER A WITH DIAERESIS + /x85 LATIN SMALL LETTER A WITH GRAVE + /x86 LATIN SMALL LETTER A WITH RING ABOVE + /x87 LATIN SMALL LETTER C WITH CEDILLA + /x88 LATIN SMALL LETTER E WITH CIRCUMFLEX + /x89 LATIN SMALL LETTER E WITH DIAERESIS + /x8a LATIN SMALL LETTER E WITH GRAVE + /x8b LATIN SMALL LETTER I WITH DIAERESIS + /x8c LATIN SMALL LETTER I WITH CIRCUMFLEX + /x8d LATIN SMALL LETTER I WITH GRAVE + /x8e LATIN CAPITAL LETTER A WITH DIAERESIS + /x8f LATIN CAPITAL LETTER A WITH RING ABOVE + /x90 LATIN CAPITAL LETTER E WITH ACUTE + /x91 LATIN SMALL LETTER AE + /x92 LATIN CAPITAL LETTER AE + /x93 LATIN SMALL LETTER O WITH CIRCUMFLEX + /x94 LATIN SMALL LETTER O WITH DIAERESIS + /x95 LATIN SMALL LETTER O WITH GRAVE + /x96 LATIN SMALL LETTER U WITH CIRCUMFLEX + /x97 LATIN SMALL LETTER U WITH GRAVE + /x98 LATIN SMALL LETTER Y WITH DIAERESIS + /x99 LATIN CAPITAL LETTER O WITH DIAERESIS + /x9a LATIN CAPITAL LETTER U WITH DIAERESIS + /x9b LATIN SMALL LETTER O WITH STROKE + /x9c POUND SIGN + /x9d LATIN CAPITAL LETTER O WITH STROKE + /x9e MULTIPLICATION SIGN + /x9f LATIN SMALL LETTER F WITH HOOK + /xa0 LATIN SMALL LETTER A WITH ACUTE + /xa1 LATIN SMALL LETTER I WITH ACUTE + /xa2 LATIN SMALL LETTER O WITH ACUTE + /xa3 LATIN SMALL LETTER U WITH ACUTE + /xa4 LATIN SMALL LETTER N WITH TILDE + /xa5 LATIN CAPITAL LETTER N WITH TILDE + /xa6 FEMININE ORDINAL INDICATOR + /xa7 MASCULINE ORDINAL INDICATOR + /xa8 INVERTED QUESTION MARK + /xa9 REGISTERED SIGN + /xaa NOT SIGN + /xab VULGAR FRACTION ONE HALF + /xac VULGAR FRACTION ONE QUARTER + /xad INVERTED EXCLAMATION MARK + /xae LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + /xaf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + /xb0 LIGHT SHADE + /xb1 MEDIUM SHADE + /xb2 DARK SHADE + /xb3 BOX DRAWINGS LIGHT VERTICAL + /xb4 BOX DRAWINGS LIGHT VERTICAL AND LEFT + /xb5 LATIN CAPITAL LETTER A WITH ACUTE + /xb6 LATIN CAPITAL LETTER A WITH CIRCUMFLEX + /xb7 LATIN CAPITAL LETTER A WITH GRAVE + /xb8 COPYRIGHT SIGN + /xb9 BOX DRAWINGS DOUBLE VERTICAL AND LEFT + /xba BOX DRAWINGS DOUBLE VERTICAL + /xbb BOX DRAWINGS DOUBLE DOWN AND LEFT + /xbc BOX DRAWINGS DOUBLE UP AND LEFT + /xbd CENT SIGN + /xbe YEN SIGN + /xbf BOX DRAWINGS LIGHT DOWN AND LEFT + /xc0 BOX DRAWINGS LIGHT UP AND RIGHT + /xc1 BOX DRAWINGS LIGHT UP AND HORIZONTAL + /xc2 BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + /xc3 BOX DRAWINGS LIGHT VERTICAL AND RIGHT + /xc4 BOX DRAWINGS LIGHT HORIZONTAL + /xc5 BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + /xc6 LATIN SMALL LETTER A WITH TILDE + /xc7 LATIN CAPITAL LETTER A WITH TILDE + /xc8 BOX DRAWINGS DOUBLE UP AND RIGHT + /xc9 BOX DRAWINGS DOUBLE DOWN AND RIGHT + /xca BOX DRAWINGS DOUBLE UP AND HORIZONTAL + /xcb BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + /xcc BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + /xcd BOX DRAWINGS DOUBLE HORIZONTAL + /xce BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + /xcf CURRENCY SIGN + /xd0 LATIN SMALL LETTER ETH (Icelandic) + /xd1 LATIN CAPITAL LETTER ETH (Icelandic) + /xd2 LATIN CAPITAL LETTER E WITH CIRCUMFLEX + /xd3 LATIN CAPITAL LETTER E WITH DIAERESIS + /xd4 LATIN CAPITAL LETTER E WITH GRAVE + /xd5 EURO SIGN + /xd6 LATIN CAPITAL LETTER I WITH ACUTE + /xd7 LATIN CAPITAL LETTER I WITH CIRCUMFLEX + /xd8 LATIN CAPITAL LETTER I WITH DIAERESIS + /xd9 BOX DRAWINGS LIGHT UP AND LEFT + /xda BOX DRAWINGS LIGHT DOWN AND RIGHT + /xdb FULL BLOCK + /xdc LOWER HALF BLOCK + /xdd BROKEN BAR + /xde LATIN CAPITAL LETTER I WITH GRAVE + /xdf UPPER HALF BLOCK + /xe0 LATIN CAPITAL LETTER O WITH ACUTE + /xe1 LATIN SMALL LETTER SHARP S (German) + /xe2 LATIN CAPITAL LETTER O WITH CIRCUMFLEX + /xe3 LATIN CAPITAL LETTER O WITH GRAVE + /xe4 LATIN SMALL LETTER O WITH TILDE + /xe5 LATIN CAPITAL LETTER O WITH TILDE + /xe6 MICRO SIGN + /xe7 LATIN SMALL LETTER THORN (Icelandic) + /xe8 LATIN CAPITAL LETTER THORN (Icelandic) + /xe9 LATIN CAPITAL LETTER U WITH ACUTE + /xea LATIN CAPITAL LETTER U WITH CIRCUMFLEX + /xeb LATIN CAPITAL LETTER U WITH GRAVE + /xec LATIN SMALL LETTER Y WITH ACUTE + /xed LATIN CAPITAL LETTER Y WITH ACUTE + /xee MACRON + /xef ACUTE ACCENT + /xf0 SOFT HYPHEN + /xf1 PLUS-MINUS SIGN + /xf2 DOUBLE LOW LINE + /xf3 VULGAR FRACTION THREE QUARTERS + /xf4 PILCROW SIGN + /xf5 SECTION SIGN + /xf6 DIVISION SIGN + /xf7 CEDILLA + /xf8 DEGREE SIGN + /xf9 DIAERESIS + /xfa MIDDLE DOT + /xfb SUPERSCRIPT ONE + /xfc SUPERSCRIPT THREE + /xfd SUPERSCRIPT TWO + /xfe BLACK SQUARE + /xff NO-BREAK SPACE +END CHARMAP