|
Robert Kuska |
833dc39 |
diff -r 847a0e74c4cc Lib/test/test_unicode.py
|
|
Robert Kuska |
833dc39 |
--- a/Lib/test/test_unicode.py Sun Jul 20 21:26:04 2014 -0700
|
|
Robert Kuska |
833dc39 |
+++ b/Lib/test/test_unicode.py Tue Jul 22 00:13:24 2014 +0200
|
|
Robert Kuska |
833dc39 |
@@ -1659,6 +1659,122 @@ class UnicodeTest(
|
|
Robert Kuska |
833dc39 |
self.assertEqual("%s" % u, u'__unicode__ overridden')
|
|
Robert Kuska |
833dc39 |
self.assertEqual("{}".format(u), '__unicode__ overridden')
|
|
Robert Kuska |
833dc39 |
|
|
Robert Kuska |
833dc39 |
+ # Test PyUnicode_FromFormat()
|
|
Robert Kuska |
833dc39 |
+ def test_from_format(self):
|
|
Robert Kuska |
833dc39 |
+ test_support.import_module('ctypes')
|
|
Robert Kuska |
833dc39 |
+ from ctypes import (
|
|
Robert Kuska |
833dc39 |
+ pythonapi, py_object, sizeof,
|
|
Robert Kuska |
833dc39 |
+ c_int, c_long, c_longlong, c_ssize_t,
|
|
Robert Kuska |
833dc39 |
+ c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
|
|
Robert Kuska |
833dc39 |
+ if sys.maxunicode == 0xffff:
|
|
Robert Kuska |
833dc39 |
+ name = "PyUnicodeUCS2_FromFormat"
|
|
Robert Kuska |
833dc39 |
+ else:
|
|
Robert Kuska |
833dc39 |
+ name = "PyUnicodeUCS4_FromFormat"
|
|
Robert Kuska |
833dc39 |
+ _PyUnicode_FromFormat = getattr(pythonapi, name)
|
|
Robert Kuska |
833dc39 |
+ _PyUnicode_FromFormat.restype = py_object
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ def PyUnicode_FromFormat(format, *args):
|
|
Robert Kuska |
833dc39 |
+ cargs = tuple(
|
|
Robert Kuska |
833dc39 |
+ py_object(arg) if isinstance(arg, unicode) else arg
|
|
Robert Kuska |
833dc39 |
+ for arg in args)
|
|
Robert Kuska |
833dc39 |
+ return _PyUnicode_FromFormat(format, *cargs)
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ def check_format(expected, format, *args):
|
|
Robert Kuska |
833dc39 |
+ text = PyUnicode_FromFormat(format, *args)
|
|
Robert Kuska |
833dc39 |
+ self.assertEqual(expected, text)
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # ascii format, non-ascii argument
|
|
Robert Kuska |
833dc39 |
+ check_format(u'ascii\x7f=unicode\xe9',
|
|
Robert Kuska |
833dc39 |
+ b'ascii\x7f=%U', u'unicode\xe9')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
|
|
Robert Kuska |
833dc39 |
+ # raises an error
|
|
Robert Kuska |
833dc39 |
+ #self.assertRaisesRegex(ValueError,
|
|
Robert Kuska |
833dc39 |
+ # '^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
|
|
Robert Kuska |
833dc39 |
+ # 'string, got a non-ASCII byte: 0xe9$',
|
|
Robert Kuska |
833dc39 |
+ # PyUnicode_FromFormat, b'unicode\xe9=%s', u'ascii')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test "%c"
|
|
Robert Kuska |
833dc39 |
+ check_format(u'\uabcd',
|
|
Robert Kuska |
833dc39 |
+ b'%c', c_int(0xabcd))
|
|
Robert Kuska |
833dc39 |
+ if sys.maxunicode > 0xffff:
|
|
Robert Kuska |
833dc39 |
+ check_format(u'\U0010ffff',
|
|
Robert Kuska |
833dc39 |
+ b'%c', c_int(0x10ffff))
|
|
Robert Kuska |
833dc39 |
+ with self.assertRaises(OverflowError):
|
|
Robert Kuska |
833dc39 |
+ PyUnicode_FromFormat(b'%c', c_int(0x110000))
|
|
Robert Kuska |
833dc39 |
+ # Issue #18183
|
|
Robert Kuska |
833dc39 |
+ if sys.maxunicode > 0xffff:
|
|
Robert Kuska |
833dc39 |
+ check_format(u'\U00010000\U00100000',
|
|
Robert Kuska |
833dc39 |
+ b'%c%c', c_int(0x10000), c_int(0x100000))
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test "%"
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%',
|
|
Robert Kuska |
833dc39 |
+ b'%')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%',
|
|
Robert Kuska |
833dc39 |
+ b'%%')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%s',
|
|
Robert Kuska |
833dc39 |
+ b'%%s')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'[%]',
|
|
Robert Kuska |
833dc39 |
+ b'[%%]')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%abc',
|
|
Robert Kuska |
833dc39 |
+ b'%%%s', b'abc')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test %S
|
|
Robert Kuska |
833dc39 |
+ check_format(u"repr=abc",
|
|
Robert Kuska |
833dc39 |
+ b'repr=%S', u'abc')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test %R
|
|
Robert Kuska |
833dc39 |
+ check_format(u"repr=u'abc'",
|
|
Robert Kuska |
833dc39 |
+ b'repr=%R', u'abc')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test integer formats (%i, %d, %u)
|
|
Robert Kuska |
833dc39 |
+ check_format(u'010',
|
|
Robert Kuska |
833dc39 |
+ b'%03i', c_int(10))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'0010',
|
|
Robert Kuska |
833dc39 |
+ b'%0.4i', c_int(10))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'-123',
|
|
Robert Kuska |
833dc39 |
+ b'%i', c_int(-123))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'-123',
|
|
Robert Kuska |
833dc39 |
+ b'%li', c_long(-123))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'-123',
|
|
Robert Kuska |
833dc39 |
+ b'%zi', c_ssize_t(-123))
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ check_format(u'-123',
|
|
Robert Kuska |
833dc39 |
+ b'%d', c_int(-123))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'-123',
|
|
Robert Kuska |
833dc39 |
+ b'%ld', c_long(-123))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'-123',
|
|
Robert Kuska |
833dc39 |
+ b'%zd', c_ssize_t(-123))
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ check_format(u'123',
|
|
Robert Kuska |
833dc39 |
+ b'%u', c_uint(123))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'123',
|
|
Robert Kuska |
833dc39 |
+ b'%lu', c_ulong(123))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'123',
|
|
Robert Kuska |
833dc39 |
+ b'%zu', c_size_t(123))
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test long output
|
|
Robert Kuska |
833dc39 |
+ PyUnicode_FromFormat(b'%p', c_void_p(-1))
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # test %V
|
|
Robert Kuska |
833dc39 |
+ check_format(u'repr=abc',
|
|
Robert Kuska |
833dc39 |
+ b'repr=%V', u'abc', b'xyz')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'repr=\xe4\xba\xba\xe6\xb0\x91',
|
|
Robert Kuska |
833dc39 |
+ b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'repr=abc\xff',
|
|
Robert Kuska |
833dc39 |
+ b'repr=%V', None, b'abc\xff')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
+ # not supported: copy the raw format string. these tests are just here
|
|
Robert Kuska |
833dc39 |
+ # to check for crashs and should not be considered as specifications
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%s',
|
|
Robert Kuska |
833dc39 |
+ b'%1%s', b'abc')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%1abc',
|
|
Robert Kuska |
833dc39 |
+ b'%1abc')
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%+i',
|
|
Robert Kuska |
833dc39 |
+ b'%+i', c_int(10))
|
|
Robert Kuska |
833dc39 |
+ check_format(u'%s',
|
|
Robert Kuska |
833dc39 |
+ b'%.%s', b'abc')
|
|
Robert Kuska |
833dc39 |
+
|
|
Robert Kuska |
833dc39 |
@test_support.cpython_only
|
|
Robert Kuska |
833dc39 |
def test_encode_decimal(self):
|
|
Robert Kuska |
833dc39 |
from _testcapi import unicode_encodedecimal
|
|
Robert Kuska |
833dc39 |
diff -r 847a0e74c4cc Objects/unicodeobject.c
|
|
Robert Kuska |
833dc39 |
--- a/Objects/unicodeobject.c Sun Jul 20 21:26:04 2014 -0700
|
|
Robert Kuska |
833dc39 |
+++ b/Objects/unicodeobject.c Tue Jul 22 00:13:24 2014 +0200
|
|
Robert Kuska |
833dc39 |
@@ -690,7 +690,12 @@ makefmt(char *fmt, int longflag, int siz
|
|
Robert Kuska |
833dc39 |
*fmt = '\0';
|
|
Robert Kuska |
833dc39 |
}
|
|
Robert Kuska |
833dc39 |
|
|
Robert Kuska |
833dc39 |
-#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
|
|
Robert Kuska |
833dc39 |
+#define appendstring(string) \
|
|
Robert Kuska |
833dc39 |
+ do { \
|
|
Robert Kuska |
833dc39 |
+ for (copy = string;*copy; copy++) { \
|
|
Robert Kuska |
833dc39 |
+ *s++ = (unsigned char)*copy; \
|
|
Robert Kuska |
833dc39 |
+ } \
|
|
Robert Kuska |
833dc39 |
+ } while (0)
|
|
Robert Kuska |
833dc39 |
|
|
Robert Kuska |
833dc39 |
PyObject *
|
|
Robert Kuska |
833dc39 |
PyUnicode_FromFormatV(const char *format, va_list vargs)
|
|
Robert Kuska |
833dc39 |
@@ -845,7 +850,7 @@ PyUnicode_FromFormatV(const char *format
|
|
Robert Kuska |
833dc39 |
str = PyObject_Str(obj);
|
|
Robert Kuska |
833dc39 |
if (!str)
|
|
Robert Kuska |
833dc39 |
goto fail;
|
|
Robert Kuska |
833dc39 |
- n += PyUnicode_GET_SIZE(str);
|
|
Robert Kuska |
833dc39 |
+ n += PyString_GET_SIZE(str);
|
|
Robert Kuska |
833dc39 |
/* Remember the str and switch to the next slot */
|
|
Robert Kuska |
833dc39 |
*callresult++ = str;
|
|
Robert Kuska |
833dc39 |
break;
|
|
Robert Kuska |
833dc39 |
@@ -925,12 +930,12 @@ PyUnicode_FromFormatV(const char *format
|
|
Robert Kuska |
833dc39 |
}
|
|
Robert Kuska |
833dc39 |
/* handle the long flag, but only for %ld and %lu.
|
|
Robert Kuska |
833dc39 |
others can be added when necessary. */
|
|
Robert Kuska |
833dc39 |
- if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
|
|
Robert Kuska |
833dc39 |
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'i' || f[1] == 'u')) {
|
|
Robert Kuska |
833dc39 |
longflag = 1;
|
|
Robert Kuska |
833dc39 |
++f;
|
|
Robert Kuska |
833dc39 |
}
|
|
Robert Kuska |
833dc39 |
/* handle the size_t flag. */
|
|
Robert Kuska |
833dc39 |
- if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
|
Robert Kuska |
833dc39 |
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'i' || f[1] == 'u')) {
|
|
Robert Kuska |
833dc39 |
size_tflag = 1;
|
|
Robert Kuska |
833dc39 |
++f;
|
|
Robert Kuska |
833dc39 |
}
|
|
Robert Kuska |
833dc39 |
@@ -939,8 +944,9 @@ PyUnicode_FromFormatV(const char *format
|
|
Robert Kuska |
833dc39 |
case 'c':
|
|
Robert Kuska |
833dc39 |
*s++ = va_arg(vargs, int);
|
|
Robert Kuska |
833dc39 |
break;
|
|
Robert Kuska |
833dc39 |
+ case 'i':
|
|
Robert Kuska |
833dc39 |
case 'd':
|
|
Robert Kuska |
833dc39 |
- makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd');
|
|
Robert Kuska |
833dc39 |
+ makefmt(fmt, longflag, size_tflag, zeropad, width, precision, *f);
|
|
Robert Kuska |
833dc39 |
if (longflag)
|
|
Robert Kuska |
833dc39 |
sprintf(realbuffer, fmt, va_arg(vargs, long));
|
|
Robert Kuska |
833dc39 |
else if (size_tflag)
|
|
Robert Kuska |
833dc39 |
@@ -959,11 +965,6 @@ PyUnicode_FromFormatV(const char *format
|
|
Robert Kuska |
833dc39 |
sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
|
|
Robert Kuska |
833dc39 |
appendstring(realbuffer);
|
|
Robert Kuska |
833dc39 |
break;
|
|
Robert Kuska |
833dc39 |
- case 'i':
|
|
Robert Kuska |
833dc39 |
- makefmt(fmt, 0, 0, zeropad, width, precision, 'i');
|
|
Robert Kuska |
833dc39 |
- sprintf(realbuffer, fmt, va_arg(vargs, int));
|
|
Robert Kuska |
833dc39 |
- appendstring(realbuffer);
|
|
Robert Kuska |
833dc39 |
- break;
|
|
Robert Kuska |
833dc39 |
case 'x':
|
|
Robert Kuska |
833dc39 |
makefmt(fmt, 0, 0, zeropad, width, precision, 'x');
|
|
Robert Kuska |
833dc39 |
sprintf(realbuffer, fmt, va_arg(vargs, int));
|
|
Robert Kuska |
833dc39 |
@@ -1006,15 +1007,10 @@ PyUnicode_FromFormatV(const char *format
|
|
Robert Kuska |
833dc39 |
case 'S':
|
|
Robert Kuska |
833dc39 |
case 'R':
|
|
Robert Kuska |
833dc39 |
{
|
|
Robert Kuska |
833dc39 |
- Py_UNICODE *ucopy;
|
|
Robert Kuska |
833dc39 |
- Py_ssize_t usize;
|
|
Robert Kuska |
833dc39 |
- Py_ssize_t upos;
|
|
Robert Kuska |
833dc39 |
+ const char *str = PyString_AS_STRING(*callresult);
|
|
Robert Kuska |
833dc39 |
/* unused, since we already have the result */
|
|
Robert Kuska |
833dc39 |
(void) va_arg(vargs, PyObject *);
|
|
Robert Kuska |
833dc39 |
- ucopy = PyUnicode_AS_UNICODE(*callresult);
|
|
Robert Kuska |
833dc39 |
- usize = PyUnicode_GET_SIZE(*callresult);
|
|
Robert Kuska |
833dc39 |
- for (upos = 0; upos
|
|
Robert Kuska |
833dc39 |
- *s++ = ucopy[upos++];
|
|
Robert Kuska |
833dc39 |
+ appendstring(str);
|
|
Robert Kuska |
833dc39 |
/* We're done with the unicode()/repr() => forget it */
|
|
Robert Kuska |
833dc39 |
Py_DECREF(*callresult);
|
|
Robert Kuska |
833dc39 |
/* switch to next unicode()/repr() result */
|