diff -r f3b365199366 docs/conf.py --- a/docs/conf.py Thu Jul 14 14:34:23 2011 +0200 +++ b/docs/conf.py Tue Nov 01 13:31:57 2011 +1000 @@ -13,16 +13,18 @@ import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('..')) +import polib + # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] @@ -43,19 +45,19 @@ master_doc = 'index' project = u'polib' copyright = u'2011, David Jean Louis ' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.6.0' +version = polib.__version__ # The full version, including alpha/beta/rc tags. -release = '0.6.0' +release = polib.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' diff -r f3b365199366 polib.py --- a/polib.py Thu Jul 14 14:34:23 2011 +0200 +++ b/polib.py Tue Nov 01 13:31:57 2011 +1000 @@ -1181,78 +1181,91 @@ class _POFileParser(object): self.entry_obsolete = 1 else: self.entry_obsolete = 0 # Take care of keywords like # msgid, msgid_plural, msgctxt & msgstr. if tokens[0] in keywords and nb_tokens > 1: line = line[len(tokens[0]):].lstrip() + if re.search(r'([^\\]|^)"', line[1:-1]): + raise IOError('Syntax error in po file %s (line %s): '\ + 'unescaped double quote found' % \ + (self.instance.fpath, i)) self.current_token = line self.process(keywords[tokens[0]], i) continue self.current_token = line if tokens[0] == '#:' and nb_tokens > 1: # we are on a occurrences line self.process('OC', i) elif line[:1] == '"': # we are on a continuation line + if re.search(r'([^\\]|^)"', line[1:-1]): + raise IOError('Syntax error in po file %s (line %s): '\ + 'unescaped double quote found' % \ + (self.instance.fpath, i)) self.process('MC', i) elif line[:7] == 'msgstr[': # we are on a msgstr plural self.process('MX', i) - elif tokens[0] == '#,' and nb_tokens > 1: + elif tokens[0] == '#,' and nb_tokens >= 1: # we are on a flags line self.process('FL', i) elif tokens[0] == '#': if line == '#': line += ' ' # we are on a translator comment line self.process('TC', i) - elif tokens[0] == '#.' and nb_tokens > 1: + elif tokens[0] == '#.' and nb_tokens >= 1: # we are on a generated comment line self.process('GC', i) elif tokens[0] == '#|': if nb_tokens < 2: - self.process('??', i) - continue + raise IOError('Syntax error in po file %s (line %s)' % \ + (self.instance.fpath, i)) # Remove the marker and any whitespace right after that. line = line[2:].lstrip() self.current_token = line if tokens[1].startswith('"'): # Continuation of previous metadata. self.process('MC', i) continue if nb_tokens == 2: # Invalid continuation line. - self.process('??', i) + raise IOError('Syntax error in po file %s (line %s): '\ + 'invalid continuation line' % \ + (self.instance.fpath, i)) # we are on a "previous translation" comment line, if tokens[1] not in prev_keywords: # Unknown keyword in previous translation comment. - self.process('??', i) + raise IOError('Syntax error in po file %s (line %s): '\ + 'unknown keyword %s' % \ + (self.instance.fpath, i, tokens[1])) # Remove the keyword and any whitespace # between it and the starting quote. line = line[len(tokens[1]):].lstrip() self.current_token = line self.process(prev_keywords[tokens[1]], i) else: - self.process('??', i) + raise IOError('Syntax error in po file %s (line %s)' % \ + (self.instance.fpath, i)) if self.current_entry: # since entries are added when another entry is found, we must add # the last entry here (only if there are lines) self.instance.append(self.current_entry) # before returning the instance, check if there's metadata and if # so extract it in a dict firstentry = self.instance[0] diff -r f3b365199366 tests/test_weird_occurrences.po --- a/tests/test_weird_occurrences.po Thu Jul 14 14:34:23 2011 +0200 +++ b/tests/test_weird_occurrences.po Tue Nov 01 13:31:57 2011 +1000 @@ -9,16 +9,23 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "X-Generator: Pootle 1.1.0rc2\n" "X-Launchpad-Export-Date: 2007-12-03 23:40+0000\n" "X-Etoys-Domain: etoys\n" "X-Etoys-SystemVersion: etoys3.0 of 24 February 2008 update 2029\n" +#. Test for empty comment lines +#. +#, +#: main.c:117 +msgid "Override the default prgname" +msgstr "Override the default prgname" + #: Balloon-Fills,BitmapFillStyle>>addFillStyleMenuItems:hand:from: msgid "choose new graphic" msgstr "escolher novo gráfico" #: Balloon-Fills,BitmapFillStyle>>addFillStyleMenuItems:hand:from: msgid "grab new graphic" msgstr "agarrar novo gráfico" diff -r f3b365199366 tests/tests.py --- a/tests/tests.py Thu Jul 14 14:34:23 2011 +0200 +++ b/tests/tests.py Tue Nov 01 13:31:57 2011 +1000 @@ -78,16 +78,84 @@ msgstr "bar" def test_pofile_and_mofile7(self): """ Test that encoding is ok when encoding is explicitely given. """ po = polib.pofile('tests/test_iso-8859-15.po', encoding='iso-8859-15') self.assertTrue(po.encoding == 'iso-8859-15') + def test_pofile_and_mofile8(self): + """ + Test that weird occurrences are correctly parsed. + """ + po = polib.pofile('tests/test_weird_occurrences.po') + self.assertEqual(len(po), 46) + + def test_unescaped_double_quote1(self): + """ + Test that polib reports an error when unescaped double quote is found. + """ + data = r''' +msgid "Some msgid with \"double\" quotes" +msgid "Some msgstr with "double\" quotes" +''' + try: + po = polib.pofile(data) + self.fail("Unescaped quote not detected") + except IOError, exc: + msg = 'Syntax error in po file None (line 3): unescaped double quote found' + self.assertEqual(str(exc), msg) + + def test_unescaped_double_quote2(self): + """ + Test that polib reports an error when unescaped double quote is found. + """ + data = r''' +msgid "Some msgid with \"double\" quotes" +msgstr "" +"Some msgstr with "double\" quotes" +''' + try: + po = polib.pofile(data) + self.fail("Unescaped quote not detected") + except IOError, exc: + msg = 'Syntax error in po file None (line 4): unescaped double quote found' + self.assertEqual(str(exc), msg) + + def test_unescaped_double_quote3(self): + """ + Test that polib reports an error when unescaped double quote is found at the beginning of the string. + """ + data = r''' +msgid "Some msgid with \"double\" quotes" +msgid ""Some msgstr with double\" quotes" +''' + try: + po = polib.pofile(data) + self.fail("Unescaped quote not detected") + except IOError, exc: + msg = 'Syntax error in po file None (line 3): unescaped double quote found' + + def test_unescaped_double_quote4(self): + """ + Test that polib reports an error when unescaped double quote is found at the beginning of the string. + """ + data = r''' +msgid "Some msgid with \"double\" quotes" +msgstr "" +""Some msgstr with double\" quotes" +''' + try: + po = polib.pofile(data) + self.fail("Unescaped quote not detected") + except IOError, exc: + msg = 'Syntax error in po file None (line 4): unescaped double quote found' + self.assertEqual(str(exc), msg) + def test_detect_encoding1(self): """ Test that given enconding is returned when file has no encoding defined. """ self.assertEqual(polib.detect_encoding('tests/test_noencoding.po'), 'utf-8') def test_detect_encoding2(self): """