Blob Blame History Raw
diff -r f3b365199366 docs/conf.py
--- a/docs/conf.py	Thu Jul 14 14:34:23 2011 +0200
+++ b/docs/conf.py	Tue Nov 01 13:31:57 2011 +1000
@@ -13,16 +13,18 @@
 
 import sys, os
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('..'))
 
+import polib
+
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
 #needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode']
@@ -43,19 +45,19 @@ master_doc = 'index'
 project = u'polib'
 copyright = u'2011, David Jean Louis <izimobil@gmail.com>'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '0.6.0'
+version = polib.__version__
 # The full version, including alpha/beta/rc tags.
-release = '0.6.0'
+release = polib.__version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
 #today = ''
diff -r f3b365199366 polib.py
--- a/polib.py	Thu Jul 14 14:34:23 2011 +0200
+++ b/polib.py	Tue Nov 01 13:31:57 2011 +1000
@@ -1181,78 +1181,91 @@ class _POFileParser(object):
                 self.entry_obsolete = 1
             else:
                 self.entry_obsolete = 0
 
             # Take care of keywords like
             # msgid, msgid_plural, msgctxt & msgstr.
             if tokens[0] in keywords and nb_tokens > 1:
                 line = line[len(tokens[0]):].lstrip()
+                if re.search(r'([^\\]|^)"', line[1:-1]):
+                    raise IOError('Syntax error in po file %s (line %s): '\
+                                  'unescaped double quote found' % \
+                                  (self.instance.fpath, i))
                 self.current_token = line
                 self.process(keywords[tokens[0]], i)
                 continue
 
             self.current_token = line
 
             if tokens[0] == '#:' and nb_tokens > 1:
                 # we are on a occurrences line
                 self.process('OC', i)
 
             elif line[:1] == '"':
                 # we are on a continuation line
+                if re.search(r'([^\\]|^)"', line[1:-1]):
+                    raise IOError('Syntax error in po file %s (line %s): '\
+                                  'unescaped double quote found' % \
+                                  (self.instance.fpath, i))
                 self.process('MC', i)
 
             elif line[:7] == 'msgstr[':
                 # we are on a msgstr plural
                 self.process('MX', i)
 
-            elif tokens[0] == '#,' and nb_tokens > 1:
+            elif tokens[0] == '#,' and nb_tokens >= 1:
                 # we are on a flags line
                 self.process('FL', i)
 
             elif tokens[0] == '#':
                 if line == '#': line += ' '
                 # we are on a translator comment line
                 self.process('TC', i)
 
-            elif tokens[0] == '#.' and nb_tokens > 1:
+            elif tokens[0] == '#.' and nb_tokens >= 1:
                 # we are on a generated comment line
                 self.process('GC', i)
 
             elif tokens[0] == '#|':
                 if nb_tokens < 2:
-                    self.process('??', i)
-                    continue
+                    raise IOError('Syntax error in po file %s (line %s)' % \
+                                  (self.instance.fpath, i))
 
                 # Remove the marker and any whitespace right after that.
                 line = line[2:].lstrip()
                 self.current_token = line
 
                 if tokens[1].startswith('"'):
                     # Continuation of previous metadata.
                     self.process('MC', i)
                     continue
 
                 if nb_tokens == 2:
                     # Invalid continuation line.
-                    self.process('??', i)
+                    raise IOError('Syntax error in po file %s (line %s): '\
+                                  'invalid continuation line' % \
+                                  (self.instance.fpath, i))
 
                 # we are on a "previous translation" comment line,
                 if tokens[1] not in prev_keywords:
                     # Unknown keyword in previous translation comment.
-                    self.process('??', i)
+                    raise IOError('Syntax error in po file %s (line %s): '\
+                                  'unknown keyword %s' % \
+                                  (self.instance.fpath, i, tokens[1]))
 
                 # Remove the keyword and any whitespace
                 # between it and the starting quote.
                 line = line[len(tokens[1]):].lstrip()
                 self.current_token = line
                 self.process(prev_keywords[tokens[1]], i)
 
             else:
-                self.process('??', i)
+                raise IOError('Syntax error in po file %s (line %s)' % \
+                              (self.instance.fpath, i))
 
         if self.current_entry:
             # since entries are added when another entry is found, we must add
             # the last entry here (only if there are lines)
             self.instance.append(self.current_entry)
         # before returning the instance, check if there's metadata and if 
         # so extract it in a dict
         firstentry = self.instance[0]
diff -r f3b365199366 tests/test_weird_occurrences.po
--- a/tests/test_weird_occurrences.po	Thu Jul 14 14:34:23 2011 +0200
+++ b/tests/test_weird_occurrences.po	Tue Nov 01 13:31:57 2011 +1000
@@ -9,16 +9,23 @@ msgstr ""
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 "X-Generator: Pootle 1.1.0rc2\n"
 "X-Launchpad-Export-Date: 2007-12-03 23:40+0000\n"
 "X-Etoys-Domain: etoys\n"
 "X-Etoys-SystemVersion: etoys3.0 of 24 February 2008 update 2029\n"
 
+#. Test for empty comment lines
+#. 
+#,
+#: main.c:117
+msgid "Override the default prgname"
+msgstr "Override the default prgname"
+
 #: Balloon-Fills,BitmapFillStyle>>addFillStyleMenuItems:hand:from:
 msgid "choose new graphic"
 msgstr "escolher novo gráfico"
 
 #: Balloon-Fills,BitmapFillStyle>>addFillStyleMenuItems:hand:from:
 msgid "grab new graphic"
 msgstr "agarrar novo gráfico"
 
diff -r f3b365199366 tests/tests.py
--- a/tests/tests.py	Thu Jul 14 14:34:23 2011 +0200
+++ b/tests/tests.py	Tue Nov 01 13:31:57 2011 +1000
@@ -78,16 +78,84 @@ msgstr "bar"
 
     def test_pofile_and_mofile7(self):
         """
         Test that encoding is ok when encoding is explicitely given.
         """
         po = polib.pofile('tests/test_iso-8859-15.po', encoding='iso-8859-15')
         self.assertTrue(po.encoding == 'iso-8859-15')
 
+    def test_pofile_and_mofile8(self):
+        """
+        Test that weird occurrences are correctly parsed.
+        """
+        po = polib.pofile('tests/test_weird_occurrences.po')
+        self.assertEqual(len(po), 46)
+
+    def test_unescaped_double_quote1(self):
+        """
+        Test that polib reports an error when unescaped double quote is found.
+        """
+        data = r'''
+msgid "Some msgid with \"double\" quotes"
+msgid "Some msgstr with "double\" quotes"
+'''
+        try:
+            po = polib.pofile(data)
+            self.fail("Unescaped quote not detected")
+        except IOError, exc:
+            msg = 'Syntax error in po file None (line 3): unescaped double quote found'
+            self.assertEqual(str(exc), msg)
+
+    def test_unescaped_double_quote2(self):
+        """
+        Test that polib reports an error when unescaped double quote is found.
+        """
+        data = r'''
+msgid "Some msgid with \"double\" quotes"
+msgstr ""
+"Some msgstr with "double\" quotes"
+'''
+        try:
+            po = polib.pofile(data)
+            self.fail("Unescaped quote not detected")
+        except IOError, exc:
+            msg = 'Syntax error in po file None (line 4): unescaped double quote found'
+            self.assertEqual(str(exc), msg)
+
+    def test_unescaped_double_quote3(self):
+        """
+        Test that polib reports an error when unescaped double quote is found at the beginning of the string.
+        """
+        data = r'''
+msgid "Some msgid with \"double\" quotes"
+msgid ""Some msgstr with double\" quotes"
+'''
+        try:
+            po = polib.pofile(data)
+            self.fail("Unescaped quote not detected")
+        except IOError, exc:
+            msg = 'Syntax error in po file None (line 3): unescaped double quote found'
+
+    def test_unescaped_double_quote4(self):
+        """
+        Test that polib reports an error when unescaped double quote is found at the beginning of the string.
+        """
+        data = r'''
+msgid "Some msgid with \"double\" quotes"
+msgstr ""
+""Some msgstr with double\" quotes"
+'''
+        try:
+            po = polib.pofile(data)
+            self.fail("Unescaped quote not detected")
+        except IOError, exc:
+            msg = 'Syntax error in po file None (line 4): unescaped double quote found'
+            self.assertEqual(str(exc), msg)
+    
     def test_detect_encoding1(self):
         """
         Test that given enconding is returned when file has no encoding defined.
         """
         self.assertEqual(polib.detect_encoding('tests/test_noencoding.po'), 'utf-8')
 
     def test_detect_encoding2(self):
         """