2af1362
diff -r f3b365199366 docs/conf.py
2af1362
--- a/docs/conf.py	Thu Jul 14 14:34:23 2011 +0200
2af1362
+++ b/docs/conf.py	Tue Nov 01 13:31:57 2011 +1000
2af1362
@@ -13,16 +13,18 @@
2af1362
 
2af1362
 import sys, os
2af1362
 
2af1362
 # If extensions (or modules to document with autodoc) are in another directory,
2af1362
 # add these directories to sys.path here. If the directory is relative to the
2af1362
 # documentation root, use os.path.abspath to make it absolute, like shown here.
2af1362
 sys.path.insert(0, os.path.abspath('..'))
2af1362
 
2af1362
+import polib
2af1362
+
2af1362
 # -- General configuration -----------------------------------------------------
2af1362
 
2af1362
 # If your documentation needs a minimal Sphinx version, state it here.
2af1362
 #needs_sphinx = '1.0'
2af1362
 
2af1362
 # Add any Sphinx extension module names here, as strings. They can be extensions
2af1362
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
2af1362
 extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode']
2af1362
@@ -43,19 +45,19 @@ master_doc = 'index'
2af1362
 project = u'polib'
2af1362
 copyright = u'2011, David Jean Louis <izimobil@gmail.com>'
2af1362
 
2af1362
 # The version info for the project you're documenting, acts as replacement for
2af1362
 # |version| and |release|, also used in various other places throughout the
2af1362
 # built documents.
2af1362
 #
2af1362
 # The short X.Y version.
2af1362
-version = '0.6.0'
2af1362
+version = polib.__version__
2af1362
 # The full version, including alpha/beta/rc tags.
2af1362
-release = '0.6.0'
2af1362
+release = polib.__version__
2af1362
 
2af1362
 # The language for content autogenerated by Sphinx. Refer to documentation
2af1362
 # for a list of supported languages.
2af1362
 #language = None
2af1362
 
2af1362
 # There are two options for replacing |today|: either, you set today to some
2af1362
 # non-false value, then it is used:
2af1362
 #today = ''
2af1362
diff -r f3b365199366 polib.py
2af1362
--- a/polib.py	Thu Jul 14 14:34:23 2011 +0200
2af1362
+++ b/polib.py	Tue Nov 01 13:31:57 2011 +1000
2af1362
@@ -1181,78 +1181,91 @@ class _POFileParser(object):
2af1362
                 self.entry_obsolete = 1
2af1362
             else:
2af1362
                 self.entry_obsolete = 0
2af1362
 
2af1362
             # Take care of keywords like
2af1362
             # msgid, msgid_plural, msgctxt & msgstr.
2af1362
             if tokens[0] in keywords and nb_tokens > 1:
2af1362
                 line = line[len(tokens[0]):].lstrip()
2af1362
+                if re.search(r'([^\\]|^)"', line[1:-1]):
2af1362
+                    raise IOError('Syntax error in po file %s (line %s): '\
2af1362
+                                  'unescaped double quote found' % \
2af1362
+                                  (self.instance.fpath, i))
2af1362
                 self.current_token = line
2af1362
                 self.process(keywords[tokens[0]], i)
2af1362
                 continue
2af1362
 
2af1362
             self.current_token = line
2af1362
 
2af1362
             if tokens[0] == '#:' and nb_tokens > 1:
2af1362
                 # we are on a occurrences line
2af1362
                 self.process('OC', i)
2af1362
 
2af1362
             elif line[:1] == '"':
2af1362
                 # we are on a continuation line
2af1362
+                if re.search(r'([^\\]|^)"', line[1:-1]):
2af1362
+                    raise IOError('Syntax error in po file %s (line %s): '\
2af1362
+                                  'unescaped double quote found' % \
2af1362
+                                  (self.instance.fpath, i))
2af1362
                 self.process('MC', i)
2af1362
 
2af1362
             elif line[:7] == 'msgstr[':
2af1362
                 # we are on a msgstr plural
2af1362
                 self.process('MX', i)
2af1362
 
2af1362
-            elif tokens[0] == '#,' and nb_tokens > 1:
2af1362
+            elif tokens[0] == '#,' and nb_tokens >= 1:
2af1362
                 # we are on a flags line
2af1362
                 self.process('FL', i)
2af1362
 
2af1362
             elif tokens[0] == '#':
2af1362
                 if line == '#': line += ' '
2af1362
                 # we are on a translator comment line
2af1362
                 self.process('TC', i)
2af1362
 
2af1362
-            elif tokens[0] == '#.' and nb_tokens > 1:
2af1362
+            elif tokens[0] == '#.' and nb_tokens >= 1:
2af1362
                 # we are on a generated comment line
2af1362
                 self.process('GC', i)
2af1362
 
2af1362
             elif tokens[0] == '#|':
2af1362
                 if nb_tokens < 2:
2af1362
-                    self.process('??', i)
2af1362
-                    continue
2af1362
+                    raise IOError('Syntax error in po file %s (line %s)' % \
2af1362
+                                  (self.instance.fpath, i))
2af1362
 
2af1362
                 # Remove the marker and any whitespace right after that.
2af1362
                 line = line[2:].lstrip()
2af1362
                 self.current_token = line
2af1362
 
2af1362
                 if tokens[1].startswith('"'):
2af1362
                     # Continuation of previous metadata.
2af1362
                     self.process('MC', i)
2af1362
                     continue
2af1362
 
2af1362
                 if nb_tokens == 2:
2af1362
                     # Invalid continuation line.
2af1362
-                    self.process('??', i)
2af1362
+                    raise IOError('Syntax error in po file %s (line %s): '\
2af1362
+                                  'invalid continuation line' % \
2af1362
+                                  (self.instance.fpath, i))
2af1362
 
2af1362
                 # we are on a "previous translation" comment line,
2af1362
                 if tokens[1] not in prev_keywords:
2af1362
                     # Unknown keyword in previous translation comment.
2af1362
-                    self.process('??', i)
2af1362
+                    raise IOError('Syntax error in po file %s (line %s): '\
2af1362
+                                  'unknown keyword %s' % \
2af1362
+                                  (self.instance.fpath, i, tokens[1]))
2af1362
 
2af1362
                 # Remove the keyword and any whitespace
2af1362
                 # between it and the starting quote.
2af1362
                 line = line[len(tokens[1]):].lstrip()
2af1362
                 self.current_token = line
2af1362
                 self.process(prev_keywords[tokens[1]], i)
2af1362
 
2af1362
             else:
2af1362
-                self.process('??', i)
2af1362
+                raise IOError('Syntax error in po file %s (line %s)' % \
2af1362
+                              (self.instance.fpath, i))
2af1362
 
2af1362
         if self.current_entry:
2af1362
             # since entries are added when another entry is found, we must add
2af1362
             # the last entry here (only if there are lines)
2af1362
             self.instance.append(self.current_entry)
2af1362
         # before returning the instance, check if there's metadata and if 
2af1362
         # so extract it in a dict
2af1362
         firstentry = self.instance[0]
2af1362
diff -r f3b365199366 tests/test_weird_occurrences.po
2af1362
--- a/tests/test_weird_occurrences.po	Thu Jul 14 14:34:23 2011 +0200
2af1362
+++ b/tests/test_weird_occurrences.po	Tue Nov 01 13:31:57 2011 +1000
2af1362
@@ -9,16 +9,23 @@ msgstr ""
2af1362
 "MIME-Version: 1.0\n"
2af1362
 "Content-Type: text/plain; charset=UTF-8\n"
2af1362
 "Content-Transfer-Encoding: 8bit\n"
2af1362
 "X-Generator: Pootle 1.1.0rc2\n"
2af1362
 "X-Launchpad-Export-Date: 2007-12-03 23:40+0000\n"
2af1362
 "X-Etoys-Domain: etoys\n"
2af1362
 "X-Etoys-SystemVersion: etoys3.0 of 24 February 2008 update 2029\n"
2af1362
 
2af1362
+#. Test for empty comment lines
2af1362
+#. 
2af1362
+#,
2af1362
+#: main.c:117
2af1362
+msgid "Override the default prgname"
2af1362
+msgstr "Override the default prgname"
2af1362
+
2af1362
 #: Balloon-Fills,BitmapFillStyle>>addFillStyleMenuItems:hand:from:
2af1362
 msgid "choose new graphic"
2af1362
 msgstr "escolher novo gráfico"
2af1362
 
2af1362
 #: Balloon-Fills,BitmapFillStyle>>addFillStyleMenuItems:hand:from:
2af1362
 msgid "grab new graphic"
2af1362
 msgstr "agarrar novo gráfico"
2af1362
 
2af1362
diff -r f3b365199366 tests/tests.py
2af1362
--- a/tests/tests.py	Thu Jul 14 14:34:23 2011 +0200
2af1362
+++ b/tests/tests.py	Tue Nov 01 13:31:57 2011 +1000
2af1362
@@ -78,16 +78,84 @@ msgstr "bar"
2af1362
 
2af1362
     def test_pofile_and_mofile7(self):
2af1362
         """
2af1362
         Test that encoding is ok when encoding is explicitely given.
2af1362
         """
2af1362
         po = polib.pofile('tests/test_iso-8859-15.po', encoding='iso-8859-15')
2af1362
         self.assertTrue(po.encoding == 'iso-8859-15')
2af1362
 
2af1362
+    def test_pofile_and_mofile8(self):
2af1362
+        """
2af1362
+        Test that weird occurrences are correctly parsed.
2af1362
+        """
2af1362
+        po = polib.pofile('tests/test_weird_occurrences.po')
2af1362
+        self.assertEqual(len(po), 46)
2af1362
+
2af1362
+    def test_unescaped_double_quote1(self):
2af1362
+        """
2af1362
+        Test that polib reports an error when unescaped double quote is found.
2af1362
+        """
2af1362
+        data = r'''
2af1362
+msgid "Some msgid with \"double\" quotes"
2af1362
+msgid "Some msgstr with "double\" quotes"
2af1362
+'''
2af1362
+        try:
2af1362
+            po = polib.pofile(data)
2af1362
+            self.fail("Unescaped quote not detected")
2af1362
+        except IOError, exc:
2af1362
+            msg = 'Syntax error in po file None (line 3): unescaped double quote found'
2af1362
+            self.assertEqual(str(exc), msg)
2af1362
+
2af1362
+    def test_unescaped_double_quote2(self):
2af1362
+        """
2af1362
+        Test that polib reports an error when unescaped double quote is found.
2af1362
+        """
2af1362
+        data = r'''
2af1362
+msgid "Some msgid with \"double\" quotes"
2af1362
+msgstr ""
2af1362
+"Some msgstr with "double\" quotes"
2af1362
+'''
2af1362
+        try:
2af1362
+            po = polib.pofile(data)
2af1362
+            self.fail("Unescaped quote not detected")
2af1362
+        except IOError, exc:
2af1362
+            msg = 'Syntax error in po file None (line 4): unescaped double quote found'
2af1362
+            self.assertEqual(str(exc), msg)
2af1362
+
2af1362
+    def test_unescaped_double_quote3(self):
2af1362
+        """
2af1362
+        Test that polib reports an error when unescaped double quote is found at the beginning of the string.
2af1362
+        """
2af1362
+        data = r'''
2af1362
+msgid "Some msgid with \"double\" quotes"
2af1362
+msgid ""Some msgstr with double\" quotes"
2af1362
+'''
2af1362
+        try:
2af1362
+            po = polib.pofile(data)
2af1362
+            self.fail("Unescaped quote not detected")
2af1362
+        except IOError, exc:
2af1362
+            msg = 'Syntax error in po file None (line 3): unescaped double quote found'
2af1362
+
2af1362
+    def test_unescaped_double_quote4(self):
2af1362
+        """
2af1362
+        Test that polib reports an error when unescaped double quote is found at the beginning of the string.
2af1362
+        """
2af1362
+        data = r'''
2af1362
+msgid "Some msgid with \"double\" quotes"
2af1362
+msgstr ""
2af1362
+""Some msgstr with double\" quotes"
2af1362
+'''
2af1362
+        try:
2af1362
+            po = polib.pofile(data)
2af1362
+            self.fail("Unescaped quote not detected")
2af1362
+        except IOError, exc:
2af1362
+            msg = 'Syntax error in po file None (line 4): unescaped double quote found'
2af1362
+            self.assertEqual(str(exc), msg)
2af1362
+    
2af1362
     def test_detect_encoding1(self):
2af1362
         """
2af1362
         Test that given enconding is returned when file has no encoding defined.
2af1362
         """
2af1362
         self.assertEqual(polib.detect_encoding('tests/test_noencoding.po'), 'utf-8')
2af1362
 
2af1362
     def test_detect_encoding2(self):
2af1362
         """