Blob Blame History Raw
Index: libvoikko/python/libvoikko.py
===================================================================
--- libvoikko/python/libvoikko.py	(revision 3900)
+++ libvoikko/python/libvoikko.py	(revision 3903)
@@ -20,7 +20,7 @@
 
 """
 
-# Copyright 2009 - 2010 Harri Pitkänen (hatapitk@iki.fi)
+# Copyright 2009 - 2011 Harri Pitkänen (hatapitk@iki.fi)
 # This library requires Python version 2.5 or newer.
 
 # This program is free software; you can redistribute it and/or modify
@@ -259,6 +259,9 @@
 		if result == 0:
 			raise VoikkoException(u"Could not set boolean option " + str(option) + u" to value " + str(value) + u".")
 	
+	def __isValidInput(self, text):
+		return u"\0" not in text
+	
 	def terminate(self):
 		"""Releases the resources allocated by libvoikko for this instance. The instance cannot be used anymore
 		after this method has been called. The resources are released automatically when the Python object is
@@ -342,6 +345,9 @@
 		"""Check the spelling of given word. Return true if the word is correct,
 		false if it is incorrect.
 		"""
+		if not self.__isValidInput(word):
+			return False
+		
 		result = self.__lib.voikkoSpellUcs4(self.__handle, word)
 		if result == 0:
 			return False
@@ -354,6 +360,8 @@
 		"""Generate a list of suggested spellings for given (misspelled) word.
 		If the given word is correct, the list contains only the word itself.
 		"""
+		if not self.__isValidInput(word):
+			return []
 		
 		cSuggestions = self.__lib.voikkoSuggestUcs4(self.__handle, word)
 		pSuggestions = []
@@ -404,6 +412,9 @@
 		Unlike the C based API this method accepts multiple paragraphs
 		separated by newline characters.
 		"""
+		if not self.__isValidInput(text):
+			return []
+		
 		textUnicode = unicode(text)
 		errorList = []
 		offset = 0
@@ -424,6 +435,9 @@
 		analysis results. The results are represented as maps having property
 		names as keys and property values as values.
 		"""
+		if not self.__isValidInput(word):
+			return []
+		
 		cAnalysisList = self.__lib.voikkoAnalyzeWordUcs4(self.__handle, word)
 		pAnalysisList = []
 		
@@ -449,6 +463,19 @@
 	
 	def tokens(self, text):
 		"""Split the given natural language text into a list of Token objects."""
+		startIndex = 0
+		tokens = []
+		while True:
+			i = text.find(u"\0", startIndex)
+			if i == -1:
+				break
+			tokens = tokens + self.__splitTokens(text[startIndex:i])
+			tokens.append(Token(u"\0", Token.UNKNOWN))
+			startIndex = i + 1
+		tokens = tokens + self.__splitTokens(text[startIndex:])
+		return tokens
+	
+	def __splitTokens(self, text):
 		uniText = unicode(text)
 		result = []
 		textLen = len(uniText)
@@ -467,6 +494,9 @@
 	
 	def sentences(self, text):
 		"""Split the given natural language text into a list of Sentence objects."""
+		if not self.__isValidInput(text):
+			return [Sentence(text, Sentence.NONE)]
+		
 		uniText = unicode(text)
 		result = []
 		textLen = len(uniText)
@@ -491,6 +521,9 @@
 		  '=' = hyphentation point (character at this position
 		        is replaced by the hyphen.)
 		"""
+		if not self.__isValidInput(word):
+			return "".ljust(len(word))
+		
 		cHyphenationPattern = self.__lib.voikkoHyphenateUcs4(self.__handle, word)
 		hyphenationPattern = string_at(cHyphenationPattern)
 		self.__lib.voikkoFreeCstr(cHyphenationPattern)