Blob Blame History Raw
From 1f512da81a71287b13eb0a1f9b31830b16db2107 Mon Sep 17 00:00:00 2001
From: Daiki Ueno <ueno@gnu.org>
Date: Tue, 17 Feb 2015 14:47:17 +0900
Subject: [PATCH] template: Try all possible okuri-gana combinations

Reported by ricky@burg.in.
---
 libkkc/state.vala                      | 29 +++++++++++++++++++++--------
 libkkc/template.vala                   | 22 +++++++++-------------
 tests/conversions-segment.json         | 14 +++++++-------
 tests/conversions-user-dictionary.json |  6 +++---
 tests/template.vala                    |  2 +-
 5 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/libkkc/state.vala b/libkkc/state.vala
index 4ba4c50..7bbf079 100644
--- a/libkkc/state.vala
+++ b/libkkc/state.vala
@@ -324,11 +324,14 @@ namespace Kkc {
                                                 out _candidates)) {
                 return template.expand (_candidates[0].text);
             }
-            template = new OkuriganaTemplate (input);
-            if (segment_dict.lookup_candidates (template.source,
-                                                template.okuri,
-                                                out _candidates)) {
-                return template.expand (_candidates[0].text);
+            var count = input.char_count ();
+            if (count > 1) {
+                template = new OkuriganaTemplate (input, count - 1);
+                if (segment_dict.lookup_candidates (template.source,
+                                                    template.okuri,
+                                                    out _candidates)) {
+                    return template.expand (_candidates[0].text);
+                }
             }
             return null;
         }
@@ -385,7 +388,10 @@ namespace Kkc {
             // 1. Look up candidates from user segment dictionaries.
             lookup_template (new NumericTemplate (normalized_input), true);
             lookup_template (new SimpleTemplate (normalized_input), true);
-            lookup_template (new OkuriganaTemplate (normalized_input), true);
+            for (var i = normalized_input.char_count (); i > 1; i--) {
+                lookup_template (
+                    new OkuriganaTemplate (normalized_input, i - 1), true);
+            }
 
             // 2. Look up the most frequently used unigram from language model.
             if (normalized_input.char_count () > 1) {
@@ -405,7 +411,6 @@ namespace Kkc {
             // 3. Look up candidates from system segment dictionaries.
             lookup_template (new NumericTemplate (normalized_input), false);
             lookup_template (new SimpleTemplate (normalized_input), false);
-            lookup_template (new OkuriganaTemplate (normalized_input), false);
 
             // 4. Do sentence conversion with N-best search.
 
@@ -445,9 +450,17 @@ namespace Kkc {
                     builder.str);
                 if (!kana_candidates.contains (sentence))
                     candidates.add (sentence);
+
+            }
+
+            // 4.3. Look up okuri-ari candidates from system segment
+            // dictionaries, for each possible okurigana combination.
+            for (var i = normalized_input.char_count (); i > 1; i--) {
+                lookup_template (
+                    new OkuriganaTemplate (normalized_input, i - 1), false);
             }
 
-            // 4.3. Add Kana candidates at the end.
+            // 4.4. Add Kana candidates at the end.
             candidates.add_all (kana_candidates);
 
             candidates.populated ();
diff --git a/libkkc/template.vala b/libkkc/template.vala
index 7768f80..92c9995 100644
--- a/libkkc/template.vala
+++ b/libkkc/template.vala
@@ -42,19 +42,15 @@ namespace Kkc {
 
         string? okurigana = null;
 
-        public OkuriganaTemplate (string source) {
-            var count = source.char_count ();
-            if (count > 1) {
-                var last_char_index = source.index_of_nth_char (count - 1);
-                this.okurigana = source[last_char_index:source.length];
-                string? prefix = RomKanaUtils.get_okurigana_prefix (
-                    this.okurigana);
-                this.source = source[0:last_char_index] + prefix;
-                this.okuri = true;
-            } else {
-                this.source = source;
-                this.okuri = false;
-            }
+        public OkuriganaTemplate (string source, int pos) {
+            assert (source.char_count () > 1);
+            assert (0 < pos && pos < source.char_count ());
+
+            var last_char_index = source.index_of_nth_char (pos);
+            this.okurigana = source[last_char_index:source.length];
+            string? prefix = RomKanaUtils.get_okurigana_prefix (this.okurigana);
+            this.source = source[0:last_char_index] + prefix;
+            this.okuri = true;
         }
 
         public string expand (string text) {
diff --git a/tests/conversions-segment.json b/tests/conversions-segment.json
index 63d0b9b..33baadf 100644
--- a/tests/conversions-segment.json
+++ b/tests/conversions-segment.json
@@ -122,11 +122,11 @@
     {
         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC",
         "input": "わたしのなまえはなかのです",
-        "segments": "わたしの名前は中野です",
+        "segments": "渡しの名前は中野です",
         "segments_size": 3,
         "segments_cursor_pos": 0,
         "output": "",
-        "candidates_size": 4,
+        "candidates_size": 5,
         "input_cursor_pos": -1
     },
     {
@@ -136,7 +136,7 @@
         "segments_size": 3,
         "segments_cursor_pos": 0,
         "output": "",
-        "candidates_size": 4,
+        "candidates_size": 5,
         "input_cursor_pos": -1
     },
     {
@@ -152,17 +152,17 @@
     {
         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC Right",
         "input": "わたしのなまえはなかのです",
-        "segments": "わたしの名前は中野です",
+        "segments": "渡しの名前は中野です",
         "segments_size": 3,
         "segments_cursor_pos": 1,
         "output": "",
-        "candidates_size": 4,
+        "candidates_size": 5,
         "input_cursor_pos": -1
     },
     {
         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC Right SPC",
         "input": "わたしのなまえはなかのです",
-        "segments": "わたしのなまえは中野です",
+        "segments": "渡しのなまえは中野です",
         "segments_size": 3,
         "segments_cursor_pos": 1,
         "output": "",
@@ -172,7 +172,7 @@
     {
         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC Right SPC SPC",
         "input": "わたしのなまえはなかのです",
-        "segments": "わたしのナマエハ中野です",
+        "segments": "渡しのナマエハ中野です",
         "segments_size": 3,
         "segments_cursor_pos": 1,
         "output": "",
diff --git a/tests/conversions-user-dictionary.json b/tests/conversions-user-dictionary.json
index 6c52df5..c5ddace 100644
--- a/tests/conversions-user-dictionary.json
+++ b/tests/conversions-user-dictionary.json
@@ -29,12 +29,12 @@
         "segments": "",
         "segments_size": 0,
         "segments_cursor_pos": -1,
-        "output": "わたしの名前はなかのです"
+        "output": "渡しの名前はなかのです"
     },
     {
         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC",
         "input": "わたしのなまえはなかのです",
-        "segments": "わたしの名前はなかのです",
+        "segments": "渡しの名前はなかのです",
         "segments_size": 2,
         "segments_cursor_pos": 0,
         "output": ""
@@ -42,7 +42,7 @@
     {
         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC Right SPC Right Right SPC",
         "input": "わたしのなまえはなかのです",
-        "segments": "わたしのなまえはなかのです",
+        "segments": "渡しのなまえはなかのです",
         "segments_size": 2,
         "segments_cursor_pos": 1,
         "output": ""
diff --git a/tests/template.vala b/tests/template.vala
index 1f8fb5e..5900cd1 100644
--- a/tests/template.vala
+++ b/tests/template.vala
@@ -16,7 +16,7 @@ class TemplateTests : Kkc.TestCase {
         assert (source == "source");
         assert (!okuri);
 
-        template = new Kkc.OkuriganaTemplate ("かう");
+        template = new Kkc.OkuriganaTemplate ("かう", 1);
         template.get ("source", out source,
                       "okuri", out okuri);
 
-- 
2.1.0