9c3a28b
From 463ae6186be38c6c4290b260f86b1e2044e233f0 Mon Sep 17 00:00:00 2001
a6d78d3
From: Stephan Bergmann <sbergman@redhat.com>
a6d78d3
Date: Fri, 7 Oct 2016 15:02:47 +0200
a6d78d3
Subject: [PATCH] rhbz#1382401: Support surrogate pairs in HTMLOutFuncs
a6d78d3
a6d78d3
(cherry picked from commit 375b99cad4a79d26a6cbcd0f71bc12b312d95818)
a6d78d3
Conflicts:
a6d78d3
	svtools/source/svhtml/htmlout.cxx
a6d78d3
plus cherry-pick of 6131bf9c96fb2ae37decf13e453f27304707271b "Minor performance
a6d78d3
improvement of previous patch"
a6d78d3
a6d78d3
Change-Id: Ib578f758e4f5f355a79a014c2ad4660924dd34a4
a6d78d3
---
a6d78d3
 include/svtools/htmlout.hxx       |  2 +-
9c3a28b
 svtools/source/svhtml/htmlout.cxx | 44 ++++++++++++++++++++++++++-------------
9c3a28b
 2 files changed, 31 insertions(+), 15 deletions(-)
a6d78d3
a6d78d3
diff --git a/include/svtools/htmlout.hxx b/include/svtools/htmlout.hxx
a6d78d3
index 715f0ba..e8a5d50 100644
a6d78d3
--- a/include/svtools/htmlout.hxx
a6d78d3
+++ b/include/svtools/htmlout.hxx
a6d78d3
@@ -60,7 +60,7 @@ struct HTMLOutFuncs
a6d78d3
     SVT_DLLPUBLIC static SvStream& Out_AsciiTag( SvStream&, const sal_Char* pStr,
a6d78d3
                                    bool bOn = true,
a6d78d3
                         rtl_TextEncoding eDestEnc = RTL_TEXTENCODING_MS_1252);
a6d78d3
-    SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_Unicode cChar,
a6d78d3
+    SVT_DLLPUBLIC static SvStream& Out_Char( SvStream&, sal_uInt32 cChar,
a6d78d3
                         HTMLOutContext& rContext,
a6d78d3
                         OUString *pNonConvertableChars = nullptr );
a6d78d3
     SVT_DLLPUBLIC static SvStream& Out_String( SvStream&, const OUString&,
a6d78d3
diff --git a/svtools/source/svhtml/htmlout.cxx b/svtools/source/svhtml/htmlout.cxx
9c3a28b
index 99d9e38..13ff6d8 100644
a6d78d3
--- a/svtools/source/svhtml/htmlout.cxx
a6d78d3
+++ b/svtools/source/svhtml/htmlout.cxx
a6d78d3
@@ -55,7 +55,7 @@ HTMLOutContext::~HTMLOutContext()
a6d78d3
     rtl_destroyUnicodeToTextConverter( m_hConv );
a6d78d3
 }
a6d78d3
 
a6d78d3
-static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c,
a6d78d3
+static const sal_Char *lcl_svhtml_GetEntityForChar( sal_uInt32 c,
a6d78d3
                                              rtl_TextEncoding eDestEnc )
a6d78d3
 {
a6d78d3
     const sal_Char* pStr = nullptr;
a6d78d3
@@ -388,10 +388,12 @@ static const sal_Char *lcl_svhtml_GetEntityForChar( sal_Unicode c,
a6d78d3
     return pStr;
a6d78d3
 }
a6d78d3
 
a6d78d3
-static OString lcl_ConvertCharToHTML( sal_Unicode c,
a6d78d3
+static OString lcl_ConvertCharToHTML( sal_uInt32 c,
a6d78d3
                             HTMLOutContext& rContext,
a6d78d3
                             OUString *pNonConvertableChars )
a6d78d3
 {
a6d78d3
+    assert(rtl::isUnicodeCodePoint(c));
a6d78d3
+
a6d78d3
     OStringBuffer aDest;
a6d78d3
     DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW != rContext.m_eDestEnc,
a6d78d3
                     "wrong destination encoding" );
9c3a28b
@@ -427,7 +429,7 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
9c3a28b
     if( pStr )
9c3a28b
     {
9c3a28b
         sal_Size nLen = rtl_convertUnicodeToText(
9c3a28b
-                            rContext.m_hConv, rContext.m_hContext, &c, 0,
9c3a28b
+                            rContext.m_hConv, rContext.m_hContext, nullptr, 0,
9c3a28b
                             cBuffer, TXTCONV_BUFFER_SIZE,
9c3a28b
                             nFlags|RTL_UNICODETOTEXT_FLAGS_FLUSH,
9c3a28b
                             &nInfo, &nSrcChars );
a6d78d3
@@ -439,8 +441,18 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
a6d78d3
     }
a6d78d3
     else
a6d78d3
     {
a6d78d3
+        sal_Unicode utf16[2];
a6d78d3
+        sal_Size n;
a6d78d3
+        if (c < 0x10000) {
a6d78d3
+            utf16[0] = c;
a6d78d3
+            n = 1;
a6d78d3
+        } else {
a6d78d3
+            utf16[0] = rtl::getHighSurrogate(c);
a6d78d3
+            utf16[1] = rtl::getLowSurrogate(c);
a6d78d3
+            n = 2;
a6d78d3
+        }
a6d78d3
         sal_Size nLen = rtl_convertUnicodeToText( rContext.m_hConv,
a6d78d3
-                                                  rContext.m_hContext, &c, 1,
a6d78d3
+                                                  rContext.m_hContext, utf16, n,
a6d78d3
                                                      cBuffer, TXTCONV_BUFFER_SIZE,
a6d78d3
                                                   nFlags,
a6d78d3
                                                   &nInfo, &nSrcChars );
9c3a28b
@@ -457,7 +469,7 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
9c3a28b
             // entity.
9c3a28b
             // coverity[callee_ptr_arith]
9c3a28b
             nLen = rtl_convertUnicodeToText(
9c3a28b
-                                rContext.m_hConv, rContext.m_hContext, &c, 0,
9c3a28b
+                                rContext.m_hConv, rContext.m_hContext, nullptr, 0,
9c3a28b
                                 cBuffer, TXTCONV_BUFFER_SIZE,
9c3a28b
                                 nFlags|RTL_UNICODETOTEXT_FLAGS_FLUSH,
9c3a28b
                                 &nInfo, &nSrcChars );
a6d78d3
@@ -466,11 +478,15 @@ static OString lcl_ConvertCharToHTML( sal_Unicode c,
a6d78d3
             while( nLen-- )
a6d78d3
                 aDest.append(*pBuffer++);
a6d78d3
 
a6d78d3
-            aDest.append('&').append('#').append(static_cast<sal_Int64>(c))
a6d78d3
+            aDest.append('&').append('#').append(static_cast<sal_Int32>(c))
a6d78d3
+                    // Unicode code points guaranteed to fit into sal_Int32
a6d78d3
                  .append(';');
a6d78d3
-            if( pNonConvertableChars &&
a6d78d3
-                -1 == pNonConvertableChars->indexOf( c ) )
a6d78d3
-                (*pNonConvertableChars) += OUString(c);
a6d78d3
+            if( pNonConvertableChars )
a6d78d3
+            {
a6d78d3
+                OUString cs(&c, 1);
a6d78d3
+                if( -1 == pNonConvertableChars->indexOf( cs ) )
a6d78d3
+                    (*pNonConvertableChars) += cs;
a6d78d3
+            }
a6d78d3
         }
a6d78d3
     }
a6d78d3
     return aDest.makeStringAndClear();
a6d78d3
@@ -505,9 +521,9 @@ OString HTMLOutFuncs::ConvertStringToHTML( const OUString& rSrc,
a6d78d3
 {
a6d78d3
     HTMLOutContext aContext( eDestEnc );
a6d78d3
     OStringBuffer aDest;
a6d78d3
-    for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; i++ )
a6d78d3
+    for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; )
a6d78d3
         aDest.append(lcl_ConvertCharToHTML(
a6d78d3
-            rSrc[i], aContext, pNonConvertableChars));
a6d78d3
+            rSrc.iterateCodePoints(&i), aContext, pNonConvertableChars));
a6d78d3
     aDest.append(lcl_FlushToAscii(aContext));
a6d78d3
     return aDest.makeStringAndClear();
a6d78d3
 }
a6d78d3
@@ -525,7 +541,7 @@ SvStream& HTMLOutFuncs::Out_AsciiTag( SvStream& rStream, const sal_Char *pStr,
a6d78d3
     return rStream;
a6d78d3
 }
a6d78d3
 
a6d78d3
-SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_Unicode c,
a6d78d3
+SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_uInt32 c,
a6d78d3
                                   HTMLOutContext& rContext,
a6d78d3
                                   OUString *pNonConvertableChars )
a6d78d3
 {
a6d78d3
@@ -540,8 +556,8 @@ SvStream& HTMLOutFuncs::Out_String( SvStream& rStream, const OUString& rOUStr,
a6d78d3
 {
a6d78d3
     HTMLOutContext aContext( eDestEnc );
a6d78d3
     sal_Int32 nLen = rOUStr.getLength();
a6d78d3
-    for( sal_Int32 n = 0; n < nLen; n++ )
a6d78d3
-        HTMLOutFuncs::Out_Char( rStream, rOUStr[n],
a6d78d3
+    for( sal_Int32 n = 0; n < nLen; )
a6d78d3
+        HTMLOutFuncs::Out_Char( rStream, rOUStr.iterateCodePoints(&n),
a6d78d3
                                 aContext, pNonConvertableChars );
a6d78d3
     HTMLOutFuncs::FlushToAscii( rStream, aContext );
a6d78d3
     return rStream;
a6d78d3
-- 
a6d78d3
2.7.4
a6d78d3