1 files changed, 53 insertions, 42 deletions
diff --git a/comphelper/source/misc/syntaxhighlight.cxx b/comphelper/source/misc/syntaxhighlight.cxx
index f6eccc7b4fc3..89dcb73752e4 100644
--- a/comphelper/source/misc/syntaxhighlight.cxx
+++ b/comphelper/source/misc/syntaxhighlight.cxx
@@ -22,6 +22,7 @@
 #include <cassert>
 
 #include <rtl/character.hxx>
+#include <rtl/ustring.hxx>
 #include <unicode/uchar.h>
 #include <comphelper/syntaxhighlight.hxx>
 #include <o3tl/typed_flags_set.hxx>
@@ -279,8 +280,8 @@ class SyntaxHighlighter::Tokenizer
     bool testCharFlags(sal_Unicode c, CharFlags nTestFlags) const;
 
     // Get new token, EmptyString == nothing more over there
-    bool getNextToken(const sal_Unicode*& pos, /*out*/TokenType& reType,
-        /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos) const;
+    bool getNextToken(std::u16string_view::const_iterator& pos, std::u16string_view::const_iterator end, /*out*/TokenType& reType,
+        /*out*/std::u16string_view::const_iterator& rpStartPos, /*out*/std::u16string_view::const_iterator& rpEndPos) const;
 
     const char** ppListKeyWords;
     sal_uInt16 nKeyWordCount;
@@ -290,7 +291,7 @@ public:
 
     explicit Tokenizer( HighlighterLanguage aLang );
 
-    void getHighlightPortions(const OUString& rLine,
+    void getHighlightPortions(std::u16string_view rLine,
                                /*out*/std::vector<HighlightPortion>& portions) const;
     void setKeyWords( const char** ppKeyWords, sal_uInt16 nCount );
 };
@@ -317,24 +318,25 @@ void SyntaxHighlighter::Tokenizer::setKeyWords( const char** ppKeyWords, sal_uIn
     nKeyWordCount = nCount;
 }
 
-bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/TokenType& reType,
-    /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos) const
+bool SyntaxHighlighter::Tokenizer::getNextToken(std::u16string_view::const_iterator& pos, std::u16string_view::const_iterator end,
+    /*out*/TokenType& reType,
+    /*out*/std::u16string_view::const_iterator& rpStartPos, /*out*/std::u16string_view::const_iterator& rpEndPos) const
 {
     reType = TokenType::Unknown;
 
     rpStartPos = pos;
 
-    sal_Unicode c = *pos;
-    if( c == 0 )
+    if( pos == end )
         return false;
 
+    sal_Unicode c = *pos;
     ++pos;
 
     //*** Go through all possibilities ***
     // Space?
     if ( testCharFlags( c, CharFlags::Space ) )
     {
-        while( testCharFlags( *pos, CharFlags::Space ) )
+        while( pos != end && testCharFlags( *pos, CharFlags::Space ) )
             ++pos;
 
         reType = TokenType::Whitespace;
@@ -346,6 +348,8 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
         bool bIdentifierChar;
         do
         {
+            if (pos == end)
+                break;
             // Fetch next character
             c = *pos;
             bIdentifierChar = testCharFlags( c, CharFlags::InIdentifier );
@@ -374,7 +378,7 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
 
             if( bCanBeKeyword )
             {
-                OUString aKWString(rpStartPos, nCount);
+                std::u16string_view aKWString(&*rpStartPos, nCount);
                 OString aByteStr = OUStringToOString(aKWString,
                     RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase();
                 if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
@@ -385,10 +389,14 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
                     if( aByteStr == "rem" )
                     {
                         // Remove all characters until end of line or EOF
-                        sal_Unicode cPeek = *pos;
-                        while( cPeek != 0 && !testCharFlags( cPeek, CharFlags::EOL ) )
+                        for (;;)
                         {
-                            cPeek = *++pos;
+                            if (pos == end)
+                                break;
+                            sal_Unicode cPeek = *pos;
+                            if ( testCharFlags( cPeek, CharFlags::EOL ) )
+                                break;
+                            ++pos;
                         }
 
                         reType = TokenType::Comment;
@@ -411,6 +419,8 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
                 do
                 {
                     // Get next character
+                    if (pos == end)
+                        break;
                     c = *pos;
                     bIdentifierChar = isAlpha(c);
                     if( bIdentifierChar )
@@ -422,14 +432,12 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
         }
         else if ((c=='-') && (aLanguage == HighlighterLanguage::SQL))
         {
-            sal_Unicode cPeekNext = *pos;
-            if (cPeekNext=='-')
+            if (pos != end && *pos=='-')
             {
                 // Remove all characters until end of line or EOF
-                while( cPeekNext != 0 && !testCharFlags( cPeekNext, CharFlags::EOL ) )
+                while( pos != end && !testCharFlags( *pos, CharFlags::EOL ) )
                 {
                     ++pos;
-                    cPeekNext = *pos;
                 }
                 reType = TokenType::Comment;
             }
@@ -438,14 +446,12 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
         }
         else if ((c=='/') && (aLanguage == HighlighterLanguage::SQL))
         {
-            sal_Unicode cPeekNext = *pos;
-            if (cPeekNext=='/')
+            if (pos != end && *pos=='/')
             {
                 // Remove all characters until end of line or EOF
-                while( cPeekNext != 0 && !testCharFlags( cPeekNext, CharFlags::EOL ) )
+                while( pos != end && !testCharFlags( *pos, CharFlags::EOL ) )
                 {
                     ++pos;
-                    cPeekNext = *pos;
                 }
                 reType = TokenType::Comment;
             }
@@ -459,8 +465,10 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
             {
                 // Skip all characters until end of input or end of line:
                 for (;;) {
+                    if (pos == end)
+                        break;
                     c = *pos;
-                    if (c == 0 || testCharFlags(c, CharFlags::EOL)) {
+                    if (testCharFlags(c, CharFlags::EOL)) {
                         break;
                     }
                     ++pos;
@@ -480,7 +488,7 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
     }
 
     // Object separator? Must be handled before Number
-    else if( c == '.' && ( *pos < '0' || *pos > '9' ) )
+    else if( c == '.' && ( pos == end || *pos < '0' || *pos > '9' ) )
     {
         reType = TokenType::Operator;
     }
@@ -497,25 +505,25 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
         if( c == '&' )
         {
             // Octal?
-            if( *pos == 'o' || *pos == 'O' )
+            if( pos != end && (*pos == 'o' || *pos == 'O' ))
             {
                 // remove o
                 ++pos;
                 nRadix = 8;     // Octal base
 
                 // Read all numbers
-                while( testCharFlags( *pos, CharFlags::InOctNumber ) )
+                while( pos != end && testCharFlags( *pos, CharFlags::InOctNumber ) )
                     ++pos;
             }
             // Hexadecimal?
-            else if( *pos == 'h' || *pos == 'H' )
+            else if( pos != end && (*pos == 'h' || *pos == 'H' ))
             {
                 // remove x
                 ++pos;
                 nRadix = 16;     // Hexadecimal base
 
                 // Read all numbers
-                while( testCharFlags( *pos, CharFlags::InHexNumber ) )
+                while( pos != end && testCharFlags( *pos, CharFlags::InHexNumber ) )
                     ++pos;
             }
             else
@@ -531,9 +539,9 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
             bool bAfterExpChar = false;
 
             // Read all numbers
-            while( testCharFlags( *pos, CharFlags::InNumber ) ||
+            while( pos != end && (testCharFlags( *pos, CharFlags::InNumber ) ||
                     (bAfterExpChar && *pos == '+' ) ||
-                    (bAfterExpChar && *pos == '-' ) )
+                    (bAfterExpChar && *pos == '-' ) ))
                     // After exponent +/- are OK, too
             {
                 c = *pos++;
@@ -551,10 +559,10 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
             cEndString = ']';
 
         // Read all characters
-        while( *pos != cEndString )
+        while( pos == end || *pos != cEndString )
         {
             // Detect EOF before reading next char, so we do not lose EOF
-            if( *pos == 0 )
+            if( pos == end )
             {
                 // ERROR: unterminated string literal
                 reType = TokenType::Error;
@@ -583,9 +591,12 @@ bool SyntaxHighlighter::Tokenizer::getNextToken(const sal_Unicode*& pos, /*out*/
     else if( testCharFlags( c, CharFlags::EOL ) )
     {
         // If another EOL character comes, read it
-        sal_Unicode cNext = *pos;
-        if( cNext != c && testCharFlags( cNext, CharFlags::EOL ) )
-            ++pos;
+        if (pos != end)
+        {
+            sal_Unicode cNext = *pos;
+            if( cNext != c && testCharFlags( cNext, CharFlags::EOL ) )
+                ++pos;
+        }
 
         reType = TokenType::EOL;
     }
@@ -676,22 +687,22 @@ SyntaxHighlighter::Tokenizer::Tokenizer( HighlighterLanguage aLang ): aLanguage(
     nKeyWordCount = 0;
 }
 
-void SyntaxHighlighter::Tokenizer::getHighlightPortions(const OUString& rLine,
+void SyntaxHighlighter::Tokenizer::getHighlightPortions(std::u16string_view rLine,
                                                  /*out*/std::vector<HighlightPortion>& portions) const
 {
     // Set the position to the beginning of the source string
-    const sal_Unicode* pos = rLine.getStr();
+    auto pos = rLine.begin();
 
     // Variables for the out parameter
     TokenType eType;
-    const sal_Unicode* pStartPos;
-    const sal_Unicode* pEndPos;
+    std::u16string_view::const_iterator pStartPos;
+    std::u16string_view::const_iterator pEndPos;
 
     // Loop over all the tokens
-    while( getNextToken( pos, eType, pStartPos, pEndPos ) )
+    while( getNextToken( pos, rLine.end(), eType, pStartPos, pEndPos ) )
     {
         portions.emplace_back(
-                pStartPos - rLine.getStr(), pEndPos - rLine.getStr(), eType);
+                pStartPos - rLine.begin(), pEndPos - rLine.begin(), eType);
     }
 }
 
@@ -703,11 +714,11 @@ SyntaxHighlighter::SyntaxHighlighter(HighlighterLanguage language):
     {
         case HighlighterLanguage::Basic:
             m_tokenizer->setKeyWords( strListBasicKeyWords,
-                                      SAL_N_ELEMENTS( strListBasicKeyWords ));
+                                      std::size( strListBasicKeyWords ));
             break;
         case HighlighterLanguage::SQL:
             m_tokenizer->setKeyWords( strListSqlKeyWords,
-                                      SAL_N_ELEMENTS( strListSqlKeyWords ));
+                                      std::size( strListSqlKeyWords ));
             break;
         default:
             assert(false); // this cannot happen
@@ -716,7 +727,7 @@ SyntaxHighlighter::SyntaxHighlighter(HighlighterLanguage language):
 
 SyntaxHighlighter::~SyntaxHighlighter() {}
 
-void SyntaxHighlighter::getHighlightPortions(const OUString& rLine,
+void SyntaxHighlighter::getHighlightPortions(std::u16string_view rLine,
                                               /*out*/std::vector<HighlightPortion>& portions) const
 {
     m_tokenizer->getHighlightPortions( rLine, portions );