diff options
Diffstat (limited to 'compilerplugins/clang/stringliteralvar.cxx')
-rw-r--r-- | compilerplugins/clang/stringliteralvar.cxx | 66 |
1 files changed, 65 insertions, 1 deletions
diff --git a/compilerplugins/clang/stringliteralvar.cxx b/compilerplugins/clang/stringliteralvar.cxx index f8dfe916cc5b..fcd3690669e7 100644 --- a/compilerplugins/clang/stringliteralvar.cxx +++ b/compilerplugins/clang/stringliteralvar.cxx @@ -28,6 +28,7 @@ #include <cassert> #include "check.hxx" +#include "compat.hxx" #include "plugin.hxx" namespace @@ -137,6 +138,10 @@ public: return true; } auto const d = e1->getDecl(); + if (isPotentiallyInitializedWithMalformedUtf16(d)) + { + return true; + } if (!reportedArray_.insert(d).second) { return true; @@ -188,6 +193,10 @@ public: return true; } auto const d = e->getDecl(); + if (isPotentiallyInitializedWithMalformedUtf16(d)) + { + return true; + } if (!reportedArray_.insert(d).second) { return true; @@ -198,7 +207,7 @@ public: d->getLocation()) << d << d->getType() << (tc.Class("OString").Namespace("rtl").GlobalNamespace() ? 0 : 1) << isAutomaticVariable(cast<VarDecl>(d)) << d->getSourceRange(); - report(DiagnosticsEngine::Note, "first assigned here", compat::getBeginLoc(expr)) + report(DiagnosticsEngine::Note, "first assigned here", expr->getBeginLoc()) << expr->getSourceRange(); return true; } @@ -246,6 +255,61 @@ private: } } + // There is some confusion on the semantics of numeric-escape-sequences in string literals, see + // <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2029r4.html> "Proposed resolution + // for core issues 411, 1656, and 2333; numeric and universal character escapes in character and + // string literals", so suppress warnings about arrays that are deliberately not written as + // UTF-16 string literals because they contain lone surrogates: + bool isPotentiallyInitializedWithMalformedUtf16(ValueDecl const* decl) const + { + if (!decl->getType()->getArrayElementTypeNoTypeQual()->isChar16Type()) + { + return false; + } + auto const init = cast<VarDecl>(decl)->getAnyInitializer(); + if (init == nullptr) + { + return true; + } + auto const list = dyn_cast<InitListExpr>(init); + if (list == nullptr) + { + // Assuming that the initializer already is a string literal, assume that that string + // literal has no issues with malformed UTF-16: + if (isDebugMode()) + { + assert(isa<clang::StringLiteral>(init)); + } + return false; + } + auto highSurrogate = false; + for (auto const e : list->inits()) + { + llvm::APSInt v; + if (!compat::EvaluateAsInt(e, v, compiler.getASTContext())) + { + return true; + } + if (highSurrogate) + { + if (v < 0xDC00 || v > 0xDFFF) + { + return true; + } + highSurrogate = false; + } + else if (v >= 0xD800 && v <= 0xDBFF) + { + highSurrogate = true; + } + else if (v >= 0xDC00 && v <= 0xDFFF) + { + return true; + } + } + return highSurrogate; + } + std::set<Decl const*> reportedAutomatic_; std::set<Decl const*> reportedArray_; }; |