Commit 217dc082 by Jim Stichnoth

Subzero: Deal with substitutions in the primitive remangler.

https://refspecs.linuxbase.org/cxxabi-1.75.html#mangling-compression describes the mechanism for compressing mangled strings by using substitutions of the form S[0-9A-Z]*_ to represent repeated components. When the prefix is handled as wrapping inside a namespace, the base-36 substitution numbers all have to be incremented. This is implemented in a very simple way by scanning the string only for instances of the substitution pattern. Unfortunately, false matches are possible because the S[0-9A-Z]*_ pattern can be a substring of the type name, or can span other components of the mangled name. Getting this completely right would essentially require a full demangling parser - see the ~4000 lines of code in cxa_demangle.cpp and ItaniumMangle.cpp. Since this is just for testing, any false matches will likely cause a linking error and the test can be rewritten to avoid false matches. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/385273002
parent b164d208
......@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include <ctype.h> // isdigit()
#include <ctype.h> // isdigit(), isupper()
#include "IceDefs.h"
#include "IceTypes.h"
......@@ -120,6 +120,92 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump,
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), HasEmittedFirstMethod(false) {}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name
// legitimately contains that pattern, then the substitution will be
// made in error and most likely the link will fail. In this case,
// the test classes can be rewritten not to use that pattern, which is
// much simpler and more reliable than implementing a full demangling
// parser. Another substitution-in-error may occur if a type
// identifier ends with the pattern S[0-9A-Z]*, because an immediately
// following substitution string like "S1_" or "PS1_" may be combined
// with the previous type.
void GlobalContext::incrementSubstitutions(ManglerVector &OldName) const {
const std::locale CLocale("C");
// Provide extra space in case the length of <num> increases.
ManglerVector NewName(OldName.size() * 2);
size_t OldPos = 0;
size_t NewPos = 0;
size_t OldLen = OldName.size();
for (; OldPos < OldLen; ++OldPos, ++NewPos) {
if (OldName[OldPos] == '\0')
break;
if (OldName[OldPos] == 'S') {
// Search forward until we find _ or invalid character (including \0).
bool AllZs = true;
bool Found = false;
size_t Last;
for (Last = OldPos + 1; Last < OldLen; ++Last) {
char Ch = OldName[Last];
if (Ch == '_') {
Found = true;
break;
} else if (std::isdigit(Ch) || std::isupper(Ch, CLocale)) {
if (Ch != 'Z')
AllZs = false;
} else {
// Invalid character, stop searching.
break;
}
}
if (Found) {
NewName[NewPos++] = OldName[OldPos++]; // 'S'
size_t Length = Last - OldPos;
// NewPos and OldPos point just past the 'S'.
assert(NewName[NewPos - 1] == 'S');
assert(OldName[OldPos - 1] == 'S');
assert(OldName[OldPos + Length] == '_');
if (AllZs) {
// Replace N 'Z' characters with N+1 '0' characters. (This
// is also true for N=0, i.e. S_ ==> S0_ .)
for (size_t i = 0; i < Length + 1; ++i) {
NewName[NewPos++] = '0';
}
} else {
// Iterate right-to-left and increment the base-36 number.
bool Carry = true;
for (size_t i = 0; i < Length; ++i) {
size_t Offset = Length - 1 - i;
char Ch = OldName[OldPos + Offset];
if (Carry) {
Carry = false;
switch (Ch) {
case '9':
Ch = 'A';
break;
case 'Z':
Ch = '0';
Carry = true;
break;
default:
++Ch;
break;
}
}
NewName[NewPos + Offset] = Ch;
}
NewPos += Length;
}
OldPos = Last;
// Fall through and let the '_' be copied across.
}
}
NewName[NewPos] = OldName[OldPos];
}
assert(NewName[NewPos] == '\0');
OldName = NewName;
}
// In this context, name mangling means to rewrite a symbol using a
// given prefix. For a C++ symbol, nest the original symbol inside
// the "prefix" namespace. For other symbols, just prepend the
......@@ -137,9 +223,9 @@ IceString GlobalContext::mangleName(const IceString &Name) const {
return Name;
unsigned PrefixLength = getTestPrefix().length();
llvm::SmallVector<char, 32> NameBase(1 + Name.length());
ManglerVector NameBase(1 + Name.length());
const size_t BufLen = 30 + Name.length() + PrefixLength;
llvm::SmallVector<char, 32> NewName(BufLen);
ManglerVector NewName(BufLen);
uint32_t BaseLength = 0; // using uint32_t due to sscanf format string
int ItemsParsed = sscanf(Name.c_str(), "_ZN%s", NameBase.data());
......@@ -152,6 +238,7 @@ IceString GlobalContext::mangleName(const IceString &Name) const {
// somehow miscalculated the output buffer length, the output will
// be truncated, but it will be truncated consistently for all
// mangleName() calls on the same input string.
incrementSubstitutions(NewName);
return NewName.data();
}
......@@ -172,8 +259,8 @@ IceString GlobalContext::mangleName(const IceString &Name) const {
// Transform _Z3barIabcExyz ==> _ZN6Prefix3barIabcEExyz
// ^^^^^^^^ ^
// (splice in "N6Prefix", and insert "E" after "3barIabcE")
llvm::SmallVector<char, 32> OrigName(Name.length());
llvm::SmallVector<char, 32> OrigSuffix(Name.length());
ManglerVector OrigName(Name.length());
ManglerVector OrigSuffix(Name.length());
uint32_t ActualBaseLength = BaseLength;
if (NameBase[ActualBaseLength] == 'I') {
++ActualBaseLength;
......@@ -187,6 +274,7 @@ IceString GlobalContext::mangleName(const IceString &Name) const {
snprintf(NewName.data(), BufLen, "_ZN%u%s%u%sE%s", PrefixLength,
getTestPrefix().c_str(), BaseLength, OrigName.data(),
OrigSuffix.data());
incrementSubstitutions(NewName);
return NewName.data();
}
......
......@@ -105,6 +105,10 @@ private:
bool HasEmittedFirstMethod;
GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
// Private helpers for mangleName()
typedef llvm::SmallVector<char, 32> ManglerVector;
void incrementSubstitutions(ManglerVector &OldName) const;
};
} // end of namespace Ice
......
......@@ -101,5 +101,28 @@ entry:
}
; MANGLE: Subzero_Z-1FuncCPlusPlusi:
; Test for substitution incrementing. This single test captures:
; S<num>_ ==> S<num+1>_ for single-digit <num>
; S_ ==> S0_
; String length increase, e.g. SZZZ_ ==> S0000_
; At least one digit wrapping without length increase, e.g. SZ9ZZ_ ==> SZA00_
; Unrelated identifiers containing S[0-9A-Z]* , e.g. MyClassS1x
; A proper substring of S<num>_ at the end of the string
; (to test parser edge cases)
define internal void @_Z3fooP10MyClassS1xP10MyClassS2xRS_RS1_S_S1_SZZZ_SZ9ZZ_S12345() {
; MANGLE: _ZN7Subzero3fooEP10MyClassS1xP10MyClassS2xRS0_RS2_S0_S2_S0000_SZA00_S12345:
entry:
ret void
}
; Test that unmangled (non-C++) strings don't have substitutions updated.
define internal void @foo_S_S0_SZ_S() {
; MANGLE: Subzerofoo_S_S0_SZ_S:
entry:
ret void
}
; ERRORS-NOT: ICE translation error
; DUMP-NOT: SZ
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment