Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
googletest
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chen Yisong
googletest
Commits
0c5a6624
Commit
0c5a6624
authored
Aug 25, 2008
by
vladlosev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement wide->UTF-8 string conversion more correctly
parent
c6e674db
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
301 additions
and
66 deletions
+301
-66
gtest-port.h
include/gtest/internal/gtest-port.h
+6
-0
gtest-internal-inl.h
src/gtest-internal-inl.h
+24
-2
gtest.cc
src/gtest.cc
+125
-38
gtest_unittest.cc
test/gtest_unittest.cc
+146
-26
No files found.
include/gtest/internal/gtest-port.h
View file @
0c5a6624
...
...
@@ -225,6 +225,12 @@
#include <sys/mman.h>
#endif // GTEST_HAS_STD_STRING && defined(GTEST_OS_LINUX)
// Determines whether the system compiler uses UTF-16 for encoding wide strings.
#if defined(GTEST_OS_WINDOWS) || defined(GTEST_OS_CYGWIN) || \
defined(__SYMBIAN32__)
#define GTEST_WIDE_STRING_USES_UTF16_ 1
#endif
// Defines some utility macros.
// The GNU compiler emits a warning if nested "if" statements are followed by
...
...
src/gtest-internal-inl.h
View file @
0c5a6624
...
...
@@ -133,8 +133,30 @@ class GTestFlagSaver {
internal
::
Int32
repeat_
;
}
GTEST_ATTRIBUTE_UNUSED
;
// Converts a Unicode code-point to its UTF-8 encoding.
String
ToUtf8String
(
wchar_t
wchar
);
// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// The output buffer str must containt at least 32 characters.
// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
// as '(Invalid Unicode 0xXXXXXXXX)'.
char
*
CodePointToUtf8
(
UInt32
code_point
,
char
*
str
);
// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
String
WideStringToUtf8
(
const
wchar_t
*
str
,
int
num_chars
);
// Returns the number of active threads, or 0 when there is an error.
size_t
GetThreadCount
();
...
...
src/gtest.cc
View file @
0c5a6624
...
...
@@ -784,16 +784,19 @@ bool String::CStringEquals(const char * lhs, const char * rhs) {
// encoding, and streams the result to the given Message object.
static
void
StreamWideCharsToMessage
(
const
wchar_t
*
wstr
,
size_t
len
,
Message
*
msg
)
{
for
(
size_t
i
=
0
;
i
!=
len
;
i
++
)
{
// TODO(wan): consider allowing a testing::String object to
// contain '\0'. This will make it behave more like std::string,
// and will allow ToUtf8String() to return the correct encoding
// for '\0' s.t. we can get rid of the conditional here (and in
// several other places).
if
(
wstr
[
i
])
{
*
msg
<<
internal
::
ToUtf8String
(
wstr
[
i
]);
// TODO(wan): consider allowing a testing::String object to
// contain '\0'. This will make it behave more like std::string,
// and will allow ToUtf8String() to return the correct encoding
// for '\0' s.t. we can get rid of the conditional here (and in
// several other places).
for
(
size_t
i
=
0
;
i
!=
len
;
)
{
// NOLINT
if
(
wstr
[
i
]
!=
L'\0'
)
{
*
msg
<<
WideStringToUtf8
(
wstr
+
i
,
len
-
i
);
while
(
i
!=
len
&&
wstr
[
i
]
!=
L'\0'
)
i
++
;
}
else
{
*
msg
<<
'\0'
;
i
++
;
}
}
}
...
...
@@ -852,8 +855,10 @@ String FormatForFailureMessage(wchar_t wchar) {
Message
msg
;
// A String object cannot contain '\0', so we print "\\0" when wchar is
// L'\0'.
msg
<<
"L'"
<<
(
wchar
?
ToUtf8String
(
wchar
).
c_str
()
:
"
\\
0"
)
<<
"' ("
<<
wchar_as_uint64
<<
", 0x"
<<
::
std
::
setbase
(
16
)
char
buffer
[
32
];
// CodePointToUtf8 requires a buffer that big.
msg
<<
"L'"
<<
(
wchar
?
CodePointToUtf8
(
static_cast
<
UInt32
>
(
wchar
),
buffer
)
:
"
\\
0"
)
<<
"' ("
<<
wchar_as_uint64
<<
", 0x"
<<
::
std
::
setbase
(
16
)
<<
wchar_as_uint64
<<
")"
;
return
msg
.
GetString
();
}
...
...
@@ -1317,31 +1322,118 @@ inline UInt32 ChopLowBits(UInt32* bits, int n) {
return
low_bits
;
}
// Converts a Unicode code-point to its UTF-8 encoding.
String
ToUtf8String
(
wchar_t
wchar
)
{
char
str
[
5
]
=
{};
// Initializes str to all '\0' characters.
UInt32
code
=
static_cast
<
UInt32
>
(
wchar
);
if
(
code
<=
kMaxCodePoint1
)
{
str
[
0
]
=
static_cast
<
char
>
(
code
);
// 0xxxxxxx
}
else
if
(
code
<=
kMaxCodePoint2
)
{
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xC0
|
code
);
// 110xxxxx
}
else
if
(
code
<=
kMaxCodePoint3
)
{
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xE0
|
code
);
// 1110xxxx
}
else
if
(
code
<=
kMaxCodePoint4
)
{
str
[
3
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xF0
|
code
);
// 11110xxx
// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// The output buffer str must containt at least 32 characters.
// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
// as '(Invalid Unicode 0xXXXXXXXX)'.
char
*
CodePointToUtf8
(
UInt32
code_point
,
char
*
str
)
{
if
(
code_point
<=
kMaxCodePoint1
)
{
str
[
1
]
=
'\0'
;
str
[
0
]
=
static_cast
<
char
>
(
code_point
);
// 0xxxxxxx
}
else
if
(
code_point
<=
kMaxCodePoint2
)
{
str
[
2
]
=
'\0'
;
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xC0
|
code_point
);
// 110xxxxx
}
else
if
(
code_point
<=
kMaxCodePoint3
)
{
str
[
3
]
=
'\0'
;
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xE0
|
code_point
);
// 1110xxxx
}
else
if
(
code_point
<=
kMaxCodePoint4
)
{
str
[
4
]
=
'\0'
;
str
[
3
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xF0
|
code_point
);
// 11110xxx
}
else
{
return
String
::
Format
(
"(Invalid Unicode 0x%llX)"
,
static_cast
<
UInt64
>
(
wchar
));
// The longest string String::Format can produce when invoked
// with these parameters is 28 character long (not including
// the terminating nul character). We are asking for 32 character
// buffer just in case. This is also enough for strncpy to
// null-terminate the destination string.
// MSVC 8 deprecates strncpy(), so we want to suppress warning
// 4996 (deprecated function) there.
#ifdef GTEST_OS_WINDOWS // We are on Windows.
#pragma warning(push) // Saves the current warning state.
#pragma warning(disable:4996) // Temporarily disables warning 4996.
#endif
strncpy
(
str
,
String
::
Format
(
"(Invalid Unicode 0x%X)"
,
code_point
).
c_str
(),
32
);
#ifdef GTEST_OS_WINDOWS // We are on Windows.
#pragma warning(pop) // Restores the warning state.
#endif
str
[
31
]
=
'\0'
;
// Makes sure no change in the format to strncpy leaves
// the result unterminated.
}
return
str
;
}
// The following two functions only make sense if the the system
// uses UTF-16 for wide string encoding. All supported systems
// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.
return
String
(
str
);
// Determines if the arguments constitute UTF-16 surrogate pair
// and thus should be combined into a single Unicode code point
// using CreateCodePointFromUtf16SurrogatePair.
inline
bool
IsUtf16SurrogatePair
(
wchar_t
first
,
wchar_t
second
)
{
if
(
sizeof
(
wchar_t
)
==
2
)
return
(
first
&
0xFC00
)
==
0xD800
&&
(
second
&
0xFC00
)
==
0xDC00
;
else
return
false
;
}
// Creates a Unicode code point from UTF16 surrogate pair.
inline
UInt32
CreateCodePointFromUtf16SurrogatePair
(
wchar_t
first
,
wchar_t
second
)
{
if
(
sizeof
(
wchar_t
)
==
2
)
{
const
UInt32
mask
=
(
1
<<
10
)
-
1
;
return
(((
first
&
mask
)
<<
10
)
|
(
second
&
mask
))
+
0x10000
;
}
else
{
// This should not be called, but we provide a sensible default
// in case it is.
return
static_cast
<
UInt32
>
(
first
);
}
}
// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
String
WideStringToUtf8
(
const
wchar_t
*
str
,
int
num_chars
)
{
if
(
num_chars
==
-
1
)
num_chars
=
wcslen
(
str
);
StrStream
stream
;
for
(
int
i
=
0
;
i
<
num_chars
;
++
i
)
{
UInt32
unicode_code_point
;
if
(
str
[
i
]
==
L'\0'
)
{
break
;
}
else
if
(
i
+
1
<
num_chars
&&
IsUtf16SurrogatePair
(
str
[
i
],
str
[
i
+
1
]))
{
unicode_code_point
=
CreateCodePointFromUtf16SurrogatePair
(
str
[
i
],
str
[
i
+
1
]);
i
++
;
}
else
{
unicode_code_point
=
static_cast
<
UInt32
>
(
str
[
i
]);
}
char
buffer
[
32
];
// CodePointToUtf8 requires a buffer this big.
stream
<<
CodePointToUtf8
(
unicode_code_point
,
buffer
);
}
return
StrStreamToString
(
&
stream
);
}
// Converts a wide C string to a String using the UTF-8 encoding.
...
...
@@ -1349,12 +1441,7 @@ String ToUtf8String(wchar_t wchar) {
String
String
::
ShowWideCString
(
const
wchar_t
*
wide_c_str
)
{
if
(
wide_c_str
==
NULL
)
return
String
(
"(null)"
);
StrStream
ss
;
while
(
*
wide_c_str
)
{
ss
<<
internal
::
ToUtf8String
(
*
wide_c_str
++
);
}
return
internal
::
StrStreamToString
(
&
ss
);
return
String
(
internal
::
WideStringToUtf8
(
wide_c_str
,
-
1
).
c_str
());
}
// Similar to ShowWideCString(), except that this function encloses
...
...
test/gtest_unittest.cc
View file @
0c5a6624
...
...
@@ -101,6 +101,7 @@ using testing::TPRT_NONFATAL_FAILURE;
using
testing
::
TPRT_SUCCESS
;
using
testing
::
UnitTest
;
using
testing
::
internal
::
AppendUserMessage
;
using
testing
::
internal
::
CodePointToUtf8
;
using
testing
::
internal
::
EqFailure
;
using
testing
::
internal
::
FloatingPoint
;
using
testing
::
internal
::
GTestFlagSaver
;
...
...
@@ -111,8 +112,8 @@ using testing::internal::StreamableToString;
using
testing
::
internal
::
String
;
using
testing
::
internal
::
TestProperty
;
using
testing
::
internal
::
TestResult
;
using
testing
::
internal
::
ToUtf8String
;
using
testing
::
internal
::
UnitTestImpl
;
using
testing
::
internal
::
WideStringToUtf8
;
// This line tests that we can define tests in an unnamed namespace.
namespace
{
...
...
@@ -142,65 +143,184 @@ TEST(NullLiteralTest, IsFalseForNonNullLiterals) {
}
#endif // __SYMBIAN32__
// Tests ToUtf8String().
//
// Tests CodePointToUtf8().
// Tests that the NUL character L'\0' is encoded correctly.
TEST
(
ToUtf8StringTest
,
CanEncodeNul
)
{
EXPECT_STREQ
(
""
,
ToUtf8String
(
L'\0'
).
c_str
());
TEST
(
CodePointToUtf8Test
,
CanEncodeNul
)
{
char
buffer
[
32
];
EXPECT_STREQ
(
""
,
CodePointToUtf8
(
L'\0'
,
buffer
));
}
// Tests that ASCII characters are encoded correctly.
TEST
(
ToUtf8StringTest
,
CanEncodeAscii
)
{
EXPECT_STREQ
(
"a"
,
ToUtf8String
(
L'a'
).
c_str
());
EXPECT_STREQ
(
"Z"
,
ToUtf8String
(
L'Z'
).
c_str
());
EXPECT_STREQ
(
"&"
,
ToUtf8String
(
L'&'
).
c_str
());
EXPECT_STREQ
(
"
\x7F
"
,
ToUtf8String
(
L'\x7F'
).
c_str
());
TEST
(
CodePointToUtf8Test
,
CanEncodeAscii
)
{
char
buffer
[
32
];
EXPECT_STREQ
(
"a"
,
CodePointToUtf8
(
L'a'
,
buffer
));
EXPECT_STREQ
(
"Z"
,
CodePointToUtf8
(
L'Z'
,
buffer
));
EXPECT_STREQ
(
"&"
,
CodePointToUtf8
(
L'&'
,
buffer
));
EXPECT_STREQ
(
"
\x7F
"
,
CodePointToUtf8
(
L'\x7F'
,
buffer
));
}
// Tests that Unicode code-points that have 8 to 11 bits are encoded
// as 110xxxxx 10xxxxxx.
TEST
(
ToUtf8StringTest
,
CanEncode8To11Bits
)
{
TEST
(
CodePointToUtf8Test
,
CanEncode8To11Bits
)
{
char
buffer
[
32
];
// 000 1101 0011 => 110-00011 10-010011
EXPECT_STREQ
(
"
\xC3\x93
"
,
ToUtf8String
(
L'\xD3'
).
c_str
(
));
EXPECT_STREQ
(
"
\xC3\x93
"
,
CodePointToUtf8
(
L'\xD3'
,
buffer
));
// 101 0111 0110 => 110-10101 10-110110
EXPECT_STREQ
(
"
\xD5\xB6
"
,
ToUtf8String
(
L
'\
x576
'
).
c_str
(
));
EXPECT_STREQ
(
"
\xD5\xB6
"
,
CodePointToUtf8
(
L
'\
x576
'
,
buffer
));
}
// Tests that Unicode code-points that have 12 to 16 bits are encoded
// as 1110xxxx 10xxxxxx 10xxxxxx.
TEST
(
ToUtf8StringTest
,
CanEncode12To16Bits
)
{
TEST
(
CodePointToUtf8Test
,
CanEncode12To16Bits
)
{
char
buffer
[
32
];
// 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
ToUtf8String
(
L
'\
x8D3
'
).
c_str
(
));
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
CodePointToUtf8
(
L
'\
x8D3
'
,
buffer
));
// 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
ToUtf8String
(
L
'\
xC74D
'
).
c_str
(
));
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
CodePointToUtf8
(
L
'\
xC74D
'
,
buffer
));
}
#if !defined(GTEST_OS_WINDOWS) && !defined(GTEST_OS_CYGWIN) && \
!defined(__SYMBIAN32__)
#ifndef GTEST_WIDE_STRING_USES_UTF16_
// Tests in this group require a wchar_t to hold > 16 bits, and thus
// are skipped on Windows, Cygwin, and Symbian, where a wchar_t is
// 16-bit wide.
// 16-bit wide.
This code may not compile on those systems.
// Tests that Unicode code-points that have 17 to 21 bits are encoded
// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
TEST
(
ToUtf8StringTest
,
CanEncode17To21Bits
)
{
TEST
(
CodePointToUtf8Test
,
CanEncode17To21Bits
)
{
char
buffer
[
32
];
// 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
ToUtf8String
(
L
'\
x108D3
'
).
c_str
());
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
CodePointToUtf8
(
L
'\
x108D3
'
,
buffer
));
// 0 0001 0000 0100 0000 0000 => 11110-000 10-010000 10-010000 10-000000
EXPECT_STREQ
(
"
\xF0\x90\x90\x80
"
,
CodePointToUtf8
(
L
'\
x10400
'
,
buffer
));
// 1 0
111 1000 0110 0011 0100 => 11110-101 10-11
1000 10-011000 10-110100
EXPECT_STREQ
(
"
\xF
5\xB8\x98\xB4
"
,
ToUtf8String
(
L
'\
x178634
'
).
c_str
(
));
// 1 0
000 1000 0110 0011 0100 => 11110-100 10-00
1000 10-011000 10-110100
EXPECT_STREQ
(
"
\xF
4\x88\x98\xB4
"
,
CodePointToUtf8
(
L
'\
x108634
'
,
buffer
));
}
// Tests that encoding an invalid code-point generates the expected result.
TEST
(
ToUtf8StringTest
,
CanEncodeInvalidCodePoint
)
{
TEST
(
CodePointToUtf8Test
,
CanEncodeInvalidCodePoint
)
{
char
buffer
[
32
];
EXPECT_STREQ
(
"(Invalid Unicode 0x1234ABCD)"
,
ToUtf8String
(
L
'\
x1234ABCD
'
).
c_str
());
CodePointToUtf8
(
L
'\
x1234ABCD
'
,
buffer
));
}
#endif // GTEST_WIDE_STRING_USES_UTF16_
// Tests WideStringToUtf8().
// Tests that the NUL character L'\0' is encoded correctly.
TEST
(
WideStringToUtf8Test
,
CanEncodeNul
)
{
EXPECT_STREQ
(
""
,
WideStringToUtf8
(
L""
,
0
).
c_str
());
EXPECT_STREQ
(
""
,
WideStringToUtf8
(
L""
,
-
1
).
c_str
());
}
// Tests that ASCII strings are encoded correctly.
TEST
(
WideStringToUtf8Test
,
CanEncodeAscii
)
{
EXPECT_STREQ
(
"a"
,
WideStringToUtf8
(
L"a"
,
1
).
c_str
());
EXPECT_STREQ
(
"ab"
,
WideStringToUtf8
(
L"ab"
,
2
).
c_str
());
EXPECT_STREQ
(
"a"
,
WideStringToUtf8
(
L"a"
,
-
1
).
c_str
());
EXPECT_STREQ
(
"ab"
,
WideStringToUtf8
(
L"ab"
,
-
1
).
c_str
());
}
// Tests that Unicode code-points that have 8 to 11 bits are encoded
// as 110xxxxx 10xxxxxx.
TEST
(
WideStringToUtf8Test
,
CanEncode8To11Bits
)
{
// 000 1101 0011 => 110-00011 10-010011
EXPECT_STREQ
(
"
\xC3\x93
"
,
WideStringToUtf8
(
L"
\xD3
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xC3\x93
"
,
WideStringToUtf8
(
L"
\xD3
"
,
-
1
).
c_str
());
// 101 0111 0110 => 110-10101 10-110110
EXPECT_STREQ
(
"
\xD5\xB6
"
,
WideStringToUtf8
(
L"
\x576
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xD5\xB6
"
,
WideStringToUtf8
(
L"
\x576
"
,
-
1
).
c_str
());
}
// Tests that Unicode code-points that have 12 to 16 bits are encoded
// as 1110xxxx 10xxxxxx 10xxxxxx.
TEST
(
WideStringToUtf8Test
,
CanEncode12To16Bits
)
{
// 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
WideStringToUtf8
(
L"
\x8D3
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
WideStringToUtf8
(
L"
\x8D3
"
,
-
1
).
c_str
());
// 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
WideStringToUtf8
(
L"
\xC74D
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
WideStringToUtf8
(
L"
\xC74D
"
,
-
1
).
c_str
());
}
#endif // Windows, Cygwin, or Symbian
// Tests that the conversion stops when the function encounters \0 character.
TEST
(
WideStringToUtf8Test
,
StopsOnNulCharacter
)
{
EXPECT_STREQ
(
"ABC"
,
WideStringToUtf8
(
L"ABC
\0
XYZ"
,
100
).
c_str
());
}
// Tests that the conversion stops when the function reaches the limit
// specified by the 'length' parameter.
TEST
(
WideStringToUtf8Test
,
StopsWhenLengthLimitReached
)
{
EXPECT_STREQ
(
"ABC"
,
WideStringToUtf8
(
L"ABCDEF"
,
3
).
c_str
());
}
#ifndef GTEST_WIDE_STRING_USES_UTF16_
// Tests that Unicode code-points that have 17 to 21 bits are encoded
// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. This code may not compile
// on the systems using UTF-16 encoding.
TEST
(
WideStringToUtf8Test
,
CanEncode17To21Bits
)
{
// 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
WideStringToUtf8
(
L"
\x108D
3"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
WideStringToUtf8
(
L"
\x108D
3"
,
-
1
).
c_str
());
// 1 0000 1000 0110 0011 0100 => 11110-100 10-001000 10-011000 10-110100
EXPECT_STREQ
(
"
\xF4\x88\x98\xB4
"
,
WideStringToUtf8
(
L"
\x1086
34"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xF4\x88\x98\xB4
"
,
WideStringToUtf8
(
L"
\x1086
34"
,
-
1
).
c_str
());
}
// Tests that encoding an invalid code-point generates the expected result.
TEST
(
WideStringToUtf8Test
,
CanEncodeInvalidCodePoint
)
{
EXPECT_STREQ
(
"(Invalid Unicode 0xABCDFF)"
,
WideStringToUtf8
(
L"
\xABCD
FF"
,
-
1
).
c_str
());
}
#else
// Tests that surrogate pairs are encoded correctly on the systems using
// UTF-16 encoding in the wide strings.
TEST
(
WideStringToUtf8Test
,
CanEncodeValidUtf16SUrrogatePairs
)
{
EXPECT_STREQ
(
"
\xF0\x90\x90\x80
"
,
WideStringToUtf8
(
L"
\xD801\xDC00
"
,
-
1
).
c_str
());
}
// Tests that encoding an invalid UTF-16 surrogate pair
// generates the expected result.
TEST
(
WideStringToUtf8Test
,
CanEncodeInvalidUtf16SurrogatePair
)
{
// Leading surrogate is at the end of the string.
EXPECT_STREQ
(
"
\xED\xA0\x80
"
,
WideStringToUtf8
(
L"
\xD800
"
,
-
1
).
c_str
());
// Leading surrogate is not followed by the trailing surrogate.
EXPECT_STREQ
(
"
\xED\xA0\x80
$"
,
WideStringToUtf8
(
L"
\xD800
$"
,
-
1
).
c_str
());
// Trailing surrogate appearas without a leading surrogate.
EXPECT_STREQ
(
"
\xED\xB0\x80
PQR"
,
WideStringToUtf8
(
L"
\xDC00
PQR"
,
-
1
).
c_str
());
}
#endif // GTEST_WIDE_STRING_USES_UTF16_
// Tests that codepoint concatenation works correctly.
#ifndef GTEST_WIDE_STRING_USES_UTF16_
TEST
(
WideStringToUtf8Test
,
ConcatenatesCodepointsCorrectly
)
{
EXPECT_STREQ
(
"
\xF4\x88\x98\xB4
"
"
\xEC\x9D\x8D
"
"
\n
"
"
\xD5\xB6
"
"
\xE0\xA3\x93
"
"
\xF4\x88\x98\xB4
"
,
WideStringToUtf8
(
L"
\x1086
34
\xC74D\n\x576\x8D3\x1086
34"
,
-
1
).
c_str
());
}
#else
TEST
(
WideStringToUtf8Test
,
ConcatenatesCodepointsCorrectly
)
{
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
"
\n
"
"
\xD5\xB6
"
"
\xE0\xA3\x93
"
,
WideStringToUtf8
(
L"
\xC74D\n\x576\x8D3
"
,
-
1
).
c_str
());
}
#endif // GTEST_WIDE_STRING_USES_UTF16_
// Tests the List template class.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment