Java源码示例:org.apache.commons.lang3.text.translate.CharSequenceTranslator
示例1
@Test
public void testEscapeXmlAllCharacters() {
// http://www.w3.org/TR/xml/#charsets says:
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
// excluding the surrogate blocks, FFFE, and FFFF. */
final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
.with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));
assertEquals("�", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
assertEquals("\t", escapeXml.translate("\t")); // 0x9
assertEquals("\n", escapeXml.translate("\n")); // 0xA
assertEquals("", escapeXml.translate("\u000B\u000C"));
assertEquals("\r", escapeXml.translate("\r")); // 0xD
assertEquals("Hello World! Ain't this great?", escapeXml.translate("Hello World! Ain't this great?"));
assertEquals("", escapeXml.translate("\u000E\u000F\u0018\u0019"));
}
示例2
/**
* @param out write to receieve the escaped string
* @param str String to escape values in, may be null
* @param escapeSingleQuote escapes single quotes if <code>true</code>
* @param escapeForwardSlash TODO
* @throws IOException if an IOException occurs
*/
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
boolean escapeForwardSlash) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz;
sz = str.length();
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
// "[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]"
// handle unicode
if (ch > 0xFFFD) {
out.write("\\u" + CharSequenceTranslator.hex(ch));
} else if (ch > 0xD7FF && ch < 0xE000) {
out.write("\\u" + CharSequenceTranslator.hex(ch));
} else if (ch > 0x7E && ch != 0x85 && ch < 0xA0) {
out.write("\\u00" + CharSequenceTranslator.hex(ch));
} else if (ch < 32) {
switch (ch) {
case '\t' :
out.write('\\');
out.write('t');
break;
case '\n' :
out.write('\\');
out.write('n');
break;
case '\r' :
out.write('\\');
out.write('r');
break;
default :
if (ch > 0xf) {
out.write("\\u00" + CharSequenceTranslator.hex(ch));
} else {
out.write("\\u000" + CharSequenceTranslator.hex(ch));
}
break;
}
} else {
switch (ch) {
case '\'' :
if (escapeSingleQuote) {
out.write('\\');
}
out.write('\'');
break;
case '"' :
out.write('\\');
out.write('"');
break;
case '\\' :
out.write('\\');
out.write('\\');
break;
case '/' :
if (escapeForwardSlash) {
out.write('\\');
}
out.write('/');
break;
default :
out.write(ch);
break;
}
}
}
}
示例3
public CharSequenceTranslatorEvaluator(final Evaluator<String> subject, CharSequenceTranslator method) {
this.subject = subject;
this.method = method;
}
示例4
private CharTranslator(CharSequenceTranslator translator) {
notNullOf(translator, "translator");
this.translator = translator;
}
示例5
public CharSequenceTranslator getTranslator() {
return translator;
}
示例6
/**
* Tests Supplementary characters.
* <p>
* From http://www.w3.org/International/questions/qa-escapes
* </p>
* <blockquote>
* Supplementary characters are those Unicode characters that have code points higher than the characters in
* the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
* BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
* - you must use the single, code point value for that character. For example, use 𣎴 rather than ��.
* </blockquote>
* @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
* @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
*/
@Test
public void testEscapeXmlSupplementaryCharacters() {
CharSequenceTranslator escapeXml =
StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
assertEquals("Supplementary character must be represented using a single escape", "𣎴",
escapeXml.translate("\uD84C\uDFB4"));
}
示例7
/**
* Tests Supplementary characters.
* <p>
* From http://www.w3.org/International/questions/qa-escapes
* </p>
* <blockquote>
* Supplementary characters are those Unicode characters that have code points higher than the characters in
* the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
* BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
* - you must use the single, code point value for that character. For example, use 𣎴 rather than ��.
* </blockquote>
* @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
* @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
*/
@Test
public void testEscapeXmlSupplementaryCharacters() {
final CharSequenceTranslator escapeXml =
StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
assertEquals("Supplementary character must be represented using a single escape", "𣎴",
escapeXml.translate("\uD84C\uDFB4"));
}
示例8
/**
* Tests Supplementary characters.
* <p>
* From http://www.w3.org/International/questions/qa-escapes
* </p>
* <blockquote>
* Supplementary characters are those Unicode characters that have code points higher than the characters in
* the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
* BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
* - you must use the single, code point value for that character. For example, use 𣎴 rather than ��.
* </blockquote>
* @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
* @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
*/
@Test
public void testEscapeXmlSupplementaryCharacters() {
CharSequenceTranslator escapeXml =
StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
assertEquals("Supplementary character must be represented using a single escape", "𣎴",
escapeXml.translate("\uD84C\uDFB4"));
}