Java源码示例:com.ibm.icu.text.RuleBasedCollator
示例1
private static void setCollatorStrength(RuleBasedCollator collator, CollationSpecifier specifier) {
if (specifier.caseSensitive() && specifier.accentSensitive()) {
collator.setStrength(Collator.TERTIARY);
collator.setCaseLevel(false);
}
else if (specifier.caseSensitive() && !specifier.accentSensitive()) {
collator.setCaseLevel(true);
collator.setStrength(Collator.PRIMARY);
}
else if (!specifier.caseSensitive() && specifier.accentSensitive()) {
collator.setStrength(Collator.SECONDARY);
collator.setCaseLevel(false);
}
else {
collator.setStrength(Collator.PRIMARY);
collator.setCaseLevel(false);
}
}
示例2
/**
* Read custom rules from a file, and create a RuleBasedCollator
* The file cannot support comments, as # might be in the rules!
*/
static Collator createFromRules(String fileName, ResourceLoader loader) {
InputStream input = null;
try {
input = loader.openResource(fileName);
String rules = IOUtils.toString(input, "UTF-8");
return new RuleBasedCollator(rules);
} catch (Exception e) {
// io error or invalid rules
throw new RuntimeException(e);
} finally {
IOUtils.closeQuietly(input);
}
}
示例3
/**
* Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
* These are largish files, and jvm-specific (as our documentation says, you should always
* look out for jvm differences with collation).
* So it's preferable to create this file on-the-fly.
*/
public static String setupSolrHome() throws Exception {
String tmpFile = createTempDir().toFile().getAbsolutePath();
// make data and conf dirs
new File(tmpFile + "/collection1", "data").mkdirs();
File confDir = new File(tmpFile + "/collection1", "conf");
confDir.mkdirs();
// copy over configuration files
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
// generate custom collation rules (DIN 5007-2), saving to customrules.dat
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
String DIN5007_2_tailorings =
"& ae , a\u0308 & AE , A\u0308"+
"& oe , o\u0308 & OE , O\u0308"+
"& ue , u\u0308 & UE , u\u0308";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
final String osFileName = "customrules.dat";
final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
IOUtils.write(tailoredRules, os, "UTF-8");
os.close();
final ResourceLoader loader;
if (random().nextBoolean()) {
loader = new StringMockResourceLoader(tailoredRules);
} else {
loader = new FilesystemResourceLoader(confDir.toPath());
}
final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
assertEquals(tailoredCollator, readCollator);
return tmpFile;
}
示例4
/**
* Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
* These are largish files, and jvm-specific (as our documentation says, you should always
* look out for jvm differences with collation).
* So it's preferable to create this file on-the-fly.
*/
public static String setupSolrHome() throws Exception {
File tmpFile = createTempDir().toFile();
// make data and conf dirs
new File(tmpFile + "/collection1", "data").mkdirs();
File confDir = new File(tmpFile + "/collection1", "conf");
confDir.mkdirs();
// copy over configuration files
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
// generate custom collation rules (DIN 5007-2), saving to customrules.dat
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
String DIN5007_2_tailorings =
"& ae , a\u0308 & AE , A\u0308"+
"& oe , o\u0308 & OE , O\u0308"+
"& ue , u\u0308 & UE , u\u0308";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
IOUtils.write(tailoredRules, os, "UTF-8");
os.close();
return tmpFile.getAbsolutePath();
}
示例5
private void requireThatArabicHasCorrectRules(Collator col) {
final int reorderCodes [] = {UScript.ARABIC};
assertEquals("6.2.0.0", col.getUCAVersion().toString());
assertEquals("58.0.0.6", col.getVersion().toString());
assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));
assertTrue(col.compare("a", "b") < 0);
assertTrue(col.compare("a", "aس") < 0);
assertFalse(col.compare("س", "a") < 0);
assertEquals(" [reorder Arab]&ت<<ة<<<ﺔ<<<ﺓ&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ", ((RuleBasedCollator) col).getRules());
assertFalse(col.compare("س", "a") < 0);
}
示例6
private void requireThatChineseHasCorrectRules(Collator col) {
final int reorderCodes [] = {UScript.HAN};
assertEquals("8.0.0.0", col.getUCAVersion().toString());
assertEquals("153.64.29.0", col.getVersion().toString());
assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));
assertNotEquals("", ((RuleBasedCollator) col).getRules());
}
示例7
public RuleBasedCollator createCollator() {
ULocale ulocale = new ULocale(locale);
checkLocale(ulocale, scheme);
ulocale = setKeywords(ulocale, keywordsToValues);
RuleBasedCollator collator = (RuleBasedCollator) RuleBasedCollator.getInstance(ulocale);
checkKeywords(collator.getLocale(ULocale.VALID_LOCALE), keywordsToValues,
scheme);
if (shouldSetStrength()) {
setCollatorStrength(collator, this);
}
return collator;
}
示例8
/**
* Construct an actual ICU Collator given a collation specifier. The
* result is a Collator that must be use in a thread-private manner.
*/
static synchronized Collator forScheme(final CollationSpecifier specifier) {
RuleBasedCollator collator = (RuleBasedCollator) sourceMap.get(specifier.toString());
if (collator == null) {
collator = specifier.createCollator();
sourceMap.put(specifier.toString(), collator);
}
collator = collator.cloneAsThawed();
return collator;
}
示例9
public void testCustomRules() throws Exception {
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
String DIN5007_2_tailorings =
"& ae , a\u0308 & AE , A\u0308& oe , o\u0308 & OE , O\u0308& ue , u\u0308 & UE , u\u0308";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
Settings settings = Settings.builder()
.put("index.analysis.analyzer.myAnalyzer.type", "icu_collation")
.put("index.analysis.analyzer.myAnalyzer.rules", tailoredRules)
.put("index.analysis.analyzer.myAnalyzer.strength", "primary")
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY));
Analyzer analyzer = analysis.indexAnalyzers.get("myAnalyzer");
String germanUmlaut = "Töne";
TokenStream tsUmlaut = analyzer.tokenStream(null, germanUmlaut);
BytesRef b1 = bytesFromTokenStream(tsUmlaut);
String germanExpandedUmlaut = "Toene";
TokenStream tsExpanded = analyzer.tokenStream(null, germanExpandedUmlaut);
BytesRef b2 = bytesFromTokenStream(tsExpanded);
assertTrue(compare(b1.bytes, b2.bytes) == 0);
}
示例10
private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
this.collator = rbc;
}
示例11
/**
* Setup the field according to the provided parameters
*/
private void setup(ResourceLoader loader, Map<String,String> args) {
String custom = args.remove("custom");
String localeID = args.remove("locale");
String strength = args.remove("strength");
String decomposition = args.remove("decomposition");
String alternate = args.remove("alternate");
String caseLevel = args.remove("caseLevel");
String caseFirst = args.remove("caseFirst");
String numeric = args.remove("numeric");
String variableTop = args.remove("variableTop");
if (custom == null && localeID == null)
throw new SolrException(ErrorCode.SERVER_ERROR, "Either custom or locale is required.");
if (custom != null && localeID != null)
throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot specify both locale and custom. "
+ "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. "
+ "Then save the entire customized ruleset to a file, and use with the custom parameter");
final Collator collator;
if (localeID != null) {
// create from a system collator, based on Locale.
collator = createFromLocale(localeID);
} else {
// create from a custom ruleset
collator = createFromRules(custom, loader);
}
// set the strength flag, otherwise it will be the default.
if (strength != null) {
if (strength.equalsIgnoreCase("primary"))
collator.setStrength(Collator.PRIMARY);
else if (strength.equalsIgnoreCase("secondary"))
collator.setStrength(Collator.SECONDARY);
else if (strength.equalsIgnoreCase("tertiary"))
collator.setStrength(Collator.TERTIARY);
else if (strength.equalsIgnoreCase("quaternary"))
collator.setStrength(Collator.QUATERNARY);
else if (strength.equalsIgnoreCase("identical"))
collator.setStrength(Collator.IDENTICAL);
else
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid strength: " + strength);
}
// set the decomposition flag, otherwise it will be the default.
if (decomposition != null) {
if (decomposition.equalsIgnoreCase("no"))
collator.setDecomposition(Collator.NO_DECOMPOSITION);
else if (decomposition.equalsIgnoreCase("canonical"))
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
else
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition);
}
// expert options: concrete subclasses are always a RuleBasedCollator
RuleBasedCollator rbc = (RuleBasedCollator) collator;
if (alternate != null) {
if (alternate.equalsIgnoreCase("shifted")) {
rbc.setAlternateHandlingShifted(true);
} else if (alternate.equalsIgnoreCase("non-ignorable")) {
rbc.setAlternateHandlingShifted(false);
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid alternate: " + alternate);
}
}
if (caseLevel != null) {
rbc.setCaseLevel(Boolean.parseBoolean(caseLevel));
}
if (caseFirst != null) {
if (caseFirst.equalsIgnoreCase("lower")) {
rbc.setLowerCaseFirst(true);
} else if (caseFirst.equalsIgnoreCase("upper")) {
rbc.setUpperCaseFirst(true);
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid caseFirst: " + caseFirst);
}
}
if (numeric != null) {
rbc.setNumericCollation(Boolean.parseBoolean(numeric));
}
if (variableTop != null) {
rbc.setVariableTop(variableTop);
}
analyzer = new ICUCollationKeyAnalyzer(collator);
}
示例12
/**
* Unit test for ICU generation of Unicode sort keys.
* <pre>
* Input : "__globalRowStore"
*
* Expected: [7, -124, 7, -124, 53, 63, 69, 43, 41, 63, 75, 69, 85, 77, 79, 69, 75, 49, 1, 20, 1, 126, -113, -124, -113, 8]
* </pre>
*/
public void test_ICU_Unicode_SortKey() {
final String input = "__globalRowStore";
// Buffer reused for each String from which a sort key is derived.
final RawCollationKey raw = new RawCollationKey(128);
/*
* Setup the collator by specifying the locale, strength, and
* decomposition mode.
*/
final Locale locale = new Locale("en", "US");
final RuleBasedCollator collator = (RuleBasedCollator) Collator
.getInstance(locale);
collator.setStrength(Collator.TERTIARY);
collator.setDecomposition(Collator.NO_DECOMPOSITION);
collator.getRawCollationKey(input, raw);
// do not include the nul byte
final byte[] actual = new byte[raw.size - 1];
// copy data from the buffer.
System.arraycopy(raw.bytes/* src */, 0/* srcPos */, actual/* dest */,
0/* destPos */, actual.length);
if (log.isInfoEnabled()) {
log.info("Actual : " + Arrays.toString(actual));
}
/*
* The expected Unicode sort key (this depends on the runtime ICU
* version).
*/
final byte[] expected;
if (VersionInfo.ICU_VERSION.getMajor() == 3
&& VersionInfo.ICU_VERSION.getMinor() == 6) {
/*
* bigdata was initially deployed against v3.6.
*/
expected = new byte[] { 7, -124, 7, -124, 53, 63, 69, 43, 41, 63,
75, 69, 85, 77, 79, 69, 75, 49, 1, 20, 1, 126, -113, -124,
-113, 8 };
} else if (VersionInfo.ICU_VERSION.getMajor() == 4
&& VersionInfo.ICU_VERSION.getMinor() == 8) {
/*
* The next bundled version was 4.8.
*/
expected = new byte[] { 6, 12, 6, 12, 51, 61, 67, 41, 39, 61, 73,
67, 83, 75, 77, 67, 73, 47, 1, 20, 1, 126, -113, -124,
-113, 8};
} else {
throw new AssertionFailedError("Not an expected ICU version: "
+ VersionInfo.ICU_VERSION);
}
if (log.isInfoEnabled()) {
log.info("Expected: " + Arrays.toString(expected));
}
if (!Arrays.equals(expected, actual)) {
fail("Expected: " + Arrays.toString(expected) + ", " + //
"Actual: " + Arrays.toString(actual));
}
}
示例13
BlobDescriptorList(Application app, DescriptorStore<BlobDescriptor> store, int maxSize) {
this.app = app;
this.store = store;
this.maxSize = maxSize;
this.list = new ArrayList<BlobDescriptor>();
this.filteredList = new ArrayList<BlobDescriptor>();
this.dataSetObservable = new DataSetObservable();
this.filter = "";
keyComparator = Slob.Strength.QUATERNARY.comparator;
nameComparatorAsc = new Comparator<BlobDescriptor>() {
@Override
public int compare(BlobDescriptor b1, BlobDescriptor b2) {
return keyComparator.compare(b1.key, b2.key);
}
};
nameComparatorDesc = Collections.reverseOrder(nameComparatorAsc);
timeComparatorAsc = new Comparator<BlobDescriptor>() {
@Override
public int compare(BlobDescriptor b1, BlobDescriptor b2) {
return Util.compare(b1.createdAt, b2.createdAt);
}
};
timeComparatorDesc = Collections.reverseOrder(timeComparatorAsc);
lastAccessComparator = new Comparator<BlobDescriptor>() {
@Override
public int compare(BlobDescriptor b1, BlobDescriptor b2) {
return Util.compare(b2.lastAccess, b1.lastAccess);
}
};
order = SortOrder.TIME;
ascending = false;
setSort(order, ascending);
try {
filterCollator = (RuleBasedCollator) Collator.getInstance(Locale.ROOT).clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
filterCollator.setStrength(Collator.PRIMARY);
filterCollator.setAlternateHandlingShifted(true);
handler = new Handler(Looper.getMainLooper());
}
示例14
private Collator createCollator() {
ULocale locale = ULocale.forLanguageTag(this.locale);
if ("search".equals(usage)) {
// "search" usage cannot be set through unicode extensions (u-co-search), handle here:
locale = locale.setKeywordValue("collation", "search");
}
RuleBasedCollator collator = (RuleBasedCollator) Collator.getInstance(locale);
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
collator.setNumericCollation(numeric);
switch (caseFirst) {
case "upper":
collator.setUpperCaseFirst(true);
break;
case "lower":
collator.setLowerCaseFirst(true);
break;
case "false":
if (collator.isLowerCaseFirst()) {
collator.setLowerCaseFirst(false);
}
if (collator.isUpperCaseFirst()) {
collator.setUpperCaseFirst(false);
}
break;
default:
throw new AssertionError();
}
switch (sensitivity) {
case "base":
collator.setStrength(Collator.PRIMARY);
break;
case "accent":
collator.setStrength(Collator.SECONDARY);
break;
case "case":
collator.setStrength(Collator.PRIMARY);
collator.setCaseLevel(true);
break;
case "variant":
collator.setStrength(Collator.TERTIARY);
break;
default:
throw new AssertionError();
}
collator.setAlternateHandlingShifted(ignorePunctuation);
return collator;
}