use of com.ibm.icu.text.Collator in project lucene-solr by apache.
the class ICUCollationField method setup.
/**
* Setup the field according to the provided parameters
*/
private void setup(ResourceLoader loader, Map<String, String> args) {
String custom = args.remove("custom");
String localeID = args.remove("locale");
String strength = args.remove("strength");
String decomposition = args.remove("decomposition");
String alternate = args.remove("alternate");
String caseLevel = args.remove("caseLevel");
String caseFirst = args.remove("caseFirst");
String numeric = args.remove("numeric");
String variableTop = args.remove("variableTop");
if (custom == null && localeID == null)
throw new SolrException(ErrorCode.SERVER_ERROR, "Either custom or locale is required.");
if (custom != null && localeID != null)
throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot specify both locale and custom. " + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. " + "Then save the entire customized ruleset to a file, and use with the custom parameter");
final Collator collator;
if (localeID != null) {
// create from a system collator, based on Locale.
collator = createFromLocale(localeID);
} else {
// create from a custom ruleset
collator = createFromRules(custom, loader);
}
// set the strength flag, otherwise it will be the default.
if (strength != null) {
if (strength.equalsIgnoreCase("primary"))
collator.setStrength(Collator.PRIMARY);
else if (strength.equalsIgnoreCase("secondary"))
collator.setStrength(Collator.SECONDARY);
else if (strength.equalsIgnoreCase("tertiary"))
collator.setStrength(Collator.TERTIARY);
else if (strength.equalsIgnoreCase("quaternary"))
collator.setStrength(Collator.QUATERNARY);
else if (strength.equalsIgnoreCase("identical"))
collator.setStrength(Collator.IDENTICAL);
else
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid strength: " + strength);
}
// set the decomposition flag, otherwise it will be the default.
if (decomposition != null) {
if (decomposition.equalsIgnoreCase("no"))
collator.setDecomposition(Collator.NO_DECOMPOSITION);
else if (decomposition.equalsIgnoreCase("canonical"))
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
else
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition);
}
// expert options: concrete subclasses are always a RuleBasedCollator
RuleBasedCollator rbc = (RuleBasedCollator) collator;
if (alternate != null) {
if (alternate.equalsIgnoreCase("shifted")) {
rbc.setAlternateHandlingShifted(true);
} else if (alternate.equalsIgnoreCase("non-ignorable")) {
rbc.setAlternateHandlingShifted(false);
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid alternate: " + alternate);
}
}
if (caseLevel != null) {
rbc.setCaseLevel(Boolean.parseBoolean(caseLevel));
}
if (caseFirst != null) {
if (caseFirst.equalsIgnoreCase("lower")) {
rbc.setLowerCaseFirst(true);
} else if (caseFirst.equalsIgnoreCase("upper")) {
rbc.setUpperCaseFirst(true);
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid caseFirst: " + caseFirst);
}
}
if (numeric != null) {
rbc.setNumericCollation(Boolean.parseBoolean(numeric));
}
if (variableTop != null) {
rbc.setVariableTop(variableTop);
}
analyzer = new ICUCollationKeyAnalyzer(collator);
}
use of com.ibm.icu.text.Collator in project lucene-solr by apache.
the class TestICUCollationDocValuesField method testRanges.
public void testRanges() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field field = newField("field", "", StringField.TYPE_STORED);
// uses -Dtests.locale
Collator collator = Collator.getInstance();
if (random().nextBoolean()) {
collator.setStrength(Collator.PRIMARY);
}
ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", collator);
doc.add(field);
doc.add(collationField);
int numDocs = atLeast(500);
for (int i = 0; i < numDocs; i++) {
String value = TestUtil.randomSimpleString(random());
field.setStringValue(value);
collationField.setStringValue(value);
iw.addDocument(doc);
}
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
int numChecks = atLeast(100);
for (int i = 0; i < numChecks; i++) {
String start = TestUtil.randomSimpleString(random());
String end = TestUtil.randomSimpleString(random());
BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
doTestRanges(is, start, end, lowerVal, upperVal, collator);
}
ir.close();
dir.close();
}
use of com.ibm.icu.text.Collator in project lucene-solr by apache.
the class TestICUCollationKeyAnalyzer method testThreadSafe.
public void testThreadSafe() throws Exception {
int iters = 20 * RANDOM_MULTIPLIER;
for (int i = 0; i < iters; i++) {
Locale locale = Locale.GERMAN;
Collator collator = Collator.getInstance(locale);
collator.setStrength(Collator.IDENTICAL);
Analyzer a = new ICUCollationKeyAnalyzer(collator);
assertThreadSafe(a);
a.close();
}
}
use of com.ibm.icu.text.Collator in project lucene-solr by apache.
the class TestICUCollationField method setupSolrHome.
/**
* Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
* These are largish files, and jvm-specific (as our documentation says, you should always
* look out for jvm differences with collation).
* So it's preferable to create this file on-the-fly.
*/
public static String setupSolrHome() throws Exception {
String tmpFile = createTempDir().toFile().getAbsolutePath();
// make data and conf dirs
new File(tmpFile + "/collection1", "data").mkdirs();
File confDir = new File(tmpFile + "/collection1", "conf");
confDir.mkdirs();
// copy over configuration files
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
// generate custom collation rules (DIN 5007-2), saving to customrules.dat
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
final String osFileName = "customrules.dat";
final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
IOUtils.write(tailoredRules, os, "UTF-8");
os.close();
final ResourceLoader loader;
if (random().nextBoolean()) {
loader = new StringMockResourceLoader(tailoredRules);
} else {
loader = new FilesystemResourceLoader(confDir.toPath());
}
final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
assertEquals(tailoredCollator, readCollator);
return tmpFile;
}
use of com.ibm.icu.text.Collator in project eclipse.platform.text by eclipse.
the class ReplaceRefactoring method checkFinalConditions.
@Override
public RefactoringStatus checkFinalConditions(IProgressMonitor pm) throws CoreException, OperationCanceledException {
if (fReplaceString == null) {
return RefactoringStatus.createFatalErrorStatus(SearchMessages.ReplaceRefactoring_error_no_replace_string);
}
Pattern pattern = null;
FileSearchQuery query = getQuery();
if (query.isRegexSearch()) {
pattern = createSearchPattern(query);
}
RefactoringStatus resultingStatus = new RefactoringStatus();
Collection<IFile> allFilesSet = fMatches.keySet();
IFile[] allFiles = allFilesSet.toArray(new IFile[allFilesSet.size()]);
Arrays.sort(allFiles, new Comparator<IFile>() {
private Collator fCollator = Collator.getInstance();
@Override
public int compare(IFile o1, IFile o2) {
String p1 = o1.getFullPath().toString();
String p2 = o2.getFullPath().toString();
return fCollator.compare(p1, p2);
}
});
checkFilesToBeChanged(allFiles, resultingStatus);
if (resultingStatus.hasFatalError()) {
return resultingStatus;
}
CompositeChange compositeChange = new CompositeChange(SearchMessages.ReplaceRefactoring_composite_change_name);
compositeChange.markAsSynthetic();
ArrayList<MatchGroup> matchGroups = new ArrayList<>();
boolean hasChanges = false;
try {
for (IFile file : allFiles) {
Set<FileMatch> bucket = fMatches.get(file);
if (!bucket.isEmpty()) {
try {
TextChange change = createFileChange(file, pattern, bucket, resultingStatus, matchGroups);
if (change != null) {
compositeChange.add(change);
hasChanges = true;
}
} catch (CoreException e) {
String message = Messages.format(SearchMessages.ReplaceRefactoring_error_access_file, new Object[] { file.getName(), e.getLocalizedMessage() });
return RefactoringStatus.createFatalErrorStatus(message);
}
}
}
} catch (PatternSyntaxException e) {
String message = Messages.format(SearchMessages.ReplaceRefactoring_error_replacement_expression, e.getLocalizedMessage());
return RefactoringStatus.createFatalErrorStatus(message);
}
if (!hasChanges && resultingStatus.isOK()) {
return RefactoringStatus.createFatalErrorStatus(SearchMessages.ReplaceRefactoring_error_no_changes);
}
compositeChange.add(new SearchResultUpdateChange(fResult, matchGroups.toArray(new MatchGroup[matchGroups.size()]), fIgnoredMatches));
fChange = compositeChange;
return resultingStatus;
}
Aggregations