Search in sources :

Example 1 with Collator

use of com.ibm.icu.text.Collator in project lucene-solr by apache.

the class ICUCollationField method setup.

/**
   * Setup the field according to the provided parameters
   */
private void setup(ResourceLoader loader, Map<String, String> args) {
    String custom = args.remove("custom");
    String localeID = args.remove("locale");
    String strength = args.remove("strength");
    String decomposition = args.remove("decomposition");
    String alternate = args.remove("alternate");
    String caseLevel = args.remove("caseLevel");
    String caseFirst = args.remove("caseFirst");
    String numeric = args.remove("numeric");
    String variableTop = args.remove("variableTop");
    if (custom == null && localeID == null)
        throw new SolrException(ErrorCode.SERVER_ERROR, "Either custom or locale is required.");
    if (custom != null && localeID != null)
        throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot specify both locale and custom. " + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. " + "Then save the entire customized ruleset to a file, and use with the custom parameter");
    final Collator collator;
    if (localeID != null) {
        // create from a system collator, based on Locale.
        collator = createFromLocale(localeID);
    } else {
        // create from a custom ruleset
        collator = createFromRules(custom, loader);
    }
    // set the strength flag, otherwise it will be the default.
    if (strength != null) {
        if (strength.equalsIgnoreCase("primary"))
            collator.setStrength(Collator.PRIMARY);
        else if (strength.equalsIgnoreCase("secondary"))
            collator.setStrength(Collator.SECONDARY);
        else if (strength.equalsIgnoreCase("tertiary"))
            collator.setStrength(Collator.TERTIARY);
        else if (strength.equalsIgnoreCase("quaternary"))
            collator.setStrength(Collator.QUATERNARY);
        else if (strength.equalsIgnoreCase("identical"))
            collator.setStrength(Collator.IDENTICAL);
        else
            throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid strength: " + strength);
    }
    // set the decomposition flag, otherwise it will be the default.
    if (decomposition != null) {
        if (decomposition.equalsIgnoreCase("no"))
            collator.setDecomposition(Collator.NO_DECOMPOSITION);
        else if (decomposition.equalsIgnoreCase("canonical"))
            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
        else
            throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid decomposition: " + decomposition);
    }
    // expert options: concrete subclasses are always a RuleBasedCollator
    RuleBasedCollator rbc = (RuleBasedCollator) collator;
    if (alternate != null) {
        if (alternate.equalsIgnoreCase("shifted")) {
            rbc.setAlternateHandlingShifted(true);
        } else if (alternate.equalsIgnoreCase("non-ignorable")) {
            rbc.setAlternateHandlingShifted(false);
        } else {
            throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid alternate: " + alternate);
        }
    }
    if (caseLevel != null) {
        rbc.setCaseLevel(Boolean.parseBoolean(caseLevel));
    }
    if (caseFirst != null) {
        if (caseFirst.equalsIgnoreCase("lower")) {
            rbc.setLowerCaseFirst(true);
        } else if (caseFirst.equalsIgnoreCase("upper")) {
            rbc.setUpperCaseFirst(true);
        } else {
            throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid caseFirst: " + caseFirst);
        }
    }
    if (numeric != null) {
        rbc.setNumericCollation(Boolean.parseBoolean(numeric));
    }
    if (variableTop != null) {
        rbc.setVariableTop(variableTop);
    }
    analyzer = new ICUCollationKeyAnalyzer(collator);
}
Also used : RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) SolrException(org.apache.solr.common.SolrException) ICUCollationKeyAnalyzer(org.apache.lucene.collation.ICUCollationKeyAnalyzer) Collator(com.ibm.icu.text.Collator) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator)

Example 2 with Collator

use of com.ibm.icu.text.Collator in project lucene-solr by apache.

the class TestICUCollationDocValuesField method testRanges.

public void testRanges() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field field = newField("field", "", StringField.TYPE_STORED);
    // uses -Dtests.locale
    Collator collator = Collator.getInstance();
    if (random().nextBoolean()) {
        collator.setStrength(Collator.PRIMARY);
    }
    ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", collator);
    doc.add(field);
    doc.add(collationField);
    int numDocs = atLeast(500);
    for (int i = 0; i < numDocs; i++) {
        String value = TestUtil.randomSimpleString(random());
        field.setStringValue(value);
        collationField.setStringValue(value);
        iw.addDocument(doc);
    }
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    int numChecks = atLeast(100);
    for (int i = 0; i < numChecks; i++) {
        String start = TestUtil.randomSimpleString(random());
        String end = TestUtil.randomSimpleString(random());
        BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
        BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
        doTestRanges(is, start, end, lowerVal, upperVal, collator);
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) SortField(org.apache.lucene.search.SortField) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) Collator(com.ibm.icu.text.Collator)

Example 3 with Collator

use of com.ibm.icu.text.Collator in project lucene-solr by apache.

the class TestICUCollationKeyAnalyzer method testThreadSafe.

public void testThreadSafe() throws Exception {
    int iters = 20 * RANDOM_MULTIPLIER;
    for (int i = 0; i < iters; i++) {
        Locale locale = Locale.GERMAN;
        Collator collator = Collator.getInstance(locale);
        collator.setStrength(Collator.IDENTICAL);
        Analyzer a = new ICUCollationKeyAnalyzer(collator);
        assertThreadSafe(a);
        a.close();
    }
}
Also used : Locale(java.util.Locale) Analyzer(org.apache.lucene.analysis.Analyzer) Collator(com.ibm.icu.text.Collator)

Example 4 with Collator

use of com.ibm.icu.text.Collator in project lucene-solr by apache.

the class TestICUCollationField method setupSolrHome.

/**
   * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
   * These are largish files, and jvm-specific (as our documentation says, you should always
   * look out for jvm differences with collation).
   * So it's preferable to create this file on-the-fly.
   */
public static String setupSolrHome() throws Exception {
    String tmpFile = createTempDir().toFile().getAbsolutePath();
    // make data and conf dirs
    new File(tmpFile + "/collection1", "data").mkdirs();
    File confDir = new File(tmpFile + "/collection1", "conf");
    confDir.mkdirs();
    // copy over configuration files
    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
    // generate custom collation rules (DIN 5007-2), saving to customrules.dat
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
    String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();
    final String osFileName = "customrules.dat";
    final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
    IOUtils.write(tailoredRules, os, "UTF-8");
    os.close();
    final ResourceLoader loader;
    if (random().nextBoolean()) {
        loader = new StringMockResourceLoader(tailoredRules);
    } else {
        loader = new FilesystemResourceLoader(confDir.toPath());
    }
    final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
    assertEquals(tailoredCollator, readCollator);
    return tmpFile;
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) ULocale(com.ibm.icu.util.ULocale) FileOutputStream(java.io.FileOutputStream) File(java.io.File) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) Collator(com.ibm.icu.text.Collator)

Example 5 with Collator

use of com.ibm.icu.text.Collator in project eclipse.platform.text by eclipse.

the class ReplaceRefactoring method checkFinalConditions.

@Override
public RefactoringStatus checkFinalConditions(IProgressMonitor pm) throws CoreException, OperationCanceledException {
    if (fReplaceString == null) {
        return RefactoringStatus.createFatalErrorStatus(SearchMessages.ReplaceRefactoring_error_no_replace_string);
    }
    Pattern pattern = null;
    FileSearchQuery query = getQuery();
    if (query.isRegexSearch()) {
        pattern = createSearchPattern(query);
    }
    RefactoringStatus resultingStatus = new RefactoringStatus();
    Collection<IFile> allFilesSet = fMatches.keySet();
    IFile[] allFiles = allFilesSet.toArray(new IFile[allFilesSet.size()]);
    Arrays.sort(allFiles, new Comparator<IFile>() {

        private Collator fCollator = Collator.getInstance();

        @Override
        public int compare(IFile o1, IFile o2) {
            String p1 = o1.getFullPath().toString();
            String p2 = o2.getFullPath().toString();
            return fCollator.compare(p1, p2);
        }
    });
    checkFilesToBeChanged(allFiles, resultingStatus);
    if (resultingStatus.hasFatalError()) {
        return resultingStatus;
    }
    CompositeChange compositeChange = new CompositeChange(SearchMessages.ReplaceRefactoring_composite_change_name);
    compositeChange.markAsSynthetic();
    ArrayList<MatchGroup> matchGroups = new ArrayList<>();
    boolean hasChanges = false;
    try {
        for (IFile file : allFiles) {
            Set<FileMatch> bucket = fMatches.get(file);
            if (!bucket.isEmpty()) {
                try {
                    TextChange change = createFileChange(file, pattern, bucket, resultingStatus, matchGroups);
                    if (change != null) {
                        compositeChange.add(change);
                        hasChanges = true;
                    }
                } catch (CoreException e) {
                    String message = Messages.format(SearchMessages.ReplaceRefactoring_error_access_file, new Object[] { file.getName(), e.getLocalizedMessage() });
                    return RefactoringStatus.createFatalErrorStatus(message);
                }
            }
        }
    } catch (PatternSyntaxException e) {
        String message = Messages.format(SearchMessages.ReplaceRefactoring_error_replacement_expression, e.getLocalizedMessage());
        return RefactoringStatus.createFatalErrorStatus(message);
    }
    if (!hasChanges && resultingStatus.isOK()) {
        return RefactoringStatus.createFatalErrorStatus(SearchMessages.ReplaceRefactoring_error_no_changes);
    }
    compositeChange.add(new SearchResultUpdateChange(fResult, matchGroups.toArray(new MatchGroup[matchGroups.size()]), fIgnoredMatches));
    fChange = compositeChange;
    return resultingStatus;
}
Also used : Pattern(java.util.regex.Pattern) IFile(org.eclipse.core.resources.IFile) ArrayList(java.util.ArrayList) RefactoringStatus(org.eclipse.ltk.core.refactoring.RefactoringStatus) TextChange(org.eclipse.ltk.core.refactoring.TextChange) Collator(com.ibm.icu.text.Collator) CoreException(org.eclipse.core.runtime.CoreException) CompositeChange(org.eclipse.ltk.core.refactoring.CompositeChange) PatternSyntaxException(java.util.regex.PatternSyntaxException)

Aggregations

Collator (com.ibm.icu.text.Collator)6 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)3 ULocale (com.ibm.icu.util.ULocale)2 Pattern (java.util.regex.Pattern)2 TimeZone (com.ibm.icu.util.TimeZone)1 SystemTimeZoneType (com.ibm.icu.util.TimeZone.SystemTimeZoneType)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 Reader (java.io.Reader)1 UncheckedIOException (java.io.UncheckedIOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 DirectoryStream (java.nio.file.DirectoryStream)1 Files (java.nio.file.Files)1 Path (java.nio.file.Path)1 java.util (java.util)1 ArrayList (java.util.ArrayList)1 Locale (java.util.Locale)1 Function (java.util.function.Function)1 Matcher (java.util.regex.Matcher)1