Search in sources :

Example 1 with LevensteinDistance

use of org.apache.lucene.search.spell.LevensteinDistance in project elasticsearch by elastic.

the class AbstractScopedSettings method validate.

/**
     * Validates that the setting is valid
     */
public final void validate(String key, Settings settings) {
    Setting setting = get(key);
    if (setting == null) {
        LevensteinDistance ld = new LevensteinDistance();
        List<Tuple<Float, String>> scoredKeys = new ArrayList<>();
        for (String k : this.keySettings.keySet()) {
            float distance = ld.getDistance(key, k);
            if (distance > 0.7f) {
                scoredKeys.add(new Tuple<>(distance, k));
            }
        }
        CollectionUtil.timSort(scoredKeys, (a, b) -> b.v1().compareTo(a.v1()));
        String msgPrefix = "unknown setting";
        SecureSettings secureSettings = settings.getSecureSettings();
        if (secureSettings != null && settings.getSecureSettings().getSettingNames().contains(key)) {
            msgPrefix = "unknown secure setting";
        }
        String msg = msgPrefix + " [" + key + "]";
        List<String> keys = scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList());
        if (keys.isEmpty() == false) {
            msg += " did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]" : "any of " + keys.toString()) + "?";
        } else {
            msg += " please check that any required plugins are installed, or check the breaking changes documentation for removed " + "settings";
        }
        throw new IllegalArgumentException(msg);
    }
    setting.get(settings);
}
Also used : AbstractComponent(org.elasticsearch.common.component.AbstractComponent) Predicate(java.util.function.Predicate) Set(java.util.Set) HashMap(java.util.HashMap) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) CollectionUtil(org.apache.lucene.util.CollectionUtil) HashSet(java.util.HashSet) Consumer(java.util.function.Consumer) ExceptionsHelper(org.elasticsearch.ExceptionsHelper) List(java.util.List) TreeMap(java.util.TreeMap) Supplier(org.apache.logging.log4j.util.Supplier) Map(java.util.Map) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) BiConsumer(java.util.function.BiConsumer) Regex(org.elasticsearch.common.regex.Regex) Pattern(java.util.regex.Pattern) Tuple(org.elasticsearch.common.collect.Tuple) Collections(java.util.Collections) SortedMap(java.util.SortedMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) Tuple(org.elasticsearch.common.collect.Tuple)

Example 2 with LevensteinDistance

use of org.apache.lucene.search.spell.LevensteinDistance in project elasticsearch by elastic.

the class BaseRestHandler method unrecognized.

protected final String unrecognized(final RestRequest request, final Set<String> invalids, final Set<String> candidates, final String detail) {
    String message = String.format(Locale.ROOT, "request [%s] contains unrecognized %s%s: ", request.path(), detail, invalids.size() > 1 ? "s" : "");
    boolean first = true;
    for (final String invalid : invalids) {
        final LevensteinDistance ld = new LevensteinDistance();
        final List<Tuple<Float, String>> scoredParams = new ArrayList<>();
        for (final String candidate : candidates) {
            final float distance = ld.getDistance(invalid, candidate);
            if (distance > 0.5f) {
                scoredParams.add(new Tuple<>(distance, candidate));
            }
        }
        CollectionUtil.timSort(scoredParams, (a, b) -> {
            // sort by distance in reverse order, then parameter name for equal distances
            int compare = a.v1().compareTo(b.v1());
            if (compare != 0)
                return -compare;
            else
                return a.v2().compareTo(b.v2());
        });
        if (first == false) {
            message += ", ";
        }
        message += "[" + invalid + "]";
        final List<String> keys = scoredParams.stream().map(Tuple::v2).collect(Collectors.toList());
        if (keys.isEmpty() == false) {
            message += " -> did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]" : "any of " + keys.toString()) + "?";
        }
        first = false;
    }
    return message;
}
Also used : ArrayList(java.util.ArrayList) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) Tuple(org.elasticsearch.common.collect.Tuple)

Example 3 with LevensteinDistance

use of org.apache.lucene.search.spell.LevensteinDistance in project lucene-solr by apache.

the class ConjunctionSolrSpellCheckerTest method test.

@Test
public void test() throws Exception {
    ConjunctionSolrSpellChecker cssc = new ConjunctionSolrSpellChecker();
    MockSolrSpellChecker levenstein1 = new MockSolrSpellChecker(new LevensteinDistance());
    MockSolrSpellChecker levenstein2 = new MockSolrSpellChecker(new LevensteinDistance());
    MockSolrSpellChecker ngram = new MockSolrSpellChecker(new NGramDistance());
    cssc.addChecker(levenstein1);
    cssc.addChecker(levenstein2);
    try {
        cssc.addChecker(ngram);
        fail("ConjunctionSolrSpellChecker should have thrown an exception about non-identical StringDistances.");
    } catch (IllegalArgumentException iae) {
    // correct behavior
    }
}
Also used : NGramDistance(org.apache.lucene.search.spell.NGramDistance) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) Test(org.junit.Test)

Example 4 with LevensteinDistance

use of org.apache.lucene.search.spell.LevensteinDistance in project lucene-solr by apache.

the class AbstractLuceneSpellChecker method init.

@Override
public String init(NamedList config, SolrCore core) {
    super.init(config, core);
    indexDir = (String) config.get(INDEX_DIR);
    String accuracy = (String) config.get(ACCURACY);
    //If indexDir is relative then create index inside core.getDataDir()
    if (indexDir != null) {
        if (!new File(indexDir).isAbsolute()) {
            indexDir = core.getDataDir() + File.separator + indexDir;
        }
    }
    sourceLocation = (String) config.get(LOCATION);
    String compClass = (String) config.get(COMPARATOR_CLASS);
    Comparator<SuggestWord> comp = null;
    if (compClass != null) {
        if (compClass.equalsIgnoreCase(SCORE_COMP)) {
            comp = SuggestWordQueue.DEFAULT_COMPARATOR;
        } else if (compClass.equalsIgnoreCase(FREQ_COMP)) {
            comp = new SuggestWordFrequencyComparator();
        } else {
            //must be a FQCN
            comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
        }
    } else {
        comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    }
    String strDistanceName = (String) config.get(STRING_DISTANCE);
    if (strDistanceName != null) {
        sd = core.getResourceLoader().newInstance(strDistanceName, StringDistance.class);
    //TODO: Figure out how to configure options.  Where's Spring when you need it?  Or at least BeanUtils...
    } else {
        sd = new LevensteinDistance();
    }
    try {
        initIndex();
        spellChecker = new SpellChecker(index, sd, comp);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    if (accuracy != null) {
        try {
            this.accuracy = Float.parseFloat(accuracy);
            spellChecker.setAccuracy(this.accuracy);
        } catch (NumberFormatException e) {
            throw new RuntimeException("Unparseable accuracy given for dictionary: " + name, e);
        }
    }
    return name;
}
Also used : SuggestWordFrequencyComparator(org.apache.lucene.search.spell.SuggestWordFrequencyComparator) StringDistance(org.apache.lucene.search.spell.StringDistance) SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellChecker(org.apache.lucene.search.spell.SpellChecker) IOException(java.io.IOException) File(java.io.File) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) SuggestWordFrequencyComparator(org.apache.lucene.search.spell.SuggestWordFrequencyComparator) Comparator(java.util.Comparator)

Example 5 with LevensteinDistance

use of org.apache.lucene.search.spell.LevensteinDistance in project elasticsearch by elastic.

the class InstallPluginCommand method checkMisspelledPlugin.

/** Returns all the official plugin names that look similar to pluginId. **/
private List<String> checkMisspelledPlugin(String pluginId) {
    LevensteinDistance ld = new LevensteinDistance();
    List<Tuple<Float, String>> scoredKeys = new ArrayList<>();
    for (String officialPlugin : OFFICIAL_PLUGINS) {
        float distance = ld.getDistance(pluginId, officialPlugin);
        if (distance > 0.7f) {
            scoredKeys.add(new Tuple<>(distance, officialPlugin));
        }
    }
    CollectionUtil.timSort(scoredKeys, (a, b) -> b.v1().compareTo(a.v1()));
    return scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList());
}
Also used : Arrays(java.util.Arrays) URLDecoder(java.net.URLDecoder) URL(java.net.URL) VERBOSE(org.elasticsearch.cli.Terminal.Verbosity.VERBOSE) Environment(org.elasticsearch.env.Environment) DirectoryStream(java.nio.file.DirectoryStream) Locale(java.util.Locale) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) OptionSet(joptsimple.OptionSet) SimpleFileVisitor(java.nio.file.SimpleFileVisitor) OptionSpec(joptsimple.OptionSpec) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) PosixFilePermission(java.nio.file.attribute.PosixFilePermission) Set(java.util.Set) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Objects(java.util.Objects) FileVisitResult(java.nio.file.FileVisitResult) List(java.util.List) Version(org.elasticsearch.Version) JarHell(org.elasticsearch.bootstrap.JarHell) ZipInputStream(java.util.zip.ZipInputStream) PosixFileAttributeView(java.nio.file.attribute.PosixFileAttributeView) TreeSet(java.util.TreeSet) StandardCopyOption(java.nio.file.StandardCopyOption) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) URLConnection(java.net.URLConnection) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) OutputStream(java.io.OutputStream) FileSystemUtils(org.elasticsearch.common.io.FileSystemUtils) Files(java.nio.file.Files) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) InputStreamReader(java.io.InputStreamReader) MessageDigests(org.elasticsearch.common.hash.MessageDigests) CollectionUtil(org.apache.lucene.util.CollectionUtil) SuppressForbidden(org.elasticsearch.common.SuppressForbidden) ExitCodes(org.elasticsearch.cli.ExitCodes) EnvironmentAwareCommand(org.elasticsearch.cli.EnvironmentAwareCommand) BufferedReader(java.io.BufferedReader) Tuple(org.elasticsearch.common.collect.Tuple) Collections(java.util.Collections) Terminal(org.elasticsearch.cli.Terminal) UserException(org.elasticsearch.cli.UserException) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) Tuple(org.elasticsearch.common.collect.Tuple)

Aggregations

LevensteinDistance (org.apache.lucene.search.spell.LevensteinDistance)6 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)3 IOException (java.io.IOException)2 Collections (java.util.Collections)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2 StringDistance (org.apache.lucene.search.spell.StringDistance)2 SuggestWord (org.apache.lucene.search.spell.SuggestWord)2 Tuple (org.elasticsearch.common.collect.Tuple)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 OutputStream (java.io.OutputStream)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1 URLDecoder (java.net.URLDecoder)1