Search in sources :

Example 1 with LevenshteinDistance

use of org.apache.lucene.search.spell.LevenshteinDistance in project crate by crate.

the class Schemas method getSimilarSchemas.

private List<String> getSimilarSchemas(User user, String schema) {
    LevenshteinDistance levenshteinDistance = new LevenshteinDistance();
    ArrayList<Candidate> candidates = new ArrayList<>();
    for (String availableSchema : schemas.keySet()) {
        if (user.hasAnyPrivilege(Privilege.Clazz.SCHEMA, availableSchema)) {
            float score = levenshteinDistance.getDistance(schema.toLowerCase(Locale.ENGLISH), availableSchema.toLowerCase(Locale.ENGLISH));
            if (score > 0.7f) {
                candidates.add(new Candidate(score, availableSchema));
            }
        }
    }
    candidates.sort(Comparator.comparing((Candidate x) -> x.score).reversed());
    return candidates.stream().limit(5).map(x -> x.name).collect(Collectors.toList());
}
Also used : Tuple(io.crate.common.collections.Tuple) Privilege(io.crate.user.Privilege) DocSchemaInfoFactory(io.crate.metadata.doc.DocSchemaInfoFactory) ClusterService(org.elasticsearch.cluster.service.ClusterService) UserDefinedFunctionsMetadata(io.crate.expression.udf.UserDefinedFunctionsMetadata) BlobSchemaInfo(io.crate.metadata.blob.BlobSchemaInfo) Operation(io.crate.metadata.table.Operation) ClusterStateListener(org.elasticsearch.cluster.ClusterStateListener) PgCatalogSchemaInfo(io.crate.metadata.pgcatalog.PgCatalogSchemaInfo) Inject(org.elasticsearch.common.inject.Inject) ArrayList(java.util.ArrayList) Sets(io.crate.common.collections.Sets) HashSet(java.util.HashSet) Metadata(org.elasticsearch.cluster.metadata.Metadata) Matcher(java.util.regex.Matcher) SchemaUnknownException(io.crate.exceptions.SchemaUnknownException) SysSchemaInfo(io.crate.metadata.sys.SysSchemaInfo) Locale(java.util.Locale) Map(java.util.Map) StreamSupport(java.util.stream.StreamSupport) QualifiedName(io.crate.sql.tree.QualifiedName) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) SchemaInfo(io.crate.metadata.table.SchemaInfo) InformationSchemaInfo(io.crate.metadata.information.InformationSchemaInfo) TableInfo(io.crate.metadata.table.TableInfo) ViewMetadata(io.crate.metadata.view.ViewMetadata) Iterator(java.util.Iterator) User(io.crate.user.User) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ViewsMetadata(io.crate.metadata.view.ViewsMetadata) Set(java.util.Set) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) ClusterChangedEvent(org.elasticsearch.cluster.ClusterChangedEvent) AbstractLifecycleComponent(org.elasticsearch.common.component.AbstractLifecycleComponent) Collectors(java.util.stream.Collectors) UserDefinedFunctionMetadata(io.crate.expression.udf.UserDefinedFunctionMetadata) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Singleton(org.elasticsearch.common.inject.Singleton) Pattern(java.util.regex.Pattern) VisibleForTesting(io.crate.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) RelationUnknown(io.crate.exceptions.RelationUnknown) LogManager(org.apache.logging.log4j.LogManager) LevenshteinDistance(org.apache.lucene.search.spell.LevenshteinDistance) ArrayList(java.util.ArrayList) LevenshteinDistance(org.apache.lucene.search.spell.LevenshteinDistance)

Example 2 with LevenshteinDistance

use of org.apache.lucene.search.spell.LevenshteinDistance in project crate by crate.

the class AbstractScopedSettings method validate.

/**
 * Validates that the settings is valid.
 *
 * @param key                            the key of the setting to validate
 * @param settings                       the settings
 * @param validateDependencies           true if dependent settings should be validated
 * @param validateInternalOrPrivateIndex true if internal index settings should be validated
 * @throws IllegalArgumentException if the setting is invalid
 */
void validate(final String key, final Settings settings, final boolean validateDependencies, final boolean validateInternalOrPrivateIndex) {
    Setting<?> setting = getRaw(key);
    if (setting == null) {
        LevenshteinDistance ld = new LevenshteinDistance();
        List<Tuple<Float, String>> scoredKeys = new ArrayList<>();
        for (String k : this.keySettings.keySet()) {
            float distance = ld.getDistance(key, k);
            if (distance > 0.7f) {
                scoredKeys.add(new Tuple<>(distance, k));
            }
        }
        CollectionUtil.timSort(scoredKeys, (a, b) -> b.v1().compareTo(a.v1()));
        String msg = "unknown setting [" + key + "]";
        List<String> keys = scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList());
        if (keys.isEmpty() == false) {
            msg += " did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]" : "any of " + keys.toString()) + "?";
        } else {
            msg += " please check that any required plugins are installed," + " or check the breaking changes documentation for removed settings";
        }
        throw new IllegalArgumentException(msg);
    } else {
        Set<Setting.SettingDependency> settingsDependencies = setting.getSettingsDependencies(key);
        if (setting.hasComplexMatcher()) {
            setting = setting.getConcreteSetting(key);
        }
        if (validateDependencies && settingsDependencies.isEmpty() == false) {
            for (final Setting.SettingDependency settingDependency : settingsDependencies) {
                final Setting<?> dependency = settingDependency.getSetting();
                // validate the dependent setting is set
                if (dependency.existsOrFallbackExists(settings) == false) {
                    final String message = String.format(Locale.ROOT, "missing required setting [%s] for setting [%s]", dependency.getKey(), setting.getKey());
                    throw new IllegalArgumentException(message);
                }
                // validate the dependent setting value
                settingDependency.validate(setting.getKey(), setting.get(settings), dependency.get(settings));
            }
        }
        // the only time that validateInternalOrPrivateIndex should be true is if this call is coming via the update settings API
        if (validateInternalOrPrivateIndex) {
            if (setting.isInternalIndex()) {
                throw new IllegalArgumentException("can not update internal setting [" + setting.getKey() + "]; this setting is managed via a dedicated API");
            } else if (setting.isPrivateIndex()) {
                throw new IllegalArgumentException("can not update private setting [" + setting.getKey() + "]; this setting is managed by CrateDB");
            }
        }
    }
    Iterator<? extends Setting<?>> validationDependencies = setting.getValidationDependencies();
    if (validationDependencies.hasNext()) {
        Settings previousSettings = this.lastSettingsApplied;
        Settings.Builder settingsInclDependencies = Settings.builder().put(settings);
        while (validationDependencies.hasNext()) {
            Setting<?> dependency = validationDependencies.next();
            if (!settings.hasValue(dependency.getKey()) && previousSettings.hasValue(dependency.getKey())) {
                settingsInclDependencies.copy(dependency.getKey(), previousSettings);
            }
        }
        setting.get(settingsInclDependencies.build());
    } else {
        setting.get(settings);
    }
}
Also used : Tuple(io.crate.common.collections.Tuple) Iterator(java.util.Iterator) Predicate(java.util.function.Predicate) Set(java.util.Set) HashMap(java.util.HashMap) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) CollectionUtil(org.apache.lucene.util.CollectionUtil) HashSet(java.util.HashSet) Consumer(java.util.function.Consumer) ExceptionsHelper(org.elasticsearch.ExceptionsHelper) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Locale(java.util.Locale) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) Regex(org.elasticsearch.common.regex.Regex) Pattern(java.util.regex.Pattern) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) LevenshteinDistance(org.apache.lucene.search.spell.LevenshteinDistance) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) LevenshteinDistance(org.apache.lucene.search.spell.LevenshteinDistance) Tuple(io.crate.common.collections.Tuple)

Example 3 with LevenshteinDistance

use of org.apache.lucene.search.spell.LevenshteinDistance in project crate by crate.

the class Schemas method getSimilarTables.

private static List<String> getSimilarTables(User user, String tableName, Iterable<TableInfo> tables) {
    LevenshteinDistance levenshteinDistance = new LevenshteinDistance();
    ArrayList<Candidate> candidates = new ArrayList<>();
    for (TableInfo table : tables) {
        if (user.hasAnyPrivilege(Privilege.Clazz.TABLE, table.ident().fqn())) {
            String candidate = table.ident().name();
            float score = levenshteinDistance.getDistance(tableName.toLowerCase(Locale.ENGLISH), candidate.toLowerCase(Locale.ENGLISH));
            if (score > 0.7f) {
                candidates.add(new Candidate(score, candidate));
            }
        }
    }
    candidates.sort(Comparator.comparing((Candidate x) -> x.score).reversed());
    return candidates.stream().limit(5).map(x -> x.name).collect(Collectors.toList());
}
Also used : Tuple(io.crate.common.collections.Tuple) Privilege(io.crate.user.Privilege) DocSchemaInfoFactory(io.crate.metadata.doc.DocSchemaInfoFactory) ClusterService(org.elasticsearch.cluster.service.ClusterService) UserDefinedFunctionsMetadata(io.crate.expression.udf.UserDefinedFunctionsMetadata) BlobSchemaInfo(io.crate.metadata.blob.BlobSchemaInfo) Operation(io.crate.metadata.table.Operation) ClusterStateListener(org.elasticsearch.cluster.ClusterStateListener) PgCatalogSchemaInfo(io.crate.metadata.pgcatalog.PgCatalogSchemaInfo) Inject(org.elasticsearch.common.inject.Inject) ArrayList(java.util.ArrayList) Sets(io.crate.common.collections.Sets) HashSet(java.util.HashSet) Metadata(org.elasticsearch.cluster.metadata.Metadata) Matcher(java.util.regex.Matcher) SchemaUnknownException(io.crate.exceptions.SchemaUnknownException) SysSchemaInfo(io.crate.metadata.sys.SysSchemaInfo) Locale(java.util.Locale) Map(java.util.Map) StreamSupport(java.util.stream.StreamSupport) QualifiedName(io.crate.sql.tree.QualifiedName) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) SchemaInfo(io.crate.metadata.table.SchemaInfo) InformationSchemaInfo(io.crate.metadata.information.InformationSchemaInfo) TableInfo(io.crate.metadata.table.TableInfo) ViewMetadata(io.crate.metadata.view.ViewMetadata) Iterator(java.util.Iterator) User(io.crate.user.User) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ViewsMetadata(io.crate.metadata.view.ViewsMetadata) Set(java.util.Set) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) ClusterChangedEvent(org.elasticsearch.cluster.ClusterChangedEvent) AbstractLifecycleComponent(org.elasticsearch.common.component.AbstractLifecycleComponent) Collectors(java.util.stream.Collectors) UserDefinedFunctionMetadata(io.crate.expression.udf.UserDefinedFunctionMetadata) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Singleton(org.elasticsearch.common.inject.Singleton) Pattern(java.util.regex.Pattern) VisibleForTesting(io.crate.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) RelationUnknown(io.crate.exceptions.RelationUnknown) LogManager(org.apache.logging.log4j.LogManager) LevenshteinDistance(org.apache.lucene.search.spell.LevenshteinDistance) ArrayList(java.util.ArrayList) LevenshteinDistance(org.apache.lucene.search.spell.LevenshteinDistance) TableInfo(io.crate.metadata.table.TableInfo)

Aggregations

Tuple (io.crate.common.collections.Tuple)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Iterator (java.util.Iterator)3 List (java.util.List)3 Locale (java.util.Locale)3 Map (java.util.Map)3 Set (java.util.Set)3 Pattern (java.util.regex.Pattern)3 Collectors (java.util.stream.Collectors)3 LogManager (org.apache.logging.log4j.LogManager)3 Logger (org.apache.logging.log4j.Logger)3 LevenshteinDistance (org.apache.lucene.search.spell.LevenshteinDistance)3 ObjectCursor (com.carrotsearch.hppc.cursors.ObjectCursor)2 VisibleForTesting (io.crate.common.annotations.VisibleForTesting)2 Sets (io.crate.common.collections.Sets)2 RelationUnknown (io.crate.exceptions.RelationUnknown)2 SchemaUnknownException (io.crate.exceptions.SchemaUnknownException)2 UserDefinedFunctionMetadata (io.crate.expression.udf.UserDefinedFunctionMetadata)2 UserDefinedFunctionsMetadata (io.crate.expression.udf.UserDefinedFunctionsMetadata)2