Search in sources :

Example 1 with DocumentEntityResponse

use of io.openk9.index.writer.entity.model.DocumentEntityResponse in project openk9 by smclab.

the class GetOrAddEntities method cleanCandidates.

private List<DocumentEntityResponse> cleanCandidates(EntityRequest entityRequest, List<DocumentEntityResponse> candidates) {
    if (_log.isDebugEnabled()) {
        _log.debug("entity " + entityRequest.getName() + " candidates: " + candidates);
    }
    if (!candidates.isEmpty()) {
        DocumentEntityResponse documentEntityResponse = candidates.get(0);
        double bestScore;
        if (candidates.size() > 1) {
            if (_log.isDebugEnabled()) {
                _log.debug("softmax");
            }
            double[] scores = candidates.stream().mapToDouble(DocumentEntityResponse::getScore).toArray();
            bestScore = _softmax(documentEntityResponse.getScore(), scores);
        } else {
            if (_log.isDebugEnabled()) {
                _log.debug("levenshtein");
            }
            bestScore = _levenshteinDistance(_entityNameCleanerProvider.getEntityNameCleaner(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), _entityNameCleanerProvider.getEntityNameCleaner(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
        }
        if (_log.isDebugEnabled()) {
            _log.debug("current score: " + bestScore + " score threshold: " + _scoreThreshold + " for entity " + entityRequest.getName());
        }
        if (bestScore > _scoreThreshold) {
            _log.debug("filtered with treshold");
            return Collections.singletonList(documentEntityResponse);
        }
    }
    if (candidates.isEmpty() && _log.isDebugEnabled()) {
        _log.debug("candidates empty");
    }
    return candidates;
}
Also used : DocumentEntityResponse(io.openk9.index.writer.entity.model.DocumentEntityResponse)

Example 2 with DocumentEntityResponse

use of io.openk9.index.writer.entity.model.DocumentEntityResponse in project openk9 by smclab.

the class CreateEntitiesRunnable method cleanCandidates.

private List<EntityIndex> cleanCandidates(Entity entityRequest, List<EntityIndex> candidates, EntityNameCleanerProvider entityNameCleanerProvider, float scoreThreshold) {
    if (!candidates.isEmpty()) {
        EntityIndex documentEntityResponse = candidates.get(0);
        double bestScore;
        if (candidates.size() > 1) {
            double[] scores = candidates.stream().mapToDouble(EntityIndex::getScore).toArray();
            bestScore = _softmax(documentEntityResponse.getScore(), scores);
        } else {
            bestScore = _levenshteinDistance(entityNameCleanerProvider.get(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), entityNameCleanerProvider.get(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
        }
        if (bestScore > scoreThreshold) {
            return Collections.singletonList(documentEntityResponse);
        }
    }
    return candidates;
}
Also used : EntityIndex(io.openk9.entity.manager.model.index.EntityIndex)

Example 3 with DocumentEntityResponse

use of io.openk9.index.writer.entity.model.DocumentEntityResponse in project openk9 by smclab.

the class GetOrAddEntities method _disambiguate.

private Mono<Entity> _disambiguate(List<DocumentEntityResponse> candidates, List<DocumentEntityResponse> entityRequestList, long tenantId, EntityRequest currentEntityRequest) {
    Flux<Entity> entityFlux = Flux.empty();
    String currentEntityRequestType = currentEntityRequest.getType();
    if (_log.isDebugEnabled()) {
        _log.debug(Arrays.toString(_uniqueEntities));
    }
    if (!candidates.isEmpty() && !_containsValue(_uniqueEntities, currentEntityRequestType)) {
        if (_log.isDebugEnabled()) {
            _log.debug("disambiguating with search entity with type " + currentEntityRequestType);
        }
        Statement[] statements = new Statement[entityRequestList.size()];
        for (int i = 0; i < entityRequestList.size(); i++) {
            DocumentEntityResponse entityRequest = entityRequestList.get(i);
            Node nodeEntity = Cypher.node(entityRequest.getType()).named(ENTITY);
            AliasedExpression entityAliased = nodeEntity.as(ENTITY);
            SymbolicName path = Cypher.name(PATH);
            Statement statement = Cypher.match(nodeEntity).where(Functions.id(nodeEntity).eq(literalOf(entityRequest.getId()))).call(APOC_PATH_EXPAND).withArgs(entityAliased.getDelegate(), literalOf(null), literalOf(_labelFilter), literalOf(_minHops), literalOf(_maxHops)).yield(path).returning(Functions.last(Functions.nodes(path)).as(NODE), Functions.size(Functions.nodes(path)).subtract(literalOf(1)).as(HOPS)).build();
            statements[i] = statement;
        }
        if (statements.length == 1) {
            Statement entityRequestListStatement = Cypher.call(statements[0]).returning(NODE, HOPS).orderBy(Cypher.name(HOPS)).build();
            entityFlux = _entityGraphRepository.getEntities(entityRequestListStatement);
        } else if (statements.length > 1) {
            Statement entityRequestListStatement = Cypher.call(Cypher.unionAll(statements)).returning(NODE, HOPS).orderBy(Cypher.name(HOPS)).build();
            entityFlux = _entityGraphRepository.getEntities(entityRequestListStatement);
        }
    }
    if (candidates.size() == 1 && _containsValue(_uniqueEntities, currentEntityRequestType)) {
        if (_log.isDebugEnabled()) {
            _log.debug("disambiguating entity with type " + currentEntityRequestType);
        }
        DocumentEntityResponse candidate = candidates.get(0);
        entityFlux = _entityGraphRepository.getEntity(candidate.getId()).flux();
    }
    return entityFlux.filter(entity -> candidates.stream().anyMatch(entity1 -> entity1.getId() == entity.getId())).next().switchIfEmpty(Mono.defer(() -> _entityGraphRepository.addEntity(tenantId, currentEntityRequest.getName(), currentEntityRequest.getType()).flatMap(entity -> {
        if (_containsValue(_notIndexEntities, entity.getType())) {
            return Mono.just(entity);
        } else {
            return _insertEntity(DocumentEntityRequest.builder().tenantId(entity.getTenantId()).name(entity.getName()).type(entity.getType()).id(entity.getId()).build()).thenReturn(entity);
        }
    })));
}
Also used : ReactorStopWatch(io.openk9.common.api.reactor.util.ReactorStopWatch) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) GraphClient(io.openk9.relationship.graph.api.client.GraphClient) JsonFactory(io.openk9.json.api.JsonFactory) DocumentEntityRequest(io.openk9.index.writer.entity.model.DocumentEntityRequest) SymbolicName(org.neo4j.cypherdsl.core.SymbolicName) IndexRequest(org.elasticsearch.action.index.IndexRequest) SearchResponse(org.elasticsearch.action.search.SearchResponse) RequestOptions(org.elasticsearch.client.RequestOptions) Cypher(org.neo4j.cypherdsl.core.Cypher) IndexWriterEntityClient(io.openk9.index.writer.entity.client.api.IndexWriterEntityClient) ReactorActionListener(io.openk9.search.client.api.ReactorActionListener) EntityNameCleanerProvider(io.openk9.entity.manager.api.EntityNameCleanerProvider) Collectors(java.util.stream.Collectors) List(java.util.List) Builder(lombok.Builder) Cypher.literalOf(org.neo4j.cypherdsl.core.Cypher.literalOf) RestHighLevelClientProvider(io.openk9.search.client.api.RestHighLevelClientProvider) XContentType(org.elasticsearch.common.xcontent.XContentType) MonoSink(reactor.core.publisher.MonoSink) SearchRequest(org.elasticsearch.action.search.SearchRequest) EntityGraphRepository(io.openk9.entity.manager.api.EntityGraphRepository) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) Function(java.util.function.Function) WriteRequest(org.elasticsearch.action.support.WriteRequest) Component(org.osgi.service.component.annotations.Component) EntityNameCleaner(io.openk9.entity.manager.api.EntityNameCleaner) Statement(org.neo4j.cypherdsl.core.Statement) DocumentEntityResponse(io.openk9.index.writer.entity.model.DocumentEntityResponse) Cancellable(org.elasticsearch.client.Cancellable) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) IndexResponse(org.elasticsearch.action.index.IndexResponse) Activate(org.osgi.service.component.annotations.Activate) Node(org.neo4j.cypherdsl.core.Node) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Logger(org.slf4j.Logger) Functions(org.neo4j.cypherdsl.core.Functions) Mono(reactor.core.publisher.Mono) Entity(io.openk9.entity.manager.model.Entity) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) Flux(reactor.core.publisher.Flux) AliasedExpression(org.neo4j.cypherdsl.core.AliasedExpression) Data(lombok.Data) Modified(org.osgi.service.component.annotations.Modified) AllArgsConstructor(lombok.AllArgsConstructor) Reference(org.osgi.service.component.annotations.Reference) Collections(java.util.Collections) NoArgsConstructor(lombok.NoArgsConstructor) Entity(io.openk9.entity.manager.model.Entity) Statement(org.neo4j.cypherdsl.core.Statement) DocumentEntityResponse(io.openk9.index.writer.entity.model.DocumentEntityResponse) Node(org.neo4j.cypherdsl.core.Node) SymbolicName(org.neo4j.cypherdsl.core.SymbolicName) AliasedExpression(org.neo4j.cypherdsl.core.AliasedExpression)

Aggregations

DocumentEntityResponse (io.openk9.index.writer.entity.model.DocumentEntityResponse)2 ReactorStopWatch (io.openk9.common.api.reactor.util.ReactorStopWatch)1 EntityGraphRepository (io.openk9.entity.manager.api.EntityGraphRepository)1 EntityNameCleaner (io.openk9.entity.manager.api.EntityNameCleaner)1 EntityNameCleanerProvider (io.openk9.entity.manager.api.EntityNameCleanerProvider)1 Entity (io.openk9.entity.manager.model.Entity)1 EntityIndex (io.openk9.entity.manager.model.index.EntityIndex)1 EntityRequest (io.openk9.entity.manager.model.payload.EntityRequest)1 IndexWriterEntityClient (io.openk9.index.writer.entity.client.api.IndexWriterEntityClient)1 DocumentEntityRequest (io.openk9.index.writer.entity.model.DocumentEntityRequest)1 JsonFactory (io.openk9.json.api.JsonFactory)1 GraphClient (io.openk9.relationship.graph.api.client.GraphClient)1 ReactorActionListener (io.openk9.search.client.api.ReactorActionListener)1 RestHighLevelClientProvider (io.openk9.search.client.api.RestHighLevelClientProvider)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 AllArgsConstructor (lombok.AllArgsConstructor)1