Search in sources :

Example 1 with Entity

use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.

the class GetOrAddEntities method cleanCandidates.

private List<DocumentEntityResponse> cleanCandidates(EntityRequest entityRequest, List<DocumentEntityResponse> candidates) {
    if (_log.isDebugEnabled()) {
        _log.debug("entity " + entityRequest.getName() + " candidates: " + candidates);
    }
    if (!candidates.isEmpty()) {
        DocumentEntityResponse documentEntityResponse = candidates.get(0);
        double bestScore;
        if (candidates.size() > 1) {
            if (_log.isDebugEnabled()) {
                _log.debug("softmax");
            }
            double[] scores = candidates.stream().mapToDouble(DocumentEntityResponse::getScore).toArray();
            bestScore = _softmax(documentEntityResponse.getScore(), scores);
        } else {
            if (_log.isDebugEnabled()) {
                _log.debug("levenshtein");
            }
            bestScore = _levenshteinDistance(_entityNameCleanerProvider.getEntityNameCleaner(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), _entityNameCleanerProvider.getEntityNameCleaner(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
        }
        if (_log.isDebugEnabled()) {
            _log.debug("current score: " + bestScore + " score threshold: " + _scoreThreshold + " for entity " + entityRequest.getName());
        }
        if (bestScore > _scoreThreshold) {
            _log.debug("filtered with treshold");
            return Collections.singletonList(documentEntityResponse);
        }
    }
    if (candidates.isEmpty() && _log.isDebugEnabled()) {
        _log.debug("candidates empty");
    }
    return candidates;
}
Also used : DocumentEntityResponse(io.openk9.index.writer.entity.model.DocumentEntityResponse)

Example 2 with Entity

use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.

the class GetOrAddEntitiesConsumer method apply.

public Mono<ObjectNode> apply(ObjectNode objectNode) {
    return Mono.defer(() -> {
        ObjectNode datasourceContextJson = objectNode.get("datasourceContext").toObjectNode();
        long datasourceId = datasourceContextJson.get("datasource").get("datasourceId").asLong();
        long tenantId = datasourceContextJson.get("tenant").get("tenantId").asLong();
        JsonNode entities = objectNode.remove("entities");
        Mono<ArrayNode> entitiesField;
        if (entities.size() == 0) {
            entitiesField = Mono.just(_jsonFactory.createArrayNode());
        } else {
            ObjectNode responseJson = _jsonFactory.createObjectNode();
            responseJson.put("entities", entities);
            responseJson.put("tenantId", tenantId);
            responseJson.put("datasourceId", datasourceId);
            Request request = _jsonFactory.fromJson(responseJson.toString(), Request.class);
            List<RequestContext> requestContextList = request.getEntities().stream().map(entityRequest -> RequestContext.builder().current(entityRequest).tenantId(request.getTenantId()).datasourceId(request.getDatasourceId()).rest(request.getEntities().stream().filter(er -> er != entityRequest).collect(Collectors.toList())).build()).collect(Collectors.toList());
            Mono<List<EntityContext>> disambiguateListMono = GetOrAddEntities.stopWatch("disambiguate-all-entities", Flux.fromIterable(requestContextList).flatMap(requestContext -> GetOrAddEntities.stopWatch("disambiguate-" + requestContext.getCurrent().getName(), Mono.<EntityContext>create(fluxSink -> _startDisambiguation.disambiguate(requestContext, fluxSink)))).collectList());
            Mono<ResponseList> writeRelations = disambiguateListMono.flatMap(entityContexts -> GetOrAddEntities.stopWatch("write-relations", writeRelations(entityContexts)));
            Mono<ResponseList> responseListWrapper = _transactional ? _graphClient.makeTransactional(writeRelations) : writeRelations;
            entitiesField = responseListWrapper.map(responseListDTO -> {
                List<Response> responseList = responseListDTO.getResponse();
                ArrayNode entitiesArrayNode = entities.toArrayNode();
                ArrayNode arrayNode = _jsonFactory.createArrayNode();
                for (JsonNode node : entitiesArrayNode) {
                    Optional<Response> responseOptional = responseList.stream().filter(response -> node.get("tmpId").asLong() == response.getTmpId()).findFirst();
                    if (responseOptional.isPresent()) {
                        Entity entity = responseOptional.get().getEntity();
                        ObjectNode result = _jsonFactory.createObjectNode();
                        result.put("entityType", entity.getType());
                        result.put("id", entity.getId());
                        result.put("context", node.get("context"));
                        arrayNode.add(result);
                    }
                }
                return arrayNode;
            });
        }
        return entitiesField.map(entitiesArray -> {
            ObjectNode payload = objectNode.get("payload").toObjectNode();
            payload.set("entities", entitiesArray);
            return objectNode;
        }).timeout(Duration.ofSeconds(_timeout), Mono.error(new TimeoutException("timeout on entities count: " + entities.size() + " (Did not observe any item or terminal signal within " + Duration.ofSeconds(_timeout).toMillis() + "ms)")));
    });
}
Also used : Response(io.openk9.entity.manager.model.payload.Response) SenderReactor(io.openk9.ingestion.api.SenderReactor) Disposable(reactor.core.Disposable) JsonNode(io.openk9.json.api.JsonNode) LoggerFactory(org.slf4j.LoggerFactory) Tuples(reactor.util.function.Tuples) Tuple2(reactor.util.function.Tuple2) TimeoutException(java.util.concurrent.TimeoutException) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) GraphClient(io.openk9.relationship.graph.api.client.GraphClient) ArrayNode(io.openk9.json.api.ArrayNode) JsonFactory(io.openk9.json.api.JsonFactory) ArrayList(java.util.ArrayList) Component(org.osgi.service.component.annotations.Component) Statement(org.neo4j.cypherdsl.core.Statement) Duration(java.time.Duration) OutboundMessageFactory(io.openk9.ingestion.api.OutboundMessageFactory) Schedulers(reactor.core.scheduler.Schedulers) Request(io.openk9.entity.manager.model.payload.Request) Cypher(org.neo4j.cypherdsl.core.Cypher) Activate(org.osgi.service.component.annotations.Activate) Node(org.neo4j.cypherdsl.core.Node) Binding(io.openk9.ingestion.api.Binding) Logger(org.slf4j.Logger) Deactivate(org.osgi.service.component.annotations.Deactivate) Functions(org.neo4j.cypherdsl.core.Functions) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) Mono(reactor.core.publisher.Mono) Entity(io.openk9.entity.manager.model.Entity) Collectors(java.util.stream.Collectors) Flux(reactor.core.publisher.Flux) List(java.util.List) Stream(java.util.stream.Stream) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ReceiverReactor(io.openk9.ingestion.api.ReceiverReactor) Modified(org.osgi.service.component.annotations.Modified) Optional(java.util.Optional) ObjectNode(io.openk9.json.api.ObjectNode) Reference(org.osgi.service.component.annotations.Reference) Cypher.literalOf(org.neo4j.cypherdsl.core.Cypher.literalOf) Entity(io.openk9.entity.manager.model.Entity) ObjectNode(io.openk9.json.api.ObjectNode) Optional(java.util.Optional) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) Request(io.openk9.entity.manager.model.payload.Request) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) JsonNode(io.openk9.json.api.JsonNode) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ArrayList(java.util.ArrayList) List(java.util.List) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ArrayNode(io.openk9.json.api.ArrayNode) TimeoutException(java.util.concurrent.TimeoutException)

Example 3 with Entity

use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.

the class GetOrAddEntitiesConsumer method writeRelations.

public Mono<ResponseList> writeRelations(List<EntityContext> entityContext) {
    return Mono.defer(() -> {
        List<Statement> statementList = new ArrayList<>();
        for (EntityContext context : entityContext) {
            EntityRequest entityRequest = context.getEntityRequest();
            List<RelationRequest> relations = entityRequest.getRelations();
            if (relations == null || relations.isEmpty()) {
                continue;
            }
            Entity currentEntity = context.getEntity();
            List<Tuple2<String, Entity>> entityRelations = entityContext.stream().flatMap(entry -> {
                for (RelationRequest relation : relations) {
                    if (entry.getEntityRequest().getTmpId() == relation.getTo()) {
                        return Stream.of(Tuples.of(relation.getName(), entry.getEntity()));
                    }
                }
                return Stream.empty();
            }).collect(Collectors.toList());
            Node currentEntityNode = Cypher.node(currentEntity.getType()).named("a");
            List<Statement> currentStatementList = entityRelations.stream().map(t2 -> {
                Entity entityRelation = t2.getT2();
                Node entityRelationNode = Cypher.node(entityRelation.getType()).named("b");
                return Cypher.match(currentEntityNode, entityRelationNode).where(Functions.id(currentEntityNode).eq(literalOf(currentEntity.getId())).and(Functions.id(entityRelationNode).eq(literalOf(entityRelation.getId())))).merge(currentEntityNode.relationshipTo(entityRelationNode, t2.getT1())).build();
            }).collect(Collectors.toList());
            statementList.addAll(currentStatementList);
        }
        List<Response> response = entityContext.stream().map(context -> Response.builder().entity(Entity.builder().name(context.getEntity().getName()).id(context.getEntity().getId()).tenantId(context.getEntity().getTenantId()).type(context.getEntity().getType()).build()).tmpId(context.getEntityRequest().getTmpId()).build()).collect(Collectors.toList());
        if (statementList.size() > 1) {
            return _graphClient.write(Cypher.unionAll(statementList.toArray(new Statement[0]))).then(Mono.just(ResponseList.of("", response)));
        } else if (statementList.size() == 1) {
            return _graphClient.write(statementList.get(0)).then(Mono.just(ResponseList.of("", response)));
        } else {
            return Mono.just(ResponseList.of("", response));
        }
    });
}
Also used : Response(io.openk9.entity.manager.model.payload.Response) SenderReactor(io.openk9.ingestion.api.SenderReactor) Disposable(reactor.core.Disposable) JsonNode(io.openk9.json.api.JsonNode) LoggerFactory(org.slf4j.LoggerFactory) Tuples(reactor.util.function.Tuples) Tuple2(reactor.util.function.Tuple2) TimeoutException(java.util.concurrent.TimeoutException) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) GraphClient(io.openk9.relationship.graph.api.client.GraphClient) ArrayNode(io.openk9.json.api.ArrayNode) JsonFactory(io.openk9.json.api.JsonFactory) ArrayList(java.util.ArrayList) Component(org.osgi.service.component.annotations.Component) Statement(org.neo4j.cypherdsl.core.Statement) Duration(java.time.Duration) OutboundMessageFactory(io.openk9.ingestion.api.OutboundMessageFactory) Schedulers(reactor.core.scheduler.Schedulers) Request(io.openk9.entity.manager.model.payload.Request) Cypher(org.neo4j.cypherdsl.core.Cypher) Activate(org.osgi.service.component.annotations.Activate) Node(org.neo4j.cypherdsl.core.Node) Binding(io.openk9.ingestion.api.Binding) Logger(org.slf4j.Logger) Deactivate(org.osgi.service.component.annotations.Deactivate) Functions(org.neo4j.cypherdsl.core.Functions) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) Mono(reactor.core.publisher.Mono) Entity(io.openk9.entity.manager.model.Entity) Collectors(java.util.stream.Collectors) Flux(reactor.core.publisher.Flux) List(java.util.List) Stream(java.util.stream.Stream) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ReceiverReactor(io.openk9.ingestion.api.ReceiverReactor) Modified(org.osgi.service.component.annotations.Modified) Optional(java.util.Optional) ObjectNode(io.openk9.json.api.ObjectNode) Reference(org.osgi.service.component.annotations.Reference) Cypher.literalOf(org.neo4j.cypherdsl.core.Cypher.literalOf) Entity(io.openk9.entity.manager.model.Entity) Statement(org.neo4j.cypherdsl.core.Statement) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) JsonNode(io.openk9.json.api.JsonNode) ArrayNode(io.openk9.json.api.ArrayNode) Node(org.neo4j.cypherdsl.core.Node) ObjectNode(io.openk9.json.api.ObjectNode) ArrayList(java.util.ArrayList) Response(io.openk9.entity.manager.model.payload.Response) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) Tuple2(reactor.util.function.Tuple2)

Example 4 with Entity

use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.

the class IndexWriterEndpoins method _insertEntity.

private Publisher<Void> _insertEntity(HttpServerRequest httpRequest, HttpServerResponse httpResponse) {
    RestHighLevelClient restHighLevelClient = _restHighLevelClientProvider.get();
    Mono<List<DocumentEntityRequest>> request = Mono.from(ReactorNettyUtils.aggregateBodyAsByteArray(httpRequest)).map(json -> _jsonFactory.fromJsonList(json, DocumentEntityRequest.class));
    Mono<BulkResponse> elasticResponse = request.flatMapIterable(Function.identity()).map(entity -> {
        IndexRequest indexRequest = new IndexRequest(entity.getTenantId() + "-entity");
        return indexRequest.source(_jsonFactory.toJson(entity), XContentType.JSON);
    }).reduce(new BulkRequest(), BulkRequest::add).flatMap(bulkRequest -> Mono.create(sink -> {
        bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
        Cancellable cancellable = restHighLevelClient.bulkAsync(bulkRequest, RequestOptions.DEFAULT, new ReactorActionListener<>(sink));
        sink.onCancel(cancellable::cancel);
    }));
    return _httpResponseWriter.write(httpResponse, elasticResponse.thenReturn("{}"));
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) JsonFactory(io.openk9.json.api.JsonFactory) DocumentEntityRequest(io.openk9.index.writer.entity.model.DocumentEntityRequest) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) ReactorNettyUtils(io.openk9.reactor.netty.util.ReactorNettyUtils) IndexRequest(org.elasticsearch.action.index.IndexRequest) EventBus(io.vertx.core.eventbus.EventBus) Map(java.util.Map) SearchResponse(org.elasticsearch.action.search.SearchResponse) RequestOptions(org.elasticsearch.client.RequestOptions) PluginDriverManagerClient(io.openk9.plugin.driver.manager.client.api.PluginDriverManagerClient) DeleteByQueryRequest(org.elasticsearch.index.reindex.DeleteByQueryRequest) CountRequest(org.elasticsearch.client.core.CountRequest) TimeValue(org.elasticsearch.core.TimeValue) SearchHit(org.elasticsearch.search.SearchHit) ReactorActionListener(io.openk9.search.client.api.ReactorActionListener) Collection(java.util.Collection) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) Message(io.vertx.core.eventbus.Message) DatasourceClient(io.openk9.datasource.client.api.DatasourceClient) Scroll(org.elasticsearch.search.Scroll) Collectors(java.util.stream.Collectors) BundleContext(org.osgi.framework.BundleContext) HttpServerRequest(reactor.netty.http.server.HttpServerRequest) List(java.util.List) QueryBuilders.matchQuery(org.elasticsearch.index.query.QueryBuilders.matchQuery) Builder(lombok.Builder) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) RestHighLevelClientProvider(io.openk9.search.client.api.RestHighLevelClientProvider) HttpServerRoutes(reactor.netty.http.server.HttpServerRoutes) HttpServerResponse(reactor.netty.http.server.HttpServerResponse) MatchQueryBuilder(org.elasticsearch.index.query.MatchQueryBuilder) XContentType(org.elasticsearch.common.xcontent.XContentType) ClearScrollRequest(org.elasticsearch.action.search.ClearScrollRequest) HashMap(java.util.HashMap) SearchRequest(org.elasticsearch.action.search.SearchRequest) HttpResponseWriter(io.openk9.http.util.HttpResponseWriter) Function(java.util.function.Function) ClearScrollResponse(org.elasticsearch.action.search.ClearScrollResponse) CountResponse(org.elasticsearch.client.core.CountResponse) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) WriteRequest(org.elasticsearch.action.support.WriteRequest) Component(org.osgi.service.component.annotations.Component) Operator(org.elasticsearch.index.query.Operator) Cancellable(org.elasticsearch.client.Cancellable) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) Activate(org.osgi.service.component.annotations.Activate) BulkByScrollResponse(org.elasticsearch.index.reindex.BulkByScrollResponse) RouterHandler(io.openk9.http.web.RouterHandler) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Logger(org.slf4j.Logger) Datasource(io.openk9.model.Datasource) Publisher(org.reactivestreams.Publisher) Mono(reactor.core.publisher.Mono) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) Data(lombok.Data) SearchScrollRequest(org.elasticsearch.action.search.SearchScrollRequest) AllArgsConstructor(lombok.AllArgsConstructor) Reference(org.osgi.service.component.annotations.Reference) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) NoArgsConstructor(lombok.NoArgsConstructor) ReactorActionListener(io.openk9.search.client.api.ReactorActionListener) DocumentEntityRequest(io.openk9.index.writer.entity.model.DocumentEntityRequest) Cancellable(org.elasticsearch.client.Cancellable) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) List(java.util.List) ArrayList(java.util.ArrayList) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) IndexRequest(org.elasticsearch.action.index.IndexRequest)

Example 5 with Entity

use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.

the class AssociateEntitiesRunnable method run_.

@Override
public void run_() {
    _log.info("start AssociateEntitiesRunnable");
    IMap<AssociableEntityKey, Entity> associableEntityMap = MapUtil.getAssociableEntityMap(_hazelcastInstance);
    Set<AssociableEntityKey> associableEntityKeys = associableEntityMap.localKeySet();
    Map<AssociableEntityKey, Entity> localEntityMap = associableEntityMap.getAll(associableEntityKeys);
    _log.info("ingestionKeys: " + localEntityMap.size());
    Map<String, List<Entity>> groupingByIngestionId = localEntityMap.entrySet().stream().collect(Collectors.groupingBy(e -> e.getKey().getIngestionId(), Collectors.mapping(Map.Entry::getValue, Collectors.toList())));
    List<AssociableEntityKey> entitiesToRemove = new ArrayList<>();
    List<String> ingestionIds = new ArrayList<>();
    for (Map.Entry<String, List<Entity>> entry : groupingByIngestionId.entrySet()) {
        String ingestionId = entry.getKey();
        List<Entity> v = entry.getValue();
        if (v.isEmpty()) {
            continue;
        }
        DataService dataService = CDI.current().select(DataService.class).get();
        Long tenantId = v.stream().map(Entity::getTenantId).findFirst().get();
        try {
            boolean associated = dataService.associateEntities(tenantId, ingestionId, v.stream().map(IngestionEntity::fromEntity).collect(Collectors.toList()));
            if (associated) {
                for (Entity entity : v) {
                    entitiesToRemove.add(AssociableEntityKey.of(entity.getCacheId(), entity.getIngestionId()));
                    ingestionIds.add(ingestionId);
                }
            }
        } catch (Exception ioe) {
            _log.error(ioe.getMessage());
        }
    }
    _log.info("entities associated: " + entitiesToRemove.size() + " ingestionIds: " + ingestionIds);
    try {
        Pipelining pipelining = new Pipelining<>(10);
        for (AssociableEntityKey associateEntityKey : entitiesToRemove) {
            pipelining.add(associableEntityMap.removeAsync(associateEntityKey));
        }
        pipelining.results();
    } catch (Exception e) {
        _log.error(e.getMessage(), e);
    }
}
Also used : HazelcastInstance(com.hazelcast.core.HazelcastInstance) Entity(io.openk9.entity.manager.cache.model.Entity) Logger(org.jboss.logging.Logger) CDI(javax.enterprise.inject.spi.CDI) Set(java.util.Set) DataService(io.openk9.entity.manager.service.index.DataService) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) ArrayList(java.util.ArrayList) Pipelining(com.hazelcast.core.Pipelining) List(java.util.List) Map(java.util.Map) IngestionEntity(io.openk9.entity.manager.cache.model.IngestionEntity) HazelcastInstanceAware(com.hazelcast.core.HazelcastInstanceAware) AssociableEntityKey(io.openk9.entity.manager.cache.model.AssociableEntityKey) MapUtil(io.openk9.entity.manager.util.MapUtil) IMap(com.hazelcast.map.IMap) Entity(io.openk9.entity.manager.cache.model.Entity) IngestionEntity(io.openk9.entity.manager.cache.model.IngestionEntity) ArrayList(java.util.ArrayList) DataService(io.openk9.entity.manager.service.index.DataService) IngestionEntity(io.openk9.entity.manager.cache.model.IngestionEntity) AssociableEntityKey(io.openk9.entity.manager.cache.model.AssociableEntityKey) ArrayList(java.util.ArrayList) List(java.util.List) Pipelining(com.hazelcast.core.Pipelining) Map(java.util.Map) IMap(com.hazelcast.map.IMap)

Aggregations

ArrayList (java.util.ArrayList)11 List (java.util.List)11 Mono (reactor.core.publisher.Mono)10 Collectors (java.util.stream.Collectors)9 Node (org.neo4j.cypherdsl.core.Node)8 Statement (org.neo4j.cypherdsl.core.Statement)8 JsonFactory (io.openk9.json.api.JsonFactory)7 Map (java.util.Map)7 Cypher (org.neo4j.cypherdsl.core.Cypher)7 Cypher.literalOf (org.neo4j.cypherdsl.core.Cypher.literalOf)7 Functions (org.neo4j.cypherdsl.core.Functions)7 Activate (org.osgi.service.component.annotations.Activate)7 Component (org.osgi.service.component.annotations.Component)7 Reference (org.osgi.service.component.annotations.Reference)7 Entity (io.openk9.entity.manager.cache.model.Entity)6 Entity (io.openk9.entity.manager.model.Entity)6 EntityIndex (io.openk9.entity.manager.model.index.EntityIndex)6 Logger (org.slf4j.Logger)6 LoggerFactory (org.slf4j.LoggerFactory)6 EntityKey (io.openk9.entity.manager.cache.model.EntityKey)5