Search in sources :

Example 1 with EntityRequest

use of io.openk9.entity.manager.model.payload.EntityRequest in project openk9 by smclab.

the class GetOrAddEntities method cleanCandidates.

private List<DocumentEntityResponse> cleanCandidates(EntityRequest entityRequest, List<DocumentEntityResponse> candidates) {
    if (_log.isDebugEnabled()) {
        _log.debug("entity " + entityRequest.getName() + " candidates: " + candidates);
    }
    if (!candidates.isEmpty()) {
        DocumentEntityResponse documentEntityResponse = candidates.get(0);
        double bestScore;
        if (candidates.size() > 1) {
            if (_log.isDebugEnabled()) {
                _log.debug("softmax");
            }
            double[] scores = candidates.stream().mapToDouble(DocumentEntityResponse::getScore).toArray();
            bestScore = _softmax(documentEntityResponse.getScore(), scores);
        } else {
            if (_log.isDebugEnabled()) {
                _log.debug("levenshtein");
            }
            bestScore = _levenshteinDistance(_entityNameCleanerProvider.getEntityNameCleaner(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), _entityNameCleanerProvider.getEntityNameCleaner(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
        }
        if (_log.isDebugEnabled()) {
            _log.debug("current score: " + bestScore + " score threshold: " + _scoreThreshold + " for entity " + entityRequest.getName());
        }
        if (bestScore > _scoreThreshold) {
            _log.debug("filtered with treshold");
            return Collections.singletonList(documentEntityResponse);
        }
    }
    if (candidates.isEmpty() && _log.isDebugEnabled()) {
        _log.debug("candidates empty");
    }
    return candidates;
}
Also used : DocumentEntityResponse(io.openk9.index.writer.entity.model.DocumentEntityResponse)

Example 2 with EntityRequest

use of io.openk9.entity.manager.model.payload.EntityRequest in project openk9 by smclab.

the class GetOrAddEntitiesConsumer method apply.

public Mono<ObjectNode> apply(ObjectNode objectNode) {
    return Mono.defer(() -> {
        ObjectNode datasourceContextJson = objectNode.get("datasourceContext").toObjectNode();
        long datasourceId = datasourceContextJson.get("datasource").get("datasourceId").asLong();
        long tenantId = datasourceContextJson.get("tenant").get("tenantId").asLong();
        JsonNode entities = objectNode.remove("entities");
        Mono<ArrayNode> entitiesField;
        if (entities.size() == 0) {
            entitiesField = Mono.just(_jsonFactory.createArrayNode());
        } else {
            ObjectNode responseJson = _jsonFactory.createObjectNode();
            responseJson.put("entities", entities);
            responseJson.put("tenantId", tenantId);
            responseJson.put("datasourceId", datasourceId);
            Request request = _jsonFactory.fromJson(responseJson.toString(), Request.class);
            List<RequestContext> requestContextList = request.getEntities().stream().map(entityRequest -> RequestContext.builder().current(entityRequest).tenantId(request.getTenantId()).datasourceId(request.getDatasourceId()).rest(request.getEntities().stream().filter(er -> er != entityRequest).collect(Collectors.toList())).build()).collect(Collectors.toList());
            Mono<List<EntityContext>> disambiguateListMono = GetOrAddEntities.stopWatch("disambiguate-all-entities", Flux.fromIterable(requestContextList).flatMap(requestContext -> GetOrAddEntities.stopWatch("disambiguate-" + requestContext.getCurrent().getName(), Mono.<EntityContext>create(fluxSink -> _startDisambiguation.disambiguate(requestContext, fluxSink)))).collectList());
            Mono<ResponseList> writeRelations = disambiguateListMono.flatMap(entityContexts -> GetOrAddEntities.stopWatch("write-relations", writeRelations(entityContexts)));
            Mono<ResponseList> responseListWrapper = _transactional ? _graphClient.makeTransactional(writeRelations) : writeRelations;
            entitiesField = responseListWrapper.map(responseListDTO -> {
                List<Response> responseList = responseListDTO.getResponse();
                ArrayNode entitiesArrayNode = entities.toArrayNode();
                ArrayNode arrayNode = _jsonFactory.createArrayNode();
                for (JsonNode node : entitiesArrayNode) {
                    Optional<Response> responseOptional = responseList.stream().filter(response -> node.get("tmpId").asLong() == response.getTmpId()).findFirst();
                    if (responseOptional.isPresent()) {
                        Entity entity = responseOptional.get().getEntity();
                        ObjectNode result = _jsonFactory.createObjectNode();
                        result.put("entityType", entity.getType());
                        result.put("id", entity.getId());
                        result.put("context", node.get("context"));
                        arrayNode.add(result);
                    }
                }
                return arrayNode;
            });
        }
        return entitiesField.map(entitiesArray -> {
            ObjectNode payload = objectNode.get("payload").toObjectNode();
            payload.set("entities", entitiesArray);
            return objectNode;
        }).timeout(Duration.ofSeconds(_timeout), Mono.error(new TimeoutException("timeout on entities count: " + entities.size() + " (Did not observe any item or terminal signal within " + Duration.ofSeconds(_timeout).toMillis() + "ms)")));
    });
}
Also used : Response(io.openk9.entity.manager.model.payload.Response) SenderReactor(io.openk9.ingestion.api.SenderReactor) Disposable(reactor.core.Disposable) JsonNode(io.openk9.json.api.JsonNode) LoggerFactory(org.slf4j.LoggerFactory) Tuples(reactor.util.function.Tuples) Tuple2(reactor.util.function.Tuple2) TimeoutException(java.util.concurrent.TimeoutException) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) GraphClient(io.openk9.relationship.graph.api.client.GraphClient) ArrayNode(io.openk9.json.api.ArrayNode) JsonFactory(io.openk9.json.api.JsonFactory) ArrayList(java.util.ArrayList) Component(org.osgi.service.component.annotations.Component) Statement(org.neo4j.cypherdsl.core.Statement) Duration(java.time.Duration) OutboundMessageFactory(io.openk9.ingestion.api.OutboundMessageFactory) Schedulers(reactor.core.scheduler.Schedulers) Request(io.openk9.entity.manager.model.payload.Request) Cypher(org.neo4j.cypherdsl.core.Cypher) Activate(org.osgi.service.component.annotations.Activate) Node(org.neo4j.cypherdsl.core.Node) Binding(io.openk9.ingestion.api.Binding) Logger(org.slf4j.Logger) Deactivate(org.osgi.service.component.annotations.Deactivate) Functions(org.neo4j.cypherdsl.core.Functions) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) Mono(reactor.core.publisher.Mono) Entity(io.openk9.entity.manager.model.Entity) Collectors(java.util.stream.Collectors) Flux(reactor.core.publisher.Flux) List(java.util.List) Stream(java.util.stream.Stream) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ReceiverReactor(io.openk9.ingestion.api.ReceiverReactor) Modified(org.osgi.service.component.annotations.Modified) Optional(java.util.Optional) ObjectNode(io.openk9.json.api.ObjectNode) Reference(org.osgi.service.component.annotations.Reference) Cypher.literalOf(org.neo4j.cypherdsl.core.Cypher.literalOf) Entity(io.openk9.entity.manager.model.Entity) ObjectNode(io.openk9.json.api.ObjectNode) Optional(java.util.Optional) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) Request(io.openk9.entity.manager.model.payload.Request) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) JsonNode(io.openk9.json.api.JsonNode) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ArrayList(java.util.ArrayList) List(java.util.List) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ArrayNode(io.openk9.json.api.ArrayNode) TimeoutException(java.util.concurrent.TimeoutException)

Example 3 with EntityRequest

use of io.openk9.entity.manager.model.payload.EntityRequest in project openk9 by smclab.

the class GetOrAddEntitiesConsumer method writeRelations.

public Mono<ResponseList> writeRelations(List<EntityContext> entityContext) {
    return Mono.defer(() -> {
        List<Statement> statementList = new ArrayList<>();
        for (EntityContext context : entityContext) {
            EntityRequest entityRequest = context.getEntityRequest();
            List<RelationRequest> relations = entityRequest.getRelations();
            if (relations == null || relations.isEmpty()) {
                continue;
            }
            Entity currentEntity = context.getEntity();
            List<Tuple2<String, Entity>> entityRelations = entityContext.stream().flatMap(entry -> {
                for (RelationRequest relation : relations) {
                    if (entry.getEntityRequest().getTmpId() == relation.getTo()) {
                        return Stream.of(Tuples.of(relation.getName(), entry.getEntity()));
                    }
                }
                return Stream.empty();
            }).collect(Collectors.toList());
            Node currentEntityNode = Cypher.node(currentEntity.getType()).named("a");
            List<Statement> currentStatementList = entityRelations.stream().map(t2 -> {
                Entity entityRelation = t2.getT2();
                Node entityRelationNode = Cypher.node(entityRelation.getType()).named("b");
                return Cypher.match(currentEntityNode, entityRelationNode).where(Functions.id(currentEntityNode).eq(literalOf(currentEntity.getId())).and(Functions.id(entityRelationNode).eq(literalOf(entityRelation.getId())))).merge(currentEntityNode.relationshipTo(entityRelationNode, t2.getT1())).build();
            }).collect(Collectors.toList());
            statementList.addAll(currentStatementList);
        }
        List<Response> response = entityContext.stream().map(context -> Response.builder().entity(Entity.builder().name(context.getEntity().getName()).id(context.getEntity().getId()).tenantId(context.getEntity().getTenantId()).type(context.getEntity().getType()).build()).tmpId(context.getEntityRequest().getTmpId()).build()).collect(Collectors.toList());
        if (statementList.size() > 1) {
            return _graphClient.write(Cypher.unionAll(statementList.toArray(new Statement[0]))).then(Mono.just(ResponseList.of("", response)));
        } else if (statementList.size() == 1) {
            return _graphClient.write(statementList.get(0)).then(Mono.just(ResponseList.of("", response)));
        } else {
            return Mono.just(ResponseList.of("", response));
        }
    });
}
Also used : Response(io.openk9.entity.manager.model.payload.Response) SenderReactor(io.openk9.ingestion.api.SenderReactor) Disposable(reactor.core.Disposable) JsonNode(io.openk9.json.api.JsonNode) LoggerFactory(org.slf4j.LoggerFactory) Tuples(reactor.util.function.Tuples) Tuple2(reactor.util.function.Tuple2) TimeoutException(java.util.concurrent.TimeoutException) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) GraphClient(io.openk9.relationship.graph.api.client.GraphClient) ArrayNode(io.openk9.json.api.ArrayNode) JsonFactory(io.openk9.json.api.JsonFactory) ArrayList(java.util.ArrayList) Component(org.osgi.service.component.annotations.Component) Statement(org.neo4j.cypherdsl.core.Statement) Duration(java.time.Duration) OutboundMessageFactory(io.openk9.ingestion.api.OutboundMessageFactory) Schedulers(reactor.core.scheduler.Schedulers) Request(io.openk9.entity.manager.model.payload.Request) Cypher(org.neo4j.cypherdsl.core.Cypher) Activate(org.osgi.service.component.annotations.Activate) Node(org.neo4j.cypherdsl.core.Node) Binding(io.openk9.ingestion.api.Binding) Logger(org.slf4j.Logger) Deactivate(org.osgi.service.component.annotations.Deactivate) Functions(org.neo4j.cypherdsl.core.Functions) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) Mono(reactor.core.publisher.Mono) Entity(io.openk9.entity.manager.model.Entity) Collectors(java.util.stream.Collectors) Flux(reactor.core.publisher.Flux) List(java.util.List) Stream(java.util.stream.Stream) ResponseList(io.openk9.entity.manager.model.payload.ResponseList) ReceiverReactor(io.openk9.ingestion.api.ReceiverReactor) Modified(org.osgi.service.component.annotations.Modified) Optional(java.util.Optional) ObjectNode(io.openk9.json.api.ObjectNode) Reference(org.osgi.service.component.annotations.Reference) Cypher.literalOf(org.neo4j.cypherdsl.core.Cypher.literalOf) Entity(io.openk9.entity.manager.model.Entity) Statement(org.neo4j.cypherdsl.core.Statement) RelationRequest(io.openk9.entity.manager.model.payload.RelationRequest) JsonNode(io.openk9.json.api.JsonNode) ArrayNode(io.openk9.json.api.ArrayNode) Node(org.neo4j.cypherdsl.core.Node) ObjectNode(io.openk9.json.api.ObjectNode) ArrayList(java.util.ArrayList) Response(io.openk9.entity.manager.model.payload.Response) EntityRequest(io.openk9.entity.manager.model.payload.EntityRequest) Tuple2(reactor.util.function.Tuple2)

Example 4 with EntityRequest

use of io.openk9.entity.manager.model.payload.EntityRequest in project openk9 by smclab.

the class CreateEntitiesRunnable method _getEntityGraphs.

private List<EntityGraph> _getEntityGraphs(EntityGraphService entityGraphService, List<EntityIndex> entityRequestList, int minHops, int maxHops, String currentEntityRequestType, List<EntityGraph> result) {
    if (_log.isDebugEnabled()) {
        _log.debug("disambiguating with search entity with type " + currentEntityRequestType);
    }
    Statement[] statements = new Statement[entityRequestList.size()];
    for (int i = 0; i < entityRequestList.size(); i++) {
        EntityIndex entityRequest = entityRequestList.get(i);
        Node nodeEntity = Cypher.node(entityRequest.getType()).named("entity");
        AliasedExpression entityAliased = nodeEntity.as("entity");
        SymbolicName path = Cypher.name("path");
        Property idProperty = entityAliased.getDelegate().property("id");
        Statement statement = Cypher.match(nodeEntity).where(idProperty.eq(literalOf(entityRequest.getId()))).call("apoc.path.expand").withArgs(entityAliased.getDelegate(), literalOf(null), literalOf("-date"), literalOf(minHops), literalOf(maxHops)).yield(path).returning(Functions.last(Functions.nodes(path)).as("node"), Functions.size(Functions.nodes(path)).subtract(literalOf(1)).as("hops")).build();
        statements[i] = statement;
    }
    if (statements.length == 1) {
        Statement entityRequestListStatement = Cypher.call(statements[0]).returning("node", "hops").orderBy(Cypher.name("hops")).build();
        result = entityGraphService.search(entityRequestListStatement);
    } else if (statements.length > 1) {
        Statement entityRequestListStatement = Cypher.call(Cypher.unionAll(statements)).returning("node", "hops").orderBy(Cypher.name("hops")).build();
        result = entityGraphService.search(entityRequestListStatement);
    }
    return result;
}
Also used : Statement(org.neo4j.cypherdsl.core.Statement) Node(org.neo4j.cypherdsl.core.Node) EntityIndex(io.openk9.entity.manager.model.index.EntityIndex) SymbolicName(org.neo4j.cypherdsl.core.SymbolicName) Property(org.neo4j.cypherdsl.core.Property) AliasedExpression(org.neo4j.cypherdsl.core.AliasedExpression)

Example 5 with EntityRequest

use of io.openk9.entity.manager.model.payload.EntityRequest in project openk9 by smclab.

the class EntityManagerBus method run.

@SneakyThrows
public void run() {
    while (true) {
        Payload request = _entityManagerQueue.take();
        TransactionContext transactionContext = _hazelcastInstance.newTransactionContext();
        transactionContext.beginTransaction();
        try {
            TransactionalMap<EntityKey, Entity> entityTransactionalMap = transactionContext.getMap("entityMap");
            TransactionalMap<EntityRelationKey, EntityRelation> transactionalEntityRelationMap = transactionContext.getMap("entityRelationMap");
            TransactionalMultiMap<DocumentKey, String> documentEntityMap = transactionContext.getMultiMap("documentEntityMap");
            EntityManagerRequest payload = request.getPayload();
            _loggerAggregator.emitLog("process ingestionId", payload.getIngestionId());
            long tenantId = payload.getTenantId();
            String ingestionId = payload.getIngestionId();
            List<EntityRequest> entities = request.getEntities();
            Map<EntityKey, Entity> localEntityMap = new HashMap<>(entities.size());
            for (EntityRequest entityRequest : entities) {
                String name = entityRequest.getName();
                String type = entityRequest.getType();
                String cacheId = Long.toString(_entityFlakeId.newId());
                EntityKey key = EntityKey.of(tenantId, name, type, cacheId, ingestionId);
                Entity entity = new Entity(null, cacheId, tenantId, name, type, null, ingestionId, false, true, entityRequest.getContext());
                entityTransactionalMap.set(key, entity);
                localEntityMap.put(key, entity);
                for (EntityRequest entityRequest2 : entities) {
                    for (RelationRequest relation : entityRequest2.getRelations()) {
                        if (relation.getTo().equals(entityRequest.getTmpId())) {
                            relation.setTo(entity.getCacheId());
                        }
                    }
                }
            }
            for (EntityRequest entity : entities) {
                List<RelationRequest> relations = entity.getRelations();
                if (relations == null || relations.isEmpty()) {
                    continue;
                }
                Collection<Entity> values = localEntityMap.values();
                Entity current = values.stream().filter(e -> e.getName().equals(entity.getName()) && e.getType().equals(entity.getType())).findFirst().orElse(null);
                if (current == null) {
                    continue;
                }
                for (RelationRequest relation : relations) {
                    String to = relation.getTo();
                    String name = relation.getName();
                    for (Entity value : values) {
                        if (value.getCacheId().equals(to)) {
                            long entityRelationId = _entityRelationFlakeId.newId();
                            EntityRelation entityRelation = new EntityRelation(entityRelationId, current.getCacheId(), ingestionId, name, value.getCacheId());
                            transactionalEntityRelationMap.set(EntityRelationKey.of(entityRelationId, current.getCacheId(), ingestionId), entityRelation);
                        }
                    }
                }
            }
            if (!localEntityMap.isEmpty()) {
                DocumentKey key = DocumentKey.of(payload.getDatasourceId(), payload.getContentId(), tenantId);
                for (Entity value : localEntityMap.values()) {
                    documentEntityMap.put(key, value.getCacheId());
                }
            }
        } catch (Exception e) {
            _log.error(e.getMessage(), e);
            transactionContext.rollbackTransaction();
        } finally {
            transactionContext.commitTransaction();
        }
    }
}
Also used : Entity(io.openk9.entity.manager.cache.model.Entity) HashMap(java.util.HashMap) RelationRequest(io.openk9.entity.manager.dto.RelationRequest) EntityRelationKey(io.openk9.entity.manager.cache.model.EntityRelationKey) EntityKey(io.openk9.entity.manager.cache.model.EntityKey) EntityRelation(io.openk9.entity.manager.cache.model.EntityRelation) EntityRequest(io.openk9.entity.manager.dto.EntityRequest) TransactionContext(com.hazelcast.transaction.TransactionContext) DocumentKey(io.openk9.entity.manager.cache.model.DocumentKey) Payload(io.openk9.entity.manager.dto.Payload) EntityManagerRequest(io.openk9.entity.manager.dto.EntityManagerRequest) SneakyThrows(lombok.SneakyThrows)

Aggregations

Node (org.neo4j.cypherdsl.core.Node)5 Statement (org.neo4j.cypherdsl.core.Statement)5 Entity (io.openk9.entity.manager.model.Entity)4 EntityRequest (io.openk9.entity.manager.model.payload.EntityRequest)4 JsonFactory (io.openk9.json.api.JsonFactory)4 GraphClient (io.openk9.relationship.graph.api.client.GraphClient)4 List (java.util.List)4 Collectors (java.util.stream.Collectors)4 Cypher (org.neo4j.cypherdsl.core.Cypher)4 Cypher.literalOf (org.neo4j.cypherdsl.core.Cypher.literalOf)4 Functions (org.neo4j.cypherdsl.core.Functions)4 Activate (org.osgi.service.component.annotations.Activate)4 Component (org.osgi.service.component.annotations.Component)4 Modified (org.osgi.service.component.annotations.Modified)4 Reference (org.osgi.service.component.annotations.Reference)4 Flux (reactor.core.publisher.Flux)4 Mono (reactor.core.publisher.Mono)4 RelationRequest (io.openk9.entity.manager.model.payload.RelationRequest)3 Request (io.openk9.entity.manager.model.payload.Request)3 Response (io.openk9.entity.manager.model.payload.Response)3