use of io.openk9.entity.manager.dto.EntityRequest in project openk9 by smclab.
the class GetOrAddEntities method cleanCandidates.
private List<DocumentEntityResponse> cleanCandidates(EntityRequest entityRequest, List<DocumentEntityResponse> candidates) {
if (_log.isDebugEnabled()) {
_log.debug("entity " + entityRequest.getName() + " candidates: " + candidates);
}
if (!candidates.isEmpty()) {
DocumentEntityResponse documentEntityResponse = candidates.get(0);
double bestScore;
if (candidates.size() > 1) {
if (_log.isDebugEnabled()) {
_log.debug("softmax");
}
double[] scores = candidates.stream().mapToDouble(DocumentEntityResponse::getScore).toArray();
bestScore = _softmax(documentEntityResponse.getScore(), scores);
} else {
if (_log.isDebugEnabled()) {
_log.debug("levenshtein");
}
bestScore = _levenshteinDistance(_entityNameCleanerProvider.getEntityNameCleaner(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), _entityNameCleanerProvider.getEntityNameCleaner(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
}
if (_log.isDebugEnabled()) {
_log.debug("current score: " + bestScore + " score threshold: " + _scoreThreshold + " for entity " + entityRequest.getName());
}
if (bestScore > _scoreThreshold) {
_log.debug("filtered with treshold");
return Collections.singletonList(documentEntityResponse);
}
}
if (candidates.isEmpty() && _log.isDebugEnabled()) {
_log.debug("candidates empty");
}
return candidates;
}
use of io.openk9.entity.manager.dto.EntityRequest in project openk9 by smclab.
the class GetOrAddEntitiesConsumer method apply.
public Mono<ObjectNode> apply(ObjectNode objectNode) {
return Mono.defer(() -> {
ObjectNode datasourceContextJson = objectNode.get("datasourceContext").toObjectNode();
long datasourceId = datasourceContextJson.get("datasource").get("datasourceId").asLong();
long tenantId = datasourceContextJson.get("tenant").get("tenantId").asLong();
JsonNode entities = objectNode.remove("entities");
Mono<ArrayNode> entitiesField;
if (entities.size() == 0) {
entitiesField = Mono.just(_jsonFactory.createArrayNode());
} else {
ObjectNode responseJson = _jsonFactory.createObjectNode();
responseJson.put("entities", entities);
responseJson.put("tenantId", tenantId);
responseJson.put("datasourceId", datasourceId);
Request request = _jsonFactory.fromJson(responseJson.toString(), Request.class);
List<RequestContext> requestContextList = request.getEntities().stream().map(entityRequest -> RequestContext.builder().current(entityRequest).tenantId(request.getTenantId()).datasourceId(request.getDatasourceId()).rest(request.getEntities().stream().filter(er -> er != entityRequest).collect(Collectors.toList())).build()).collect(Collectors.toList());
Mono<List<EntityContext>> disambiguateListMono = GetOrAddEntities.stopWatch("disambiguate-all-entities", Flux.fromIterable(requestContextList).flatMap(requestContext -> GetOrAddEntities.stopWatch("disambiguate-" + requestContext.getCurrent().getName(), Mono.<EntityContext>create(fluxSink -> _startDisambiguation.disambiguate(requestContext, fluxSink)))).collectList());
Mono<ResponseList> writeRelations = disambiguateListMono.flatMap(entityContexts -> GetOrAddEntities.stopWatch("write-relations", writeRelations(entityContexts)));
Mono<ResponseList> responseListWrapper = _transactional ? _graphClient.makeTransactional(writeRelations) : writeRelations;
entitiesField = responseListWrapper.map(responseListDTO -> {
List<Response> responseList = responseListDTO.getResponse();
ArrayNode entitiesArrayNode = entities.toArrayNode();
ArrayNode arrayNode = _jsonFactory.createArrayNode();
for (JsonNode node : entitiesArrayNode) {
Optional<Response> responseOptional = responseList.stream().filter(response -> node.get("tmpId").asLong() == response.getTmpId()).findFirst();
if (responseOptional.isPresent()) {
Entity entity = responseOptional.get().getEntity();
ObjectNode result = _jsonFactory.createObjectNode();
result.put("entityType", entity.getType());
result.put("id", entity.getId());
result.put("context", node.get("context"));
arrayNode.add(result);
}
}
return arrayNode;
});
}
return entitiesField.map(entitiesArray -> {
ObjectNode payload = objectNode.get("payload").toObjectNode();
payload.set("entities", entitiesArray);
return objectNode;
}).timeout(Duration.ofSeconds(_timeout), Mono.error(new TimeoutException("timeout on entities count: " + entities.size() + " (Did not observe any item or terminal signal within " + Duration.ofSeconds(_timeout).toMillis() + "ms)")));
});
}
use of io.openk9.entity.manager.dto.EntityRequest in project openk9 by smclab.
the class GetOrAddEntitiesConsumer method writeRelations.
public Mono<ResponseList> writeRelations(List<EntityContext> entityContext) {
return Mono.defer(() -> {
List<Statement> statementList = new ArrayList<>();
for (EntityContext context : entityContext) {
EntityRequest entityRequest = context.getEntityRequest();
List<RelationRequest> relations = entityRequest.getRelations();
if (relations == null || relations.isEmpty()) {
continue;
}
Entity currentEntity = context.getEntity();
List<Tuple2<String, Entity>> entityRelations = entityContext.stream().flatMap(entry -> {
for (RelationRequest relation : relations) {
if (entry.getEntityRequest().getTmpId() == relation.getTo()) {
return Stream.of(Tuples.of(relation.getName(), entry.getEntity()));
}
}
return Stream.empty();
}).collect(Collectors.toList());
Node currentEntityNode = Cypher.node(currentEntity.getType()).named("a");
List<Statement> currentStatementList = entityRelations.stream().map(t2 -> {
Entity entityRelation = t2.getT2();
Node entityRelationNode = Cypher.node(entityRelation.getType()).named("b");
return Cypher.match(currentEntityNode, entityRelationNode).where(Functions.id(currentEntityNode).eq(literalOf(currentEntity.getId())).and(Functions.id(entityRelationNode).eq(literalOf(entityRelation.getId())))).merge(currentEntityNode.relationshipTo(entityRelationNode, t2.getT1())).build();
}).collect(Collectors.toList());
statementList.addAll(currentStatementList);
}
List<Response> response = entityContext.stream().map(context -> Response.builder().entity(Entity.builder().name(context.getEntity().getName()).id(context.getEntity().getId()).tenantId(context.getEntity().getTenantId()).type(context.getEntity().getType()).build()).tmpId(context.getEntityRequest().getTmpId()).build()).collect(Collectors.toList());
if (statementList.size() > 1) {
return _graphClient.write(Cypher.unionAll(statementList.toArray(new Statement[0]))).then(Mono.just(ResponseList.of("", response)));
} else if (statementList.size() == 1) {
return _graphClient.write(statementList.get(0)).then(Mono.just(ResponseList.of("", response)));
} else {
return Mono.just(ResponseList.of("", response));
}
});
}
use of io.openk9.entity.manager.dto.EntityRequest in project openk9 by smclab.
the class CreateEntitiesRunnable method _getEntityGraphs.
private List<EntityGraph> _getEntityGraphs(EntityGraphService entityGraphService, List<EntityIndex> entityRequestList, int minHops, int maxHops, String currentEntityRequestType, List<EntityGraph> result) {
if (_log.isDebugEnabled()) {
_log.debug("disambiguating with search entity with type " + currentEntityRequestType);
}
Statement[] statements = new Statement[entityRequestList.size()];
for (int i = 0; i < entityRequestList.size(); i++) {
EntityIndex entityRequest = entityRequestList.get(i);
Node nodeEntity = Cypher.node(entityRequest.getType()).named("entity");
AliasedExpression entityAliased = nodeEntity.as("entity");
SymbolicName path = Cypher.name("path");
Property idProperty = entityAliased.getDelegate().property("id");
Statement statement = Cypher.match(nodeEntity).where(idProperty.eq(literalOf(entityRequest.getId()))).call("apoc.path.expand").withArgs(entityAliased.getDelegate(), literalOf(null), literalOf("-date"), literalOf(minHops), literalOf(maxHops)).yield(path).returning(Functions.last(Functions.nodes(path)).as("node"), Functions.size(Functions.nodes(path)).subtract(literalOf(1)).as("hops")).build();
statements[i] = statement;
}
if (statements.length == 1) {
Statement entityRequestListStatement = Cypher.call(statements[0]).returning("node", "hops").orderBy(Cypher.name("hops")).build();
result = entityGraphService.search(entityRequestListStatement);
} else if (statements.length > 1) {
Statement entityRequestListStatement = Cypher.call(Cypher.unionAll(statements)).returning("node", "hops").orderBy(Cypher.name("hops")).build();
result = entityGraphService.search(entityRequestListStatement);
}
return result;
}
use of io.openk9.entity.manager.dto.EntityRequest in project openk9 by smclab.
the class EntityManagerBus method run.
@SneakyThrows
public void run() {
while (true) {
Payload request = _entityManagerQueue.take();
TransactionContext transactionContext = _hazelcastInstance.newTransactionContext();
transactionContext.beginTransaction();
try {
TransactionalMap<EntityKey, Entity> entityTransactionalMap = transactionContext.getMap("entityMap");
TransactionalMap<EntityRelationKey, EntityRelation> transactionalEntityRelationMap = transactionContext.getMap("entityRelationMap");
TransactionalMultiMap<DocumentKey, String> documentEntityMap = transactionContext.getMultiMap("documentEntityMap");
EntityManagerRequest payload = request.getPayload();
_loggerAggregator.emitLog("process ingestionId", payload.getIngestionId());
long tenantId = payload.getTenantId();
String ingestionId = payload.getIngestionId();
List<EntityRequest> entities = request.getEntities();
Map<EntityKey, Entity> localEntityMap = new HashMap<>(entities.size());
for (EntityRequest entityRequest : entities) {
String name = entityRequest.getName();
String type = entityRequest.getType();
String cacheId = Long.toString(_entityFlakeId.newId());
EntityKey key = EntityKey.of(tenantId, name, type, cacheId, ingestionId);
Entity entity = new Entity(null, cacheId, tenantId, name, type, null, ingestionId, false, true, entityRequest.getContext());
entityTransactionalMap.set(key, entity);
localEntityMap.put(key, entity);
for (EntityRequest entityRequest2 : entities) {
for (RelationRequest relation : entityRequest2.getRelations()) {
if (relation.getTo().equals(entityRequest.getTmpId())) {
relation.setTo(entity.getCacheId());
}
}
}
}
for (EntityRequest entity : entities) {
List<RelationRequest> relations = entity.getRelations();
if (relations == null || relations.isEmpty()) {
continue;
}
Collection<Entity> values = localEntityMap.values();
Entity current = values.stream().filter(e -> e.getName().equals(entity.getName()) && e.getType().equals(entity.getType())).findFirst().orElse(null);
if (current == null) {
continue;
}
for (RelationRequest relation : relations) {
String to = relation.getTo();
String name = relation.getName();
for (Entity value : values) {
if (value.getCacheId().equals(to)) {
long entityRelationId = _entityRelationFlakeId.newId();
EntityRelation entityRelation = new EntityRelation(entityRelationId, current.getCacheId(), ingestionId, name, value.getCacheId());
transactionalEntityRelationMap.set(EntityRelationKey.of(entityRelationId, current.getCacheId(), ingestionId), entityRelation);
}
}
}
}
if (!localEntityMap.isEmpty()) {
DocumentKey key = DocumentKey.of(payload.getDatasourceId(), payload.getContentId(), tenantId);
for (Entity value : localEntityMap.values()) {
documentEntityMap.put(key, value.getCacheId());
}
}
} catch (Exception e) {
_log.error(e.getMessage(), e);
transactionContext.rollbackTransaction();
} finally {
transactionContext.commitTransaction();
}
}
}
Aggregations