use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.
the class GetOrAddEntities method cleanCandidates.
private List<DocumentEntityResponse> cleanCandidates(EntityRequest entityRequest, List<DocumentEntityResponse> candidates) {
if (_log.isDebugEnabled()) {
_log.debug("entity " + entityRequest.getName() + " candidates: " + candidates);
}
if (!candidates.isEmpty()) {
DocumentEntityResponse documentEntityResponse = candidates.get(0);
double bestScore;
if (candidates.size() > 1) {
if (_log.isDebugEnabled()) {
_log.debug("softmax");
}
double[] scores = candidates.stream().mapToDouble(DocumentEntityResponse::getScore).toArray();
bestScore = _softmax(documentEntityResponse.getScore(), scores);
} else {
if (_log.isDebugEnabled()) {
_log.debug("levenshtein");
}
bestScore = _levenshteinDistance(_entityNameCleanerProvider.getEntityNameCleaner(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), _entityNameCleanerProvider.getEntityNameCleaner(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
}
if (_log.isDebugEnabled()) {
_log.debug("current score: " + bestScore + " score threshold: " + _scoreThreshold + " for entity " + entityRequest.getName());
}
if (bestScore > _scoreThreshold) {
_log.debug("filtered with treshold");
return Collections.singletonList(documentEntityResponse);
}
}
if (candidates.isEmpty() && _log.isDebugEnabled()) {
_log.debug("candidates empty");
}
return candidates;
}
use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.
the class GetOrAddEntitiesConsumer method apply.
public Mono<ObjectNode> apply(ObjectNode objectNode) {
return Mono.defer(() -> {
ObjectNode datasourceContextJson = objectNode.get("datasourceContext").toObjectNode();
long datasourceId = datasourceContextJson.get("datasource").get("datasourceId").asLong();
long tenantId = datasourceContextJson.get("tenant").get("tenantId").asLong();
JsonNode entities = objectNode.remove("entities");
Mono<ArrayNode> entitiesField;
if (entities.size() == 0) {
entitiesField = Mono.just(_jsonFactory.createArrayNode());
} else {
ObjectNode responseJson = _jsonFactory.createObjectNode();
responseJson.put("entities", entities);
responseJson.put("tenantId", tenantId);
responseJson.put("datasourceId", datasourceId);
Request request = _jsonFactory.fromJson(responseJson.toString(), Request.class);
List<RequestContext> requestContextList = request.getEntities().stream().map(entityRequest -> RequestContext.builder().current(entityRequest).tenantId(request.getTenantId()).datasourceId(request.getDatasourceId()).rest(request.getEntities().stream().filter(er -> er != entityRequest).collect(Collectors.toList())).build()).collect(Collectors.toList());
Mono<List<EntityContext>> disambiguateListMono = GetOrAddEntities.stopWatch("disambiguate-all-entities", Flux.fromIterable(requestContextList).flatMap(requestContext -> GetOrAddEntities.stopWatch("disambiguate-" + requestContext.getCurrent().getName(), Mono.<EntityContext>create(fluxSink -> _startDisambiguation.disambiguate(requestContext, fluxSink)))).collectList());
Mono<ResponseList> writeRelations = disambiguateListMono.flatMap(entityContexts -> GetOrAddEntities.stopWatch("write-relations", writeRelations(entityContexts)));
Mono<ResponseList> responseListWrapper = _transactional ? _graphClient.makeTransactional(writeRelations) : writeRelations;
entitiesField = responseListWrapper.map(responseListDTO -> {
List<Response> responseList = responseListDTO.getResponse();
ArrayNode entitiesArrayNode = entities.toArrayNode();
ArrayNode arrayNode = _jsonFactory.createArrayNode();
for (JsonNode node : entitiesArrayNode) {
Optional<Response> responseOptional = responseList.stream().filter(response -> node.get("tmpId").asLong() == response.getTmpId()).findFirst();
if (responseOptional.isPresent()) {
Entity entity = responseOptional.get().getEntity();
ObjectNode result = _jsonFactory.createObjectNode();
result.put("entityType", entity.getType());
result.put("id", entity.getId());
result.put("context", node.get("context"));
arrayNode.add(result);
}
}
return arrayNode;
});
}
return entitiesField.map(entitiesArray -> {
ObjectNode payload = objectNode.get("payload").toObjectNode();
payload.set("entities", entitiesArray);
return objectNode;
}).timeout(Duration.ofSeconds(_timeout), Mono.error(new TimeoutException("timeout on entities count: " + entities.size() + " (Did not observe any item or terminal signal within " + Duration.ofSeconds(_timeout).toMillis() + "ms)")));
});
}
use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.
the class GetOrAddEntitiesConsumer method writeRelations.
public Mono<ResponseList> writeRelations(List<EntityContext> entityContext) {
return Mono.defer(() -> {
List<Statement> statementList = new ArrayList<>();
for (EntityContext context : entityContext) {
EntityRequest entityRequest = context.getEntityRequest();
List<RelationRequest> relations = entityRequest.getRelations();
if (relations == null || relations.isEmpty()) {
continue;
}
Entity currentEntity = context.getEntity();
List<Tuple2<String, Entity>> entityRelations = entityContext.stream().flatMap(entry -> {
for (RelationRequest relation : relations) {
if (entry.getEntityRequest().getTmpId() == relation.getTo()) {
return Stream.of(Tuples.of(relation.getName(), entry.getEntity()));
}
}
return Stream.empty();
}).collect(Collectors.toList());
Node currentEntityNode = Cypher.node(currentEntity.getType()).named("a");
List<Statement> currentStatementList = entityRelations.stream().map(t2 -> {
Entity entityRelation = t2.getT2();
Node entityRelationNode = Cypher.node(entityRelation.getType()).named("b");
return Cypher.match(currentEntityNode, entityRelationNode).where(Functions.id(currentEntityNode).eq(literalOf(currentEntity.getId())).and(Functions.id(entityRelationNode).eq(literalOf(entityRelation.getId())))).merge(currentEntityNode.relationshipTo(entityRelationNode, t2.getT1())).build();
}).collect(Collectors.toList());
statementList.addAll(currentStatementList);
}
List<Response> response = entityContext.stream().map(context -> Response.builder().entity(Entity.builder().name(context.getEntity().getName()).id(context.getEntity().getId()).tenantId(context.getEntity().getTenantId()).type(context.getEntity().getType()).build()).tmpId(context.getEntityRequest().getTmpId()).build()).collect(Collectors.toList());
if (statementList.size() > 1) {
return _graphClient.write(Cypher.unionAll(statementList.toArray(new Statement[0]))).then(Mono.just(ResponseList.of("", response)));
} else if (statementList.size() == 1) {
return _graphClient.write(statementList.get(0)).then(Mono.just(ResponseList.of("", response)));
} else {
return Mono.just(ResponseList.of("", response));
}
});
}
use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.
the class IndexWriterEndpoins method _insertEntity.
private Publisher<Void> _insertEntity(HttpServerRequest httpRequest, HttpServerResponse httpResponse) {
RestHighLevelClient restHighLevelClient = _restHighLevelClientProvider.get();
Mono<List<DocumentEntityRequest>> request = Mono.from(ReactorNettyUtils.aggregateBodyAsByteArray(httpRequest)).map(json -> _jsonFactory.fromJsonList(json, DocumentEntityRequest.class));
Mono<BulkResponse> elasticResponse = request.flatMapIterable(Function.identity()).map(entity -> {
IndexRequest indexRequest = new IndexRequest(entity.getTenantId() + "-entity");
return indexRequest.source(_jsonFactory.toJson(entity), XContentType.JSON);
}).reduce(new BulkRequest(), BulkRequest::add).flatMap(bulkRequest -> Mono.create(sink -> {
bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
Cancellable cancellable = restHighLevelClient.bulkAsync(bulkRequest, RequestOptions.DEFAULT, new ReactorActionListener<>(sink));
sink.onCancel(cancellable::cancel);
}));
return _httpResponseWriter.write(httpResponse, elasticResponse.thenReturn("{}"));
}
use of io.openk9.entity.manager.model.Entity in project openk9 by smclab.
the class AssociateEntitiesRunnable method run_.
@Override
public void run_() {
_log.info("start AssociateEntitiesRunnable");
IMap<AssociableEntityKey, Entity> associableEntityMap = MapUtil.getAssociableEntityMap(_hazelcastInstance);
Set<AssociableEntityKey> associableEntityKeys = associableEntityMap.localKeySet();
Map<AssociableEntityKey, Entity> localEntityMap = associableEntityMap.getAll(associableEntityKeys);
_log.info("ingestionKeys: " + localEntityMap.size());
Map<String, List<Entity>> groupingByIngestionId = localEntityMap.entrySet().stream().collect(Collectors.groupingBy(e -> e.getKey().getIngestionId(), Collectors.mapping(Map.Entry::getValue, Collectors.toList())));
List<AssociableEntityKey> entitiesToRemove = new ArrayList<>();
List<String> ingestionIds = new ArrayList<>();
for (Map.Entry<String, List<Entity>> entry : groupingByIngestionId.entrySet()) {
String ingestionId = entry.getKey();
List<Entity> v = entry.getValue();
if (v.isEmpty()) {
continue;
}
DataService dataService = CDI.current().select(DataService.class).get();
Long tenantId = v.stream().map(Entity::getTenantId).findFirst().get();
try {
boolean associated = dataService.associateEntities(tenantId, ingestionId, v.stream().map(IngestionEntity::fromEntity).collect(Collectors.toList()));
if (associated) {
for (Entity entity : v) {
entitiesToRemove.add(AssociableEntityKey.of(entity.getCacheId(), entity.getIngestionId()));
ingestionIds.add(ingestionId);
}
}
} catch (Exception ioe) {
_log.error(ioe.getMessage());
}
}
_log.info("entities associated: " + entitiesToRemove.size() + " ingestionIds: " + ingestionIds);
try {
Pipelining pipelining = new Pipelining<>(10);
for (AssociableEntityKey associateEntityKey : entitiesToRemove) {
pipelining.add(associableEntityMap.removeAsync(associateEntityKey));
}
pipelining.results();
} catch (Exception e) {
_log.error(e.getMessage(), e);
}
}
Aggregations