use of io.openk9.entity.manager.cleaner.EntityNameCleanerProvider in project openk9 by smclab.
the class CreateEntitiesRunnable method getEntityCandidates.
private EntityCandidates getEntityCandidates(EntityNameCleanerProvider entityNameCleanerProvider, EntityService entityService, EntityMember ingestionIdEntityMember, Entity ingestionIdEntity) {
EntityNameCleaner entityNameCleaner = entityNameCleanerProvider.get(ingestionIdEntity.getType());
QueryBuilder queryBuilder = entityNameCleaner.cleanEntityName(ingestionIdEntity.getTenantId(), ingestionIdEntity.getName());
List<EntityIndex> candidates = entityService.search(ingestionIdEntity.getTenantId(), queryBuilder, 0, 10);
return EntityCandidates.of(ingestionIdEntityMember, candidates);
}
use of io.openk9.entity.manager.cleaner.EntityNameCleanerProvider in project openk9 by smclab.
the class CreateEntitiesRunnable method run_.
@Override
public void run_() {
_log.info("start CreateEntitiesRunnable");
IMap<EntityKey, Entity> entityIMap = MapUtil.getEntityMap(_hazelcastInstance);
IMap<AssociableEntityKey, Entity> associableEntityMap = MapUtil.getAssociableEntityMap(_hazelcastInstance);
Set<EntityKey> entityKeys = entityIMap.localKeySet(Predicates.and(Predicates.equal("id", null), Predicates.equal("graphId", null)));
EntityGraphConfig config = CDI.current().select(EntityGraphConfig.class).get();
EntityNameCleanerProvider entityNameCleanerProvider = CDI.current().select(EntityNameCleanerProvider.class).get();
EntityService entityService = CDI.current().select(EntityService.class).get();
EntityGraphService entityGraphService = CDI.current().select(EntityGraphService.class).get();
Map<EntityKey, Entity> localEntityMap = entityIMap.getAll(entityKeys);
Collection<Entity> localEntityValues = localEntityMap.values();
Set<EntityKey> localEntityKeys = localEntityMap.keySet();
List<Member> collect = _hazelcastInstance.getCluster().getMembers().stream().filter(member -> !member.localMember()).collect(Collectors.toList());
String[] ingestionIds = localEntityKeys.stream().map(EntityKey::getIngestionId).distinct().toArray(String[]::new);
IExecutorService entityExecutor = _hazelcastInstance.getExecutorService("entityExecutor");
Map<Member, Future<Map<EntityKey, Entity>>> memberFutureMap = entityExecutor.submitToMembers(new GetEntitiesCallable(ingestionIds), collect);
Map<EntityKey, Entity> otherEntityKeyEntityMap = memberFutureMap.values().stream().map(FutureUtil::makeCompletableFuture).map(CompletableFuture::join).reduce((a, b) -> {
Map<EntityKey, Entity> map = new HashMap<>();
map.putAll(a);
map.putAll(b);
return map;
}).orElseGet(Map::of);
Stream<EntityMember> otherEntityMemberStream = otherEntityKeyEntityMap.values().stream().map(entity -> EntityMember.of(entity, false));
Stream<EntityMember> localEntityMemberStream = localEntityValues.stream().map(entity -> EntityMember.of(entity, true));
Map<String, List<EntityMember>> entitiesGroupingByIngestionId = Stream.concat(localEntityMemberStream, otherEntityMemberStream).collect(Collectors.groupingBy(entityMember -> entityMember.getEntity().getIngestionId()));
Collection<List<EntityMember>> values = entitiesGroupingByIngestionId.values();
Map<EntityKey, Entity> entityMap = new HashMap<>();
for (List<EntityMember> ingestionIdEntities : values) {
Map<AssociableEntityKey, Entity> localAssociableEntityMap = new HashMap<>();
List<EntityCandidates> entityCandidateList = new ArrayList<>();
for (EntityMember ingestionIdEntity : ingestionIdEntities) {
Entity innerEntity = ingestionIdEntity.getEntity();
entityCandidateList.add(getEntityCandidates(entityNameCleanerProvider, entityService, ingestionIdEntity, innerEntity));
}
List<Mono<Entity>> completableFutureList = entityCandidateList.stream().filter(entityCandidates -> entityCandidates.getEntity().isLocal()).map(entityCandidates -> Mono.fromSupplier(_getAndCreateEntityDisambiguate(config, entityNameCleanerProvider, entityService, entityGraphService, entityCandidateList, entityCandidates, entityCandidates.getEntity())).subscribeOn(Schedulers.boundedElastic())).collect(Collectors.toList());
Mono<List<Entity>> zip = Mono.zip(completableFutureList, a -> {
List<Entity> entities = new ArrayList<>();
for (Object o : a) {
entities.add((Entity) o);
}
return entities;
}).defaultIfEmpty(List.of());
for (Entity currentEntityRequest : zip.block()) {
localAssociableEntityMap.put(AssociableEntityKey.of(currentEntityRequest.getCacheId(), currentEntityRequest.getIngestionId()), currentEntityRequest);
entityMap.put(EntityKey.of(currentEntityRequest.getTenantId(), currentEntityRequest.getName(), currentEntityRequest.getType(), currentEntityRequest.getCacheId(), currentEntityRequest.getIngestionId()), currentEntityRequest);
}
associableEntityMap.setAll(localAssociableEntityMap);
}
entityIMap.setAll(entityMap);
}
use of io.openk9.entity.manager.cleaner.EntityNameCleanerProvider in project openk9 by smclab.
the class CreateEntitiesRunnable method cleanCandidates.
private List<EntityIndex> cleanCandidates(Entity entityRequest, List<EntityIndex> candidates, EntityNameCleanerProvider entityNameCleanerProvider, float scoreThreshold) {
if (!candidates.isEmpty()) {
EntityIndex documentEntityResponse = candidates.get(0);
double bestScore;
if (candidates.size() > 1) {
double[] scores = candidates.stream().mapToDouble(EntityIndex::getScore).toArray();
bestScore = _softmax(documentEntityResponse.getScore(), scores);
} else {
bestScore = _levenshteinDistance(entityNameCleanerProvider.get(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), entityNameCleanerProvider.get(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
}
if (bestScore > scoreThreshold) {
return Collections.singletonList(documentEntityResponse);
}
}
return candidates;
}
Aggregations