Search in sources :

Example 1 with EntityNameCleanerProvider

use of io.openk9.entity.manager.api.EntityNameCleanerProvider in project openk9 by smclab.

the class CreateEntitiesRunnable method getEntityCandidates.

private EntityCandidates getEntityCandidates(EntityNameCleanerProvider entityNameCleanerProvider, EntityService entityService, EntityMember ingestionIdEntityMember, Entity ingestionIdEntity) {
    EntityNameCleaner entityNameCleaner = entityNameCleanerProvider.get(ingestionIdEntity.getType());
    QueryBuilder queryBuilder = entityNameCleaner.cleanEntityName(ingestionIdEntity.getTenantId(), ingestionIdEntity.getName());
    List<EntityIndex> candidates = entityService.search(ingestionIdEntity.getTenantId(), queryBuilder, 0, 10);
    return EntityCandidates.of(ingestionIdEntityMember, candidates);
}
Also used : EntityIndex(io.openk9.entity.manager.model.index.EntityIndex) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) EntityNameCleaner(io.openk9.entity.manager.cleaner.EntityNameCleaner)

Example 2 with EntityNameCleanerProvider

use of io.openk9.entity.manager.api.EntityNameCleanerProvider in project openk9 by smclab.

the class CreateEntitiesRunnable method run_.

@Override
public void run_() {
    _log.info("start CreateEntitiesRunnable");
    IMap<EntityKey, Entity> entityIMap = MapUtil.getEntityMap(_hazelcastInstance);
    IMap<AssociableEntityKey, Entity> associableEntityMap = MapUtil.getAssociableEntityMap(_hazelcastInstance);
    Set<EntityKey> entityKeys = entityIMap.localKeySet(Predicates.and(Predicates.equal("id", null), Predicates.equal("graphId", null)));
    EntityGraphConfig config = CDI.current().select(EntityGraphConfig.class).get();
    EntityNameCleanerProvider entityNameCleanerProvider = CDI.current().select(EntityNameCleanerProvider.class).get();
    EntityService entityService = CDI.current().select(EntityService.class).get();
    EntityGraphService entityGraphService = CDI.current().select(EntityGraphService.class).get();
    Map<EntityKey, Entity> localEntityMap = entityIMap.getAll(entityKeys);
    Collection<Entity> localEntityValues = localEntityMap.values();
    Set<EntityKey> localEntityKeys = localEntityMap.keySet();
    List<Member> collect = _hazelcastInstance.getCluster().getMembers().stream().filter(member -> !member.localMember()).collect(Collectors.toList());
    String[] ingestionIds = localEntityKeys.stream().map(EntityKey::getIngestionId).distinct().toArray(String[]::new);
    IExecutorService entityExecutor = _hazelcastInstance.getExecutorService("entityExecutor");
    Map<Member, Future<Map<EntityKey, Entity>>> memberFutureMap = entityExecutor.submitToMembers(new GetEntitiesCallable(ingestionIds), collect);
    Map<EntityKey, Entity> otherEntityKeyEntityMap = memberFutureMap.values().stream().map(FutureUtil::makeCompletableFuture).map(CompletableFuture::join).reduce((a, b) -> {
        Map<EntityKey, Entity> map = new HashMap<>();
        map.putAll(a);
        map.putAll(b);
        return map;
    }).orElseGet(Map::of);
    Stream<EntityMember> otherEntityMemberStream = otherEntityKeyEntityMap.values().stream().map(entity -> EntityMember.of(entity, false));
    Stream<EntityMember> localEntityMemberStream = localEntityValues.stream().map(entity -> EntityMember.of(entity, true));
    Map<String, List<EntityMember>> entitiesGroupingByIngestionId = Stream.concat(localEntityMemberStream, otherEntityMemberStream).collect(Collectors.groupingBy(entityMember -> entityMember.getEntity().getIngestionId()));
    Collection<List<EntityMember>> values = entitiesGroupingByIngestionId.values();
    Map<EntityKey, Entity> entityMap = new HashMap<>();
    for (List<EntityMember> ingestionIdEntities : values) {
        Map<AssociableEntityKey, Entity> localAssociableEntityMap = new HashMap<>();
        List<EntityCandidates> entityCandidateList = new ArrayList<>();
        for (EntityMember ingestionIdEntity : ingestionIdEntities) {
            Entity innerEntity = ingestionIdEntity.getEntity();
            entityCandidateList.add(getEntityCandidates(entityNameCleanerProvider, entityService, ingestionIdEntity, innerEntity));
        }
        List<Mono<Entity>> completableFutureList = entityCandidateList.stream().filter(entityCandidates -> entityCandidates.getEntity().isLocal()).map(entityCandidates -> Mono.fromSupplier(_getAndCreateEntityDisambiguate(config, entityNameCleanerProvider, entityService, entityGraphService, entityCandidateList, entityCandidates, entityCandidates.getEntity())).subscribeOn(Schedulers.boundedElastic())).collect(Collectors.toList());
        Mono<List<Entity>> zip = Mono.zip(completableFutureList, a -> {
            List<Entity> entities = new ArrayList<>();
            for (Object o : a) {
                entities.add((Entity) o);
            }
            return entities;
        }).defaultIfEmpty(List.of());
        for (Entity currentEntityRequest : zip.block()) {
            localAssociableEntityMap.put(AssociableEntityKey.of(currentEntityRequest.getCacheId(), currentEntityRequest.getIngestionId()), currentEntityRequest);
            entityMap.put(EntityKey.of(currentEntityRequest.getTenantId(), currentEntityRequest.getName(), currentEntityRequest.getType(), currentEntityRequest.getCacheId(), currentEntityRequest.getIngestionId()), currentEntityRequest);
        }
        associableEntityMap.setAll(localAssociableEntityMap);
    }
    entityIMap.setAll(entityMap);
}
Also used : Arrays(java.util.Arrays) Member(com.hazelcast.cluster.Member) FutureUtil(io.openk9.entity.manager.util.FutureUtil) EntityKey(io.openk9.entity.manager.cache.model.EntityKey) SymbolicName(org.neo4j.cypherdsl.core.SymbolicName) Future(java.util.concurrent.Future) Map(java.util.Map) ToString(lombok.ToString) Property(org.neo4j.cypherdsl.core.Property) Cypher(org.neo4j.cypherdsl.core.Cypher) EntityIndex(io.openk9.entity.manager.model.index.EntityIndex) EntityGraph(io.openk9.entity.manager.model.graph.EntityGraph) Collection(java.util.Collection) CDI(javax.enterprise.inject.spi.CDI) Set(java.util.Set) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Builder(lombok.Builder) Optional(java.util.Optional) Cypher.literalOf(org.neo4j.cypherdsl.core.Cypher.literalOf) HazelcastInstanceAware(com.hazelcast.core.HazelcastInstanceAware) AssociableEntityKey(io.openk9.entity.manager.cache.model.AssociableEntityKey) EntityGraphConfig(io.openk9.entity.manager.config.EntityGraphConfig) Logger(org.jboss.logging.Logger) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Statement(org.neo4j.cypherdsl.core.Statement) Schedulers(reactor.core.scheduler.Schedulers) EntityNameCleanerProvider(io.openk9.entity.manager.cleaner.EntityNameCleanerProvider) Node(org.neo4j.cypherdsl.core.Node) HazelcastInstance(com.hazelcast.core.HazelcastInstance) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) EntityNameCleaner(io.openk9.entity.manager.cleaner.EntityNameCleaner) Entity(io.openk9.entity.manager.cache.model.Entity) Functions(org.neo4j.cypherdsl.core.Functions) Mono(reactor.core.publisher.Mono) IExecutorService(com.hazelcast.core.IExecutorService) GetEntitiesCallable(io.openk9.entity.manager.action.GetEntitiesCallable) AliasedExpression(org.neo4j.cypherdsl.core.AliasedExpression) Predicates(com.hazelcast.query.Predicates) EntityService(io.openk9.entity.manager.service.index.EntityService) Data(lombok.Data) AllArgsConstructor(lombok.AllArgsConstructor) EntityGraphService(io.openk9.entity.manager.service.graph.EntityGraphService) Collections(java.util.Collections) MapUtil(io.openk9.entity.manager.util.MapUtil) IMap(com.hazelcast.map.IMap) Entity(io.openk9.entity.manager.cache.model.Entity) EntityGraphService(io.openk9.entity.manager.service.graph.EntityGraphService) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ToString(lombok.ToString) IExecutorService(com.hazelcast.core.IExecutorService) EntityKey(io.openk9.entity.manager.cache.model.EntityKey) AssociableEntityKey(io.openk9.entity.manager.cache.model.AssociableEntityKey) CompletableFuture(java.util.concurrent.CompletableFuture) GetEntitiesCallable(io.openk9.entity.manager.action.GetEntitiesCallable) List(java.util.List) ArrayList(java.util.ArrayList) EntityGraphConfig(io.openk9.entity.manager.config.EntityGraphConfig) Member(com.hazelcast.cluster.Member) EntityService(io.openk9.entity.manager.service.index.EntityService) Mono(reactor.core.publisher.Mono) EntityNameCleanerProvider(io.openk9.entity.manager.cleaner.EntityNameCleanerProvider) Future(java.util.concurrent.Future) CompletableFuture(java.util.concurrent.CompletableFuture) AssociableEntityKey(io.openk9.entity.manager.cache.model.AssociableEntityKey) Map(java.util.Map) HashMap(java.util.HashMap) IMap(com.hazelcast.map.IMap)

Example 3 with EntityNameCleanerProvider

use of io.openk9.entity.manager.api.EntityNameCleanerProvider in project openk9 by smclab.

the class CreateEntitiesRunnable method cleanCandidates.

private List<EntityIndex> cleanCandidates(Entity entityRequest, List<EntityIndex> candidates, EntityNameCleanerProvider entityNameCleanerProvider, float scoreThreshold) {
    if (!candidates.isEmpty()) {
        EntityIndex documentEntityResponse = candidates.get(0);
        double bestScore;
        if (candidates.size() > 1) {
            double[] scores = candidates.stream().mapToDouble(EntityIndex::getScore).toArray();
            bestScore = _softmax(documentEntityResponse.getScore(), scores);
        } else {
            bestScore = _levenshteinDistance(entityNameCleanerProvider.get(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), entityNameCleanerProvider.get(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
        }
        if (bestScore > scoreThreshold) {
            return Collections.singletonList(documentEntityResponse);
        }
    }
    return candidates;
}
Also used : EntityIndex(io.openk9.entity.manager.model.index.EntityIndex)

Aggregations

EntityIndex (io.openk9.entity.manager.model.index.EntityIndex)3 EntityNameCleaner (io.openk9.entity.manager.cleaner.EntityNameCleaner)2 QueryBuilder (org.elasticsearch.index.query.QueryBuilder)2 Member (com.hazelcast.cluster.Member)1 HazelcastInstance (com.hazelcast.core.HazelcastInstance)1 HazelcastInstanceAware (com.hazelcast.core.HazelcastInstanceAware)1 IExecutorService (com.hazelcast.core.IExecutorService)1 IMap (com.hazelcast.map.IMap)1 Predicates (com.hazelcast.query.Predicates)1 GetEntitiesCallable (io.openk9.entity.manager.action.GetEntitiesCallable)1 AssociableEntityKey (io.openk9.entity.manager.cache.model.AssociableEntityKey)1 Entity (io.openk9.entity.manager.cache.model.Entity)1 EntityKey (io.openk9.entity.manager.cache.model.EntityKey)1 EntityNameCleanerProvider (io.openk9.entity.manager.cleaner.EntityNameCleanerProvider)1 EntityGraphConfig (io.openk9.entity.manager.config.EntityGraphConfig)1 EntityGraph (io.openk9.entity.manager.model.graph.EntityGraph)1 EntityGraphService (io.openk9.entity.manager.service.graph.EntityGraphService)1 EntityService (io.openk9.entity.manager.service.index.EntityService)1 FutureUtil (io.openk9.entity.manager.util.FutureUtil)1 MapUtil (io.openk9.entity.manager.util.MapUtil)1