use of io.openk9.entity.manager.model.index.EntityIndex in project openk9 by smclab.
the class CreateEntitiesRunnable method _disambiguate.
private Optional<EntityGraph> _disambiguate(EntityGraphService entityGraphService, List<EntityIndex> candidates, List<EntityIndex> entityRequestList, Entity currentEntityRequest, String[] uniqueEntities, int minHops, int maxHops) {
String currentEntityRequestType = currentEntityRequest.getType();
List<EntityGraph> result = List.of();
if (_containsValue(uniqueEntities, currentEntityRequestType)) {
if (candidates.size() == 1) {
if (_log.isDebugEnabled()) {
_log.debug("disambiguating entity with type " + currentEntityRequestType);
}
EntityIndex candidate = candidates.get(0);
EntityGraph entityGraph = EntityGraph.of(candidate.getId(), candidate.getGraphId(), candidate.getTenantId(), candidate.getName(), candidate.getType());
result = List.of(entityGraph);
} else if (candidates.size() > 1) {
result = _getEntityGraphs(entityGraphService, entityRequestList, minHops, maxHops, currentEntityRequestType, result);
}
} else {
if (!candidates.isEmpty()) {
result = _getEntityGraphs(entityGraphService, entityRequestList, minHops, maxHops, currentEntityRequestType, result);
}
}
if (_log.isDebugEnabled()) {
_log.debug("_disambiguate: " + result + " current: " + currentEntityRequest);
}
return result.stream().filter(Objects::nonNull).filter(entityGraph -> candidates.stream().anyMatch(entityIndex -> entityGraph.getId().equals(entityIndex.getId()))).findFirst();
}
use of io.openk9.entity.manager.model.index.EntityIndex in project openk9 by smclab.
the class CreateEntitiesRunnable method _getEntityGraphs.
private List<EntityGraph> _getEntityGraphs(EntityGraphService entityGraphService, List<EntityIndex> entityRequestList, int minHops, int maxHops, String currentEntityRequestType, List<EntityGraph> result) {
if (_log.isDebugEnabled()) {
_log.debug("disambiguating with search entity with type " + currentEntityRequestType);
}
Statement[] statements = new Statement[entityRequestList.size()];
for (int i = 0; i < entityRequestList.size(); i++) {
EntityIndex entityRequest = entityRequestList.get(i);
Node nodeEntity = Cypher.node(entityRequest.getType()).named("entity");
AliasedExpression entityAliased = nodeEntity.as("entity");
SymbolicName path = Cypher.name("path");
Property idProperty = entityAliased.getDelegate().property("id");
Statement statement = Cypher.match(nodeEntity).where(idProperty.eq(literalOf(entityRequest.getId()))).call("apoc.path.expand").withArgs(entityAliased.getDelegate(), literalOf(null), literalOf("-date"), literalOf(minHops), literalOf(maxHops)).yield(path).returning(Functions.last(Functions.nodes(path)).as("node"), Functions.size(Functions.nodes(path)).subtract(literalOf(1)).as("hops")).build();
statements[i] = statement;
}
if (statements.length == 1) {
Statement entityRequestListStatement = Cypher.call(statements[0]).returning("node", "hops").orderBy(Cypher.name("hops")).build();
result = entityGraphService.search(entityRequestListStatement);
} else if (statements.length > 1) {
Statement entityRequestListStatement = Cypher.call(Cypher.unionAll(statements)).returning("node", "hops").orderBy(Cypher.name("hops")).build();
result = entityGraphService.search(entityRequestListStatement);
}
return result;
}
use of io.openk9.entity.manager.model.index.EntityIndex in project openk9 by smclab.
the class EntityService method searchByNameAndType.
public EntityIndex searchByNameAndType(long tenantId, String name, String type) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
boolQueryBuilder.must(QueryBuilders.matchQuery("name", name));
boolQueryBuilder.must(QueryBuilders.matchQuery("type", type));
List<EntityIndex> search = search(tenantId, boolQueryBuilder, 0, 1);
if (search.isEmpty()) {
return null;
}
return search.get(0);
}
use of io.openk9.entity.manager.model.index.EntityIndex in project openk9 by smclab.
the class CreateEntitiesRunnable method getEntityCandidates.
private EntityCandidates getEntityCandidates(EntityNameCleanerProvider entityNameCleanerProvider, EntityService entityService, EntityMember ingestionIdEntityMember, Entity ingestionIdEntity) {
EntityNameCleaner entityNameCleaner = entityNameCleanerProvider.get(ingestionIdEntity.getType());
QueryBuilder queryBuilder = entityNameCleaner.cleanEntityName(ingestionIdEntity.getTenantId(), ingestionIdEntity.getName());
List<EntityIndex> candidates = entityService.search(ingestionIdEntity.getTenantId(), queryBuilder, 0, 10);
return EntityCandidates.of(ingestionIdEntityMember, candidates);
}
use of io.openk9.entity.manager.model.index.EntityIndex in project openk9 by smclab.
the class CreateEntitiesRunnable method cleanCandidates.
private List<EntityIndex> cleanCandidates(Entity entityRequest, List<EntityIndex> candidates, EntityNameCleanerProvider entityNameCleanerProvider, float scoreThreshold) {
if (!candidates.isEmpty()) {
EntityIndex documentEntityResponse = candidates.get(0);
double bestScore;
if (candidates.size() > 1) {
double[] scores = candidates.stream().mapToDouble(EntityIndex::getScore).toArray();
bestScore = _softmax(documentEntityResponse.getScore(), scores);
} else {
bestScore = _levenshteinDistance(entityNameCleanerProvider.get(documentEntityResponse.getType()).cleanEntityName(documentEntityResponse.getName()), entityNameCleanerProvider.get(entityRequest.getType()).cleanEntityName(entityRequest.getName()));
}
if (bestScore > scoreThreshold) {
return Collections.singletonList(documentEntityResponse);
}
}
return candidates;
}
Aggregations