use of eu.etaxonomy.cdm.strategy.match.IMatchable in project cdmlib by cybertaxonomy.
the class ImportDeduplicationHelper method getMatchingEntity.
private <S extends IMatchable> Optional<S> getMatchingEntity(S entityOrig, DedupInfo<S> dedupInfo, boolean parsed) {
S entity = CdmBase.deproxy(entityOrig);
// choose matcher depending on the type of matching required. If matching of a parsed entity is required
// try to use the parsed matcher (if it exists)
IMatchStrategy matcher = parsed && dedupInfo.parsedMatcher != null ? dedupInfo.parsedMatcher : dedupInfo.defaultMatcher;
Predicate<S> matchFilter = persistedEntity -> {
try {
return matcher.invoke((IMatchable) entity, (IMatchable) persistedEntity).isSuccessful();
} catch (MatchException e) {
throw new RuntimeException(e);
}
};
// TODO casting
Optional<S> result = Optional.ofNullable(getEntityByTitle(((IdentifiableEntity<?>) entity).getTitleCache(), dedupInfo)).orElse(new HashSet<>()).stream().filter(matchFilter).findAny();
if (result.isPresent() || dedupInfo.status == Status.USE_MAP || repository == null) {
return result;
} else {
try {
return (Optional) repository.getCommonService().findMatching((IMatchable) entity, matcher).stream().findFirst();
} catch (MatchException e) {
throw new RuntimeException(e);
}
}
}
use of eu.etaxonomy.cdm.strategy.match.IMatchable in project cdmlib by cybertaxonomy.
the class IdentifiableServiceBase method deduplicate.
@Override
@Transactional(readOnly = false)
public int deduplicate(Class<? extends T> clazz, IMatchStrategyEqual matchStrategy, IMergeStrategy mergeStrategy) {
DeduplicateState dedupState = new DeduplicateState();
if (clazz == null) {
logger.warn("Deduplication clazz must not be null!");
return 0;
}
if (!(IMatchable.class.isAssignableFrom(clazz) && IMergable.class.isAssignableFrom(clazz))) {
logger.warn("Deduplication implemented only for classes implementing IMatchable and IMergeable. No deduplication performed!");
return 0;
}
Class matchableClass = clazz;
if (matchStrategy == null) {
matchStrategy = DefaultMatchStrategy.NewInstance(matchableClass);
}
List<T> nextGroup = new ArrayList<>();
int result = 0;
// double countTotal = count(clazz);
//
// Number countPagesN = Math.ceil(countTotal/dedupState.pageSize.doubleValue()) ;
// int countPages = countPagesN.intValue();
//
List<OrderHint> orderHints = Arrays.asList(new OrderHint[] { new OrderHint("titleCache", SortOrder.ASCENDING) });
while (!dedupState.isCompleted) {
// get x page sizes
List<? extends T> objectList = getPages(clazz, dedupState, orderHints);
// after each page check if any changes took place
int nUnEqualPages = handleAllPages(objectList, dedupState, nextGroup, matchStrategy, mergeStrategy);
nUnEqualPages = nUnEqualPages + dedupState.pageSize * dedupState.startPage;
// refresh start page counter
int finishedPages = nUnEqualPages / dedupState.pageSize;
dedupState.startPage = finishedPages;
}
result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy);
return result;
}
use of eu.etaxonomy.cdm.strategy.match.IMatchable in project cdmlib by cybertaxonomy.
the class IdentifiableServiceBase method handleLastGroup.
private int handleLastGroup(List<T> group, IMatchStrategyEqual matchStrategy, IMergeStrategy mergeStrategy) {
int result = 0;
int size = group.size();
// set to collect all objects, that have been merged already
Set<Integer> exclude = new HashSet<>();
for (int i = 0; i < size - 1; i++) {
if (exclude.contains(i)) {
continue;
}
for (int j = i + 1; j < size; j++) {
if (exclude.contains(j)) {
continue;
}
T firstObject = group.get(i);
T secondObject = group.get(j);
try {
if (matchStrategy.invoke((IMatchable) firstObject, (IMatchable) secondObject).isSuccessful()) {
commonService.merge((IMergable) firstObject, (IMergable) secondObject, mergeStrategy);
exclude.add(j);
result++;
}
} catch (MatchException e) {
logger.warn("MatchException when trying to match " + firstObject.getTitleCache());
e.printStackTrace();
} catch (MergeException e) {
logger.warn("MergeException when trying to merge " + firstObject.getTitleCache());
e.printStackTrace();
}
}
}
return result;
}
Aggregations