use of org.nzbhydra.indexers.Indexer in project nzbhydra2 by theotherp.
the class DuplicateDetector method detectDuplicates.
public DuplicateDetectionResult detectDuplicates(List<SearchResultItem> results) {
Stopwatch stopwatch = Stopwatch.createStarted();
Map<String, List<SearchResultItem>> groupedByTitle = results.stream().collect(Collectors.groupingBy(x -> x.getTitle().replaceFirst("[ .\\-_]", "")));
Multiset<Indexer> countUniqueResultsPerIndexer = HashMultiset.create();
List<LinkedHashSet<SearchResultItem>> duplicateGroups = new ArrayList<>();
// In each list of searchResults with the same title we want to find the duplicates
int countDetectedDuplicates = 0;
for (List<SearchResultItem> titleGroup : groupedByTitle.values()) {
titleGroup = titleGroup.stream().sorted(Comparator.comparing(SearchResultItem::getBestDate).reversed()).collect(Collectors.toList());
// So we start with a bucket with the first (later we have a list of buckets where all searchResults in a bucket are duplicates)
List<LinkedHashSet<SearchResultItem>> listOfBuckets = new ArrayList<>();
listOfBuckets.add(new LinkedHashSet<>(newArrayList(titleGroup.get(0))));
// And iterate over every other item in the list
for (int i = 1; i < titleGroup.size(); i++) {
SearchResultItem searchResultItem = titleGroup.get(i);
boolean foundBucket = false;
// Iterate over already existing buckets
for (LinkedHashSet<SearchResultItem> bucket : listOfBuckets) {
if (bucket.stream().map(SearchResultItem::getIndexer).collect(Collectors.toList()).contains(searchResultItem.getIndexer())) {
continue;
}
// And all searchResults in those buckets
for (SearchResultItem other : bucket) {
// Now we can check if the two searchResults are duplicates
boolean same = testForSameness(searchResultItem, other);
if (same) {
// If they are the same we found a bucket for the result. We add it and continue
foundBucket = true;
bucket.add(searchResultItem);
countDetectedDuplicates++;
break;
}
}
// If we already found a bucket for the result we can go on with the next
if (foundBucket) {
break;
}
}
// If we didn't find a bucket for the result we start a new one
if (!foundBucket) {
listOfBuckets.add(new LinkedHashSet<>(newArrayList(searchResultItem)));
}
}
LinkedHashSet<SearchResultItem> lastBucket = Iterables.getLast(listOfBuckets);
if (lastBucket.size() == 1) {
countUniqueResultsPerIndexer.add(lastBucket.iterator().next().getIndexer());
}
duplicateGroups.addAll(listOfBuckets);
}
int duplicateIdentifier = 0;
for (LinkedHashSet<SearchResultItem> group : duplicateGroups) {
for (SearchResultItem x : group) {
x.setDuplicateIdentifier(duplicateIdentifier);
}
duplicateIdentifier++;
}
logger.debug(LoggingMarkers.PERFORMANCE, "Duplicate detection for {} search results took {}ms. Found {} duplicates", results.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS), countDetectedDuplicates);
return new DuplicateDetectionResult(duplicateGroups, countUniqueResultsPerIndexer);
}
use of org.nzbhydra.indexers.Indexer in project nzbhydra2 by theotherp.
the class FileHandler method downloadFile.
protected byte[] downloadFile(SearchResultEntity result) throws IOException {
Request request = new Request.Builder().url(result.getLink()).build();
Indexer indexerByName = searchModuleProvider.getIndexerByName(result.getIndexer().getName());
Integer timeout = indexerByName.getConfig().getTimeout().orElse(configProvider.getBaseConfig().getSearching().getTimeout());
try (Response response = clientHttpRequestFactory.getOkHttpClientBuilder(request.url().uri()).readTimeout(timeout, TimeUnit.SECONDS).connectTimeout(timeout, TimeUnit.SECONDS).build().newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new IOException("Unsuccessful NZB download from URL " + result.getLink() + ". Message: " + response.message());
}
ResponseBody body = response.body();
if (body == null) {
throw new IOException("NZB downloaded from " + result.getLink() + " is empty");
}
return body.bytes();
}
}
use of org.nzbhydra.indexers.Indexer in project nzbhydra2 by theotherp.
the class IndexerForSearchSelector method pickIndexers.
public IndexerForSearchSelection pickIndexers(SearchRequest searchRequest) {
this.searchRequest = searchRequest;
// Check any indexer that's not disabled by the user. If it's disabled by the system it will be deselected with a proper message later
List<Indexer> eligibleIndexers = searchModuleProvider.getIndexers().stream().filter(x -> x.getConfig().getState() != IndexerConfig.State.DISABLED_USER).collect(Collectors.toList());
if (eligibleIndexers.isEmpty()) {
logger.warn("You don't have any enabled indexers");
return new IndexerForSearchSelection();
}
List<Indexer> selectedIndexers = new ArrayList<>();
logger.debug("Picking indexers out of " + eligibleIndexers.size());
Stopwatch stopwatch = Stopwatch.createStarted();
for (Indexer indexer : eligibleIndexers) {
if (!checkInternalAndNotEvenShown(indexer)) {
continue;
}
if (!checkIndexerSelectedByUser(indexer)) {
continue;
}
if (!checkIndexerConfigComplete(indexer)) {
continue;
}
if (!checkSearchSource(indexer)) {
continue;
}
if (!checkIndexerStatus(indexer)) {
continue;
}
if (!checkTorznabOnlyUsedForTorrentOrInternalSearches(indexer)) {
continue;
}
if (!checkDisabledForCategory(indexer)) {
continue;
}
if (!checkSchedule(indexer)) {
continue;
}
if (!checkLoadLimiting(indexer)) {
continue;
}
if (!checkSearchId(indexer)) {
continue;
}
if (!checkIndexerHitLimit(indexer)) {
continue;
}
selectedIndexers.add(indexer);
}
logger.debug(LoggingMarkers.PERFORMANCE, "Selection of indexers took {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
if (selectedIndexers.isEmpty()) {
logger.warn("No indexers were selected for this search. You probably don't have any indexers configured which support the provided ID type or all of your indexers which do are currently disabled. You can enable query generation to work around this.");
} else {
logger.info("Selected {} out of {} indexers: {}", selectedIndexers.size(), eligibleIndexers.size(), Joiner.on(", ").join(selectedIndexers.stream().map(Indexer::getName).collect(Collectors.toList())));
}
eventPublisher.publishEvent(new IndexerSelectionEvent(searchRequest, selectedIndexers.size()));
return new IndexerForSearchSelection(notSelectedIndersWithReason, selectedIndexers);
}
use of org.nzbhydra.indexers.Indexer in project nzbhydra2 by theotherp.
the class Searcher method search.
public SearchResult search(SearchRequest searchRequest) {
Stopwatch stopwatch = Stopwatch.createStarted();
eventPublisher.publishEvent(new SearchEvent(searchRequest));
SearchCacheEntry searchCacheEntry = getSearchCacheEntry(searchRequest);
SearchResult searchResult = new SearchResult();
// LATER default for limit
int numberOfWantedResults = searchRequest.getOffset().orElse(0) + searchRequest.getLimit().orElse(100);
searchResult.setPickingResult(searchCacheEntry.getPickingResult());
Map<Indexer, List<IndexerSearchResult>> indexersToSearchAndTheirResults = getIndexerSearchResultsToSearch(searchCacheEntry.getIndexerSearchResultsByIndexer());
List<SearchResultItem> searchResultItems = searchCacheEntry.getSearchResultItems();
while (indexersToSearchAndTheirResults.size() > 0 && (searchResultItems.size() < numberOfWantedResults || searchRequest.isLoadAll())) {
if (shutdownRequested) {
break;
}
if (searchRequest.isLoadAll()) {
logger.debug("Going to call {} indexers because {} results were loaded yet but more results are available and all were requested", indexersToSearchAndTheirResults.size(), searchCacheEntry.getNumberOfFoundResults());
int maxResultsToLoad = searchRequest.getIndexers().orElse(Sets.newHashSet("")).size() * 1000;
if (searchResultItems.size() > maxResultsToLoad) {
logger.info("Aborting loading all results because more than {} results were already loaded and we don't want to hammer the indexers too much", maxResultsToLoad);
break;
}
} else {
logger.debug("Going to call {} indexers because {} of {} wanted results were loaded yet", indexersToSearchAndTheirResults.size(), searchCacheEntry.getNumberOfFoundResults(), numberOfWantedResults);
}
// Do the actual search
indexersToSearchAndTheirResults = callSearchModules(searchRequest, indexersToSearchAndTheirResults);
// Update cache
searchCacheEntry.getIndexerSearchResultsByIndexer().putAll(indexersToSearchAndTheirResults);
searchRequestCache.put(searchRequest.hashCode(), searchCacheEntry);
// Use search result items from the cache which contains *all* search searchResults, not just the latest. That allows finding duplicates over multiple searches
searchResultItems = searchCacheEntry.getIndexerSearchResultsByIndexer().values().stream().flatMap(Collection::stream).filter(IndexerSearchResult::isWasSuccessful).flatMap(x -> x.getSearchResultItems().stream()).distinct().collect(Collectors.toList());
DuplicateDetectionResult duplicateDetectionResult = duplicateDetector.detectDuplicates(searchResultItems);
// Save to database
createOrUpdateIndexerSearchEntity(searchCacheEntry, indexersToSearchAndTheirResults, duplicateDetectionResult);
// Remove duplicates for external searches
if (searchRequest.getSource() == SearchSource.API) {
int beforeDuplicateRemoval = searchResultItems.size();
searchResultItems = getNewestSearchResultItemFromEachDuplicateGroup(duplicateDetectionResult.getDuplicateGroups());
searchResult.setNumberOfRemovedDuplicates(beforeDuplicateRemoval - searchResultItems.size());
}
// Set the rejection counts from all searches, this and previous
searchCacheEntry.getReasonsForRejection().clear();
indexersToSearchAndTheirResults.values().forEach(x -> x.forEach(y -> y.getReasonsForRejection().entrySet().forEach(z -> searchCacheEntry.getReasonsForRejection().add(z.getElement(), z.getCount()))));
// Update indexersToSearchAndTheirResults to remove indexers which threw an error or don't have any more results
indexersToSearchAndTheirResults = getIndexerSearchResultsToSearch(indexersToSearchAndTheirResults);
searchCacheEntry.setSearchResultItems(searchResultItems);
}
searchResult.setNumberOfTotalAvailableResults(searchCacheEntry.getNumberOfTotalAvailableResults());
searchResult.setIndexerSearchResults(searchCacheEntry.getIndexerSearchResultsByIndexer().entrySet().stream().map(x -> Iterables.getLast(x.getValue())).collect(Collectors.toList()));
searchResult.setReasonsForRejection(searchCacheEntry.getReasonsForRejection());
searchResultItems.sort(Comparator.comparingLong(x -> x.getBestDate().getEpochSecond()));
Collections.reverse(searchResultItems);
spliceSearchResultItemsAccordingToOffsetAndLimit(searchRequest, searchResult, searchResultItems);
logger.debug(LoggingMarkers.PERFORMANCE, "Internal search took {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
return searchResult;
}
use of org.nzbhydra.indexers.Indexer in project nzbhydra2 by theotherp.
the class FileHandler method getNfo.
public NfoResult getNfo(Long searchResultId) {
SearchResultEntity result = searchResultRepository.findOne(searchResultId);
if (result == null) {
logger.error("Download request with invalid/outdated search result ID " + searchResultId);
throw new RuntimeException("Download request with invalid/outdated search result ID " + searchResultId);
}
Indexer indexer = searchModuleProvider.getIndexerByName(result.getIndexer().getName());
return indexer.getNfo(result.getIndexerGuid());
}
Aggregations