use of org.apache.commons.lang3.Range in project stargate-core by tuplejump.
the class SearchSupport method getRows.
protected List<Row> getRows(final ExtendedFilter filter, final Search search, final String queryString) {
final SearchSupport searchSupport = this;
AbstractBounds<RowPosition> keyRange = filter.dataRange.keyRange();
final Range<Token> filterRange = new Range<>(keyRange.left.getToken(), keyRange.right.getToken());
final boolean isSingleToken = filterRange.left.equals(filterRange.right);
final boolean isFullRange = isSingleToken && baseCfs.partitioner.getMinimumToken().equals(filterRange.left);
final boolean shouldSaveToCache = isPagingQuery(filter.dataRange);
final boolean shouldRetrieveFromCache = shouldSaveToCache && !isFirstPage((DataRange.Paging) filter.dataRange);
SearcherCallback<List<Row>> sc = new SearcherCallback<List<Row>>() {
@Override
public List<Row> doWithSearcher(org.apache.lucene.search.IndexSearcher searcher) throws Exception {
Utils.SimpleTimer timer = Utils.getStartedTimer(logger);
List<Row> results;
if (search == null) {
results = new ArrayList<>();
} else {
Utils.SimpleTimer timer2 = Utils.getStartedTimer(SearchSupport.logger);
Function function = search.function();
Query query = LuceneUtils.getQueryUpdatedWithPKCondition(search.query(options), getPartitionKeyString(filter));
int resultsLimit = searcher.getIndexReader().maxDoc();
if (resultsLimit == 0) {
resultsLimit = 1;
}
function.init(options);
IndexEntryCollector collector = null;
if (shouldRetrieveFromCache) {
collector = currentIndex.collectorMap.get(queryString);
}
if (collector == null) {
collector = new IndexEntryCollector(tableMapper, search, options, resultsLimit);
searcher.search(query, collector);
if (shouldSaveToCache) {
currentIndex.collectorMap.put(queryString, collector);
}
if (logger.isInfoEnabled()) {
logger.info("Adding collector to cache");
}
} else if (logger.isInfoEnabled()) {
logger.info("Found collector in cache");
}
timer2.endLogTime("Lucene search for [" + collector.getTotalHits() + "] results ");
if (SearchSupport.logger.isDebugEnabled()) {
SearchSupport.logger.debug(String.format("Search results [%s]", collector.getTotalHits()));
}
ResultMapper iter = new ResultMapper(tableMapper, searchSupport, filter, collector, function.shouldTryScoring() && search.isShowScore());
Utils.SimpleTimer timer3 = Utils.getStartedTimer(SearchSupport.logger);
results = function.process(iter, baseCfs, currentIndex);
timer3.endLogTime("Aggregation [" + results.size() + "] results");
}
timer.endLogTime("Search with results [" + results.size() + "] ");
return results;
}
@Override
public Range<Token> filterRange() {
return filterRange;
}
@Override
public boolean isSingleToken() {
return isSingleToken;
}
@Override
public boolean isFullRange() {
return isFullRange;
}
};
return currentIndex.search(sc);
}
use of org.apache.commons.lang3.Range in project incubator-gobblin by apache.
the class GoogleWebmasterDataFetcherImpl method getPages.
/**
* Get all pages in an async mode.
*/
private Collection<String> getPages(String startDate, String endDate, List<Dimension> dimensions, ApiDimensionFilter countryFilter, Queue<Pair<String, FilterOperator>> toProcess, int rowLimit) {
String country = GoogleWebmasterFilter.countryFilterToString(countryFilter);
ConcurrentLinkedDeque<String> allPages = new ConcurrentLinkedDeque<>();
int r = 0;
while (r <= GET_PAGES_RETRIES) {
++r;
log.info(String.format("Get pages at round %d with size %d.", r, toProcess.size()));
ConcurrentLinkedDeque<Pair<String, FilterOperator>> nextRound = new ConcurrentLinkedDeque<>();
ExecutorService es = Executors.newFixedThreadPool(10, ExecutorsUtils.newDaemonThreadFactory(Optional.of(log), Optional.of(this.getClass().getSimpleName())));
while (!toProcess.isEmpty()) {
submitJob(toProcess.poll(), countryFilter, startDate, endDate, dimensions, es, allPages, nextRound, rowLimit);
}
// wait for jobs to finish and start next round if necessary.
try {
es.shutdown();
boolean terminated = es.awaitTermination(5, TimeUnit.MINUTES);
if (!terminated) {
es.shutdownNow();
log.warn("Timed out while getting all pages for country-{} at round {}. Next round now has size {}.", country, r, nextRound.size());
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
if (nextRound.isEmpty()) {
break;
}
toProcess = nextRound;
coolDown(r, PAGES_GET_COOLDOWN_TIME);
}
if (r == GET_PAGES_RETRIES + 1) {
throw new RuntimeException(String.format("Getting all pages reaches the maximum number of retires %d. Date range: %s ~ %s. Country: %s.", GET_PAGES_RETRIES, startDate, endDate, country));
}
return allPages;
}
use of org.apache.commons.lang3.Range in project alfresco-repository by Alfresco.
the class SolrQueryHTTPClient method buildPivotParameters.
protected void buildPivotParameters(SearchParameters searchParameters, URLCodec encoder, StringBuilder url) throws UnsupportedEncodingException {
if (searchParameters.getPivots() != null && !searchParameters.getPivots().isEmpty()) {
url.append("&facet=").append(encoder.encode("true", "UTF-8"));
for (List<String> pivotKeys : searchParameters.getPivots()) {
List<String> pivotsList = new ArrayList<>();
pivotsList.addAll(pivotKeys);
url.append("&facet.pivot=");
StringBuilder prefix = new StringBuilder("{! ");
if (searchParameters.getStats() != null && !searchParameters.getStats().isEmpty()) {
for (StatsRequestParameters aStat : searchParameters.getStats()) {
if (pivotKeys.contains(aStat.getLabel())) {
prefix.append("stats=" + aStat.getLabel() + " ");
pivotsList.remove(aStat.getLabel());
// only do it once
break;
}
}
}
if (searchParameters.getRanges() != null && !searchParameters.getRanges().isEmpty()) {
for (RangeParameters aRange : searchParameters.getRanges()) {
Optional<String> found = pivotKeys.stream().filter(aKey -> aKey.equals(aRange.getLabel())).findFirst();
if (found.isPresent()) {
prefix.append("range=" + found.get() + " ");
pivotsList.remove(found.get());
// only do it once
break;
}
}
}
if (// We have add something
prefix.length() > 3) {
url.append(encoder.encode(prefix.toString().trim(), "UTF-8"));
url.append(encoder.encode("}", "UTF-8"));
}
url.append(encoder.encode(String.join(",", pivotsList), "UTF-8"));
}
}
}
use of org.apache.commons.lang3.Range in project accumulo by apache.
the class PrintInfo method execute.
@SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit is fine here because it's a utility class executed by a main()")
@Override
public void execute(final String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs("accumulo rfile-info", args);
if (opts.files.isEmpty()) {
System.err.println("No files were given");
System.exit(1);
}
if ((opts.fullKeys || opts.dump) && opts.formatterClazz != null) {
System.err.println("--formatter argument is incompatible with --dump or --fullKeys, specify either, not both.");
System.exit(1);
}
var siteConfig = opts.getSiteConfiguration();
Configuration conf = new Configuration();
for (String confFile : opts.configFiles) {
log.debug("Adding Hadoop configuration file {}", confFile);
conf.addResource(new Path(confFile));
}
LogHistogram kvHistogram = new LogHistogram();
KeyStats dataKeyStats = new KeyStats();
KeyStats indexKeyStats = new KeyStats();
for (String arg : opts.files) {
Path path = new Path(arg);
FileSystem fs = resolveFS(log, conf, path);
System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()));
printCryptoParams(path, fs);
CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf).cryptoService(CryptoServiceFactory.newInstance(siteConfig, ClassloaderType.JAVA));
Reader iter = new RFile.Reader(cb);
MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();
if (opts.vis || opts.hash) {
iter.registerMetrics(vmg);
}
iter.printInfo(opts.printIndex);
System.out.println();
String propsPath = opts.getPropertiesPath();
String[] mainArgs = propsPath == null ? new String[] { arg } : new String[] { "-props", propsPath, arg };
org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(mainArgs);
Map<String, ArrayList<ByteSequence>> localityGroupCF = null;
if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats || opts.fullKeys || !StringUtils.isEmpty(opts.formatterClazz)) {
localityGroupCF = iter.getLocalityGroupCF();
FileSKVIterator dataIter;
if (opts.useSample) {
dataIter = iter.getSample();
if (dataIter == null) {
System.out.println("ERROR : This rfile has no sample data");
return;
}
} else {
dataIter = iter;
}
if (opts.keyStats) {
FileSKVIterator indexIter = iter.getIndex();
while (indexIter.hasTop()) {
indexKeyStats.add(indexIter.getTopKey());
indexIter.next();
}
}
BiFunction<Key, Value, String> formatter = null;
if (opts.formatterClazz != null) {
final Class<? extends BiFunction<Key, Value, String>> formatterClass = getFormatter(opts.formatterClazz);
formatter = formatterClass.getConstructor().newInstance();
} else if (opts.fullKeys) {
formatter = (key, value) -> key.toStringNoTruncate() + " -> " + value;
} else if (opts.dump) {
formatter = (key, value) -> key + " -> " + value;
}
for (String lgName : localityGroupCF.keySet()) {
LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF);
while (dataIter.hasTop()) {
Key key = dataIter.getTopKey();
Value value = dataIter.getTopValue();
if (formatter != null) {
System.out.println(formatter.apply(key, value));
if (System.out.checkError())
return;
}
if (opts.histogram) {
kvHistogram.add(key.getSize() + value.getSize());
}
if (opts.keyStats) {
dataKeyStats.add(key);
}
dataIter.next();
}
}
}
if (opts.printSummary) {
SummaryReader.print(iter, System.out);
}
iter.close();
if (opts.vis || opts.hash) {
System.out.println();
vmg.printMetrics(opts.hash, "Visibility", System.out);
}
if (opts.histogram) {
System.out.println();
kvHistogram.print("");
}
if (opts.keyStats) {
System.out.println();
System.out.println("Statistics for keys in data :");
dataKeyStats.print("\t");
System.out.println();
System.out.println("Statistics for keys in index :");
indexKeyStats.print("\t");
}
// If the output stream has closed, there is no reason to keep going.
if (System.out.checkError()) {
return;
}
}
}
Aggregations