Search in sources :

Example 86 with Range

use of org.apache.commons.lang3.Range in project stargate-core by tuplejump.

the class SearchSupport method getRows.

protected List<Row> getRows(final ExtendedFilter filter, final Search search, final String queryString) {
    final SearchSupport searchSupport = this;
    AbstractBounds<RowPosition> keyRange = filter.dataRange.keyRange();
    final Range<Token> filterRange = new Range<>(keyRange.left.getToken(), keyRange.right.getToken());
    final boolean isSingleToken = filterRange.left.equals(filterRange.right);
    final boolean isFullRange = isSingleToken && baseCfs.partitioner.getMinimumToken().equals(filterRange.left);
    final boolean shouldSaveToCache = isPagingQuery(filter.dataRange);
    final boolean shouldRetrieveFromCache = shouldSaveToCache && !isFirstPage((DataRange.Paging) filter.dataRange);
    SearcherCallback<List<Row>> sc = new SearcherCallback<List<Row>>() {

        @Override
        public List<Row> doWithSearcher(org.apache.lucene.search.IndexSearcher searcher) throws Exception {
            Utils.SimpleTimer timer = Utils.getStartedTimer(logger);
            List<Row> results;
            if (search == null) {
                results = new ArrayList<>();
            } else {
                Utils.SimpleTimer timer2 = Utils.getStartedTimer(SearchSupport.logger);
                Function function = search.function();
                Query query = LuceneUtils.getQueryUpdatedWithPKCondition(search.query(options), getPartitionKeyString(filter));
                int resultsLimit = searcher.getIndexReader().maxDoc();
                if (resultsLimit == 0) {
                    resultsLimit = 1;
                }
                function.init(options);
                IndexEntryCollector collector = null;
                if (shouldRetrieveFromCache) {
                    collector = currentIndex.collectorMap.get(queryString);
                }
                if (collector == null) {
                    collector = new IndexEntryCollector(tableMapper, search, options, resultsLimit);
                    searcher.search(query, collector);
                    if (shouldSaveToCache) {
                        currentIndex.collectorMap.put(queryString, collector);
                    }
                    if (logger.isInfoEnabled()) {
                        logger.info("Adding collector to cache");
                    }
                } else if (logger.isInfoEnabled()) {
                    logger.info("Found collector in cache");
                }
                timer2.endLogTime("Lucene search for [" + collector.getTotalHits() + "] results ");
                if (SearchSupport.logger.isDebugEnabled()) {
                    SearchSupport.logger.debug(String.format("Search results [%s]", collector.getTotalHits()));
                }
                ResultMapper iter = new ResultMapper(tableMapper, searchSupport, filter, collector, function.shouldTryScoring() && search.isShowScore());
                Utils.SimpleTimer timer3 = Utils.getStartedTimer(SearchSupport.logger);
                results = function.process(iter, baseCfs, currentIndex);
                timer3.endLogTime("Aggregation [" + results.size() + "] results");
            }
            timer.endLogTime("Search with results [" + results.size() + "] ");
            return results;
        }

        @Override
        public Range<Token> filterRange() {
            return filterRange;
        }

        @Override
        public boolean isSingleToken() {
            return isSingleToken;
        }

        @Override
        public boolean isFullRange() {
            return isFullRange;
        }
    };
    return currentIndex.search(sc);
}
Also used : SecondaryIndexSearcher(org.apache.cassandra.db.index.SecondaryIndexSearcher) Query(org.apache.lucene.search.Query) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) IndexEntryCollector(com.tuplejump.stargate.lucene.IndexEntryCollector) Function(com.tuplejump.stargate.lucene.query.function.Function) StringEscapeUtils(org.apache.commons.lang3.StringEscapeUtils) Utils(com.tuplejump.stargate.Utils) LuceneUtils(com.tuplejump.stargate.lucene.LuceneUtils) SearcherCallback(com.tuplejump.stargate.lucene.SearcherCallback)

Example 87 with Range

use of org.apache.commons.lang3.Range in project incubator-gobblin by apache.

the class GoogleWebmasterDataFetcherImpl method getPages.

/**
 * Get all pages in an async mode.
 */
private Collection<String> getPages(String startDate, String endDate, List<Dimension> dimensions, ApiDimensionFilter countryFilter, Queue<Pair<String, FilterOperator>> toProcess, int rowLimit) {
    String country = GoogleWebmasterFilter.countryFilterToString(countryFilter);
    ConcurrentLinkedDeque<String> allPages = new ConcurrentLinkedDeque<>();
    int r = 0;
    while (r <= GET_PAGES_RETRIES) {
        ++r;
        log.info(String.format("Get pages at round %d with size %d.", r, toProcess.size()));
        ConcurrentLinkedDeque<Pair<String, FilterOperator>> nextRound = new ConcurrentLinkedDeque<>();
        ExecutorService es = Executors.newFixedThreadPool(10, ExecutorsUtils.newDaemonThreadFactory(Optional.of(log), Optional.of(this.getClass().getSimpleName())));
        while (!toProcess.isEmpty()) {
            submitJob(toProcess.poll(), countryFilter, startDate, endDate, dimensions, es, allPages, nextRound, rowLimit);
        }
        // wait for jobs to finish and start next round if necessary.
        try {
            es.shutdown();
            boolean terminated = es.awaitTermination(5, TimeUnit.MINUTES);
            if (!terminated) {
                es.shutdownNow();
                log.warn("Timed out while getting all pages for country-{} at round {}. Next round now has size {}.", country, r, nextRound.size());
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        if (nextRound.isEmpty()) {
            break;
        }
        toProcess = nextRound;
        coolDown(r, PAGES_GET_COOLDOWN_TIME);
    }
    if (r == GET_PAGES_RETRIES + 1) {
        throw new RuntimeException(String.format("Getting all pages reaches the maximum number of retires %d. Date range: %s ~ %s. Country: %s.", GET_PAGES_RETRIES, startDate, endDate, country));
    }
    return allPages;
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) ConcurrentLinkedDeque(java.util.concurrent.ConcurrentLinkedDeque) Pair(org.apache.commons.lang3.tuple.Pair)

Example 88 with Range

use of org.apache.commons.lang3.Range in project alfresco-repository by Alfresco.

the class SolrQueryHTTPClient method buildPivotParameters.

protected void buildPivotParameters(SearchParameters searchParameters, URLCodec encoder, StringBuilder url) throws UnsupportedEncodingException {
    if (searchParameters.getPivots() != null && !searchParameters.getPivots().isEmpty()) {
        url.append("&facet=").append(encoder.encode("true", "UTF-8"));
        for (List<String> pivotKeys : searchParameters.getPivots()) {
            List<String> pivotsList = new ArrayList<>();
            pivotsList.addAll(pivotKeys);
            url.append("&facet.pivot=");
            StringBuilder prefix = new StringBuilder("{! ");
            if (searchParameters.getStats() != null && !searchParameters.getStats().isEmpty()) {
                for (StatsRequestParameters aStat : searchParameters.getStats()) {
                    if (pivotKeys.contains(aStat.getLabel())) {
                        prefix.append("stats=" + aStat.getLabel() + " ");
                        pivotsList.remove(aStat.getLabel());
                        // only do it once
                        break;
                    }
                }
            }
            if (searchParameters.getRanges() != null && !searchParameters.getRanges().isEmpty()) {
                for (RangeParameters aRange : searchParameters.getRanges()) {
                    Optional<String> found = pivotKeys.stream().filter(aKey -> aKey.equals(aRange.getLabel())).findFirst();
                    if (found.isPresent()) {
                        prefix.append("range=" + found.get() + " ");
                        pivotsList.remove(found.get());
                        // only do it once
                        break;
                    }
                }
            }
            if (// We have add something
            prefix.length() > 3) {
                url.append(encoder.encode(prefix.toString().trim(), "UTF-8"));
                url.append(encoder.encode("}", "UTF-8"));
            }
            url.append(encoder.encode(String.join(",", pivotsList), "UTF-8"));
        }
    }
}
Also used : SolrJsonProcessor(org.alfresco.repo.search.impl.lucene.SolrJsonProcessor) RangeParameters(org.alfresco.service.cmr.search.RangeParameters) URIException(org.apache.commons.httpclient.URIException) StringUtils(org.apache.commons.lang3.StringUtils) Header(org.apache.commons.httpclient.Header) DefaultTypeConverter(org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter) JSONResult(org.alfresco.repo.search.impl.lucene.JSONResult) JSONException(org.json.JSONException) JSONObject(org.json.JSONObject) PermissionService(org.alfresco.service.cmr.security.PermissionService) RepositoryState(org.alfresco.repo.admin.RepositoryState) NodeDAO(org.alfresco.repo.domain.node.NodeDAO) Locale(java.util.Locale) Map(java.util.Map) NodeService(org.alfresco.service.cmr.repository.NodeService) HttpStatus(org.apache.commons.httpclient.HttpStatus) ResultSet(org.alfresco.service.cmr.search.ResultSet) StatsParameters(org.alfresco.service.cmr.search.StatsParameters) QueryParserUtils(org.alfresco.repo.search.impl.QueryParserUtils) Set(java.util.Set) ShardRegistry(org.alfresco.repo.index.shard.ShardRegistry) Reader(java.io.Reader) CMISStrictDictionaryService(org.alfresco.opencmis.dictionary.CMISStrictDictionaryService) GetMethod(org.apache.commons.httpclient.methods.GetMethod) List(java.util.List) IntervalSet(org.alfresco.service.cmr.search.IntervalSet) I18NUtil(org.springframework.extensions.surf.util.I18NUtil) HttpClient(org.apache.commons.httpclient.HttpClient) Entry(java.util.Map.Entry) Optional(java.util.Optional) LogFactory(org.apache.commons.logging.LogFactory) URI(org.apache.commons.httpclient.URI) UnsupportedEncodingException(java.io.UnsupportedEncodingException) LimitBy(org.alfresco.service.cmr.search.LimitBy) LuceneQueryParserException(org.alfresco.repo.search.impl.lucene.LuceneQueryParserException) Interval(org.alfresco.service.cmr.search.Interval) FieldFacet(org.alfresco.service.cmr.search.SearchParameters.FieldFacet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FieldFacetSort(org.alfresco.service.cmr.search.SearchParameters.FieldFacetSort) NamespaceDAO(org.alfresco.repo.dictionary.NamespaceDAO) ParameterCheck(org.alfresco.util.ParameterCheck) StatsRequestParameters(org.alfresco.service.cmr.search.StatsRequestParameters) HttpException(org.apache.commons.httpclient.HttpException) AuthorityType(org.alfresco.service.cmr.security.AuthorityType) StoreRef(org.alfresco.service.cmr.repository.StoreRef) Iterator(java.util.Iterator) FieldFacetMethod(org.alfresco.service.cmr.search.SearchParameters.FieldFacetMethod) SearchDateConversion.parseDateInterval(org.alfresco.util.SearchDateConversion.parseDateInterval) FieldHighlightParameters(org.alfresco.service.cmr.search.FieldHighlightParameters) HttpServletResponse(javax.servlet.http.HttpServletResponse) JSONTokener(org.json.JSONTokener) Pair(org.alfresco.util.Pair) IOException(java.io.IOException) AlfrescoRuntimeException(org.alfresco.error.AlfrescoRuntimeException) BeansException(org.springframework.beans.BeansException) InputStreamReader(java.io.InputStreamReader) TenantService(org.alfresco.repo.tenant.TenantService) BasicSearchParameters(org.alfresco.service.cmr.search.BasicSearchParameters) PermissionEvaluationMode(org.alfresco.service.cmr.search.PermissionEvaluationMode) DictionaryService(org.alfresco.service.cmr.dictionary.DictionaryService) SortDefinition(org.alfresco.service.cmr.search.SearchParameters.SortDefinition) DataTypeDefinition(org.alfresco.service.cmr.dictionary.DataTypeDefinition) SolrStatsResult(org.alfresco.repo.search.impl.lucene.SolrStatsResult) URLCodec(org.apache.commons.codec.net.URLCodec) StringJoiner(java.util.StringJoiner) BeanFactory(org.springframework.beans.factory.BeanFactory) PropertyCheck(org.alfresco.util.PropertyCheck) Log(org.apache.commons.logging.Log) BufferedReader(java.io.BufferedReader) Floc(org.alfresco.repo.index.shard.Floc) SearchParameters(org.alfresco.service.cmr.search.SearchParameters) PropertyDefinition(org.alfresco.service.cmr.dictionary.PropertyDefinition) SolrJSONResultSet(org.alfresco.repo.search.impl.lucene.SolrJSONResultSet) JSONArray(org.json.JSONArray) ArrayList(java.util.ArrayList) StatsRequestParameters(org.alfresco.service.cmr.search.StatsRequestParameters) RangeParameters(org.alfresco.service.cmr.search.RangeParameters)

Example 89 with Range

use of org.apache.commons.lang3.Range in project accumulo by apache.

the class PrintInfo method execute.

@SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit is fine here because it's a utility class executed by a main()")
@Override
public void execute(final String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs("accumulo rfile-info", args);
    if (opts.files.isEmpty()) {
        System.err.println("No files were given");
        System.exit(1);
    }
    if ((opts.fullKeys || opts.dump) && opts.formatterClazz != null) {
        System.err.println("--formatter argument is incompatible with --dump or --fullKeys, specify either, not both.");
        System.exit(1);
    }
    var siteConfig = opts.getSiteConfiguration();
    Configuration conf = new Configuration();
    for (String confFile : opts.configFiles) {
        log.debug("Adding Hadoop configuration file {}", confFile);
        conf.addResource(new Path(confFile));
    }
    LogHistogram kvHistogram = new LogHistogram();
    KeyStats dataKeyStats = new KeyStats();
    KeyStats indexKeyStats = new KeyStats();
    for (String arg : opts.files) {
        Path path = new Path(arg);
        FileSystem fs = resolveFS(log, conf, path);
        System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()));
        printCryptoParams(path, fs);
        CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf).cryptoService(CryptoServiceFactory.newInstance(siteConfig, ClassloaderType.JAVA));
        Reader iter = new RFile.Reader(cb);
        MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();
        if (opts.vis || opts.hash) {
            iter.registerMetrics(vmg);
        }
        iter.printInfo(opts.printIndex);
        System.out.println();
        String propsPath = opts.getPropertiesPath();
        String[] mainArgs = propsPath == null ? new String[] { arg } : new String[] { "-props", propsPath, arg };
        org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(mainArgs);
        Map<String, ArrayList<ByteSequence>> localityGroupCF = null;
        if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats || opts.fullKeys || !StringUtils.isEmpty(opts.formatterClazz)) {
            localityGroupCF = iter.getLocalityGroupCF();
            FileSKVIterator dataIter;
            if (opts.useSample) {
                dataIter = iter.getSample();
                if (dataIter == null) {
                    System.out.println("ERROR : This rfile has no sample data");
                    return;
                }
            } else {
                dataIter = iter;
            }
            if (opts.keyStats) {
                FileSKVIterator indexIter = iter.getIndex();
                while (indexIter.hasTop()) {
                    indexKeyStats.add(indexIter.getTopKey());
                    indexIter.next();
                }
            }
            BiFunction<Key, Value, String> formatter = null;
            if (opts.formatterClazz != null) {
                final Class<? extends BiFunction<Key, Value, String>> formatterClass = getFormatter(opts.formatterClazz);
                formatter = formatterClass.getConstructor().newInstance();
            } else if (opts.fullKeys) {
                formatter = (key, value) -> key.toStringNoTruncate() + " -> " + value;
            } else if (opts.dump) {
                formatter = (key, value) -> key + " -> " + value;
            }
            for (String lgName : localityGroupCF.keySet()) {
                LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF);
                while (dataIter.hasTop()) {
                    Key key = dataIter.getTopKey();
                    Value value = dataIter.getTopValue();
                    if (formatter != null) {
                        System.out.println(formatter.apply(key, value));
                        if (System.out.checkError())
                            return;
                    }
                    if (opts.histogram) {
                        kvHistogram.add(key.getSize() + value.getSize());
                    }
                    if (opts.keyStats) {
                        dataKeyStats.add(key);
                    }
                    dataIter.next();
                }
            }
        }
        if (opts.printSummary) {
            SummaryReader.print(iter, System.out);
        }
        iter.close();
        if (opts.vis || opts.hash) {
            System.out.println();
            vmg.printMetrics(opts.hash, "Visibility", System.out);
        }
        if (opts.histogram) {
            System.out.println();
            kvHistogram.print("");
        }
        if (opts.keyStats) {
            System.out.println();
            System.out.println("Statistics for keys in data :");
            dataKeyStats.print("\t");
            System.out.println();
            System.out.println("Statistics for keys in index :");
            indexKeyStats.print("\t");
        }
        // If the output stream has closed, there is no reason to keep going.
        if (System.out.checkError()) {
            return;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ConfigOpts(org.apache.accumulo.core.cli.ConfigOpts) ByteSequence(org.apache.accumulo.core.data.ByteSequence) Arrays(java.util.Arrays) CryptoUtils(org.apache.accumulo.core.crypto.CryptoUtils) NoFileEncrypter(org.apache.accumulo.core.spi.crypto.NoFileEncrypter) Parameter(com.beust.jcommander.Parameter) FileSystem(org.apache.hadoop.fs.FileSystem) BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocalityGroupUtil(org.apache.accumulo.core.util.LocalityGroupUtil) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) Value(org.apache.accumulo.core.data.Value) KeywordExecutable(org.apache.accumulo.start.spi.KeywordExecutable) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Utils(org.apache.accumulo.core.file.rfile.bcfile.Utils) Logger(org.slf4j.Logger) NumUtil(org.apache.accumulo.core.util.NumUtil) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) IOException(java.io.IOException) ClassloaderType(org.apache.accumulo.core.crypto.CryptoServiceFactory.ClassloaderType) SummaryReader(org.apache.accumulo.core.summary.SummaryReader) CryptoServiceFactory(org.apache.accumulo.core.crypto.CryptoServiceFactory) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) Range(org.apache.accumulo.core.data.Range) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) List(java.util.List) AutoService(com.google.auto.service.AutoService) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Configuration(org.apache.hadoop.conf.Configuration) ConfigOpts(org.apache.accumulo.core.cli.ConfigOpts) ArrayList(java.util.ArrayList) Reader(org.apache.accumulo.core.file.rfile.RFile.Reader) SummaryReader(org.apache.accumulo.core.summary.SummaryReader) Range(org.apache.accumulo.core.data.Range) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) CachableBuilder(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Aggregations

List (java.util.List)34 ArrayList (java.util.ArrayList)27 Map (java.util.Map)26 HashMap (java.util.HashMap)24 StringUtils (org.apache.commons.lang3.StringUtils)22 Collectors (java.util.stream.Collectors)21 LoggerFactory (org.slf4j.LoggerFactory)19 Logger (org.slf4j.Logger)18 IOException (java.io.IOException)17 Set (java.util.Set)16 Pair (org.apache.commons.lang3.tuple.Pair)16 Optional (java.util.Optional)13 Date (java.util.Date)11 Stream (java.util.stream.Stream)11 Range (org.apache.commons.lang3.Range)11 Test (org.junit.jupiter.api.Test)11 java.util (java.util)10 HashSet (java.util.HashSet)10 Lists (com.google.common.collect.Lists)9 Entry (java.util.Map.Entry)9