Search in sources :

Example 46 with IAE

use of in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    final Set<AggregatorFactory> aggs = new HashSet<>();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = new HashSet<>();
    for (DimensionSpec dimension : subquery.getDimensions()) {
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
    // Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    // is ensured by QuerySegmentSpec.
    // GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    // and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(, new Function<Interval, Sequence<ResultRow>>() {

        public Sequence<ResultRow> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
    return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence( AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE( GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 47 with IAE

use of in project druid by druid-io.

the class OssDataSegmentPuller method getSegmentFiles.

FileUtils.FileCopyResult getSegmentFiles(final CloudObjectLocation ossCoords, final File outDir) throws SegmentLoadingException {"Pulling index at path[%s] to outDir[%s]", ossCoords, outDir);
    if (!isObjectInBucket(ossCoords)) {
        throw new SegmentLoadingException("IndexFile[%s] does not exist.", ossCoords);
    try {
        final URI uri = ossCoords.toUri(OssStorageDruidModule.SCHEME);
        final ByteSource byteSource = new ByteSource() {

            public InputStream openStream() throws IOException {
                try {
                    return buildFileObject(uri).openInputStream();
                } catch (OSSException e) {
                    if (e.getCause() != null) {
                        if (OssUtils.RETRYABLE.apply(e)) {
                            throw new IOException("Recoverable exception", e);
                    throw new RuntimeException(e);
        if (CompressionUtils.isZip(ossCoords.getPath())) {
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, OssUtils.RETRYABLE, false);
  "Loaded %d bytes from [%s] to [%s]", result.size(), ossCoords.toString(), outDir.getAbsolutePath());
            return result;
        if (CompressionUtils.isGz(ossCoords.getPath())) {
            final String fname = Files.getNameWithoutExtension(uri.getPath());
            final File outFile = new File(outDir, fname);
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, OssUtils.RETRYABLE);
  "Loaded %d bytes from [%s] to [%s]", result.size(), ossCoords.toString(), outFile.getAbsolutePath());
            return result;
        throw new IAE("Do not know how to load file type at [%s]", uri.toString());
    } catch (Exception e) {
        try {
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), ossCoords.toString());
        throw new SegmentLoadingException(e, e.getMessage());
Also used : SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) FileUtils( ByteSource( OSSException(com.aliyun.oss.OSSException) IOException( IAE( URI( File( SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) OSSException(com.aliyun.oss.OSSException) IOException(

Example 48 with IAE

use of in project druid by druid-io.

the class ThriftInputRowParser method parseBatch.

public List<InputRow> parseBatch(Object input) {
    if (parser == null) {
        // parser should be created when it is really used to avoid unnecessary initialization of the underlying
        // parseSpec.
        parser = parseSpec.makeParser();
    // Place it this initialization in constructor will get ClassNotFoundException
    try {
        if (thriftClass == null) {
            thriftClass = getThriftClass();
    } catch (IOException e) {
        throw new IAE(e, "failed to load jar [%s]", jarPath);
    } catch (ClassNotFoundException e) {
        throw new IAE(e, "class [%s] not found in jar", thriftClassName);
    } catch (InstantiationException | IllegalAccessException e) {
        throw new IAE(e, "instantiation thrift instance failed");
    final String json;
    try {
        if (input instanceof ByteBuffer) {
            // realtime stream
            final byte[] bytes = ((ByteBuffer) input).array();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof BytesWritable) {
            // sequence file
            final byte[] bytes = ((BytesWritable) input).getBytes();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof ThriftWritable) {
            // LzoBlockThrift file
            TBase o = (TBase) ((ThriftWritable) input).get();
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else {
            throw new IAE("unsupport input class of [%s]", input.getClass());
    } catch (IllegalAccessException | InstantiationException | TException e) {
        throw new IAE("some thing wrong with your thrift?");
    Map<String, Object> record = parser.parseToMap(json);
    final List<String> dimensions;
    if (!this.dimensions.isEmpty()) {
        dimensions = this.dimensions;
    } else {
        dimensions = Lists.newArrayList(Sets.difference(record.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()));
    return ImmutableList.of(new MapBasedInputRow(parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record));
Also used : TException(org.apache.thrift.TException) ThriftWritable( BytesWritable( IOException( IAE( ByteBuffer(java.nio.ByteBuffer) TBase(org.apache.thrift.TBase) MapBasedInputRow(

Example 49 with IAE

use of in project druid by druid-io.

the class DoublesSketchToHistogramPostAggregator method compute.

public Object compute(final Map<String, Object> combinedAggregators) {
    final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
    final int numBins = splitPoints != null ? splitPoints.length + 1 : (this.numBins != null ? this.numBins.intValue() : DEFAULT_NUM_BINS);
    if (numBins < 2) {
        throw new IAE("at least 2 bins expected");
    if (sketch.isEmpty()) {
        final double[] histogram = new double[numBins];
        Arrays.fill(histogram, Double.NaN);
        return histogram;
    final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints : equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue()));
    for (int i = 0; i < histogram.length; i++) {
        // scale fractions to counts
        histogram[i] *= sketch.getN();
    return histogram;
Also used : DoublesSketch(org.apache.datasketches.quantiles.DoublesSketch) IAE(

Example 50 with IAE

use of in project druid by druid-io.

the class ArrayOfDoublesSketchTTestPostAggregator method compute.

public double[] compute(final Map<String, Object> combinedAggregators) {
    final ArrayOfDoublesSketch sketch1 = (ArrayOfDoublesSketch) getFields().get(0).compute(combinedAggregators);
    final ArrayOfDoublesSketch sketch2 = (ArrayOfDoublesSketch) getFields().get(1).compute(combinedAggregators);
    if (sketch1.getNumValues() != sketch2.getNumValues()) {
        throw new IAE("Sketches have different number of values: %d and %d", sketch1.getNumValues(), sketch2.getNumValues());
    final SummaryStatistics[] stats1 = getStats(sketch1);
    final SummaryStatistics[] stats2 = getStats(sketch2);
    final int numberOfValues = sketch1.getNumValues();
    final double[] pValues = new double[numberOfValues];
    final TTest test = new TTest();
    for (int i = 0; i < pValues.length; i++) {
        pValues[i] = test.tTest(stats1[i], stats2[i]);
    return pValues;
Also used : TTest(org.apache.commons.math3.stat.inference.TTest) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) IAE( ArrayOfDoublesSketch(org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch)


IAE ( ISE ( IOException ( ByteBuffer (java.nio.ByteBuffer)19 ArrayList (java.util.ArrayList)16 List (java.util.List)14 Expr (org.apache.druid.math.expr.Expr)14 Nullable (javax.annotation.Nullable)12 ColumnType (org.apache.druid.segment.column.ColumnType)10 HashSet (java.util.HashSet)8 Map (java.util.Map)8 Interval (org.joda.time.Interval)8 VisibleForTesting ( HashMap (java.util.HashMap)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 File ( Iterables ( Arrays (java.util.Arrays)5 Test (org.junit.Test)5 ImmutableMap (