use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class GroupByStrategyV1 method processSubqueryResult.
@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
final Set<AggregatorFactory> aggs = new HashSet<>();
// Nested group-bys work by first running the inner query and then materializing the results in an incremental
// index which the outer query is then run against. To build the incremental index, we use the fieldNames from
// the aggregators for the outer query to define the column names so that the index will match the query. If
// there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
// multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
// aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
// subsequent ones) and return an error if the aggregator types are different.
final Set<String> dimensionNames = new HashSet<>();
for (DimensionSpec dimension : subquery.getDimensions()) {
dimensionNames.add(dimension.getOutputName());
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
if (dimensionNames.contains(transferAgg.getName())) {
// doesn't have this problem.
continue;
}
if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {
@Override
public boolean apply(AggregatorFactory agg) {
return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
}
})) {
throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
}
aggs.add(transferAgg);
}
}
// We need the inner incremental index to have all the columns required by the outer query
final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
// Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
// is ensured by QuerySegmentSpec.
// GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
// and concatenate the results.
final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {
@Override
public Sequence<ResultRow> apply(Interval interval) {
return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
}
})));
innerQueryResultIndex.close();
return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class OssDataSegmentPuller method getSegmentFiles.
FileUtils.FileCopyResult getSegmentFiles(final CloudObjectLocation ossCoords, final File outDir) throws SegmentLoadingException {
log.info("Pulling index at path[%s] to outDir[%s]", ossCoords, outDir);
if (!isObjectInBucket(ossCoords)) {
throw new SegmentLoadingException("IndexFile[%s] does not exist.", ossCoords);
}
try {
FileUtils.mkdirp(outDir);
final URI uri = ossCoords.toUri(OssStorageDruidModule.SCHEME);
final ByteSource byteSource = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
try {
return buildFileObject(uri).openInputStream();
} catch (OSSException e) {
if (e.getCause() != null) {
if (OssUtils.RETRYABLE.apply(e)) {
throw new IOException("Recoverable exception", e);
}
}
throw new RuntimeException(e);
}
}
};
if (CompressionUtils.isZip(ossCoords.getPath())) {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, OssUtils.RETRYABLE, false);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), ossCoords.toString(), outDir.getAbsolutePath());
return result;
}
if (CompressionUtils.isGz(ossCoords.getPath())) {
final String fname = Files.getNameWithoutExtension(uri.getPath());
final File outFile = new File(outDir, fname);
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, OssUtils.RETRYABLE);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), ossCoords.toString(), outFile.getAbsolutePath());
return result;
}
throw new IAE("Do not know how to load file type at [%s]", uri.toString());
} catch (Exception e) {
try {
FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), ossCoords.toString());
}
throw new SegmentLoadingException(e, e.getMessage());
}
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class ThriftInputRowParser method parseBatch.
@Override
public List<InputRow> parseBatch(Object input) {
if (parser == null) {
// parser should be created when it is really used to avoid unnecessary initialization of the underlying
// parseSpec.
parser = parseSpec.makeParser();
}
// Place it this initialization in constructor will get ClassNotFoundException
try {
if (thriftClass == null) {
thriftClass = getThriftClass();
}
} catch (IOException e) {
throw new IAE(e, "failed to load jar [%s]", jarPath);
} catch (ClassNotFoundException e) {
throw new IAE(e, "class [%s] not found in jar", thriftClassName);
} catch (InstantiationException | IllegalAccessException e) {
throw new IAE(e, "instantiation thrift instance failed");
}
final String json;
try {
if (input instanceof ByteBuffer) {
// realtime stream
final byte[] bytes = ((ByteBuffer) input).array();
TBase o = thriftClass.newInstance();
ThriftDeserialization.detectAndDeserialize(bytes, o);
json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
} else if (input instanceof BytesWritable) {
// sequence file
final byte[] bytes = ((BytesWritable) input).getBytes();
TBase o = thriftClass.newInstance();
ThriftDeserialization.detectAndDeserialize(bytes, o);
json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
} else if (input instanceof ThriftWritable) {
// LzoBlockThrift file
TBase o = (TBase) ((ThriftWritable) input).get();
json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
} else {
throw new IAE("unsupport input class of [%s]", input.getClass());
}
} catch (IllegalAccessException | InstantiationException | TException e) {
throw new IAE("some thing wrong with your thrift?");
}
Map<String, Object> record = parser.parseToMap(json);
final List<String> dimensions;
if (!this.dimensions.isEmpty()) {
dimensions = this.dimensions;
} else {
dimensions = Lists.newArrayList(Sets.difference(record.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()));
}
return ImmutableList.of(new MapBasedInputRow(parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record));
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class DoublesSketchToHistogramPostAggregator method compute.
@Override
public Object compute(final Map<String, Object> combinedAggregators) {
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
final int numBins = splitPoints != null ? splitPoints.length + 1 : (this.numBins != null ? this.numBins.intValue() : DEFAULT_NUM_BINS);
if (numBins < 2) {
throw new IAE("at least 2 bins expected");
}
if (sketch.isEmpty()) {
final double[] histogram = new double[numBins];
Arrays.fill(histogram, Double.NaN);
return histogram;
}
final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints : equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue()));
for (int i = 0; i < histogram.length; i++) {
// scale fractions to counts
histogram[i] *= sketch.getN();
}
return histogram;
}
use of org.apache.druid.java.util.common.IAE in project druid by druid-io.
the class ArrayOfDoublesSketchTTestPostAggregator method compute.
@Override
public double[] compute(final Map<String, Object> combinedAggregators) {
final ArrayOfDoublesSketch sketch1 = (ArrayOfDoublesSketch) getFields().get(0).compute(combinedAggregators);
final ArrayOfDoublesSketch sketch2 = (ArrayOfDoublesSketch) getFields().get(1).compute(combinedAggregators);
if (sketch1.getNumValues() != sketch2.getNumValues()) {
throw new IAE("Sketches have different number of values: %d and %d", sketch1.getNumValues(), sketch2.getNumValues());
}
final SummaryStatistics[] stats1 = getStats(sketch1);
final SummaryStatistics[] stats2 = getStats(sketch2);
final int numberOfValues = sketch1.getNumValues();
final double[] pValues = new double[numberOfValues];
final TTest test = new TTest();
for (int i = 0; i < pValues.length; i++) {
pValues[i] = test.tTest(stats1[i], stats2[i]);
}
return pValues;
}
Aggregations