use of org.apache.druid.segment.column.ValueType in project druid by druid-io.
the class AvgSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
final List<DruidExpression> arguments = Aggregations.getArgumentsForSimpleAggregator(plannerContext, rowSignature, aggregateCall, project);
if (arguments == null) {
return null;
}
final String countName = Calcites.makePrefixedName(name, "count");
final AggregatorFactory count = CountSqlAggregator.createCountAggregatorFactory(countName, plannerContext, rowSignature, virtualColumnRegistry, rexBuilder, aggregateCall, project);
final String fieldName;
final String expression;
final DruidExpression arg = Iterables.getOnlyElement(arguments);
final ExprMacroTable macroTable = plannerContext.getExprMacroTable();
final ValueType sumType;
// Use 64-bit sum regardless of the type of the AVG aggregator.
if (SqlTypeName.INT_TYPES.contains(aggregateCall.getType().getSqlTypeName())) {
sumType = ValueType.LONG;
} else {
sumType = ValueType.DOUBLE;
}
if (arg.isDirectColumnAccess()) {
fieldName = arg.getDirectColumn();
expression = null;
} else {
// if the filter or anywhere else defined a virtual column for us, re-use it
final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
String vc = virtualColumnRegistry.getVirtualColumnByExpression(arg, resolutionArg.getType());
fieldName = vc != null ? vc : null;
expression = vc != null ? null : arg.getExpression();
}
final String sumName = Calcites.makePrefixedName(name, "sum");
final AggregatorFactory sum = SumSqlAggregator.createSumAggregatorFactory(sumType, sumName, fieldName, expression, macroTable);
return Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
}
use of org.apache.druid.segment.column.ValueType in project druid by druid-io.
the class ColumnProcessors method makeProcessorInternal.
/**
* Creates "column processors", which are objects that wrap a single input column and provide some
* functionality on top of it.
*
* @param inputCapabilitiesFn function that returns capabilities of the column being processed. The type provided
* by these capabilities will be used to determine what kind of selector to create. If
* this function returns null, then processorFactory.defaultType() will be
* used to construct a set of assumed capabilities.
* @param dimensionSelectorFn function that creates a DimensionSelector for the column being processed. Will be
* called if the column type is string.
* @param valueSelectorFunction function that creates a ColumnValueSelector for the column being processed. Will be
* called if the column type is long, float, double, or complex.
* @param processorFactory object that encapsulates the knowledge about how to create processors
* @param selectorFactory column selector factory used for creating the vector processor
*/
private static <T> T makeProcessorInternal(final Function<ColumnSelectorFactory, ColumnCapabilities> inputCapabilitiesFn, final Function<ColumnSelectorFactory, DimensionSelector> dimensionSelectorFn, final Function<ColumnSelectorFactory, ColumnValueSelector<?>> valueSelectorFunction, final ColumnProcessorFactory<T> processorFactory, final ColumnSelectorFactory selectorFactory) {
final ColumnCapabilities capabilities = inputCapabilitiesFn.apply(selectorFactory);
final TypeSignature<ValueType> effectiveType = capabilities != null ? capabilities : processorFactory.defaultType();
switch(effectiveType.getType()) {
case STRING:
return processorFactory.makeDimensionProcessor(dimensionSelectorFn.apply(selectorFactory), mayBeMultiValue(capabilities));
case LONG:
return processorFactory.makeLongProcessor(valueSelectorFunction.apply(selectorFactory));
case FLOAT:
return processorFactory.makeFloatProcessor(valueSelectorFunction.apply(selectorFactory));
case DOUBLE:
return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory));
case COMPLEX:
return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory));
default:
throw new ISE("Unsupported type[%s]", effectiveType.asTypeString());
}
}
use of org.apache.druid.segment.column.ValueType in project druid by druid-io.
the class ColumnValueGenerator method generateSingleRowValue.
private Object generateSingleRowValue() {
Object ret = null;
ValueType type = schema.getType();
if (distribution instanceof AbstractIntegerDistribution) {
ret = ((AbstractIntegerDistribution) distribution).sample();
} else if (distribution instanceof AbstractRealDistribution) {
ret = ((AbstractRealDistribution) distribution).sample();
} else if (distribution instanceof EnumeratedDistribution) {
ret = ((EnumeratedDistribution) distribution).sample();
}
ret = convertType(ret, type);
return ret;
}
use of org.apache.druid.segment.column.ValueType in project druid by druid-io.
the class ColumnValueGenerator method initDistribution.
private void initDistribution() {
GeneratorColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
ValueType type = schema.getType();
List<Object> enumeratedValues = schema.getEnumeratedValues();
List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
List<Pair<Object, Double>> probabilities = new ArrayList<>();
switch(distributionType) {
case SEQUENTIAL:
// not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues());
break;
case UNIFORM:
distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
break;
case DISCRETE_UNIFORM:
if (enumeratedValues == null) {
enumeratedValues = new ArrayList<>();
for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
Object val = convertType(i, type);
enumeratedValues.add(val);
}
}
// give them all equal probability, the library will normalize probabilities to sum to 1.0
for (Object enumeratedValue : enumeratedValues) {
probabilities.add(new Pair<>(enumeratedValue, 0.1));
}
distribution = new EnumeratedTreeDistribution<>(probabilities);
break;
case NORMAL:
distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
break;
case ROUNDED_NORMAL:
NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
distribution = new RealRoundingDistribution(normalDist);
break;
case ZIPF:
int cardinality;
if (enumeratedValues == null) {
Integer startInt = schema.getStartInt();
cardinality = schema.getEndInt() - startInt;
ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
for (int i = 0; i < cardinality; i++) {
probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
}
} else {
cardinality = enumeratedValues.size();
ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
for (int i = 0; i < cardinality; i++) {
probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
}
}
distribution = new EnumeratedTreeDistribution<>(probabilities);
break;
case LAZY_ZIPF:
int lazyCardinality;
Integer startInt = schema.getStartInt();
lazyCardinality = schema.getEndInt() - startInt;
distribution = new ZipfDistribution(lazyCardinality, schema.getZipfExponent());
break;
case LAZY_DISCRETE_UNIFORM:
distribution = new UniformIntegerDistribution(schema.getStartInt(), schema.getEndInt());
break;
case ENUMERATED:
for (int i = 0; i < enumeratedValues.size(); i++) {
probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
}
distribution = new EnumeratedTreeDistribution<>(probabilities);
break;
default:
throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
}
if (distribution instanceof AbstractIntegerDistribution) {
((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
} else if (distribution instanceof AbstractRealDistribution) {
((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
} else {
((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
}
}
use of org.apache.druid.segment.column.ValueType in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
final Metadata segmentMetadata;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
FileUtils.mkdirp(outDir);
SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
closer.register(segmentWriteOutMedium);
long startTime = System.currentTimeMillis();
Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
if (customSegmentLoader != null) {
mapper.writeValue(fos, customSegmentLoader);
} else {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
}
log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMergerV9> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionHandler handler = handlers.get(mergedDimensions.get(i));
mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
}
/**
*********** Setup Dim Conversions *************
*/
progress.progress();
startTime = System.currentTimeMillis();
writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/**
*********** Walk through data sets, merge them, and write merged columns ************
*/
progress.progress();
final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
closer.register(timeAndDimsIterator);
final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
/**
********** Create Inverted Indexes and Finalize Build Columns ************
*/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/**
*********** Make index.drd & metadata.drd files *************
*/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
Aggregations