use of io.druid.segment.column.ValueType in project druid by druid-io.
the class ApproxCountDistinctSqlAggregator method toDruidAggregation.
@Override
public Aggregation toDruidAggregation(final String name, final RowSignature rowSignature, final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<Aggregation> existingAggregations, final Project project, final AggregateCall aggregateCall, final DimFilter filter) {
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), rexNode);
if (rex == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
if (rowSignature.getColumnType(rex.getColumn()) == ValueType.COMPLEX) {
aggregatorFactory = new HyperUniquesAggregatorFactory(name, rex.getColumn());
} else {
final SqlTypeName sqlTypeName = rexNode.getType().getSqlTypeName();
final ValueType outputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName);
if (outputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, name);
}
final DimensionSpec dimensionSpec = rex.toDimensionSpec(rowSignature, null, ValueType.STRING);
if (dimensionSpec == null) {
return null;
}
aggregatorFactory = new CardinalityAggregatorFactory(name, ImmutableList.of(dimensionSpec), false);
}
return Aggregation.createFinalizable(ImmutableList.<AggregatorFactory>of(aggregatorFactory), null, new PostAggregatorFactory() {
@Override
public PostAggregator factorize(String outputName) {
return new HyperUniqueFinalizingPostAggregator(outputName, name);
}
}).filter(filter);
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class RowExtraction method toDimensionSpec.
public DimensionSpec toDimensionSpec(final RowSignature rowSignature, final String outputName, final ValueType outputType) {
Preconditions.checkNotNull(outputType, "outputType");
final ValueType columnType = rowSignature.getColumnType(column);
if (columnType == null) {
return null;
}
if (columnType == ValueType.STRING || (column.equals(Column.TIME_COLUMN_NAME) && extractionFn != null)) {
return extractionFn == null ? new DefaultDimensionSpec(column, outputName, outputType) : new ExtractionDimensionSpec(column, outputName, outputType, extractionFn);
} else if (columnType == ValueType.LONG || columnType == ValueType.FLOAT) {
if (extractionFn == null) {
return new DefaultDimensionSpec(column, outputName, outputType);
} else {
return new ExtractionDimensionSpec(column, outputName, outputType, extractionFn);
}
} else {
// Can't create dimensionSpecs for non-string, non-numeric columns
return null;
}
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class BenchmarkColumnValueGenerator method generateSingleRowValue.
private Object generateSingleRowValue() {
Object ret = null;
ValueType type = schema.getType();
if (distribution instanceof AbstractIntegerDistribution) {
ret = ((AbstractIntegerDistribution) distribution).sample();
} else if (distribution instanceof AbstractRealDistribution) {
ret = ((AbstractRealDistribution) distribution).sample();
} else if (distribution instanceof EnumeratedDistribution) {
ret = ((EnumeratedDistribution) distribution).sample();
}
ret = convertType(ret, type);
return ret;
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class BaseFilterTest method selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory.
private List<String> selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory(final DimFilter filter, final String selectColumn) {
// Generate rowType
final Map<String, ValueType> rowSignature = Maps.newHashMap();
for (String columnName : Iterables.concat(adapter.getAvailableDimensions(), adapter.getAvailableMetrics())) {
rowSignature.put(columnName, adapter.getColumnCapabilities(columnName).getType());
}
// Perform test
final SettableSupplier<InputRow> rowSupplier = new SettableSupplier<>();
final ValueMatcher matcher = makeFilter(filter).makeMatcher(VIRTUAL_COLUMNS.wrap(RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature)));
final List<String> values = Lists.newArrayList();
for (InputRow row : rows) {
rowSupplier.set(row);
if (matcher.matches()) {
values.add((String) row.getRaw(selectColumn));
}
}
return values;
}
use of io.druid.segment.column.ValueType in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
@Override
protected File makeIndexFiles(final List<IndexableAdapter> adapters, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, new Function<IndexableAdapter, Metadata>() {
@Override
public Metadata apply(IndexableAdapter input) {
return input.getMetadata();
}
});
Metadata segmentMetadata = null;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
final File v9TmpDir = new File(outDir, "v9-tmp");
FileUtils.forceMkdir(v9TmpDir);
registerDeleteDirectory(closer, v9TmpDir);
log.info("Start making v9 index files, outDir:%s", outDir);
File tmpPeonFilesDir = new File(v9TmpDir, "tmpPeonFiles");
FileUtils.forceMkdir(tmpPeonFilesDir);
registerDeleteDirectory(closer, tmpPeonFilesDir);
final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, false);
closer.register(ioPeon);
long startTime = System.currentTimeMillis();
ByteStreams.write(Ints.toByteArray(IndexIO.V9_VERSION), Files.newOutputStreamSupplier(new File(outDir, "version.bin")));
log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
log.info("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMerger> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
mergers.add(handlers[i].makeMerger(indexSpec, v9TmpDir, ioPeon, dimCapabilities.get(i), progress));
}
/************* Setup Dim Conversions **************/
progress.progress();
startTime = System.currentTimeMillis();
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
writeDimValueAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/************* Walk through data sets, merge them, and write merged columns *************/
progress.progress();
final Iterable<Rowboat> theRows = makeRowIterable(adapters, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon, indexSpec);
final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
mergeIndexesAndWriteColumns(adapters, progress, theRows, timeWriter, metWriters, rowNumConversions, mergers);
/************ Create Inverted Indexes and Finalize Build Columns *************/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = (DimensionMergerV9) mergers.get(i);
merger.writeIndexes(rowNumConversions, closer);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/************* Make index.drd & metadata.drd files **************/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
Aggregations