Search in sources :

Example 11 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class ApproxCountDistinctSqlAggregator method toDruidAggregation.

@Override
public Aggregation toDruidAggregation(final String name, final RowSignature rowSignature, final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<Aggregation> existingAggregations, final Project project, final AggregateCall aggregateCall, final DimFilter filter) {
    final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
    final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), rexNode);
    if (rex == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    if (rowSignature.getColumnType(rex.getColumn()) == ValueType.COMPLEX) {
        aggregatorFactory = new HyperUniquesAggregatorFactory(name, rex.getColumn());
    } else {
        final SqlTypeName sqlTypeName = rexNode.getType().getSqlTypeName();
        final ValueType outputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName);
        if (outputType == null) {
            throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, name);
        }
        final DimensionSpec dimensionSpec = rex.toDimensionSpec(rowSignature, null, ValueType.STRING);
        if (dimensionSpec == null) {
            return null;
        }
        aggregatorFactory = new CardinalityAggregatorFactory(name, ImmutableList.of(dimensionSpec), false);
    }
    return Aggregation.createFinalizable(ImmutableList.<AggregatorFactory>of(aggregatorFactory), null, new PostAggregatorFactory() {

        @Override
        public PostAggregator factorize(String outputName) {
            return new HyperUniqueFinalizingPostAggregator(outputName, name);
        }
    }).filter(filter);
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) ValueType(io.druid.segment.column.ValueType) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RowExtraction(io.druid.sql.calcite.expression.RowExtraction) ISE(io.druid.java.util.common.ISE) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) CardinalityAggregatorFactory(io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) CardinalityAggregatorFactory(io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode)

Example 12 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class RowExtraction method toDimensionSpec.

public DimensionSpec toDimensionSpec(final RowSignature rowSignature, final String outputName, final ValueType outputType) {
    Preconditions.checkNotNull(outputType, "outputType");
    final ValueType columnType = rowSignature.getColumnType(column);
    if (columnType == null) {
        return null;
    }
    if (columnType == ValueType.STRING || (column.equals(Column.TIME_COLUMN_NAME) && extractionFn != null)) {
        return extractionFn == null ? new DefaultDimensionSpec(column, outputName, outputType) : new ExtractionDimensionSpec(column, outputName, outputType, extractionFn);
    } else if (columnType == ValueType.LONG || columnType == ValueType.FLOAT) {
        if (extractionFn == null) {
            return new DefaultDimensionSpec(column, outputName, outputType);
        } else {
            return new ExtractionDimensionSpec(column, outputName, outputType, extractionFn);
        }
    } else {
        // Can't create dimensionSpecs for non-string, non-numeric columns
        return null;
    }
}
Also used : ValueType(io.druid.segment.column.ValueType) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec)

Example 13 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class BenchmarkColumnValueGenerator method generateSingleRowValue.

private Object generateSingleRowValue() {
    Object ret = null;
    ValueType type = schema.getType();
    if (distribution instanceof AbstractIntegerDistribution) {
        ret = ((AbstractIntegerDistribution) distribution).sample();
    } else if (distribution instanceof AbstractRealDistribution) {
        ret = ((AbstractRealDistribution) distribution).sample();
    } else if (distribution instanceof EnumeratedDistribution) {
        ret = ((EnumeratedDistribution) distribution).sample();
    }
    ret = convertType(ret, type);
    return ret;
}
Also used : AbstractRealDistribution(org.apache.commons.math3.distribution.AbstractRealDistribution) ValueType(io.druid.segment.column.ValueType) EnumeratedDistribution(org.apache.commons.math3.distribution.EnumeratedDistribution) AbstractIntegerDistribution(org.apache.commons.math3.distribution.AbstractIntegerDistribution)

Example 14 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class BaseFilterTest method selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory.

private List<String> selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory(final DimFilter filter, final String selectColumn) {
    // Generate rowType
    final Map<String, ValueType> rowSignature = Maps.newHashMap();
    for (String columnName : Iterables.concat(adapter.getAvailableDimensions(), adapter.getAvailableMetrics())) {
        rowSignature.put(columnName, adapter.getColumnCapabilities(columnName).getType());
    }
    // Perform test
    final SettableSupplier<InputRow> rowSupplier = new SettableSupplier<>();
    final ValueMatcher matcher = makeFilter(filter).makeMatcher(VIRTUAL_COLUMNS.wrap(RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature)));
    final List<String> values = Lists.newArrayList();
    for (InputRow row : rows) {
        rowSupplier.set(row);
        if (matcher.matches()) {
            values.add((String) row.getRaw(selectColumn));
        }
    }
    return values;
}
Also used : SettableSupplier(io.druid.common.guava.SettableSupplier) ValueMatcher(io.druid.query.filter.ValueMatcher) ValueType(io.druid.segment.column.ValueType) InputRow(io.druid.data.input.InputRow)

Example 15 with ValueType

use of io.druid.segment.column.ValueType in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

@Override
protected File makeIndexFiles(final List<IndexableAdapter> adapters, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, new Function<IndexableAdapter, Metadata>() {

        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        final File v9TmpDir = new File(outDir, "v9-tmp");
        FileUtils.forceMkdir(v9TmpDir);
        registerDeleteDirectory(closer, v9TmpDir);
        log.info("Start making v9 index files, outDir:%s", outDir);
        File tmpPeonFilesDir = new File(v9TmpDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, false);
        closer.register(ioPeon);
        long startTime = System.currentTimeMillis();
        ByteStreams.write(Ints.toByteArray(IndexIO.V9_VERSION), Files.newOutputStreamSupplier(new File(outDir, "version.bin")));
        log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
        }
        log.info("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
        final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
        final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            mergers.add(handlers[i].makeMerger(indexSpec, v9TmpDir, ioPeon, dimCapabilities.get(i), progress));
        }
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
        final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
        writeDimValueAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /************* Walk through data sets, merge them, and write merged columns *************/
        progress.progress();
        final Iterable<Rowboat> theRows = makeRowIterable(adapters, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon, indexSpec);
        final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
        mergeIndexesAndWriteColumns(adapters, progress, theRows, timeWriter, metWriters, rowNumConversions, mergers);
        /************ Create Inverted Indexes and Finalize Build Columns *************/
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = (DimensionMergerV9) mergers.get(i);
            merger.writeIndexes(rowNumConversions, closer);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /************* Make index.drd & metadata.drd files **************/
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(io.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(io.druid.segment.column.ColumnDescriptor) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Aggregations

ValueType (io.druid.segment.column.ValueType)24 ISE (io.druid.java.util.common.ISE)8 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)5 DimensionSpec (io.druid.query.dimension.DimensionSpec)4 ColumnCapabilities (io.druid.segment.column.ColumnCapabilities)4 Closer (com.google.common.io.Closer)3 Column (io.druid.segment.column.Column)3 ColumnCapabilitiesImpl (io.druid.segment.column.ColumnCapabilitiesImpl)3 ComplexColumn (io.druid.segment.column.ComplexColumn)3 Function (com.google.common.base.Function)2 Supplier (com.google.common.base.Supplier)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Row (io.druid.data.input.Row)2 IAE (io.druid.java.util.common.IAE)2 ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)2 ColumnDescriptor (io.druid.segment.column.ColumnDescriptor)2 DictionaryEncodedColumn (io.druid.segment.column.DictionaryEncodedColumn)2 GenericColumn (io.druid.segment.column.GenericColumn)2 IndexedFloatsGenericColumn (io.druid.segment.column.IndexedFloatsGenericColumn)2 IndexedLongsGenericColumn (io.druid.segment.column.IndexedLongsGenericColumn)2