Search in sources :

Example 26 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class CompactionTaskRunTest method getCSVFormatRowsFromSegments.

private List<String> getCSVFormatRowsFromSegments(List<DataSegment> segments) throws Exception {
    final File cacheDir = temporaryFolder.newFolder();
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir);
    List<Cursor> cursors = new ArrayList<>();
    for (DataSegment segment : segments) {
        final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
        final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(testUtils.getTestIndexIO().loadIndex(segmentFile)), segment.getInterval());
        final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
        cursors.addAll(cursorSequence.toList());
    }
    List<String> rowsFromSegment = new ArrayList<>();
    for (Cursor cursor : cursors) {
        cursor.reset();
        while (!cursor.isDone()) {
            final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("ts", "ts"));
            final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
            final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("val", "val"));
            Object dimObject = selector2.getObject();
            String dimVal = null;
            if (dimObject instanceof String) {
                dimVal = (String) dimObject;
            } else if (dimObject instanceof List) {
                dimVal = String.join("|", (List<String>) dimObject);
            }
            rowsFromSegment.add(makeCSVFormatRow(selector1.getObject().toString(), dimVal, selector3.defaultGetObject().toString()));
            cursor.advance();
        }
    }
    return rowsFromSegment;
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) ArrayList(java.util.ArrayList) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 27 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class IndexTaskTest method testTransformSpec.

@Test
public void testTransformSpec() throws Exception {
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
        writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
        writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
    }
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
    final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
    final String listDelimiter = "|";
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
    final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
    final IndexIngestionSpec indexIngestionSpec;
    if (useInputFormatApi) {
        indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
    } else {
        indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
    }
    IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
    Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
    final List<DataSegment> segments = runTask(indexTask).rhs;
    Assert.assertEquals(1, segments.size());
    DataSegment segment = segments.get(0);
    final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
    final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
    final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
        final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
        final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
        final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
        final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
        Map<String, Object> row = new HashMap<>();
        row.put("dimt", selector1.defaultGetObject());
        row.put("dimtarray1", selector2.defaultGetObject());
        row.put("dimtarray2", selector3.defaultGetObject());
        row.put("dimtnum_array", selector4.defaultGetObject());
        cursor.advance();
        return row;
    }).toList();
    Assert.assertEquals(1, transforms.size());
    Assert.assertEquals("bb", transforms.get(0).get("dimt"));
    Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
    Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
    Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
    Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
    Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
    Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
    Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Arrays(java.util.Arrays) IndexSpec(org.apache.druid.segment.IndexSpec) Pair(org.apache.druid.java.util.common.Pair) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) IAE(org.apache.druid.java.util.common.IAE) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) NoopSegmentHandoffNotifierFactory(org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory) EqualsVerifier(nl.jqno.equalsverifier.EqualsVerifier) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) BufferedWriter(java.io.BufferedWriter) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) CoreMatchers(org.hamcrest.CoreMatchers) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Event(org.apache.druid.java.util.emitter.core.Event) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Intervals(org.apache.druid.java.util.common.Intervals) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) Files(com.google.common.io.Files) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) ExpectedException(org.junit.rules.ExpectedException) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Cursor(org.apache.druid.segment.Cursor) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) BufferedWriter(java.io.BufferedWriter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DimensionSelector(org.apache.druid.segment.DimensionSelector) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) File(java.io.File) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 28 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class StringGroupByColumnSelectorStrategy method initColumnValues.

@Override
public void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) {
    DimensionSelector dimSelector = (DimensionSelector) selector;
    IndexedInts row = dimSelector.getRow();
    valuess[columnIndex] = row;
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) IndexedInts(org.apache.druid.segment.data.IndexedInts)

Example 29 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class SegmentAnalyzer method analyzeStringColumn.

private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
    int cardinality = 0;
    long size = 0;
    Comparable min = null;
    Comparable max = null;
    if (analyzingCardinality()) {
        cardinality = storageAdapter.getDimensionCardinality(columnName);
    }
    if (analyzingSize()) {
        final DateTime start = storageAdapter.getMinTime();
        final DateTime end = storageAdapter.getMaxTime();
        final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false, null);
        size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {

            @Override
            public Long accumulate(Long accumulated, Cursor cursor) {
                DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
                if (selector == null) {
                    return accumulated;
                }
                long current = accumulated;
                while (!cursor.isDone()) {
                    final IndexedInts row = selector.getRow();
                    for (int i = 0, rowSize = row.size(); i < rowSize; ++i) {
                        final String dimVal = selector.lookupName(row.get(i));
                        if (dimVal != null && !dimVal.isEmpty()) {
                            current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
                        }
                    }
                    cursor.advance();
                }
                return current;
            }
        });
    }
    if (analyzingMinMax()) {
        min = storageAdapter.getMinValue(columnName);
        max = storageAdapter.getMaxValue(columnName);
    }
    return new ColumnAnalysis(capabilities.toColumnType(), capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), // if we don't know for sure, then we should plan to check for nulls
    capabilities.hasNulls().isMaybeTrue(), size, cardinality, min, max, null);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) DimensionSelector(org.apache.druid.segment.DimensionSelector) Cursor(org.apache.druid.segment.Cursor) DateTime(org.joda.time.DateTime) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IndexedInts(org.apache.druid.segment.data.IndexedInts) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) Interval(org.joda.time.Interval)

Example 30 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class ArrayOfDoublesSketchAggregatorFactory method factorizeBuffered.

@Override
public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) {
    if (metricColumns == null) {
        // input is sketches, use merge aggregator
        final BaseObjectColumnValueSelector<ArrayOfDoublesSketch> selector = metricFactory.makeColumnValueSelector(fieldName);
        if (selector instanceof NilColumnValueSelector) {
            return new NoopArrayOfDoublesSketchBufferAggregator(numberOfValues);
        }
        return new ArrayOfDoublesSketchMergeBufferAggregator(selector, nominalEntries, numberOfValues, getMaxIntermediateSizeWithNulls());
    }
    // input is raw data (key and array of values), use build aggregator
    final DimensionSelector keySelector = metricFactory.makeDimensionSelector(new DefaultDimensionSpec(fieldName, fieldName));
    if (DimensionSelector.isNilSelector(keySelector)) {
        return new NoopArrayOfDoublesSketchBufferAggregator(numberOfValues);
    }
    final List<BaseDoubleColumnValueSelector> valueSelectors = new ArrayList<>();
    for (final String column : metricColumns) {
        final BaseDoubleColumnValueSelector valueSelector = metricFactory.makeColumnValueSelector(column);
        valueSelectors.add(valueSelector);
    }
    return new ArrayOfDoublesSketchBuildBufferAggregator(keySelector, valueSelectors, nominalEntries, getMaxIntermediateSizeWithNulls());
}
Also used : BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) DimensionSelector(org.apache.druid.segment.DimensionSelector) ArrayList(java.util.ArrayList) ArrayOfDoublesSketch(org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch) NilColumnValueSelector(org.apache.druid.segment.NilColumnValueSelector) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec)

Aggregations

DimensionSelector (org.apache.druid.segment.DimensionSelector)66 Test (org.junit.Test)36 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)31 Cursor (org.apache.druid.segment.Cursor)24 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)24 IndexedInts (org.apache.druid.segment.data.IndexedInts)22 List (java.util.List)14 VirtualColumns (org.apache.druid.segment.VirtualColumns)14 ImmutableList (com.google.common.collect.ImmutableList)12 StorageAdapter (org.apache.druid.segment.StorageAdapter)12 ArrayList (java.util.ArrayList)11 Intervals (org.apache.druid.java.util.common.Intervals)11 Granularities (org.apache.druid.java.util.common.granularity.Granularities)11 Sequence (org.apache.druid.java.util.common.guava.Sequence)11 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)11 NullHandling (org.apache.druid.common.config.NullHandling)10 ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)9 DataSegment (org.apache.druid.timeline.DataSegment)9 ConstantDimensionSelector (org.apache.druid.segment.ConstantDimensionSelector)8 ValueMatcher (org.apache.druid.query.filter.ValueMatcher)7