use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.
the class CompactionTaskRunTest method getCSVFormatRowsFromSegments.
private List<String> getCSVFormatRowsFromSegments(List<DataSegment> segments) throws Exception {
final File cacheDir = temporaryFolder.newFolder();
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir);
List<Cursor> cursors = new ArrayList<>();
for (DataSegment segment : segments) {
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(testUtils.getTestIndexIO().loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
cursors.addAll(cursorSequence.toList());
}
List<String> rowsFromSegment = new ArrayList<>();
for (Cursor cursor : cursors) {
cursor.reset();
while (!cursor.isDone()) {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("ts", "ts"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("val", "val"));
Object dimObject = selector2.getObject();
String dimVal = null;
if (dimObject instanceof String) {
dimVal = (String) dimObject;
} else if (dimObject instanceof List) {
dimVal = String.join("|", (List<String>) dimObject);
}
rowsFromSegment.add(makeCSVFormatRow(selector1.getObject().toString(), dimVal, selector3.defaultGetObject().toString()));
cursor.advance();
}
}
return rowsFromSegment;
}
use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.
the class IndexTaskTest method testTransformSpec.
@Test
public void testTransformSpec() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
}
final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
final String listDelimiter = "|";
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
final IndexIngestionSpec indexIngestionSpec;
if (useInputFormatApi) {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
} else {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
DataSegment segment = segments.get(0);
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
Map<String, Object> row = new HashMap<>();
row.put("dimt", selector1.defaultGetObject());
row.put("dimtarray1", selector2.defaultGetObject());
row.put("dimtarray2", selector3.defaultGetObject());
row.put("dimtnum_array", selector4.defaultGetObject());
cursor.advance();
return row;
}).toList();
Assert.assertEquals(1, transforms.size());
Assert.assertEquals("bb", transforms.get(0).get("dimt"));
Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.
the class StringGroupByColumnSelectorStrategy method initColumnValues.
@Override
public void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) {
DimensionSelector dimSelector = (DimensionSelector) selector;
IndexedInts row = dimSelector.getRow();
valuess[columnIndex] = row;
}
use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.
the class SegmentAnalyzer method analyzeStringColumn.
private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
int cardinality = 0;
long size = 0;
Comparable min = null;
Comparable max = null;
if (analyzingCardinality()) {
cardinality = storageAdapter.getDimensionCardinality(columnName);
}
if (analyzingSize()) {
final DateTime start = storageAdapter.getMinTime();
final DateTime end = storageAdapter.getMaxTime();
final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false, null);
size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {
@Override
public Long accumulate(Long accumulated, Cursor cursor) {
DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
if (selector == null) {
return accumulated;
}
long current = accumulated;
while (!cursor.isDone()) {
final IndexedInts row = selector.getRow();
for (int i = 0, rowSize = row.size(); i < rowSize; ++i) {
final String dimVal = selector.lookupName(row.get(i));
if (dimVal != null && !dimVal.isEmpty()) {
current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
}
}
cursor.advance();
}
return current;
}
});
}
if (analyzingMinMax()) {
min = storageAdapter.getMinValue(columnName);
max = storageAdapter.getMaxValue(columnName);
}
return new ColumnAnalysis(capabilities.toColumnType(), capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), // if we don't know for sure, then we should plan to check for nulls
capabilities.hasNulls().isMaybeTrue(), size, cardinality, min, max, null);
}
use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.
the class ArrayOfDoublesSketchAggregatorFactory method factorizeBuffered.
@Override
public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) {
if (metricColumns == null) {
// input is sketches, use merge aggregator
final BaseObjectColumnValueSelector<ArrayOfDoublesSketch> selector = metricFactory.makeColumnValueSelector(fieldName);
if (selector instanceof NilColumnValueSelector) {
return new NoopArrayOfDoublesSketchBufferAggregator(numberOfValues);
}
return new ArrayOfDoublesSketchMergeBufferAggregator(selector, nominalEntries, numberOfValues, getMaxIntermediateSizeWithNulls());
}
// input is raw data (key and array of values), use build aggregator
final DimensionSelector keySelector = metricFactory.makeDimensionSelector(new DefaultDimensionSpec(fieldName, fieldName));
if (DimensionSelector.isNilSelector(keySelector)) {
return new NoopArrayOfDoublesSketchBufferAggregator(numberOfValues);
}
final List<BaseDoubleColumnValueSelector> valueSelectors = new ArrayList<>();
for (final String column : metricColumns) {
final BaseDoubleColumnValueSelector valueSelector = metricFactory.makeColumnValueSelector(column);
valueSelectors.add(valueSelector);
}
return new ArrayOfDoublesSketchBuildBufferAggregator(keySelector, valueSelectors, nominalEntries, getMaxIntermediateSizeWithNulls());
}
Aggregations