use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IndexMergerTestBase method testAddMetrics.
@Test
public void testAddMetrics() throws IOException {
IncrementalIndex index1 = IncrementalIndexTest.createIndex(new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A") });
closer.closeLater(index1);
long timestamp = System.currentTimeMillis();
index1.add(new MapBasedInputRow(timestamp, Arrays.asList("dim1", "dim2"), ImmutableMap.of("dim1", "1", "dim2", "2", "A", 5)));
IncrementalIndex index2 = IncrementalIndexTest.createIndex(new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A"), new LongSumAggregatorFactory("C", "C") });
index2.add(new MapBasedInputRow(timestamp, Arrays.asList("dim1", "dim2"), ImmutableMap.of("dim1", "1", "dim2", "2", "A", 5, "C", 6)));
closer.closeLater(index2);
Interval interval = new Interval(DateTimes.EPOCH, DateTimes.nowUtc());
RoaringBitmapFactory factory = new RoaringBitmapFactory();
List<IndexableAdapter> toMerge = Arrays.asList(new IncrementalIndexAdapter(interval, index1, factory), new IncrementalIndexAdapter(interval, index2, factory));
final File tmpDirMerged = temporaryFolder.newFolder();
File merged = indexMerger.merge(toMerge, true, new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A"), new LongSumAggregatorFactory("C", "C") }, tmpDirMerged, indexSpec, -1);
final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(closer.closeLater(indexIO.loadIndex(merged)));
Assert.assertEquals(ImmutableSet.of("A", "C"), ImmutableSet.copyOf(adapter.getAvailableMetrics()));
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class QueryableIndexIndexableAdapterTest method testGetBitmapIndex.
@Test
public void testGetBitmapIndex() throws Exception {
final long timestamp = System.currentTimeMillis();
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
IncrementalIndexTest.populateIndex(timestamp, toPersist);
final File tempDir = temporaryFolder.newFolder();
QueryableIndex index = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, INDEX_SPEC, null)));
IndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
String dimension = "dim1";
// null is added to all dimensions with value
@SuppressWarnings("UnusedAssignment") BitmapValues bitmapValues = adapter.getBitmapValues(dimension, 0);
try (CloseableIndexed<String> dimValueLookup = adapter.getDimValueLookup(dimension)) {
for (int i = 0; i < dimValueLookup.size(); i++) {
bitmapValues = adapter.getBitmapValues(dimension, i);
Assert.assertEquals(1, bitmapValues.size());
}
}
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class ExpressionSelectorsTest method test_incrementalIndexStringSelector.
@Test
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException {
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
// exception trying to make an optimized string expression selector that was not appropriate to use for the
// underlying dimension selector.
// This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
// conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true);
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).build();
index.add(new MapBasedInputRow(DateTimes.nowUtc().getMillis(), ImmutableList.of("x"), ImmutableMap.of("x", "foo")));
index.add(new MapBasedInputRow(DateTimes.nowUtc().plusMillis(1000).getMillis(), ImmutableList.of("y"), ImmutableMap.of("y", "foo")));
IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
Sequence<Cursor> cursors = adapter.makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null);
int rowsProcessed = cursors.map(cursor -> {
DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), null);
DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), null);
int rowCount = 0;
while (!cursor.isDone()) {
Object x = xExprSelector.getObject();
Object y = yExprSelector.getObject();
List<String> expectedFoo = Collections.singletonList("foofoo");
List<String> expectedNull = NullHandling.replaceWithDefault() ? Collections.singletonList("foo") : Collections.singletonList(null);
if (rowCount == 0) {
Assert.assertEquals(expectedFoo, x);
Assert.assertEquals(expectedNull, y);
} else {
Assert.assertEquals(expectedNull, x);
Assert.assertEquals(expectedFoo, y);
}
rowCount++;
cursor.advance();
}
return rowCount;
}).accumulate(0, (in, acc) -> in + acc);
Assert.assertEquals(2, rowsProcessed);
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class InputSourceSampler method sample.
public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
Preconditions.checkNotNull(inputSource, "inputSource required");
if (inputSource.needsFormat()) {
Preconditions.checkNotNull(inputFormat, "inputFormat required");
}
final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
final Closer closer = Closer.create();
final File tempDir = FileUtils.createTempDir();
closer.register(() -> FileUtils.deleteDirectory(tempDir));
try {
final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
final Closer closer1 = closer) {
List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
int numRowsIndexed = 0;
while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
final ParseException parseException = inputRowListPlusRawValues.getParseException();
if (parseException != null) {
if (rawColumnsList != null) {
// add all rows to response
responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
} else {
// no data parsed, add one response row
responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
}
continue;
}
List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
if (inputRows == null) {
continue;
}
for (int i = 0; i < inputRows.size(); i++) {
// InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
InputRow row = inputRows.get(i);
// keep the index of the row to be added to responseRows for further use
final int rowIndex = responseRows.size();
IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
if (addResult.hasParseException()) {
responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
} else {
// store the raw value; will be merged with the data from the IncrementalIndex later
responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
numRowsIndexed++;
}
}
}
final List<String> columnNames = index.getColumnNames();
columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
for (Row row : index) {
Map<String, Object> parsed = new LinkedHashMap<>();
parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
if (sortKey != null) {
responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
}
}
// make sure size of responseRows meets the input
if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
}
int numRowsRead = responseRows.size();
return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
}
} catch (Exception e) {
throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
}
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IncrementalIndexTest method testConcurrentAdd.
@Test
public void testConcurrentAdd() throws Exception {
final IncrementalIndex index = indexCreator.createIndex((Object) DEFAULT_AGGREGATOR_FACTORIES);
final int threadCount = 10;
final int elementsPerThread = 200;
final int dimensionCount = 5;
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
final long timestamp = System.currentTimeMillis();
final CountDownLatch latch = new CountDownLatch(threadCount);
for (int j = 0; j < threadCount; j++) {
executor.submit(new Runnable() {
@Override
public void run() {
try {
for (int i = 0; i < elementsPerThread; i++) {
index.add(getRow(timestamp + i, i, dimensionCount));
}
} catch (Exception e) {
e.printStackTrace();
}
latch.countDown();
}
});
}
Assert.assertTrue(latch.await(60, TimeUnit.SECONDS));
boolean isRollup = index.isRollup();
Assert.assertEquals(dimensionCount, index.getDimensionNames().size());
Assert.assertEquals(elementsPerThread * (isRollup ? 1 : threadCount), index.size());
Iterator<Row> iterator = index.iterator();
int curr = 0;
while (iterator.hasNext()) {
Row row = iterator.next();
Assert.assertEquals(timestamp + (isRollup ? curr : curr / threadCount), row.getTimestampFromEpoch());
Assert.assertEquals(isRollup ? threadCount : 1, row.getMetric("count").intValue());
curr++;
}
Assert.assertEquals(elementsPerThread * (isRollup ? 1 : threadCount), curr);
}
Aggregations