use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class DoubleStorageTest method buildIndex.
private static QueryableIndex buildIndex(String storeDoubleAsFloat) throws IOException {
String oldValue = System.getProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY);
System.setProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY, storeDoubleAsFloat);
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-13T00:00:00.000Z").getMillis()).withDimensionsSpec(ROW_PARSER).withMetrics(new DoubleSumAggregatorFactory(DIM_FLOAT_NAME, DIM_FLOAT_NAME)).build();
final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(MAX_ROWS).build();
getStreamOfEvents().forEach(o -> {
try {
index.add(ROW_PARSER.parseBatch((Map<String, Object>) o).get(0));
} catch (IndexSizeExceededException e) {
throw new RuntimeException(e);
}
});
if (oldValue == null) {
System.clearProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY);
} else {
System.setProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY, oldValue);
}
File someTmpFile = File.createTempFile("billy", "yay");
someTmpFile.delete();
FileUtils.mkdirp(someTmpFile);
INDEX_MERGER_V9.persist(index, someTmpFile, new IndexSpec(), null);
someTmpFile.delete();
return INDEX_IO.loadIndex(someTmpFile);
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class GroupByMultiSegmentTest method setup.
@Before
public void setup() throws Exception {
tmpDir = FileUtils.createTempDir();
InputRow row;
List<String> dimNames = Arrays.asList("dimA", "metA");
Map<String, Object> event;
final IncrementalIndex indexA = makeIncIndex(false);
incrementalIndices.add(indexA);
event = new HashMap<>();
event.put("dimA", "hello");
event.put("metA", 100);
row = new MapBasedInputRow(1000, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "world");
event.put("metA", 75);
row = new MapBasedInputRow(1000, dimNames, event);
indexA.add(row);
final File fileA = INDEX_MERGER_V9.persist(indexA, new File(tmpDir, "A"), new IndexSpec(), null);
QueryableIndex qindexA = INDEX_IO.loadIndex(fileA);
final IncrementalIndex indexB = makeIncIndex(false);
incrementalIndices.add(indexB);
event = new HashMap<>();
event.put("dimA", "foo");
event.put("metA", 100);
row = new MapBasedInputRow(1000, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "world");
event.put("metA", 75);
row = new MapBasedInputRow(1000, dimNames, event);
indexB.add(row);
final File fileB = INDEX_MERGER_V9.persist(indexB, new File(tmpDir, "B"), new IndexSpec(), null);
QueryableIndex qindexB = INDEX_IO.loadIndex(fileB);
groupByIndices = Arrays.asList(qindexA, qindexB);
resourceCloser = Closer.create();
setupGroupByFactory();
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class GroupByLimitPushDownMultiNodeMergeTest method setup.
@Before
public void setup() throws Exception {
tmpDir = FileUtils.createTempDir();
InputRow row;
List<String> dimNames = Arrays.asList("dimA", "metA");
Map<String, Object> event;
final IncrementalIndex indexA = makeIncIndex(false);
incrementalIndices.add(indexA);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2395L);
row = new MapBasedInputRow(1505260888888L, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 8L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 5028L);
row = new MapBasedInputRow(1505264400000L, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 7L);
row = new MapBasedInputRow(1505264400400L, dimNames, event);
indexA.add(row);
final File fileA = INDEX_MERGER_V9.persist(indexA, new File(tmpDir, "A"), new IndexSpec(), null);
QueryableIndex qindexA = INDEX_IO.loadIndex(fileA);
final IncrementalIndex indexB = makeIncIndex(false);
incrementalIndices.add(indexB);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 4718L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 18L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2698L);
row = new MapBasedInputRow(1505264400000L, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 3L);
row = new MapBasedInputRow(1505264400000L, dimNames, event);
indexB.add(row);
final File fileB = INDEX_MERGER_V9.persist(indexB, new File(tmpDir, "B"), new IndexSpec(), null);
QueryableIndex qindexB = INDEX_IO.loadIndex(fileB);
final IncrementalIndex indexC = makeIncIndex(false);
incrementalIndices.add(indexC);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2395L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexC.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 8L);
row = new MapBasedInputRow(1605260800000L, dimNames, event);
indexC.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 5028L);
row = new MapBasedInputRow(1705264400000L, dimNames, event);
indexC.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames, event);
indexC.add(row);
final File fileC = INDEX_MERGER_V9.persist(indexC, new File(tmpDir, "C"), new IndexSpec(), null);
QueryableIndex qindexC = INDEX_IO.loadIndex(fileC);
final IncrementalIndex indexD = makeIncIndex(false);
incrementalIndices.add(indexD);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 4718L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexD.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 18L);
row = new MapBasedInputRow(1605260800000L, dimNames, event);
indexD.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2698L);
row = new MapBasedInputRow(1705264400000L, dimNames, event);
indexD.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames, event);
indexD.add(row);
final File fileD = INDEX_MERGER_V9.persist(indexD, new File(tmpDir, "D"), new IndexSpec(), null);
QueryableIndex qindexD = INDEX_IO.loadIndex(fileD);
List<String> dimNames2 = Arrays.asList("dimA", "dimB", "metA");
List<DimensionSchema> dimensions = Arrays.asList(new StringDimensionSchema("dimA"), new StringDimensionSchema("dimB"), new LongDimensionSchema("metA"));
final IncrementalIndex indexE = makeIncIndex(false, dimensions);
incrementalIndices.add(indexE);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("dimB", "raw");
event.put("metA", 5L);
row = new MapBasedInputRow(1505260800000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("dimB", "ripe");
event.put("metA", 9L);
row = new MapBasedInputRow(1605260800000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1705264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("dimB", "ripe");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "grape");
event.put("dimB", "raw");
event.put("metA", 5L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "ripe");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "raw");
event.put("metA", 1L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "ripe");
event.put("metA", 4L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "raw");
event.put("metA", 1L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "banana");
event.put("dimB", "ripe");
event.put("metA", 4L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "orange");
event.put("dimB", "raw");
event.put("metA", 9L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "peach");
event.put("dimB", "ripe");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "orange");
event.put("dimB", "raw");
event.put("metA", 2L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "strawberry");
event.put("dimB", "ripe");
event.put("metA", 10L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
final File fileE = INDEX_MERGER_V9.persist(indexE, new File(tmpDir, "E"), new IndexSpec(), null);
QueryableIndex qindexE = INDEX_IO.loadIndex(fileE);
final IncrementalIndex indexF = makeIncIndex(false, dimensions);
incrementalIndices.add(indexF);
event = new HashMap<>();
event.put("dimA", "kiwi");
event.put("dimB", "raw");
event.put("metA", 7L);
row = new MapBasedInputRow(1505260800000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "watermelon");
event.put("dimB", "ripe");
event.put("metA", 14L);
row = new MapBasedInputRow(1605260800000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "kiwi");
event.put("dimB", "raw");
event.put("metA", 8L);
row = new MapBasedInputRow(1705264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "kiwi");
event.put("dimB", "ripe");
event.put("metA", 8L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "lemon");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "cherry");
event.put("dimB", "ripe");
event.put("metA", 2L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "cherry");
event.put("dimB", "raw");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "avocado");
event.put("dimB", "ripe");
event.put("metA", 12L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "cherry");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "plum");
event.put("dimB", "ripe");
event.put("metA", 5L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "plum");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "lime");
event.put("dimB", "ripe");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
final File fileF = INDEX_MERGER_V9.persist(indexF, new File(tmpDir, "F"), new IndexSpec(), null);
QueryableIndex qindexF = INDEX_IO.loadIndex(fileF);
groupByIndices = Arrays.asList(qindexA, qindexB, qindexC, qindexD, qindexE, qindexF);
resourceCloser = Closer.create();
setupGroupByFactory();
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class GroupByQueryRunnerFactoryTest method createSegment.
private Segment createSegment() throws Exception {
IncrementalIndex incrementalIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setConcurrentEventAdd(true).setMaxRowCount(5000).build();
StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags"))), "\t", ImmutableList.of("timestamp", "product", "tags"), false, 0), "UTF-8");
String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
for (String row : rows) {
incrementalIndex.add(parser.parse(row));
}
closerRule.closeLater(incrementalIndex);
return new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("test"));
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
final boolean combine = subquery == null;
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(timeStart);
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
final AppendableIndexBuilder indexBuilder;
if (query.getContextValue("useOffheap", false)) {
throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
} else {
indexBuilder = new OnheapIncrementalIndex.Builder();
}
index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
final MapBasedRow mapBasedRow;
if (in instanceof MapBasedRow) {
mapBasedRow = (MapBasedRow) in;
} else if (in instanceof ResultRow) {
final ResultRow row = (ResultRow) in;
mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
try {
accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
Aggregations