use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class IncrementalIndexMultiValueSpecTest method test.
@Test
public void test() throws IndexSizeExceededException {
DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("string1", DimensionSchema.MultiValueHandling.ARRAY, true), new StringDimensionSchema("string2", DimensionSchema.MultiValueHandling.SORTED_ARRAY, true), new StringDimensionSchema("string3", DimensionSchema.MultiValueHandling.SORTED_SET, true)));
IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("ds", "auto", null), Granularities.ALL, VirtualColumns.EMPTY, dimensionsSpec, new AggregatorFactory[0], false);
Map<String, Object> map = new HashMap<String, Object>() {
@Override
public Object get(Object key) {
if (((String) key).startsWith("string")) {
return Arrays.asList("xsd", "aba", "fds", "aba");
}
if (((String) key).startsWith("float")) {
return Arrays.asList(3.92f, -2.76f, 42.153f, Float.NaN, -2.76f, -2.76f);
}
if (((String) key).startsWith("long")) {
return Arrays.asList(-231238789L, 328L, 923L, 328L, -2L, 0L);
}
return null;
}
};
IncrementalIndex index = indexCreator.createIndex(schema);
index.add(new MapBasedInputRow(0, Arrays.asList("string1", "string2", "string3", "float1", "float2", "float3", "long1", "long2", "long3"), map));
Row row = index.iterator().next();
Assert.assertEquals(Lists.newArrayList("xsd", "aba", "fds", "aba"), row.getRaw("string1"));
Assert.assertEquals(Lists.newArrayList("aba", "aba", "fds", "xsd"), row.getRaw("string2"));
Assert.assertEquals(Lists.newArrayList("aba", "fds", "xsd"), row.getRaw("string3"));
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class IncrementalIndexTest method testDuplicateDimensionsFirstOccurrence.
@Test(expected = ISE.class)
public void testDuplicateDimensionsFirstOccurrence() throws IndexSizeExceededException {
IncrementalIndex index = indexCreator.createIndex();
index.add(new MapBasedInputRow(System.currentTimeMillis() - 1, Lists.newArrayList("billy", "joe", "joe"), ImmutableMap.of("billy", "A", "joe", "B")));
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class ParquetAvroHadoopInputRowParser method parseBatch.
/**
* imitate avro extension {@link org.apache.druid.data.input.avro.AvroParsers#parseGenericRecord}
*/
@Nonnull
@Override
public List<InputRow> parseBatch(GenericRecord record) {
Map<String, Object> row = recordFlattener.flatten(record);
final List<String> dimensions;
if (!this.dimensions.isEmpty()) {
dimensions = this.dimensions;
} else {
dimensions = Lists.newArrayList(Sets.difference(row.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()));
}
// check for parquet Date
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date
LogicalType logicalType = determineTimestampSpecLogicalType(record.getSchema(), timestampSpec.getTimestampColumn());
DateTime dateTime;
if (logicalType instanceof LogicalTypes.Date) {
int daysSinceEpoch = (Integer) record.get(timestampSpec.getTimestampColumn());
dateTime = DateTimes.utc(TimeUnit.DAYS.toMillis(daysSinceEpoch));
} else {
// Fall back to a binary format that will be parsed using joda-time
dateTime = timestampSpec.extractTimestamp(row);
}
return ImmutableList.of(new MapBasedInputRow(dateTime, dimensions, row));
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
final boolean combine = subquery == null;
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(timeStart);
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
final AppendableIndexBuilder indexBuilder;
if (query.getContextValue("useOffheap", false)) {
throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
} else {
indexBuilder = new OnheapIncrementalIndex.Builder();
}
index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
final MapBasedRow mapBasedRow;
if (in instanceof MapBasedRow) {
mapBasedRow = (MapBasedRow) in;
} else if (in instanceof ResultRow) {
final ResultRow row = (ResultRow) in;
mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
try {
accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.
the class DataSourceMetadataQueryTest method testMaxIngestedEventTime.
@Test
public void testMaxIngestedEventTime() throws Exception {
final IncrementalIndex rtIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setMaxRowCount(1000).build();
final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner(new DataSourceMetadataQueryRunnerFactory(new DataSourceQueryQueryToolChest(DefaultGenericQueryMetricsFactory.instance()), QueryRunnerTestHelper.NOOP_QUERYWATCHER), new IncrementalIndexSegment(rtIndex, SegmentId.dummy("test")), null);
DateTime timestamp = DateTimes.nowUtc();
rtIndex.add(new MapBasedInputRow(timestamp.getMillis(), ImmutableList.of("dim1"), ImmutableMap.of("dim1", "x")));
DataSourceMetadataQuery dataSourceMetadataQuery = Druids.newDataSourceMetadataQueryBuilder().dataSource("testing").build();
ResponseContext context = ConcurrentResponseContext.createEmpty();
context.initializeMissingSegments();
Iterable<Result<DataSourceMetadataResultValue>> results = runner.run(QueryPlus.wrap(dataSourceMetadataQuery), context).toList();
DataSourceMetadataResultValue val = results.iterator().next().getValue();
DateTime maxIngestedEventTime = val.getMaxIngestedEventTime();
Assert.assertEquals(timestamp, maxIngestedEventTime);
}
Aggregations