use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
if (query.getContextValue("useOffheap", false)) {
index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
} else {
index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
}
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
if (in instanceof MapBasedRow) {
try {
MapBasedRow row = (MapBasedRow) in;
accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.
the class SegmentAnalyzerTest method testMappedWorksHelper.
private void testMappedWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception {
final List<SegmentAnalysis> results = getSegmentAnalysises(new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex()), analyses);
Assert.assertEquals(1, results.size());
final SegmentAnalysis analysis = results.get(0);
Assert.assertEquals("test_1", analysis.getId());
final Map<String, ColumnAnalysis> columns = analysis.getColumns();
Assert.assertEquals(TestIndex.COLUMNS.length - 1, columns.size());
for (DimensionSchema schema : TestIndex.DIMENSION_SCHEMAS) {
final String dimension = schema.getName();
final ColumnAnalysis columnAnalysis = columns.get(dimension);
if (dimension.equals("null_column")) {
Assert.assertNull(columnAnalysis);
} else {
final boolean isString = schema.getValueType().name().equals(ValueType.STRING.name());
Assert.assertEquals(dimension, schema.getValueType().name(), columnAnalysis.getType());
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
if (isString) {
if (analyses == null) {
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
} else {
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
}
} else {
Assert.assertNull(dimension, columnAnalysis.getCardinality());
}
}
}
for (String metric : TestIndex.METRICS) {
final ColumnAnalysis columnAnalysis = columns.get(metric);
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
Assert.assertNull(metric, columnAnalysis.getCardinality());
}
}
use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.
the class KafkaSupervisorTest method getDataSchema.
private DataSchema getDataSchema(String dataSource) {
List<DimensionSchema> dimensions = new ArrayList<>();
dimensions.add(StringDimensionSchema.create("dim1"));
dimensions.add(StringDimensionSchema.create("dim2"));
return new DataSchema(dataSource, objectMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions, null, null), new JSONPathSpec(true, ImmutableList.<JSONPathFieldSpec>of()), ImmutableMap.<String, Boolean>of()), Charsets.UTF_8.name()), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.<Interval>of()), objectMapper);
}
use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testTypeFromParseSpec.
@Test
public void testTypeFromParseSpec() {
ParseSpec parseSpec = new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null));
String typeString = OrcHadoopInputRowParser.typeStringFromParseSpec(parseSpec);
String expected = "struct<timestamp:string,col1:string,col2:string>";
Assert.assertEquals(expected, typeString);
}
use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.
the class DruidParquetReadSupport method getPartialReadSchema.
private MessageType getPartialReadSchema(InitContext context) {
MessageType fullSchema = context.getFileSchema();
String name = fullSchema.getName();
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn();
List<DimensionSchema> dimensionSchema = config.getParser().getParseSpec().getDimensionsSpec().getDimensions();
Set<String> dimensions = Sets.newHashSet();
for (DimensionSchema dim : dimensionSchema) {
dimensions.add(dim.getName());
}
Set<String> metricsFields = Sets.newHashSet();
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
metricsFields.addAll(agg.requiredFields());
}
List<Type> partialFields = Lists.newArrayList();
for (Type type : fullSchema.getFields()) {
if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
partialFields.add(type);
}
}
return new MessageType(name, partialFields);
}
Aggregations