use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class RowExtraction method toDimensionSpec.
public DimensionSpec toDimensionSpec(final RowSignature rowSignature, final String outputName, final ValueType outputType) {
Preconditions.checkNotNull(outputType, "outputType");
final ValueType columnType = rowSignature.getColumnType(column);
if (columnType == null) {
return null;
}
if (columnType == ValueType.STRING || (column.equals(Column.TIME_COLUMN_NAME) && extractionFn != null)) {
return extractionFn == null ? new DefaultDimensionSpec(column, outputName, outputType) : new ExtractionDimensionSpec(column, outputName, outputType, extractionFn);
} else if (columnType == ValueType.LONG || columnType == ValueType.FLOAT) {
if (extractionFn == null) {
return new DefaultDimensionSpec(column, outputName, outputType);
} else {
return new ExtractionDimensionSpec(column, outputName, outputType, extractionFn);
}
} else {
// Can't create dimensionSpecs for non-string, non-numeric columns
return null;
}
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class VirtualColumnsTest method testMakeSelectors.
@Test
public void testMakeSelectors() {
final VirtualColumns virtualColumns = makeVirtualColumns();
final ObjectColumnSelector objectSelector = virtualColumns.makeObjectColumnSelector("expr", null);
final DimensionSelector dimensionSelector = virtualColumns.makeDimensionSelector(new DefaultDimensionSpec("expr", "x"), null);
final DimensionSelector extractionDimensionSelector = virtualColumns.makeDimensionSelector(new ExtractionDimensionSpec("expr", "x", new BucketExtractionFn(1.0, 0.5)), null);
final FloatColumnSelector floatSelector = virtualColumns.makeFloatColumnSelector("expr", null);
final LongColumnSelector longSelector = virtualColumns.makeLongColumnSelector("expr", null);
Assert.assertEquals(1L, objectSelector.get());
Assert.assertEquals("1", dimensionSelector.lookupName(dimensionSelector.getRow().get(0)));
Assert.assertEquals("0.5", extractionDimensionSelector.lookupName(extractionDimensionSelector.getRow().get(0)));
Assert.assertEquals(1.0f, floatSelector.get(), 0.0f);
Assert.assertEquals(1L, longSelector.get());
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project hive by apache.
the class DruidGroupByQueryRecordReader method initDimensionTypes.
private void initDimensionTypes() throws IOException {
// @TODO move this out of here to org.apache.hadoop.hive.druid.serde.DruidSerDe
List<DimensionSpec> dimensionSpecList = ((GroupByQuery) query).getDimensions();
List<DimensionSpec> extractionDimensionSpecList = dimensionSpecList.stream().filter(dimensionSpecs -> dimensionSpecs instanceof ExtractionDimensionSpec).collect(Collectors.toList());
extractionDimensionSpecList.stream().forEach(dimensionSpec -> {
ExtractionDimensionSpec extractionDimensionSpec = (ExtractionDimensionSpec) dimensionSpec;
if (extractionDimensionSpec.getExtractionFn() instanceof TimeFormatExtractionFn) {
final TimeFormatExtractionFn timeFormatExtractionFn = (TimeFormatExtractionFn) extractionDimensionSpec.getExtractionFn();
if (timeFormatExtractionFn == null || timeFormatExtractionFn.getFormat().equals(ISO_TIME_FORMAT)) {
timeExtractionFields.add(extractionDimensionSpec.getOutputName());
} else {
intFormattedTimeExtractionFields.add(extractionDimensionSpec.getOutputName());
}
}
});
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class CardinalityAggregatorTest method testSerde.
@Test
public void testSerde() throws Exception {
CardinalityAggregatorFactory factory = new CardinalityAggregatorFactory("billy", ImmutableList.<DimensionSpec>of(new DefaultDimensionSpec("b", "b"), new DefaultDimensionSpec("a", "a"), new DefaultDimensionSpec("c", "c")), true);
ObjectMapper objectMapper = new DefaultObjectMapper();
Assert.assertEquals(factory, objectMapper.readValue(objectMapper.writeValueAsString(factory), AggregatorFactory.class));
String fieldNamesOnly = "{\"type\":\"cardinality\",\"name\":\"billy\",\"fields\":[\"b\",\"a\",\"c\"],\"byRow\":true}";
Assert.assertEquals(factory, objectMapper.readValue(fieldNamesOnly, AggregatorFactory.class));
CardinalityAggregatorFactory factory2 = new CardinalityAggregatorFactory("billy", ImmutableList.<DimensionSpec>of(new ExtractionDimensionSpec("b", "b", new RegexDimExtractionFn(".*", false, null)), new RegexFilteredDimensionSpec(new DefaultDimensionSpec("a", "a"), ".*"), new DefaultDimensionSpec("c", "c")), true);
Assert.assertEquals(factory2, objectMapper.readValue(objectMapper.writeValueAsString(factory2), AggregatorFactory.class));
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class TopNTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
// Use an IdentityExtractionFn to force usage of DimExtractionTopNAlgorithm
TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
// DimExtractionTopNAlgorithm is always used for numeric columns
TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("string", queryBuilderString);
basicQueries.put("long", queryBuilderLong);
basicQueries.put("float", queryBuilderFloat);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Aggregations