Search in sources :

Example 36 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexGeneratorCombinerTest method setUp.

@Before
public void setUp() throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")))), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Intervals.of("2010/2011"))), null, HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
    Configuration hadoopConfig = new Configuration();
    hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
    Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
    EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
    EasyMock.replay(context);
    aggregators = config.getSchema().getDataSchema().getAggregators();
    combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
    combiner.setup(context);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Reducer(org.apache.hadoop.mapreduce.Reducer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Before(org.junit.Before)

Example 37 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsMerged.

@Test
public void testMultipleRowsMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, DateTimes.utc(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("host"), new StringDimensionSchema("keywords")));
    Map<String, InputRowSerde.IndexSerdeTypeHelper> typeHelperMap = InputRowSerde.getTypeHelperMap(dimensionsSpec);
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.of("keywords"), ImmutableMap.of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.of("keywords"), ImmutableMap.of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators).getSerializedRow()), new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators).getSerializedRow()));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey = Capture.newInstance();
    Capture<BytesWritable> captureVal = Capture.newInstance();
    context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey.getValue() == key);
    InputRow capturedRow = InputRowSerde.fromBytes(typeHelperMap, captureVal.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow.getDimensions());
    Assert.assertEquals(ImmutableList.of(), capturedRow.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow.getDimension("keywords"));
    Assert.assertEquals(15, capturedRow.getMetric("visited_sum").longValue());
    Assert.assertEquals(2.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow.getRaw("unique_hosts"), false), 0.001);
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) Test(org.junit.Test)

Example 38 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class InputRowSerdeTest method testDimensionNullOrDefaultForNumerics.

@Test
public void testDimensionNullOrDefaultForNumerics() {
    HashMap<String, Object> eventWithNulls = new HashMap<>();
    eventWithNulls.put("d1", null);
    eventWithNulls.put("d2", Arrays.asList("d2v1", "d2v2"));
    eventWithNulls.put("d3", null);
    eventWithNulls.put("d4", null);
    eventWithNulls.put("d5", null);
    InputRow in = new MapBasedInputRow(timestamp, dims, eventWithNulls);
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
    byte[] result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, new AggregatorFactory[0]).getSerializedRow();
    if (NullHandling.replaceWithDefault()) {
        long expected = 0;
        // timestamp bytes + dims length
        expected += 9;
        // dim_non_existing writes: 1 16 1 bytes
        expected += 18;
        // d1: writes 1 2 1 bytes
        expected += 4;
        // d2: writes 1 2 1 1 4 1 4 bytes
        expected += 14;
        // d3: writes 1 2 8 bytes
        expected += 11;
        // d4: writes 1 2 4 bytes
        expected += 7;
        // d5: writes 1 2 8 bytes
        expected += 11;
        // writes aggregator length
        expected += 1;
        Assert.assertEquals(expected, result.length);
        Assert.assertArrayEquals(new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, Arrays.copyOfRange(result, 48, 56));
        Assert.assertArrayEquals(new byte[] { 0, 0, 0, 0 }, Arrays.copyOfRange(result, 59, 63));
        Assert.assertArrayEquals(new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, Arrays.copyOfRange(result, 66, 74));
    } else {
        long expected = 9 + 18 + 4 + 14 + 4 + 4 + 4 + 1;
        Assert.assertEquals(expected, result.length);
        Assert.assertEquals(result[48], NullHandling.IS_NULL_BYTE);
        Assert.assertEquals(result[52], NullHandling.IS_NULL_BYTE);
        Assert.assertEquals(result[56], NullHandling.IS_NULL_BYTE);
    }
}
Also used : HashMap(java.util.HashMap) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 39 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class InputRowSerdeTest method testDimensionParseExceptions.

@Test
public void testDimensionParseExceptions() {
    InputRowSerde.SerializeResult result;
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new LongSumAggregatorFactory("m2out", "m2") };
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Collections.singletonList(new LongDimensionSchema("d1")));
    result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories);
    Assert.assertEquals(Collections.singletonList("could not convert value [d1v] to long"), result.getParseExceptionMessages());
    dimensionsSpec = new DimensionsSpec(Collections.singletonList(new FloatDimensionSchema("d1")));
    result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories);
    Assert.assertEquals(Collections.singletonList("could not convert value [d1v] to float"), result.getParseExceptionMessages());
    dimensionsSpec = new DimensionsSpec(Collections.singletonList(new DoubleDimensionSchema("d1")));
    result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories);
    Assert.assertEquals(Collections.singletonList("could not convert value [d1v] to double"), result.getParseExceptionMessages());
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Test(org.junit.Test)

Example 40 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class InputRowSerdeTest method testThrowParseExceptions.

@Test
public void testThrowParseExceptions() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
    InputRowSerde.SerializeResult result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories);
    Assert.assertEquals(Collections.singletonList("Unable to parse value[m3v] for field[m3]"), result.getParseExceptionMessages());
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Aggregations

DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)169 Test (org.junit.Test)129 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)114 InputRow (org.apache.druid.data.input.InputRow)52 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)47 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)47 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)42 DataSchema (org.apache.druid.segment.indexing.DataSchema)39 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)37 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)37 InputRowSchema (org.apache.druid.data.input.InputRowSchema)36 Map (java.util.Map)32 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)32 InputEntityReader (org.apache.druid.data.input.InputEntityReader)31 ArrayList (java.util.ArrayList)29 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)25 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)24 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)24 HashMap (java.util.HashMap)23 ImmutableMap (com.google.common.collect.ImmutableMap)21