Search in sources :

Example 6 with HadoopDruidIndexerConfig

use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.

the class DecimalParquetInputTest method testReadParquetDecimalFixedLen.

@Test
public void testReadParquetDecimalFixedLen() throws IOException, InterruptedException {
    // parquet-avro does not correctly convert decimal types
    if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
        return;
    }
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/decimals/dec_in_fix_len.json", parserType, true);
    List<InputRow> rows = getAllRows(parserType, config);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
    Assert.assertEquals("1.0", rows.get(0).getDimension("fixed_len_dec").get(0));
    Assert.assertEquals(new BigDecimal("1.0"), rows.get(0).getMetric("metric1"));
}
Also used : InputRow(org.apache.druid.data.input.InputRow) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) BigDecimal(java.math.BigDecimal) Test(org.junit.Test)

Example 7 with HadoopDruidIndexerConfig

use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.

the class FlattenSpecParquetInputTest method testNested1NoFlattenSpec.

@Test
public void testNested1NoFlattenSpec() throws IOException, InterruptedException {
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/flattening/nested_1.json", parserType, false);
    config.intoConfiguration(job);
    Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
    List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
    Assert.assertEquals(TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    List<String> dims = rows.get(0).getDimensions();
    Assert.assertFalse(dims.contains("dim2"));
    Assert.assertFalse(dims.contains("dim3"));
    Assert.assertFalse(dims.contains("listDim"));
    Assert.assertFalse(dims.contains("nestedData"));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
}
Also used : InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) Test(org.junit.Test)

Example 8 with HadoopDruidIndexerConfig

use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.

the class FlattenSpecParquetInputTest method testFlat1FlattenSelectListItem.

@Test
public void testFlat1FlattenSelectListItem() throws IOException, InterruptedException {
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/flattening/flat_1_list_index.json", parserType, true);
    config.intoConfiguration(job);
    Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
    List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
    Assert.assertEquals(TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listextracted").get(0));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
}
Also used : InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) Test(org.junit.Test)

Example 9 with HadoopDruidIndexerConfig

use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.

the class FlattenSpecParquetInputTest method testNested1FlattenSelectListItem.

@Test
public void testNested1FlattenSelectListItem() throws IOException, InterruptedException {
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/flattening/nested_1_list_index.json", parserType, true);
    config.intoConfiguration(job);
    Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
    List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
    Assert.assertEquals(TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listextracted").get(0));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
}
Also used : InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) Test(org.junit.Test)

Example 10 with HadoopDruidIndexerConfig

use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.

the class FlattenSpecParquetInputTest method testFlat1Flatten.

@Test
public void testFlat1Flatten() throws IOException, InterruptedException {
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/flattening/flat_1_flatten.json", parserType, true);
    config.intoConfiguration(job);
    Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
    List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
    Assert.assertEquals(TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v1", rows.get(0).getDimension("list").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("list").get(1));
    Assert.assertEquals("2", rows.get(0).getDimension("listLength").get(0));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
}
Also used : InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) Test(org.junit.Test)

Aggregations

HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)34 Test (org.junit.Test)32 InputRow (org.apache.druid.data.input.InputRow)27 ImmutableList (com.google.common.collect.ImmutableList)19 List (java.util.List)19 Job (org.apache.hadoop.mapreduce.Job)9 ArrayList (java.util.ArrayList)8 Configuration (org.apache.hadoop.conf.Configuration)8 OrcStruct (org.apache.orc.mapred.OrcStruct)6 BigDecimal (java.math.BigDecimal)3 HadoopIngestionSpec (org.apache.druid.indexer.HadoopIngestionSpec)3 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 DimensionSchema (org.apache.druid.data.input.impl.DimensionSchema)2 ParseSpec (org.apache.druid.data.input.impl.ParseSpec)2 Bucket (org.apache.druid.indexer.Bucket)2 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)2 DataSegment (org.apache.druid.timeline.DataSegment)2 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)2 Path (org.apache.hadoop.fs.Path)2