use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testOrcFile11Format.
@Test
public void testOrcFile11Format() throws IOException {
// not sure what file 11 format means, but we'll test it!
/*
orc-file-11-format.orc
struct<boolean1:boolean,byte1:tinyint,short1:smallint,int1:int,long1:bigint,float1:float,double1:double,bytes1:binary,string1:string,middle:struct<list:array<struct<int1:int,string1:string>>>,list:array<struct<int1:int,string1:string>>,map:map<string,struct<int1:int,string1:string>>,ts:timestamp,decimal1:decimal(38,10)>
{false, 1, 1024, 65536, 9223372036854775807, 1.0, -15.0, 00 01 02 03 04, hi, {[{1, bye}, {2, sigh}]}, [{3, good}, {4, bad}], {}, 2000-03-12 15:00:00.0, 12345678.6547456}
*/
HadoopDruidIndexerConfig config = loadHadoopDruidIndexerConfig("example/orc-file-11-format-hadoop-job.json");
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
OrcStruct data = getFirstRow(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(14, rows.get(0).getDimensions().size());
Assert.assertEquals("false", rows.get(0).getDimension("boolean1").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("byte1").get(0));
Assert.assertEquals("1024", rows.get(0).getDimension("short1").get(0));
Assert.assertEquals("65536", rows.get(0).getDimension("int1").get(0));
Assert.assertEquals("9223372036854775807", rows.get(0).getDimension("long1").get(0));
Assert.assertEquals("1.0", rows.get(0).getDimension("float1").get(0));
Assert.assertEquals("-15.0", rows.get(0).getDimension("double1").get(0));
Assert.assertEquals("AAECAwQAAA==", rows.get(0).getDimension("bytes1").get(0));
Assert.assertEquals("hi", rows.get(0).getDimension("string1").get(0));
Assert.assertEquals("1.23456786547456E7", rows.get(0).getDimension("decimal1").get(0));
Assert.assertEquals("2", rows.get(0).getDimension("struct_list_struct_int").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("struct_list_struct_intlist").get(0));
Assert.assertEquals("2", rows.get(0).getDimension("struct_list_struct_intlist").get(1));
Assert.assertEquals("good", rows.get(0).getDimension("list_struct_string").get(0));
Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), rows.get(0).getTimestamp());
// first row has empty 'map' column, so lets read another!
List<InputRow> allRows = getAllRows(config);
InputRow anotherRow = allRows.get(allRows.size() - 1);
Assert.assertEquals(14, anotherRow.getDimensions().size());
Assert.assertEquals("true", anotherRow.getDimension("boolean1").get(0));
Assert.assertEquals("100", anotherRow.getDimension("byte1").get(0));
Assert.assertEquals("2048", anotherRow.getDimension("short1").get(0));
Assert.assertEquals("65536", anotherRow.getDimension("int1").get(0));
Assert.assertEquals("9223372036854775807", anotherRow.getDimension("long1").get(0));
Assert.assertEquals("2.0", anotherRow.getDimension("float1").get(0));
Assert.assertEquals("-5.0", anotherRow.getDimension("double1").get(0));
Assert.assertEquals("", anotherRow.getDimension("bytes1").get(0));
Assert.assertEquals("bye", anotherRow.getDimension("string1").get(0));
Assert.assertEquals("1.23456786547457E7", anotherRow.getDimension("decimal1").get(0));
Assert.assertEquals("2", anotherRow.getDimension("struct_list_struct_int").get(0));
Assert.assertEquals("cat", anotherRow.getDimension("list_struct_string").get(0));
Assert.assertEquals("5", anotherRow.getDimension("map_struct_int").get(0));
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testDate2038.
@Test
public void testDate2038() throws IOException {
/*
TestOrcFile.testDate2038.orc
struct<time:timestamp,date:date>
{2038-05-05 12:34:56.1, 2038-12-25}
*/
HadoopDruidIndexerConfig config = loadHadoopDruidIndexerConfig("example/testDate2038_hadoop_job.json");
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
OrcStruct data = getFirstRow(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(1, rows.get(0).getDimensions().size());
Assert.assertEquals("2038-12-25T00:00:00.000Z", rows.get(0).getDimension("date").get(0));
Assert.assertEquals(DateTimes.of("2038-05-05T12:34:56.1Z"), rows.get(0).getTimestamp());
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class CompatParquetInputTest method testOldRepeatedInt.
@Test
public void testOldRepeatedInt() throws IOException, InterruptedException {
// REPEATED not supported outside LIST or MAP. Type: repeated int32 repeatedInt
if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
return;
}
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/compat/old_repeated_int.json", parserType, true);
config.intoConfiguration(job);
List<InputRow> rows = getAllRows(parserType, config);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("1", rows.get(0).getDimension("repeatedInt").get(0));
Assert.assertEquals("2", rows.get(0).getDimension("repeatedInt").get(1));
Assert.assertEquals("3", rows.get(0).getDimension("repeatedInt").get(2));
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class CompatParquetInputTest method testBinaryAsString.
@Test
public void testBinaryAsString() throws IOException, InterruptedException {
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/compat/impala_hadoop_parquet_job.json", parserType, false);
config.intoConfiguration(job);
Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
InputRow row = ((List<InputRow>) config.getParser().parseBatch(data)).get(0);
// without binaryAsString: true, the value would be "aGV5IHRoaXMgaXMgJsOpKC3DqF/Dp8OgKT1eJMO5KiEgzqleXg=="
Assert.assertEquals("hey this is &é(-è_çà)=^$ù*! Ω^^", row.getDimension("field").get(0));
Assert.assertEquals(1471800234, row.getTimestampFromEpoch());
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class DecimalParquetInputTest method testReadParquetDecimali32.
@Test
public void testReadParquetDecimali32() throws IOException, InterruptedException {
// parquet-avro does not correctly convert decimal types
if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
return;
}
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/decimals/dec_in_i32.json", parserType, true);
List<InputRow> rows = getAllRows(parserType, config);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("100", rows.get(0).getDimension("i32_dec").get(0));
Assert.assertEquals(new BigDecimal(100), rows.get(0).getMetric("metric1"));
}
Aggregations