use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class FlattenSpecParquetInputTest method testNested1Autodiscover.
@Test
public void testNested1Autodiscover() throws IOException, InterruptedException {
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/flattening/nested_1_autodiscover_fields.json", parserType, true);
config.intoConfiguration(job);
Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
List<String> dims = rows.get(0).getDimensions();
Assert.assertFalse(dims.contains("dim2"));
Assert.assertFalse(dims.contains("dim3"));
Assert.assertFalse(dims.contains("listDim"));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class FlattenSpecParquetInputTest method testNested1Flatten.
@Test
public void testNested1Flatten() throws IOException, InterruptedException {
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/flattening/nested_1_flatten.json", parserType, true);
config.intoConfiguration(job);
Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
Assert.assertEquals(2, rows.get(0).getMetric("metric2").longValue());
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class TimestampsParquetInputTest method testDateHandling.
@Test
public void testDateHandling() throws IOException, InterruptedException {
HadoopDruidIndexerConfig configTimeAsString = transformHadoopDruidIndexerConfig("example/timestamps/date_test_data_job_string.json", parserType, false);
HadoopDruidIndexerConfig configTimeAsDate = transformHadoopDruidIndexerConfig("example/timestamps/date_test_data_job_date.json", parserType, false);
List<InputRow> rowsWithString = getAllRows(parserType, configTimeAsString);
List<InputRow> rowsWithDate = getAllRows(parserType, configTimeAsDate);
Assert.assertEquals(rowsWithDate.size(), rowsWithString.size());
for (int i = 0; i < rowsWithDate.size(); i++) {
Assert.assertEquals(rowsWithString.get(i).getTimestamp(), rowsWithDate.get(i).getTimestamp());
}
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class TimestampsParquetInputTest method testTimeMillisInInt64.
@Test
public void testTimeMillisInInt64() throws IOException, InterruptedException {
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/timestamps/timemillis_in_i64.json", parserType, true);
config.intoConfiguration(job);
List<InputRow> rows = getAllRows(parserType, config);
Assert.assertEquals("1970-01-01T00:00:00.010Z", rows.get(0).getTimestamp().toString());
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class TimestampsParquetInputTest method testParseInt96Timestamp.
@Test
public void testParseInt96Timestamp() throws IOException, InterruptedException {
// parquet-avro does not support int96, but if it ever does, remove this
if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
return;
}
// the source parquet file was found in apache spark sql repo tests, where it is known as impala_timestamp.parq
// it has a single column, "ts" which is an int96 timestamp
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/timestamps/int96_timestamp.json", parserType, true);
config.intoConfiguration(job);
Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals("2001-01-01T01:01:01.000Z", rows.get(0).getTimestamp().toString());
}
Aggregations