use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class ProtobufInputRowParserTest method testParseNestedData.
@Test
public void testParseNestedData() throws Exception {
// configure parser with desc file
ProtobufInputRowParser parser = new ProtobufInputRowParser(parseSpec, decoder, null, null);
// create binary of proto test event
DateTime dateTime = new DateTime(2012, 7, 12, 9, 30, ISOChronology.getInstanceUTC());
ProtoTestEventWrapper.ProtoTestEvent event = buildNestedData(dateTime);
InputRow row = parser.parseBatch(toByteBuffer(event)).get(0);
verifyNestedData(row, dateTime);
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class ProtobufInputRowParserTest method testParseFlatDataWithComplexTimestamp.
@Test
public void testParseFlatDataWithComplexTimestamp() throws Exception {
ProtobufInputRowParser parser = new ProtobufInputRowParser(flatParseSpecWithComplexTimestamp, decoder, null, null);
// create binary of proto test event
DateTime dateTime = new DateTime(2012, 7, 12, 9, 30, ISOChronology.getInstanceUTC());
ProtoTestEventWrapper.ProtoTestEvent event = buildFlatDataWithComplexTimestamp(dateTime);
InputRow row = parser.parseBatch(toByteBuffer(event)).get(0);
verifyFlatDataWithComplexTimestamp(row, dateTime);
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class BaseParquetInputTest method getAllRows.
static List<InputRow> getAllRows(String parserType, HadoopDruidIndexerConfig config) throws IOException, InterruptedException {
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), null);
InputFormat inputFormat = ReflectionUtils.newInstance(INPUT_FORMAT_CLASSES.get(parserType), job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
List<InputRow> records = new ArrayList<>();
InputRowParser parser = config.getParser();
reader.initialize(split, context);
while (reader.nextKeyValue()) {
reader.nextKeyValue();
Object data = reader.getCurrentValue();
records.add(((List<InputRow>) parser.parseBatch(data)).get(0));
}
return records;
}
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class CompatParquetInputTest method testReadNestedArrayStruct.
@Test
public void testReadNestedArrayStruct() throws IOException, InterruptedException {
// }
if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
return;
}
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/compat/nested_array_struct.json", parserType, true);
config.intoConfiguration(job);
List<InputRow> rows = getAllRows(parserType, config);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("5", rows.get(0).getDimension("primitive").get(0));
Assert.assertEquals("4", rows.get(0).getDimension("extracted1").get(0));
Assert.assertEquals("6", rows.get(0).getDimension("extracted2").get(0));
}
use of org.apache.druid.data.input.InputRow in project druid by druid-io.
the class CompatParquetInputTest method testParquetThriftCompat.
@Test
public void testParquetThriftCompat() throws IOException, InterruptedException {
// Map key type must be binary (UTF8): required int32 key
if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
return;
}
/*
message ParquetSchema {
required boolean boolColumn;
required int32 byteColumn;
required int32 shortColumn;
required int32 intColumn;
required int64 longColumn;
required double doubleColumn;
required binary binaryColumn (UTF8);
required binary stringColumn (UTF8);
required binary enumColumn (ENUM);
optional boolean maybeBoolColumn;
optional int32 maybeByteColumn;
optional int32 maybeShortColumn;
optional int32 maybeIntColumn;
optional int64 maybeLongColumn;
optional double maybeDoubleColumn;
optional binary maybeBinaryColumn (UTF8);
optional binary maybeStringColumn (UTF8);
optional binary maybeEnumColumn (ENUM);
required group stringsColumn (LIST) {
repeated binary stringsColumn_tuple (UTF8);
}
required group intSetColumn (LIST) {
repeated int32 intSetColumn_tuple;
}
required group intToStringColumn (MAP) {
repeated group map (MAP_KEY_VALUE) {
required int32 key;
optional binary value (UTF8);
}
}
required group complexColumn (MAP) {
repeated group map (MAP_KEY_VALUE) {
required int32 key;
optional group value (LIST) {
repeated group value_tuple {
required group nestedIntsColumn (LIST) {
repeated int32 nestedIntsColumn_tuple;
}
required binary nestedStringColumn (UTF8);
}
}
}
}
}
*/
HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/compat/parquet_thrift_compat.json", parserType, true);
config.intoConfiguration(job);
Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("true", rows.get(0).getDimension("boolColumn").get(0));
Assert.assertEquals("0", rows.get(0).getDimension("byteColumn").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("shortColumn").get(0));
Assert.assertEquals("2", rows.get(0).getDimension("intColumn").get(0));
Assert.assertEquals("0", rows.get(0).getDimension("longColumn").get(0));
Assert.assertEquals("0.2", rows.get(0).getDimension("doubleColumn").get(0));
Assert.assertEquals("val_0", rows.get(0).getDimension("binaryColumn").get(0));
Assert.assertEquals("val_0", rows.get(0).getDimension("stringColumn").get(0));
Assert.assertEquals("SPADES", rows.get(0).getDimension("enumColumn").get(0));
Assert.assertTrue(rows.get(0).getDimension("maybeBoolColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeByteColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeShortColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeIntColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeLongColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeDoubleColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeBinaryColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeStringColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeEnumColumn").isEmpty());
Assert.assertEquals("arr_0", rows.get(0).getDimension("stringsColumn").get(0));
Assert.assertEquals("arr_1", rows.get(0).getDimension("stringsColumn").get(1));
Assert.assertEquals("0", rows.get(0).getDimension("intSetColumn").get(0));
Assert.assertEquals("val_1", rows.get(0).getDimension("extractByLogicalMap").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("extractByComplexLogicalMap").get(0));
}
Aggregations