Search in sources :

Example 36 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class ProtobufInputRowParserTest method testParseNestedData.

@Test
public void testParseNestedData() throws Exception {
    // configure parser with desc file
    ProtobufInputRowParser parser = new ProtobufInputRowParser(parseSpec, decoder, null, null);
    // create binary of proto test event
    DateTime dateTime = new DateTime(2012, 7, 12, 9, 30, ISOChronology.getInstanceUTC());
    ProtoTestEventWrapper.ProtoTestEvent event = buildNestedData(dateTime);
    InputRow row = parser.parseBatch(toByteBuffer(event)).get(0);
    verifyNestedData(row, dateTime);
}
Also used : InputRow(org.apache.druid.data.input.InputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 37 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class ProtobufInputRowParserTest method testParseFlatDataWithComplexTimestamp.

@Test
public void testParseFlatDataWithComplexTimestamp() throws Exception {
    ProtobufInputRowParser parser = new ProtobufInputRowParser(flatParseSpecWithComplexTimestamp, decoder, null, null);
    // create binary of proto test event
    DateTime dateTime = new DateTime(2012, 7, 12, 9, 30, ISOChronology.getInstanceUTC());
    ProtoTestEventWrapper.ProtoTestEvent event = buildFlatDataWithComplexTimestamp(dateTime);
    InputRow row = parser.parseBatch(toByteBuffer(event)).get(0);
    verifyFlatDataWithComplexTimestamp(row, dateTime);
}
Also used : InputRow(org.apache.druid.data.input.InputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 38 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class BaseParquetInputTest method getAllRows.

static List<InputRow> getAllRows(String parserType, HadoopDruidIndexerConfig config) throws IOException, InterruptedException {
    Job job = Job.getInstance(new Configuration());
    config.intoConfiguration(job);
    File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);
    InputFormat inputFormat = ReflectionUtils.newInstance(INPUT_FORMAT_CLASSES.get(parserType), job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
        List<InputRow> records = new ArrayList<>();
        InputRowParser parser = config.getParser();
        reader.initialize(split, context);
        while (reader.nextKeyValue()) {
            reader.nextKeyValue();
            Object data = reader.getCurrentValue();
            records.add(((List<InputRow>) parser.parseBatch(data)).get(0));
        }
        return records;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) RecordReader(org.apache.hadoop.mapreduce.RecordReader) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) InputRow(org.apache.druid.data.input.InputRow) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File)

Example 39 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class CompatParquetInputTest method testReadNestedArrayStruct.

@Test
public void testReadNestedArrayStruct() throws IOException, InterruptedException {
    // }
    if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
        return;
    }
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/compat/nested_array_struct.json", parserType, true);
    config.intoConfiguration(job);
    List<InputRow> rows = getAllRows(parserType, config);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
    Assert.assertEquals("5", rows.get(0).getDimension("primitive").get(0));
    Assert.assertEquals("4", rows.get(0).getDimension("extracted1").get(0));
    Assert.assertEquals("6", rows.get(0).getDimension("extracted2").get(0));
}
Also used : InputRow(org.apache.druid.data.input.InputRow) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) Test(org.junit.Test)

Example 40 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class CompatParquetInputTest method testParquetThriftCompat.

@Test
public void testParquetThriftCompat() throws IOException, InterruptedException {
    // Map key type must be binary (UTF8): required int32 key
    if (parserType.equals(ParquetExtensionsModule.PARQUET_AVRO_INPUT_PARSER_TYPE)) {
        return;
    }
    /*
      message ParquetSchema {
        required boolean boolColumn;
        required int32 byteColumn;
        required int32 shortColumn;
        required int32 intColumn;
        required int64 longColumn;
        required double doubleColumn;
        required binary binaryColumn (UTF8);
        required binary stringColumn (UTF8);
        required binary enumColumn (ENUM);
        optional boolean maybeBoolColumn;
        optional int32 maybeByteColumn;
        optional int32 maybeShortColumn;
        optional int32 maybeIntColumn;
        optional int64 maybeLongColumn;
        optional double maybeDoubleColumn;
        optional binary maybeBinaryColumn (UTF8);
        optional binary maybeStringColumn (UTF8);
        optional binary maybeEnumColumn (ENUM);
        required group stringsColumn (LIST) {
          repeated binary stringsColumn_tuple (UTF8);
        }
        required group intSetColumn (LIST) {
          repeated int32 intSetColumn_tuple;
        }
        required group intToStringColumn (MAP) {
          repeated group map (MAP_KEY_VALUE) {
            required int32 key;
            optional binary value (UTF8);
          }
        }
        required group complexColumn (MAP) {
          repeated group map (MAP_KEY_VALUE) {
            required int32 key;
            optional group value (LIST) {
              repeated group value_tuple {
                required group nestedIntsColumn (LIST) {
                  repeated int32 nestedIntsColumn_tuple;
                }
                required binary nestedStringColumn (UTF8);
              }
            }
          }
        }
      }
     */
    HadoopDruidIndexerConfig config = transformHadoopDruidIndexerConfig("example/compat/parquet_thrift_compat.json", parserType, true);
    config.intoConfiguration(job);
    Object data = getFirstRow(job, parserType, ((StaticPathSpec) config.getPathSpec()).getPaths());
    List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
    Assert.assertEquals("true", rows.get(0).getDimension("boolColumn").get(0));
    Assert.assertEquals("0", rows.get(0).getDimension("byteColumn").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("shortColumn").get(0));
    Assert.assertEquals("2", rows.get(0).getDimension("intColumn").get(0));
    Assert.assertEquals("0", rows.get(0).getDimension("longColumn").get(0));
    Assert.assertEquals("0.2", rows.get(0).getDimension("doubleColumn").get(0));
    Assert.assertEquals("val_0", rows.get(0).getDimension("binaryColumn").get(0));
    Assert.assertEquals("val_0", rows.get(0).getDimension("stringColumn").get(0));
    Assert.assertEquals("SPADES", rows.get(0).getDimension("enumColumn").get(0));
    Assert.assertTrue(rows.get(0).getDimension("maybeBoolColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeByteColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeShortColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeIntColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeLongColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeDoubleColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeBinaryColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeStringColumn").isEmpty());
    Assert.assertTrue(rows.get(0).getDimension("maybeEnumColumn").isEmpty());
    Assert.assertEquals("arr_0", rows.get(0).getDimension("stringsColumn").get(0));
    Assert.assertEquals("arr_1", rows.get(0).getDimension("stringsColumn").get(1));
    Assert.assertEquals("0", rows.get(0).getDimension("intSetColumn").get(0));
    Assert.assertEquals("val_1", rows.get(0).getDimension("extractByLogicalMap").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("extractByComplexLogicalMap").get(0));
}
Also used : InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) Test(org.junit.Test)

Aggregations

InputRow (org.apache.druid.data.input.InputRow)266 Test (org.junit.Test)193 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)57 InputEntityReader (org.apache.druid.data.input.InputEntityReader)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)52 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)52 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)49 ArrayList (java.util.ArrayList)46 List (java.util.List)37 ImmutableList (com.google.common.collect.ImmutableList)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)29 File (java.io.File)27 HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)27 DateTime (org.joda.time.DateTime)24 Map (java.util.Map)23 IOException (java.io.IOException)18 Interval (org.joda.time.Interval)18