Search in sources :

Example 36 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class AvroOCFReaderTest method testParseWithReaderSchemaAlias.

@Test
public void testParseWithReaderSchemaAlias() throws Exception {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, mapper));
    // Read the data using a reduced reader schema, emulate using an older version with less fields
    String schemaStr = "{\n" + "  \"namespace\": \"org.apache.druid.data.input\",\n" + "  \"name\": \"SomeAvroDatum\",\n" + "  \"type\": \"record\",\n" + "  \"fields\" : [\n" + "    {\"name\":\"timestamp\",\"type\":\"long\"},\n" + "    {\"name\":\"someLong\",\"type\":\"long\"}\n," + "    {\"name\":\"eventClass\",\"type\":\"string\", \"aliases\": [\"eventType\"]}\n" + "  ]\n" + "}";
    TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {
    };
    final Map<String, Object> readerSchema = mapper.readValue(schemaStr, typeRef);
    final InputEntityReader reader = createReader(mapper, readerSchema);
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        // eventType is aliased to eventClass in the reader schema and should be transformed at read time
        Assert.assertEquals("type-a", Iterables.getOnlyElement(row.getDimension("eventClass")));
        Assert.assertFalse(iterator.hasNext());
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) TypeReference(com.fasterxml.jackson.core.type.TypeReference) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) AvroStreamInputRowParserTest(org.apache.druid.data.input.AvroStreamInputRowParserTest) Test(org.junit.Test) AvroHadoopInputRowParserTest(org.apache.druid.data.input.AvroHadoopInputRowParserTest)

Example 37 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class AvroOCFReaderTest method testSample.

@Test
public void testSample() throws Exception {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, mapper));
    final InputEntityReader reader = createReader(mapper, null);
    try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRowListPlusRawValues row = iterator.next();
        Assert.assertFalse(iterator.hasNext());
        final Map<String, Object> rawColumns = row.getRawValues();
        Assert.assertNotNull(rawColumns);
        Assert.assertEquals(20, rawColumns.size());
        final List<InputRow> inputRows = row.getInputRows();
        Assert.assertNotNull(inputRows);
        final InputRow inputRow = Iterables.getOnlyElement(inputRows);
        assertInputRow(inputRow);
    }
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) InputRow(org.apache.druid.data.input.InputRow) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) AvroStreamInputRowParserTest(org.apache.druid.data.input.AvroStreamInputRowParserTest) Test(org.junit.Test) AvroHadoopInputRowParserTest(org.apache.druid.data.input.AvroHadoopInputRowParserTest)

Example 38 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class AvroOCFReaderTest method testParse.

@Test
public void testParse() throws Exception {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, mapper));
    final InputEntityReader reader = createReader(mapper, null);
    assertRow(reader);
}
Also used : DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) AvroStreamInputRowParserTest(org.apache.druid.data.input.AvroStreamInputRowParserTest) Test(org.junit.Test) AvroHadoopInputRowParserTest(org.apache.druid.data.input.AvroHadoopInputRowParserTest)

Example 39 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class AvroOCFReaderTest method testParseWithReaderSchema.

@Test
public void testParseWithReaderSchema() throws Exception {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, mapper));
    // Read the data using a reduced reader schema, emulate using an older version with less fields
    String schemaStr = "{\n" + "  \"namespace\": \"org.apache.druid.data.input\",\n" + "  \"name\": \"SomeAvroDatum\",\n" + "  \"type\": \"record\",\n" + "  \"fields\" : [\n" + "    {\"name\":\"timestamp\",\"type\":\"long\"},\n" + "    {\"name\":\"eventType\",\"type\":\"string\"},\n" + "    {\"name\":\"someLong\",\"type\":\"long\"}\n" + "  ]\n" + "}";
    TypeReference<Map<String, Object>> typeRef = new TypeReference<Map<String, Object>>() {
    };
    final Map<String, Object> readerSchema = mapper.readValue(schemaStr, typeRef);
    final InputEntityReader reader = createReader(mapper, readerSchema);
    assertRow(reader);
}
Also used : DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) TypeReference(com.fasterxml.jackson.core.type.TypeReference) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) AvroStreamInputRowParserTest(org.apache.druid.data.input.AvroStreamInputRowParserTest) Test(org.junit.Test) AvroHadoopInputRowParserTest(org.apache.druid.data.input.AvroHadoopInputRowParserTest)

Example 40 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class CsvReaderTest method testQuotes.

@Test
public void testQuotes() throws IOException {
    final ByteEntity source = writeData(ImmutableList.of("3,\"Lets do some \"\"normal\"\" quotes\",2018-05-05T10:00:00Z", "34,\"Lets do some \"\"normal\"\", quotes with comma\",2018-05-06T10:00:00Z", "343,\"Lets try \\\"\"it\\\"\" with slash quotes\",2018-05-07T10:00:00Z", "545,\"Lets try \\\"\"it\\\"\", with slash quotes and comma\",2018-05-08T10:00:00Z", "65,Here I write \\n slash n,2018-05-09T10:00:00Z"));
    final List<InputRow> expectedResults = ImmutableList.of(new MapBasedInputRow(DateTimes.of("2018-05-05T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "3", "Comment", "Lets do some \"normal\" quotes", "Timestamp", "2018-05-05T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-06T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "34", "Comment", "Lets do some \"normal\", quotes with comma", "Timestamp", "2018-05-06T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-07T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "343", "Comment", "Lets try \\\"it\\\" with slash quotes", "Timestamp", "2018-05-07T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-08T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "545", "Comment", "Lets try \\\"it\\\", with slash quotes and comma", "Timestamp", "2018-05-08T10:00:00Z")), new MapBasedInputRow(DateTimes.of("2018-05-09T10:00:00Z"), ImmutableList.of("Timestamp"), ImmutableMap.of("Value", "65", "Comment", "Here I write \\n slash n", "Timestamp", "2018-05-09T10:00:00Z")));
    final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("Value", "Comment", "Timestamp"), null, null, false, 0);
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("Timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("Timestamp"))), ColumnsFilter.all()), source, null);
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        final Iterator<InputRow> expectedRowIterator = expectedResults.iterator();
        while (iterator.hasNext()) {
            Assert.assertTrue(expectedRowIterator.hasNext());
            Assert.assertEquals(expectedRowIterator.next(), iterator.next());
        }
    }
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

InputEntityReader (org.apache.druid.data.input.InputEntityReader)58 Test (org.junit.Test)56 InputRow (org.apache.druid.data.input.InputRow)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)31 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)28 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)26 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)8 Configuration (org.apache.hadoop.conf.Configuration)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)5 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)5 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)5 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 Headers (org.apache.kafka.common.header.Headers)4 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)4 BigDecimal (java.math.BigDecimal)3