Search in sources :

Example 21 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class AvroOCFInputFormatTest method before.

@Before
public void before() {
    flattenSpec = new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "nested", "someRecord.subLong")));
    for (Module jacksonModule : new AvroExtensionsModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper));
}
Also used : JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) Module(com.fasterxml.jackson.databind.Module) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Before(org.junit.Before)

Example 22 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class KafkaInputFormatTest method testSerde.

@Test
public void testSerde() throws JsonProcessingException {
    final ObjectMapper mapper = new ObjectMapper();
    KafkaInputFormat kif = new KafkaInputFormat(new KafkaStringHeaderFormat(null), // Key Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), // Value Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), "kafka.newheader.", "kafka.newkey.key", "kafka.newts.timestamp");
    Assert.assertEquals(format, kif);
    final byte[] formatBytes = mapper.writeValueAsBytes(format);
    final byte[] kifBytes = mapper.writeValueAsBytes(kif);
    Assert.assertTrue(Arrays.equals(formatBytes, kifBytes));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 23 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class KinesisSamplerSpecTest method testSample.

@Test(timeout = 10_000L)
public void testSample() throws Exception {
    EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once();
    recordSupplier.assign(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID)));
    EasyMock.expectLastCall().once();
    recordSupplier.seekToEarliest(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID)));
    EasyMock.expectLastCall().once();
    EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(STREAM)).once();
    recordSupplier.close();
    EasyMock.expectLastCall().once();
    replayAll();
    KinesisSupervisorSpec supervisorSpec = new KinesisSupervisorSpec(null, DATA_SCHEMA, null, new KinesisSupervisorIOConfig(STREAM, new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), null, null, null, null, null, null, null, true, null, null, null, null, null, null, null, null, null, false), null, null, null, null, null, null, null, null, null, null, null, null);
    KinesisSamplerSpec samplerSpec = new TestableKinesisSamplerSpec(supervisorSpec, new SamplerConfig(5, null), new InputSourceSampler(), null);
    SamplerResponse response = samplerSpec.sample();
    verifyAll();
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(3, response.getNumRowsIndexed());
    Assert.assertEquals(5, response.getData().size());
    Iterator<SamplerResponse.SamplerResponseRow> it = response.getData().iterator();
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2008").put("dim1", "a").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1199145600000L).put("dim1", "a").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2009").put("dim1", "b").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1230768000000L).put("dim1", "b").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2010").put("dim1", "c").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1262304000000L).put("dim1", "c").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "246140482-04-24T15:36:27.903Z").put("dim1", "x").put("dim2", "z").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), null, true, "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]"), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(null, null, true, "Unable to parse row [unparseable] into JSON"), it.next());
    Assert.assertFalse(it.hasNext());
}
Also used : SamplerConfig(org.apache.druid.indexing.overlord.sampler.SamplerConfig) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) SamplerTestUtils(org.apache.druid.indexing.overlord.sampler.SamplerTestUtils) KinesisSupervisorSpec(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorSpec) KinesisSupervisorIOConfig(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorIOConfig) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) Test(org.junit.Test)

Example 24 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class OrcReaderTest method testOrcFile11Format.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testOrcFile11Format() throws IOException {
    final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/orc-file-11-format.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        // Check the first row
        Assert.assertTrue(iterator.hasNext());
        InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals("false", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("1024", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("1.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-15.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("AAECAwQAAA==", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("hi", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547456E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp());
        while (iterator.hasNext()) {
            actualRowCount++;
            row = iterator.next();
        }
        // Check the last row
        Assert.assertEquals("true", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("100", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("2048", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("2.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-5.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("bye", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547457E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("cat", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("5", Iterables.getOnlyElement(row.getDimension("map_struct_int")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:01.000Z"), row.getTimestamp());
        Assert.assertEquals(7500, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 25 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class OrcReaderTest method testDate1900.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testDate1900() throws IOException {
    final InputEntityReader reader = createReader(new TimestampSpec("time", "millis", null), DimensionsSpec.builder().setDimensionExclusions(Collections.singletonList("time")).build(), new OrcInputFormat(new JSONPathSpec(true, null), null, new Configuration()), "example/TestOrcFile.testDate1900.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals(1, row.getDimensions().size());
        Assert.assertEquals(DateTimes.of("1900-05-05T12:34:56.1Z"), row.getTimestamp());
        Assert.assertEquals("1900-12-25T00:00:00.000Z", Iterables.getOnlyElement(row.getDimension("date")));
        while (iterator.hasNext()) {
            actualRowCount++;
            iterator.next();
        }
        Assert.assertEquals(70000, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)53 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)44 Test (org.junit.Test)42 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 InputRow (org.apache.druid.data.input.InputRow)32 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)30 InputRowSchema (org.apache.druid.data.input.InputRowSchema)28 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)24 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)17 ArrayList (java.util.ArrayList)7 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)6 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)6 Configuration (org.apache.hadoop.conf.Configuration)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Before (org.junit.Before)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 Module (com.fasterxml.jackson.databind.Module)3 BigDecimal (java.math.BigDecimal)3 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)3 HashMap (java.util.HashMap)2