Search in sources :

Example 31 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class FlattenJSONBenchmarkUtil method getNestedParser.

public Parser getNestedParser() {
    List<JSONPathFieldSpec> fields = new ArrayList<>();
    fields.add(JSONPathFieldSpec.createRootField("ts"));
    fields.add(JSONPathFieldSpec.createRootField("d1"));
    // fields.add(JSONPathFieldSpec.createRootField("d2"));
    fields.add(JSONPathFieldSpec.createNestedField("e1.d1", "$.e1.d1"));
    fields.add(JSONPathFieldSpec.createNestedField("e1.d2", "$.e1.d2"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.d3", "$.e2.d3"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.d4", "$.e2.d4"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.d5", "$.e2.d5"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.d6", "$.e2.d6"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[0]", "$.e2.ad1[0]"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[1]", "$.e2.ad1[1]"));
    fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[2]", "$.e2.ad1[2]"));
    fields.add(JSONPathFieldSpec.createNestedField("ae1[0].d1", "$.ae1[0].d1"));
    fields.add(JSONPathFieldSpec.createNestedField("ae1[1].d1", "$.ae1[1].d1"));
    fields.add(JSONPathFieldSpec.createNestedField("ae1[2].e1.d2", "$.ae1[2].e1.d2"));
    fields.add(JSONPathFieldSpec.createRootField("m3"));
    // fields.add(JSONPathFieldSpec.createRootField("m4"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.m1", "$.e3.m1"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.m2", "$.e3.m2"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.m3", "$.e3.m3"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.m4", "$.e3.m4"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.am1[0]", "$.e3.am1[0]"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.am1[1]", "$.e3.am1[1]"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.am1[2]", "$.e3.am1[2]"));
    fields.add(JSONPathFieldSpec.createNestedField("e3.am1[3]", "$.e3.am1[3]"));
    fields.add(JSONPathFieldSpec.createNestedField("e4.e4.m4", "$.e4.e4.m4"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);
    JSONParseSpec spec = new JSONParseSpec(new TimestampSpec("ts", "iso", null), DimensionsSpec.EMPTY, flattenSpec, null, null);
    return spec.makeParser();
}
Also used : ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec)

Example 32 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class ProtobufParserBenchmark method setup.

@Setup
public void setup() {
    nestedParseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("event"), new StringDimensionSchema("id"), new StringDimensionSchema("someOtherId"), new StringDimensionSchema("isValid"))), new JSONPathSpec(true, Lists.newArrayList(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "eventType", "eventType"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "foobar", "$.foo.bar"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar0", "$.bar[0].bar"))), null, null);
    flatParseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("event"), new StringDimensionSchema("id"), new StringDimensionSchema("someOtherId"), new StringDimensionSchema("isValid"))), null, null, null);
    decoder = new FileBasedProtobufBytesDecoder("prototest.desc", "ProtoTestEvent");
    protoFilePath = "ProtoFile";
    protoInputs = getProtoInputs(protoFilePath);
    nestedParser = new ProtobufInputRowParser(nestedParseSpec, decoder, null, null);
    flatParser = new ProtobufInputRowParser(flatParseSpec, decoder, null, null);
}
Also used : ProtobufInputRowParser(org.apache.druid.data.input.protobuf.ProtobufInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) FileBasedProtobufBytesDecoder(org.apache.druid.data.input.protobuf.FileBasedProtobufBytesDecoder) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Setup(org.openjdk.jmh.annotations.Setup)

Example 33 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class AvroStreamInputFormatTest method before.

@Before
public void before() {
    timestampSpec = new TimestampSpec("nested", "millis", null);
    dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(DIMENSIONS));
    flattenSpec = new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "nested", "someRecord.subLong")));
    for (Module jacksonModule : new AvroExtensionsModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, new DefaultObjectMapper()));
}
Also used : AvroExtensionsModule(org.apache.druid.data.input.avro.AvroExtensionsModule) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Module(com.fasterxml.jackson.databind.Module) AvroExtensionsModule(org.apache.druid.data.input.avro.AvroExtensionsModule) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Before(org.junit.Before)

Example 34 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class JsonReaderTest method testSampleEmptyText.

@Test
public void testSampleEmptyText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    // input is empty
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8(""));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // the total num of iteration is 1
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            numActualIterations++;
            final InputRowListPlusRawValues rawValues = iterator.next();
            Assert.assertNotNull(rawValues.getParseException());
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 35 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class JsonReaderTest method testParseMultipleRows.

@Test
public void testParseMultipleRows() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":2}}\n" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}\n"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 3;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            final String msgId = String.valueOf(++numActualIterations);
            Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)44 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)44 Test (org.junit.Test)34 InputEntityReader (org.apache.druid.data.input.InputEntityReader)28 InputRow (org.apache.druid.data.input.InputRow)27 InputRowSchema (org.apache.druid.data.input.InputRowSchema)26 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)25 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)15 ArrayList (java.util.ArrayList)7 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Before (org.junit.Before)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 Module (com.fasterxml.jackson.databind.Module)3 BigDecimal (java.math.BigDecimal)3 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)3 Configuration (org.apache.hadoop.conf.Configuration)3 HashMap (java.util.HashMap)2 InputFormat (org.apache.druid.data.input.InputFormat)2