Search in sources :

Example 21 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class OrcReaderTest method testOrcFile11Format.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testOrcFile11Format() throws IOException {
    final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/orc-file-11-format.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        // Check the first row
        Assert.assertTrue(iterator.hasNext());
        InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals("false", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("1024", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("1.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-15.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("AAECAwQAAA==", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("hi", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547456E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp());
        while (iterator.hasNext()) {
            actualRowCount++;
            row = iterator.next();
        }
        // Check the last row
        Assert.assertEquals("true", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("100", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("2048", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("2.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-5.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("bye", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547457E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("cat", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("5", Iterables.getOnlyElement(row.getDimension("map_struct_int")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:01.000Z"), row.getTimestamp());
        Assert.assertEquals(7500, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 22 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class OrcReaderTest method testTest2.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testTest2() throws IOException {
    final InputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, Collections.singletonList(new JSONPathFieldSpec(JSONPathFieldType.PATH, "col7-subcol7", "$.col7.subcol7"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(null), inputFormat, "example/test_2.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        Assert.assertEquals(DateTimes.of("2016-01-01T00:00:00.000Z"), row.getTimestamp());
        Assert.assertEquals("bar", Iterables.getOnlyElement(row.getDimension("col1")));
        Assert.assertEquals(ImmutableList.of("dat1", "dat2", "dat3"), row.getDimension("col2"));
        Assert.assertEquals("1.1", Iterables.getOnlyElement(row.getDimension("col3")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("col4")));
        Assert.assertEquals("3.5", Iterables.getOnlyElement(row.getDimension("col5")));
        Assert.assertTrue(row.getDimension("col6").isEmpty());
        Assert.assertFalse(iterator.hasNext());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputFormat(org.apache.druid.data.input.InputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 23 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class InputRowParserSerdeTest method testFlattenParse.

@Test
public void testFlattenParse() throws Exception {
    List<JSONPathFieldSpec> fields = new ArrayList<>();
    fields.add(JSONPathFieldSpec.createNestedField("foobar1", "$.foo.bar1"));
    fields.add(JSONPathFieldSpec.createNestedField("foobar2", "$.foo.bar2"));
    fields.add(JSONPathFieldSpec.createNestedField("baz0", "$.baz[0]"));
    fields.add(JSONPathFieldSpec.createNestedField("baz1", "$.baz[1]"));
    fields.add(JSONPathFieldSpec.createNestedField("baz2", "$.baz[2]"));
    fields.add(JSONPathFieldSpec.createNestedField("hey0barx", "$.hey[0].barx"));
    fields.add(JSONPathFieldSpec.createNestedField("metA", "$.met.a"));
    fields.add(JSONPathFieldSpec.createNestedField("missing", "$.nonexistent.nested.field"));
    fields.add(JSONPathFieldSpec.createRootField("timestamp"));
    fields.add(JSONPathFieldSpec.createRootField("foo.bar1"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);
    final StringInputRowParser parser = new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), DimensionsSpec.EMPTY, flattenSpec, null, null), null);
    final StringInputRowParser parser2 = jsonMapper.readValue(jsonMapper.writeValueAsBytes(parser), StringInputRowParser.class);
    final InputRow parsed = parser2.parse("{\"blah\":[4,5,6], \"newmet\":5, \"foo\":{\"bar1\":\"aaa\", \"bar2\":\"bbb\"}, \"baz\":[1,2,3], \"timestamp\":\"2999\", \"foo.bar1\":\"Hello world!\", \"hey\":[{\"barx\":\"asdf\"}], \"met\":{\"a\":456}}");
    Assert.assertEquals(ImmutableList.of("foobar1", "foobar2", "baz0", "baz1", "baz2", "hey0barx", "metA", "missing", "timestamp", "foo.bar1", "blah", "newmet", "baz"), parsed.getDimensions());
    Assert.assertEquals(ImmutableList.of("aaa"), parsed.getDimension("foobar1"));
    Assert.assertEquals(ImmutableList.of("bbb"), parsed.getDimension("foobar2"));
    Assert.assertEquals(ImmutableList.of("1"), parsed.getDimension("baz0"));
    Assert.assertEquals(ImmutableList.of("2"), parsed.getDimension("baz1"));
    Assert.assertEquals(ImmutableList.of("3"), parsed.getDimension("baz2"));
    Assert.assertEquals(ImmutableList.of("Hello world!"), parsed.getDimension("foo.bar1"));
    Assert.assertEquals(ImmutableList.of("asdf"), parsed.getDimension("hey0barx"));
    Assert.assertEquals(ImmutableList.of("456"), parsed.getDimension("metA"));
    Assert.assertEquals(ImmutableList.of("5"), parsed.getDimension("newmet"));
    Assert.assertEquals(ImmutableList.of(), parsed.getDimension("missing"));
    Assert.assertEquals(DateTimes.of("2999").getMillis(), parsed.getTimestampFromEpoch());
    String testSpec = "{\"enabled\": true,\"useFieldDiscovery\": true, \"fields\": [\"parseThisRootField\"]}";
    final JSONPathSpec parsedSpec = jsonMapper.readValue(testSpec, JSONPathSpec.class);
    List<JSONPathFieldSpec> fieldSpecs = parsedSpec.getFields();
    Assert.assertEquals(JSONPathFieldType.ROOT, fieldSpecs.get(0).getType());
    Assert.assertEquals("parseThisRootField", fieldSpecs.get(0).getName());
    Assert.assertEquals("parseThisRootField", fieldSpecs.get(0).getExpr());
}
Also used : ArrayList(java.util.ArrayList) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) Test(org.junit.Test)

Example 24 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class JSONParseSpecTest method testParseRowWithConditional.

@Test
public void testParseRowWithConditional() {
    final JSONParseSpec parseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo"))), null, false);
    final Map<String, Object> expected = new HashMap<>();
    expected.put("foo", new ArrayList());
    expected.put("baz", null);
    expected.put("bar", Collections.singletonList("test"));
    final Parser<String, Object> parser = parseSpec.makeParser();
    final Map<String, Object> parsedRow = parser.parseToMap("{\"something_else\": {\"foo\": \"test\"}}");
    Assert.assertNotNull(parsedRow);
    Assert.assertEquals(expected, parsedRow);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) Test(org.junit.Test)

Example 25 with JSONPathFieldSpec

use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.

the class JsonLineReaderTest method testFalseKeepNullColumns.

@Test
public void testFalseKeepNullColumns() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"))), null, false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"o\":{\"mg\":\"a\"}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "foo"), row.getDimensions());
            Assert.assertTrue(row.getDimension("bar").isEmpty());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)44 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)44 Test (org.junit.Test)34 InputEntityReader (org.apache.druid.data.input.InputEntityReader)28 InputRow (org.apache.druid.data.input.InputRow)27 InputRowSchema (org.apache.druid.data.input.InputRowSchema)26 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)25 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)15 ArrayList (java.util.ArrayList)7 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Before (org.junit.Before)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 Module (com.fasterxml.jackson.databind.Module)3 BigDecimal (java.math.BigDecimal)3 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)3 Configuration (org.apache.hadoop.conf.Configuration)3 HashMap (java.util.HashMap)2 InputFormat (org.apache.druid.data.input.InputFormat)2