Search in sources :

Example 11 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testFlat1Flatten.

@Test
public void testFlat1Flatten() throws IOException {
    final String file = "example/flattening/test_flat_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3", "list"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim2", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim3", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list", "$.listDim"));
    JSONPathSpec flattenSpec = new JSONPathSpec(false, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v1", rows.get(0).getDimension("list").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("list").get(1));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(FLAT_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 12 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class SeekableStreamSupervisorSpecTest method testSeekableStreamSupervisorSpecWithScaleDisable.

@Test
public void testSeekableStreamSupervisorSpecWithScaleDisable() throws InterruptedException {
    SeekableStreamSupervisorIOConfig seekableStreamSupervisorIOConfig = new SeekableStreamSupervisorIOConfig("stream", new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), 1, 1, new Period("PT1H"), new Period("P1D"), new Period("PT30S"), false, new Period("PT30M"), null, null, null, null) {
    };
    EasyMock.expect(spec.getSupervisorStateManagerConfig()).andReturn(supervisorConfig).anyTimes();
    EasyMock.expect(spec.getDataSchema()).andReturn(getDataSchema()).anyTimes();
    EasyMock.expect(spec.getIoConfig()).andReturn(seekableStreamSupervisorIOConfig).anyTimes();
    EasyMock.expect(spec.getTuningConfig()).andReturn(getTuningConfig()).anyTimes();
    EasyMock.expect(spec.getEmitter()).andReturn(emitter).anyTimes();
    EasyMock.expect(spec.isSuspended()).andReturn(false).anyTimes();
    EasyMock.replay(spec);
    EasyMock.expect(ingestionSchema.getIOConfig()).andReturn(this.seekableStreamSupervisorIOConfig).anyTimes();
    EasyMock.expect(ingestionSchema.getDataSchema()).andReturn(dataSchema).anyTimes();
    EasyMock.expect(ingestionSchema.getTuningConfig()).andReturn(seekableStreamSupervisorTuningConfig).anyTimes();
    EasyMock.replay(ingestionSchema);
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.absent()).anyTimes();
    EasyMock.replay(taskMaster);
    TestSeekableStreamSupervisor supervisor = new TestSeekableStreamSupervisor(3);
    NoopTaskAutoScaler autoScaler = new NoopTaskAutoScaler();
    supervisor.start();
    autoScaler.start();
    supervisor.runInternal();
    int taskCountBeforeScaleOut = supervisor.getIoConfig().getTaskCount();
    Assert.assertEquals(1, taskCountBeforeScaleOut);
    Thread.sleep(1 * 1000);
    int taskCountAfterScaleOut = supervisor.getIoConfig().getTaskCount();
    Assert.assertEquals(1, taskCountAfterScaleOut);
    autoScaler.reset();
    autoScaler.stop();
}
Also used : SeekableStreamSupervisorIOConfig(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) Period(org.joda.time.Period) NoopTaskAutoScaler(org.apache.druid.indexing.seekablestream.supervisor.autoscaler.NoopTaskAutoScaler) Test(org.junit.Test)

Example 13 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class JSONParseSpecTest method testParseRow.

@Test
public void testParseRow() {
    final JSONParseSpec parseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, false);
    final Map<String, Object> expected = new HashMap<>();
    expected.put("foo", "x");
    expected.put("baz", 4L);
    expected.put("root_baz", 4L);
    expected.put("root_baz2", null);
    expected.put("path_omg", 1L);
    expected.put("path_omg2", null);
    expected.put("jq_omg", 1L);
    expected.put("jq_omg2", null);
    final Parser<String, Object> parser = parseSpec.makeParser();
    final Map<String, Object> parsedRow = parser.parseToMap("{\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}");
    Assert.assertNotNull(parsedRow);
    Assert.assertEquals(expected, parsedRow);
    Assert.assertNull(parsedRow.get("bar"));
    Assert.assertNull(parsedRow.get("buzz"));
    Assert.assertNull(parsedRow.get("root_baz2"));
    Assert.assertNull(parsedRow.get("jq_omg2"));
    Assert.assertNull(parsedRow.get("path_omg2"));
}
Also used : HashMap(java.util.HashMap) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) Test(org.junit.Test)

Example 14 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class JsonLineReaderTest method testParseRowKeepNullColumns.

@Test
public void testParseRowKeepNullColumns() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"))), null, true);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"o\":{\"mg\":null}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "bar", "foo"), row.getDimensions());
            Assert.assertTrue(row.getDimension("bar").isEmpty());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertTrue(row.getDimension("path_omg").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 15 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class JsonLineReaderTest method testKeepNullColumnsWithNoNullValues.

@Test
public void testKeepNullColumnsWithNoNullValues() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"))), null, true);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":1,\"foo\":\"x\",\"o\":{\"mg\":\"a\"}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "bar", "foo"), row.getDimensions());
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("bar")));
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)53 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)44 Test (org.junit.Test)42 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 InputRow (org.apache.druid.data.input.InputRow)32 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)30 InputRowSchema (org.apache.druid.data.input.InputRowSchema)28 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)24 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)17 ArrayList (java.util.ArrayList)7 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)6 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)6 Configuration (org.apache.hadoop.conf.Configuration)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Before (org.junit.Before)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 Module (com.fasterxml.jackson.databind.Module)3 BigDecimal (java.math.BigDecimal)3 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)3 HashMap (java.util.HashMap)2