Search in sources :

Example 51 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class OrcReaderTest method testJsonPathFunctions.

/**
 * schema: struct<string1:string, list:array<int>, ts:timestamp>
 * data:   {"dim1","[7,8,9]","2000-03-12 15:00:00"}
 */
@Test
public void testJsonPathFunctions() throws IOException {
    final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "min", "$.list.min()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "max", "$.list.max()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "avg", "$.list.avg()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "len", "$.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "sum", "$.list.sum()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "stddev", "$.list.stddev()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "append", "$.list.append(10)"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/test_json_path_functions.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            actualRowCount++;
            Assert.assertEquals("7.0", Iterables.getOnlyElement(row.getDimension("min")));
            Assert.assertEquals("8.0", Iterables.getOnlyElement(row.getDimension("avg")));
            Assert.assertEquals("9.0", Iterables.getOnlyElement(row.getDimension("max")));
            Assert.assertEquals("24.0", Iterables.getOnlyElement(row.getDimension("sum")));
            Assert.assertEquals("3", Iterables.getOnlyElement(row.getDimension("len")));
            // deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165
            Assert.assertEquals(0.8165, Double.parseDouble(Iterables.getOnlyElement(row.getDimension("stddev"))), 0.0001);
            // append is not supported
            Assert.assertEquals(Collections.emptyList(), row.getDimension("append"));
        }
        Assert.assertEquals(1, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 52 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class KafkaInputFormatTest method testWithOutKeyAndHeaderSpecs.

@Test
public void testWithOutKeyAndHeaderSpecs() throws IOException {
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(SAMPLE_HEADERS);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, payload, headers));
    KafkaInputFormat localFormat = new KafkaInputFormat(null, null, // Value Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), "kafka.newheader.", "kafka.newkey.", "kafka.newts.");
    final InputEntityReader reader = localFormat.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            // Key verification
            Assert.assertTrue(row.getDimension("kafka.newkey.key").isEmpty());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 53 with JSONPathSpec

use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.

the class SeekableStreamSupervisorStateTest method expectEmitterSupervisor.

private void expectEmitterSupervisor(boolean suspended) throws EntryExistsException {
    spec = createMock(SeekableStreamSupervisorSpec.class);
    EasyMock.expect(spec.getSupervisorStateManagerConfig()).andReturn(supervisorConfig).anyTimes();
    EasyMock.expect(spec.getDataSchema()).andReturn(getDataSchema()).anyTimes();
    EasyMock.expect(spec.getIoConfig()).andReturn(new SeekableStreamSupervisorIOConfig("stream", new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), 1, 1, new Period("PT1H"), new Period("PT1S"), new Period("PT30S"), false, new Period("PT30M"), null, null, null, null) {
    }).anyTimes();
    EasyMock.expect(spec.getTuningConfig()).andReturn(getTuningConfig()).anyTimes();
    EasyMock.expect(spec.getEmitter()).andReturn(emitter).anyTimes();
    EasyMock.expect(spec.getMonitorSchedulerConfig()).andReturn(new DruidMonitorSchedulerConfig() {

        @Override
        public Duration getEmitterPeriod() {
            return new Period("PT1S").toStandardDuration();
        }
    }).anyTimes();
    EasyMock.expect(spec.isSuspended()).andReturn(suspended).anyTimes();
    EasyMock.expect(spec.getType()).andReturn("test").anyTimes();
    EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.anyObject())).andReturn(true).anyTimes();
    replayAll();
}
Also used : DruidMonitorSchedulerConfig(org.apache.druid.server.metrics.DruidMonitorSchedulerConfig) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) Period(org.joda.time.Period)

Aggregations

JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)53 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)44 Test (org.junit.Test)42 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 InputRow (org.apache.druid.data.input.InputRow)32 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)30 InputRowSchema (org.apache.druid.data.input.InputRowSchema)28 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)24 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)17 ArrayList (java.util.ArrayList)7 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)6 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)6 Configuration (org.apache.hadoop.conf.Configuration)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Before (org.junit.Before)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 Module (com.fasterxml.jackson.databind.Module)3 BigDecimal (java.math.BigDecimal)3 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)3 HashMap (java.util.HashMap)2