Search in sources :

Example 11 with JsonInputFormat

use of org.apache.druid.data.input.impl.JsonInputFormat in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testWithPrefixesSplit.

@Test
public void testWithPrefixesSplit() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(null, 1));
    Assert.assertEquals(EXPECTED_OBJECTS, splits.map(InputSplit::get).collect(Collectors.toList()));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 12 with JsonInputFormat

use of org.apache.druid.data.input.impl.JsonInputFormat in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testCreateSplitsWithSplitHintSpecRespectingHint.

@Test
public void testCreateSplitsWithSplitHintSpecRespectingHint() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null));
    Assert.assertEquals(ImmutableList.of(EXPECTED_URIS.stream().map(CloudObjectLocation::new).collect(Collectors.toList())), splits.map(InputSplit::get).collect(Collectors.toList()));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 13 with JsonInputFormat

use of org.apache.druid.data.input.impl.JsonInputFormat in project druid by druid-io.

the class KafkaInputFormatTest method testSerde.

@Test
public void testSerde() throws JsonProcessingException {
    final ObjectMapper mapper = new ObjectMapper();
    KafkaInputFormat kif = new KafkaInputFormat(new KafkaStringHeaderFormat(null), // Key Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), // Value Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), "kafka.newheader.", "kafka.newkey.key", "kafka.newts.timestamp");
    Assert.assertEquals(format, kif);
    final byte[] formatBytes = mapper.writeValueAsBytes(format);
    final byte[] kifBytes = mapper.writeValueAsBytes(kif);
    Assert.assertTrue(Arrays.equals(formatBytes, kifBytes));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 14 with JsonInputFormat

use of org.apache.druid.data.input.impl.JsonInputFormat in project druid by druid-io.

the class KafkaSamplerSpecTest method testSample.

@Test(timeout = 30_000L)
public void testSample() {
    insertData(generateRecords(TOPIC));
    KafkaSupervisorSpec supervisorSpec = new KafkaSupervisorSpec(null, DATA_SCHEMA, null, new KafkaSupervisorIOConfig(TOPIC, new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null, null, null, kafkaServer.consumerProperties(), null, null, null, null, true, null, null, null, null), null, null, null, null, null, null, null, null, null, null, null);
    KafkaSamplerSpec samplerSpec = new KafkaSamplerSpec(supervisorSpec, new SamplerConfig(5, null), new InputSourceSampler(), OBJECT_MAPPER);
    SamplerResponse response = samplerSpec.sample();
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(3, response.getNumRowsIndexed());
    Assert.assertEquals(5, response.getData().size());
    Iterator<SamplerResponse.SamplerResponseRow> it = response.getData().iterator();
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2008").put("dim1", "a").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1199145600000L).put("dim1", "a").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2009").put("dim1", "b").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1230768000000L).put("dim1", "b").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2010").put("dim1", "c").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1262304000000L).put("dim1", "c").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "246140482-04-24T15:36:27.903Z").put("dim1", "x").put("dim2", "z").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), null, true, "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]"), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(null, null, true, "Unable to parse row [unparseable] into JSON"), it.next());
    Assert.assertFalse(it.hasNext());
}
Also used : SamplerConfig(org.apache.druid.indexing.overlord.sampler.SamplerConfig) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) SamplerTestUtils(org.apache.druid.indexing.overlord.sampler.SamplerTestUtils) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) KafkaSupervisorIOConfig(org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorIOConfig) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) KafkaSupervisorSpec(org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 15 with JsonInputFormat

use of org.apache.druid.data.input.impl.JsonInputFormat in project druid by druid-io.

the class KinesisSamplerSpecTest method testSample.

@Test(timeout = 10_000L)
public void testSample() throws Exception {
    EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once();
    recordSupplier.assign(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID)));
    EasyMock.expectLastCall().once();
    recordSupplier.seekToEarliest(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID)));
    EasyMock.expectLastCall().once();
    EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(STREAM)).once();
    recordSupplier.close();
    EasyMock.expectLastCall().once();
    replayAll();
    KinesisSupervisorSpec supervisorSpec = new KinesisSupervisorSpec(null, DATA_SCHEMA, null, new KinesisSupervisorIOConfig(STREAM, new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), null, null, null, null, null, null, null, true, null, null, null, null, null, null, null, null, null, false), null, null, null, null, null, null, null, null, null, null, null, null);
    KinesisSamplerSpec samplerSpec = new TestableKinesisSamplerSpec(supervisorSpec, new SamplerConfig(5, null), new InputSourceSampler(), null);
    SamplerResponse response = samplerSpec.sample();
    verifyAll();
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(3, response.getNumRowsIndexed());
    Assert.assertEquals(5, response.getData().size());
    Iterator<SamplerResponse.SamplerResponseRow> it = response.getData().iterator();
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2008").put("dim1", "a").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1199145600000L).put("dim1", "a").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2009").put("dim1", "b").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1230768000000L).put("dim1", "b").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2010").put("dim1", "c").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1262304000000L).put("dim1", "c").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "246140482-04-24T15:36:27.903Z").put("dim1", "x").put("dim2", "z").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), null, true, "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]"), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(null, null, true, "Unable to parse row [unparseable] into JSON"), it.next());
    Assert.assertFalse(it.hasNext());
}
Also used : SamplerConfig(org.apache.druid.indexing.overlord.sampler.SamplerConfig) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) SamplerTestUtils(org.apache.druid.indexing.overlord.sampler.SamplerTestUtils) KinesisSupervisorSpec(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorSpec) KinesisSupervisorIOConfig(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorIOConfig) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) Test(org.junit.Test)

Aggregations

JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)23 Test (org.junit.Test)21 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)13 InputSplit (org.apache.druid.data.input.InputSplit)11 CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)11 MaxSizeSplitHintSpec (org.apache.druid.data.input.MaxSizeSplitHintSpec)6 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)6 File (java.io.File)3 HumanReadableBytes (org.apache.druid.java.util.common.HumanReadableBytes)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)2 InputFormat (org.apache.druid.data.input.InputFormat)2 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)2 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)2 InputSourceSampler (org.apache.druid.indexing.overlord.sampler.InputSourceSampler)2 SamplerConfig (org.apache.druid.indexing.overlord.sampler.SamplerConfig)2 SamplerTestUtils (org.apache.druid.indexing.overlord.sampler.SamplerTestUtils)2 DataSchema (org.apache.druid.segment.indexing.DataSchema)2 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)2 DataSegment (org.apache.druid.timeline.DataSegment)2