Search in sources :

Example 1 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class SqlInputSourceTest method testNumSplits.

@Test
public void testNumSplits() {
    derbyConnector = derbyConnectorRule.getConnector();
    SqlTestUtils testUtils = new SqlTestUtils(derbyConnector);
    SqlInputSource sqlInputSource = new SqlInputSource(SQLLIST2, true, testUtils.getDerbyFirehoseConnector(), mapper);
    InputFormat inputFormat = EasyMock.createMock(InputFormat.class);
    Stream<InputSplit<String>> sqlSplits = sqlInputSource.createSplits(inputFormat, null);
    Assert.assertEquals(SQLLIST2, sqlSplits.map(InputSplit::get).collect(Collectors.toList()));
    Assert.assertEquals(2, sqlInputSource.estimateNumSplits(inputFormat, null));
}
Also used : InputFormat(org.apache.druid.data.input.InputFormat) InputSplit(org.apache.druid.data.input.InputSplit) Test(org.junit.Test)

Example 2 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class S3InputSourceTest method testCreateSplitsWithSplitHintSpecRespectingHint.

@Test
public void testCreateSplitsWithSplitHintSpecRespectingHint() {
    EasyMock.reset(S3_CLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
    EasyMock.replay(S3_CLIENT);
    S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null));
    Assert.assertEquals(ImmutableList.of(EXPECTED_URIS.stream().map(CloudObjectLocation::new).collect(Collectors.toList())), splits.map(InputSplit::get).collect(Collectors.toList()));
    EasyMock.verify(S3_CLIENT);
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 3 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class S3InputSourceTest method testWithPrefixesSplit.

@Test
public void testWithPrefixesSplit() {
    EasyMock.reset(S3_CLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
    EasyMock.replay(S3_CLIENT);
    S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(null, 1));
    Assert.assertEquals(EXPECTED_COORDS, splits.map(InputSplit::get).collect(Collectors.toList()));
    EasyMock.verify(S3_CLIENT);
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class LocalInputSourceTest method testCreateSplitsRespectingSplitHintSpec.

@Test
public void testCreateSplitsRespectingSplitHintSpec() {
    final long fileSize = 15;
    final HumanReadableBytes maxSplitSize = new HumanReadableBytes(50L);
    final List<File> files = mockFiles(10, fileSize);
    final LocalInputSource inputSource = new LocalInputSource(null, null, files);
    final List<InputSplit<List<File>>> splits = inputSource.createSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(maxSplitSize, null)).collect(Collectors.toList());
    Assert.assertEquals(4, splits.size());
    Assert.assertEquals(3, splits.get(0).get().size());
    Assert.assertEquals(3, splits.get(1).get().size());
    Assert.assertEquals(3, splits.get(2).get().size());
    Assert.assertEquals(1, splits.get(3).get().size());
}
Also used : HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) File(java.io.File) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) Test(org.junit.Test)

Example 5 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class OssInputSourceTest method testCreateSplitsWithSplitHintSpecRespectingHint.

@Test
public void testCreateSplitsWithSplitHintSpecRespectingHint() {
    EasyMock.reset(OSSCLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
    EasyMock.replay(OSSCLIENT);
    OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null));
    Assert.assertEquals(ImmutableList.of(EXPECTED_URIS.stream().map(CloudObjectLocation::new).collect(Collectors.toList())), splits.map(InputSplit::get).collect(Collectors.toList()));
    EasyMock.verify(OSSCLIENT);
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

InputSplit (org.apache.druid.data.input.InputSplit)19 Test (org.junit.Test)15 CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)13 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)11 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 MaxSizeSplitHintSpec (org.apache.druid.data.input.MaxSizeSplitHintSpec)10 List (java.util.List)5 ImmutableList (com.google.common.collect.ImmutableList)4 HumanReadableBytes (org.apache.druid.java.util.common.HumanReadableBytes)4 File (java.io.File)3 URI (java.net.URI)3 InputFileAttribute (org.apache.druid.data.input.InputFileAttribute)3 JacksonInject (com.fasterxml.jackson.annotation.JacksonInject)2 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 ArrayList (java.util.ArrayList)2 Iterator (java.util.Iterator)2 Stream (java.util.stream.Stream)2 Nullable (javax.annotation.Nullable)2 SplitHintSpec (org.apache.druid.data.input.SplitHintSpec)2