Search in sources :

Example 11 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class OssInputSourceTest method testCreateSplitsWithEmptyObjectsIteratingOnlyNonEmptyObjects.

@Test
public void testCreateSplitsWithEmptyObjectsIteratingOnlyNonEmptyObjects() {
    EasyMock.reset(OSSCLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), new byte[0]);
    EasyMock.replay(OSSCLIENT);
    OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null);
    Assert.assertEquals(ImmutableList.of(ImmutableList.of(new CloudObjectLocation(EXPECTED_URIS.get(0)))), splits.map(InputSplit::get).collect(Collectors.toList()));
    EasyMock.verify(OSSCLIENT);
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) InputSplit(org.apache.druid.data.input.InputSplit) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 12 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class AzureInputSourceTest method test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations.

@Test
public void test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations() {
    List<URI> prefixes = ImmutableList.of(PREFIX_URI);
    List<List<CloudObjectLocation>> expectedCloudLocations = ImmutableList.of(ImmutableList.of(CLOUD_OBJECT_LOCATION_1));
    List<CloudBlobHolder> expectedCloudBlobs = ImmutableList.of(cloudBlobDruid1);
    Iterator<CloudBlobHolder> expectedCloudBlobsIterator = expectedCloudBlobs.iterator();
    EasyMock.expect(inputDataConfig.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.expect(azureCloudBlobIterableFactory.create(prefixes, MAX_LISTING_LENGTH)).andReturn(azureCloudBlobIterable);
    EasyMock.expect(azureCloudBlobIterable.iterator()).andReturn(expectedCloudBlobsIterator);
    EasyMock.expect(azureCloudBlobToLocationConverter.createCloudObjectLocation(cloudBlobDruid1)).andReturn(CLOUD_OBJECT_LOCATION_1);
    EasyMock.expect(cloudBlobDruid1.getBlobLength()).andReturn(100L).anyTimes();
    replayAll();
    azureInputSource = new AzureInputSource(storage, entityFactory, azureCloudBlobIterableFactory, azureCloudBlobToLocationConverter, inputDataConfig, EMPTY_URIS, prefixes, EMPTY_OBJECTS);
    Stream<InputSplit<List<CloudObjectLocation>>> cloudObjectStream = azureInputSource.getPrefixesSplitStream(new MaxSizeSplitHintSpec(null, 1));
    List<List<CloudObjectLocation>> actualCloudLocationList = cloudObjectStream.map(InputSplit::get).collect(Collectors.toList());
    verifyAll();
    Assert.assertEquals(expectedCloudLocations, actualCloudLocationList);
}
Also used : CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CloudBlobHolder(org.apache.druid.storage.azure.blob.CloudBlobHolder) URI(java.net.URI) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) Test(org.junit.Test)

Example 13 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class CombiningInputSourceTest method testCreateSplits.

@Test
public void testCreateSplits() {
    final File file = EasyMock.niceMock(File.class);
    EasyMock.expect(file.length()).andReturn(30L).anyTimes();
    EasyMock.replay(file);
    final TestFileInputSource fileSource = new TestFileInputSource(generateFiles(3));
    final TestUriInputSource uriInputSource = new TestUriInputSource(ImmutableList.of(URI.create("http://test.com/http-test3"), URI.create("http://test.com/http-test4"), URI.create("http://test.com/http-test5")));
    final CombiningInputSource combiningInputSource = new CombiningInputSource(ImmutableList.of(fileSource, uriInputSource));
    List<InputSplit> combinedInputSplits = combiningInputSource.createSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(new HumanReadableBytes(5L), null)).collect(Collectors.toList());
    Assert.assertEquals(6, combinedInputSplits.size());
    for (int i = 0; i < 3; i++) {
        Pair<SplittableInputSource, InputSplit> splitPair = (Pair) combinedInputSplits.get(i).get();
        InputSplit<File> fileSplits = splitPair.rhs;
        Assert.assertTrue(splitPair.lhs instanceof TestFileInputSource);
        Assert.assertEquals(5, fileSplits.get().length());
    }
    for (int i = 3; i < combinedInputSplits.size(); i++) {
        Pair<SplittableInputSource, InputSplit> splitPair = (Pair) combinedInputSplits.get(i).get();
        InputSplit<URI> fileSplits = splitPair.rhs;
        Assert.assertTrue(splitPair.lhs instanceof TestUriInputSource);
        Assert.assertEquals(URI.create("http://test.com/http-test" + i), fileSplits.get());
    }
}
Also used : URI(java.net.URI) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) File(java.io.File) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) Pair(org.apache.druid.java.util.common.Pair) Test(org.junit.Test)

Example 14 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTimelineTest method testSplit.

private void testSplit() throws Exception {
    Assert.assertTrue(factory.isSplittable());
    final int numSplits = factory.getNumSplits(null);
    // We set maxInputSegmentBytesPerSplit to 2 so each segment should become a byte.
    Assert.assertEquals(segmentCount, numSplits);
    final List<InputSplit<List<WindowedSegmentId>>> splits = factory.getSplits(null).collect(Collectors.toList());
    Assert.assertEquals(numSplits, splits.size());
    int count = 0;
    long sum = 0;
    for (InputSplit<List<WindowedSegmentId>> split : splits) {
        final FiniteFirehoseFactory<InputRowParser, List<WindowedSegmentId>> splitFactory = factory.withSplit(split);
        try (final Firehose firehose = splitFactory.connect(ROW_PARSER, tmpDir)) {
            while (firehose.hasMore()) {
                final InputRow row = firehose.nextRow();
                count++;
                sum += row.getMetric(METRICS[0]).longValue();
            }
        }
    }
    Assert.assertEquals("count", expectedCount, count);
    Assert.assertEquals("sum", expectedSum, sum);
}
Also used : Firehose(org.apache.druid.data.input.Firehose) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) InputSplit(org.apache.druid.data.input.InputSplit)

Example 15 with InputSplit

use of org.apache.druid.data.input.InputSplit in project druid by druid-io.

the class HdfsInputSource method createSplits.

@Override
public Stream<InputSplit<List<Path>>> createSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) throws IOException {
    cachePathsIfNeeded();
    final Iterator<List<Path>> splitIterator = getSplitHintSpecOrDefault(splitHintSpec).split(cachedPaths.iterator(), path -> {
        try {
            final long size = path.getFileSystem(configuration).getFileStatus(path).getLen();
            return new InputFileAttribute(size);
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    });
    return Streams.sequentialStreamFrom(splitIterator).map(InputSplit::new);
}
Also used : InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) InputSplit(org.apache.druid.data.input.InputSplit)

Aggregations

InputSplit (org.apache.druid.data.input.InputSplit)19 Test (org.junit.Test)15 CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)13 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)11 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 MaxSizeSplitHintSpec (org.apache.druid.data.input.MaxSizeSplitHintSpec)10 List (java.util.List)5 ImmutableList (com.google.common.collect.ImmutableList)4 HumanReadableBytes (org.apache.druid.java.util.common.HumanReadableBytes)4 File (java.io.File)3 URI (java.net.URI)3 InputFileAttribute (org.apache.druid.data.input.InputFileAttribute)3 JacksonInject (com.fasterxml.jackson.annotation.JacksonInject)2 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 ArrayList (java.util.ArrayList)2 Iterator (java.util.Iterator)2 Stream (java.util.stream.Stream)2 Nullable (javax.annotation.Nullable)2 SplitHintSpec (org.apache.druid.data.input.SplitHintSpec)2