use of org.apache.druid.data.input.InputSplit in project druid by druid-io.
the class OssInputSourceTest method testCreateSplitsWithEmptyObjectsIteratingOnlyNonEmptyObjects.
@Test
public void testCreateSplitsWithEmptyObjectsIteratingOnlyNonEmptyObjects() {
EasyMock.reset(OSSCLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), new byte[0]);
EasyMock.replay(OSSCLIENT);
OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null);
Assert.assertEquals(ImmutableList.of(ImmutableList.of(new CloudObjectLocation(EXPECTED_URIS.get(0)))), splits.map(InputSplit::get).collect(Collectors.toList()));
EasyMock.verify(OSSCLIENT);
}
use of org.apache.druid.data.input.InputSplit in project druid by druid-io.
the class AzureInputSourceTest method test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations.
@Test
public void test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations() {
List<URI> prefixes = ImmutableList.of(PREFIX_URI);
List<List<CloudObjectLocation>> expectedCloudLocations = ImmutableList.of(ImmutableList.of(CLOUD_OBJECT_LOCATION_1));
List<CloudBlobHolder> expectedCloudBlobs = ImmutableList.of(cloudBlobDruid1);
Iterator<CloudBlobHolder> expectedCloudBlobsIterator = expectedCloudBlobs.iterator();
EasyMock.expect(inputDataConfig.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
EasyMock.expect(azureCloudBlobIterableFactory.create(prefixes, MAX_LISTING_LENGTH)).andReturn(azureCloudBlobIterable);
EasyMock.expect(azureCloudBlobIterable.iterator()).andReturn(expectedCloudBlobsIterator);
EasyMock.expect(azureCloudBlobToLocationConverter.createCloudObjectLocation(cloudBlobDruid1)).andReturn(CLOUD_OBJECT_LOCATION_1);
EasyMock.expect(cloudBlobDruid1.getBlobLength()).andReturn(100L).anyTimes();
replayAll();
azureInputSource = new AzureInputSource(storage, entityFactory, azureCloudBlobIterableFactory, azureCloudBlobToLocationConverter, inputDataConfig, EMPTY_URIS, prefixes, EMPTY_OBJECTS);
Stream<InputSplit<List<CloudObjectLocation>>> cloudObjectStream = azureInputSource.getPrefixesSplitStream(new MaxSizeSplitHintSpec(null, 1));
List<List<CloudObjectLocation>> actualCloudLocationList = cloudObjectStream.map(InputSplit::get).collect(Collectors.toList());
verifyAll();
Assert.assertEquals(expectedCloudLocations, actualCloudLocationList);
}
use of org.apache.druid.data.input.InputSplit in project druid by druid-io.
the class CombiningInputSourceTest method testCreateSplits.
@Test
public void testCreateSplits() {
final File file = EasyMock.niceMock(File.class);
EasyMock.expect(file.length()).andReturn(30L).anyTimes();
EasyMock.replay(file);
final TestFileInputSource fileSource = new TestFileInputSource(generateFiles(3));
final TestUriInputSource uriInputSource = new TestUriInputSource(ImmutableList.of(URI.create("http://test.com/http-test3"), URI.create("http://test.com/http-test4"), URI.create("http://test.com/http-test5")));
final CombiningInputSource combiningInputSource = new CombiningInputSource(ImmutableList.of(fileSource, uriInputSource));
List<InputSplit> combinedInputSplits = combiningInputSource.createSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(new HumanReadableBytes(5L), null)).collect(Collectors.toList());
Assert.assertEquals(6, combinedInputSplits.size());
for (int i = 0; i < 3; i++) {
Pair<SplittableInputSource, InputSplit> splitPair = (Pair) combinedInputSplits.get(i).get();
InputSplit<File> fileSplits = splitPair.rhs;
Assert.assertTrue(splitPair.lhs instanceof TestFileInputSource);
Assert.assertEquals(5, fileSplits.get().length());
}
for (int i = 3; i < combinedInputSplits.size(); i++) {
Pair<SplittableInputSource, InputSplit> splitPair = (Pair) combinedInputSplits.get(i).get();
InputSplit<URI> fileSplits = splitPair.rhs;
Assert.assertTrue(splitPair.lhs instanceof TestUriInputSource);
Assert.assertEquals(URI.create("http://test.com/http-test" + i), fileSplits.get());
}
}
use of org.apache.druid.data.input.InputSplit in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTimelineTest method testSplit.
private void testSplit() throws Exception {
Assert.assertTrue(factory.isSplittable());
final int numSplits = factory.getNumSplits(null);
// We set maxInputSegmentBytesPerSplit to 2 so each segment should become a byte.
Assert.assertEquals(segmentCount, numSplits);
final List<InputSplit<List<WindowedSegmentId>>> splits = factory.getSplits(null).collect(Collectors.toList());
Assert.assertEquals(numSplits, splits.size());
int count = 0;
long sum = 0;
for (InputSplit<List<WindowedSegmentId>> split : splits) {
final FiniteFirehoseFactory<InputRowParser, List<WindowedSegmentId>> splitFactory = factory.withSplit(split);
try (final Firehose firehose = splitFactory.connect(ROW_PARSER, tmpDir)) {
while (firehose.hasMore()) {
final InputRow row = firehose.nextRow();
count++;
sum += row.getMetric(METRICS[0]).longValue();
}
}
}
Assert.assertEquals("count", expectedCount, count);
Assert.assertEquals("sum", expectedSum, sum);
}
use of org.apache.druid.data.input.InputSplit in project druid by druid-io.
the class HdfsInputSource method createSplits.
@Override
public Stream<InputSplit<List<Path>>> createSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec) throws IOException {
cachePathsIfNeeded();
final Iterator<List<Path>> splitIterator = getSplitHintSpecOrDefault(splitHintSpec).split(cachedPaths.iterator(), path -> {
try {
final long size = path.getFileSystem(configuration).getFileStatus(path).getLen();
return new InputFileAttribute(size);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
return Streams.sequentialStreamFrom(splitIterator).map(InputSplit::new);
}
Aggregations