use of org.apache.druid.data.input.MaxSizeSplitHintSpec in project druid by druid-io.
the class GoogleCloudStorageInputSourceTest method testCreateSplitsWithSplitHintSpecRespectingHint.
@Test
public void testCreateSplitsWithSplitHintSpecRespectingHint() throws IOException {
EasyMock.reset(STORAGE);
EasyMock.reset(INPUT_DATA_CONFIG);
addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)));
addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)));
EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
EasyMock.replay(STORAGE);
EasyMock.replay(INPUT_DATA_CONFIG);
GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null));
Assert.assertEquals(ImmutableList.of(EXPECTED_URIS.stream().map(CloudObjectLocation::new).collect(Collectors.toList())), splits.map(InputSplit::get).collect(Collectors.toList()));
}
use of org.apache.druid.data.input.MaxSizeSplitHintSpec in project druid by druid-io.
the class AzureInputSourceTest method test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations.
@Test
public void test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations() {
List<URI> prefixes = ImmutableList.of(PREFIX_URI);
List<List<CloudObjectLocation>> expectedCloudLocations = ImmutableList.of(ImmutableList.of(CLOUD_OBJECT_LOCATION_1));
List<CloudBlobHolder> expectedCloudBlobs = ImmutableList.of(cloudBlobDruid1);
Iterator<CloudBlobHolder> expectedCloudBlobsIterator = expectedCloudBlobs.iterator();
EasyMock.expect(inputDataConfig.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
EasyMock.expect(azureCloudBlobIterableFactory.create(prefixes, MAX_LISTING_LENGTH)).andReturn(azureCloudBlobIterable);
EasyMock.expect(azureCloudBlobIterable.iterator()).andReturn(expectedCloudBlobsIterator);
EasyMock.expect(azureCloudBlobToLocationConverter.createCloudObjectLocation(cloudBlobDruid1)).andReturn(CLOUD_OBJECT_LOCATION_1);
EasyMock.expect(cloudBlobDruid1.getBlobLength()).andReturn(100L).anyTimes();
replayAll();
azureInputSource = new AzureInputSource(storage, entityFactory, azureCloudBlobIterableFactory, azureCloudBlobToLocationConverter, inputDataConfig, EMPTY_URIS, prefixes, EMPTY_OBJECTS);
Stream<InputSplit<List<CloudObjectLocation>>> cloudObjectStream = azureInputSource.getPrefixesSplitStream(new MaxSizeSplitHintSpec(null, 1));
List<List<CloudObjectLocation>> actualCloudLocationList = cloudObjectStream.map(InputSplit::get).collect(Collectors.toList());
verifyAll();
Assert.assertEquals(expectedCloudLocations, actualCloudLocationList);
}
use of org.apache.druid.data.input.MaxSizeSplitHintSpec in project druid by druid-io.
the class CombiningInputSourceTest method testCreateSplits.
@Test
public void testCreateSplits() {
final File file = EasyMock.niceMock(File.class);
EasyMock.expect(file.length()).andReturn(30L).anyTimes();
EasyMock.replay(file);
final TestFileInputSource fileSource = new TestFileInputSource(generateFiles(3));
final TestUriInputSource uriInputSource = new TestUriInputSource(ImmutableList.of(URI.create("http://test.com/http-test3"), URI.create("http://test.com/http-test4"), URI.create("http://test.com/http-test5")));
final CombiningInputSource combiningInputSource = new CombiningInputSource(ImmutableList.of(fileSource, uriInputSource));
List<InputSplit> combinedInputSplits = combiningInputSource.createSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(new HumanReadableBytes(5L), null)).collect(Collectors.toList());
Assert.assertEquals(6, combinedInputSplits.size());
for (int i = 0; i < 3; i++) {
Pair<SplittableInputSource, InputSplit> splitPair = (Pair) combinedInputSplits.get(i).get();
InputSplit<File> fileSplits = splitPair.rhs;
Assert.assertTrue(splitPair.lhs instanceof TestFileInputSource);
Assert.assertEquals(5, fileSplits.get().length());
}
for (int i = 3; i < combinedInputSplits.size(); i++) {
Pair<SplittableInputSource, InputSplit> splitPair = (Pair) combinedInputSplits.get(i).get();
InputSplit<URI> fileSplits = splitPair.rhs;
Assert.assertTrue(splitPair.lhs instanceof TestUriInputSource);
Assert.assertEquals(URI.create("http://test.com/http-test" + i), fileSplits.get());
}
}
use of org.apache.druid.data.input.MaxSizeSplitHintSpec in project druid by druid-io.
the class DruidInputSource method createSplits.
public static Iterator<InputSplit<List<WindowedSegmentId>>> createSplits(CoordinatorClient coordinatorClient, RetryPolicyFactory retryPolicyFactory, String dataSource, Interval interval, SplitHintSpec splitHintSpec) {
final SplitHintSpec convertedSplitHintSpec;
if (splitHintSpec instanceof SegmentsSplitHintSpec) {
final SegmentsSplitHintSpec segmentsSplitHintSpec = (SegmentsSplitHintSpec) splitHintSpec;
convertedSplitHintSpec = new MaxSizeSplitHintSpec(segmentsSplitHintSpec.getMaxInputSegmentBytesPerTask(), segmentsSplitHintSpec.getMaxNumSegments());
} else {
convertedSplitHintSpec = splitHintSpec;
}
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = getTimelineForInterval(coordinatorClient, retryPolicyFactory, dataSource, interval);
final Map<WindowedSegmentId, Long> segmentIdToSize = createWindowedSegmentIdFromTimeline(timelineSegments);
// noinspection ConstantConditions
return Iterators.transform(convertedSplitHintSpec.split(// the same input split.
segmentIdToSize.keySet().iterator(), segmentId -> new InputFileAttribute(Preconditions.checkNotNull(segmentIdToSize.get(segmentId), "segment size for [%s]", segmentId))), InputSplit::new);
}
Aggregations