Search in sources :

Example 16 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testWithUrisSplit.

@Test
public void testWithUrisSplit() {
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, EXPECTED_URIS, ImmutableList.of(), null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null);
    Assert.assertEquals(EXPECTED_OBJECTS, splits.map(InputSplit::get).collect(Collectors.toList()));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) InputSplit(org.apache.druid.data.input.InputSplit) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 17 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testWithPrefixesSplit.

@Test
public void testWithPrefixesSplit() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(null, 1));
    Assert.assertEquals(EXPECTED_OBJECTS, splits.map(InputSplit::get).collect(Collectors.toList()));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 18 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testCreateSplitsWithSplitHintSpecRespectingHint.

@Test
public void testCreateSplitsWithSplitHintSpecRespectingHint() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null));
    Assert.assertEquals(ImmutableList.of(EXPECTED_URIS.stream().map(CloudObjectLocation::new).collect(Collectors.toList())), splits.map(InputSplit::get).collect(Collectors.toList()));
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 19 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class OssInputSourceTest method testCreateSplitsWithEmptyObjectsIteratingOnlyNonEmptyObjects.

@Test
public void testCreateSplitsWithEmptyObjectsIteratingOnlyNonEmptyObjects() {
    EasyMock.reset(OSSCLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), new byte[0]);
    EasyMock.replay(OSSCLIENT);
    OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
    Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null);
    Assert.assertEquals(ImmutableList.of(ImmutableList.of(new CloudObjectLocation(EXPECTED_URIS.get(0)))), splits.map(InputSplit::get).collect(Collectors.toList()));
    EasyMock.verify(OSSCLIENT);
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) InputSplit(org.apache.druid.data.input.InputSplit) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 20 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class AzureInputSourceTest method test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations.

@Test
public void test_getPrefixesSplitStream_successfullyCreatesCloudLocation_returnsExpectedLocations() {
    List<URI> prefixes = ImmutableList.of(PREFIX_URI);
    List<List<CloudObjectLocation>> expectedCloudLocations = ImmutableList.of(ImmutableList.of(CLOUD_OBJECT_LOCATION_1));
    List<CloudBlobHolder> expectedCloudBlobs = ImmutableList.of(cloudBlobDruid1);
    Iterator<CloudBlobHolder> expectedCloudBlobsIterator = expectedCloudBlobs.iterator();
    EasyMock.expect(inputDataConfig.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.expect(azureCloudBlobIterableFactory.create(prefixes, MAX_LISTING_LENGTH)).andReturn(azureCloudBlobIterable);
    EasyMock.expect(azureCloudBlobIterable.iterator()).andReturn(expectedCloudBlobsIterator);
    EasyMock.expect(azureCloudBlobToLocationConverter.createCloudObjectLocation(cloudBlobDruid1)).andReturn(CLOUD_OBJECT_LOCATION_1);
    EasyMock.expect(cloudBlobDruid1.getBlobLength()).andReturn(100L).anyTimes();
    replayAll();
    azureInputSource = new AzureInputSource(storage, entityFactory, azureCloudBlobIterableFactory, azureCloudBlobToLocationConverter, inputDataConfig, EMPTY_URIS, prefixes, EMPTY_OBJECTS);
    Stream<InputSplit<List<CloudObjectLocation>>> cloudObjectStream = azureInputSource.getPrefixesSplitStream(new MaxSizeSplitHintSpec(null, 1));
    List<List<CloudObjectLocation>> actualCloudLocationList = cloudObjectStream.map(InputSplit::get).collect(Collectors.toList());
    verifyAll();
    Assert.assertEquals(expectedCloudLocations, actualCloudLocationList);
}
Also used : CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CloudBlobHolder(org.apache.druid.storage.azure.blob.CloudBlobHolder) URI(java.net.URI) InputSplit(org.apache.druid.data.input.InputSplit) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) Test(org.junit.Test)

Aggregations

CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)34 Test (org.junit.Test)21 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)15 InputSplit (org.apache.druid.data.input.InputSplit)13 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)11 OSSObjectSummary (com.aliyun.oss.model.OSSObjectSummary)6 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)6 MaxSizeSplitHintSpec (org.apache.druid.data.input.MaxSizeSplitHintSpec)6 URI (java.net.URI)5 File (java.io.File)4 FileInputStream (java.io.FileInputStream)4 FileOutputStream (java.io.FileOutputStream)4 OutputStream (java.io.OutputStream)4 Date (java.util.Date)4 GZIPOutputStream (java.util.zip.GZIPOutputStream)4 FileUtils (org.apache.druid.java.util.common.FileUtils)4 IOE (org.apache.druid.java.util.common.IOE)4 OSSException (com.aliyun.oss.OSSException)3 OSSObject (com.aliyun.oss.model.OSSObject)3 S3Object (com.amazonaws.services.s3.model.S3Object)3