use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class GoogleCloudStorageInputSourceTest method addExpectedGetCompressedObjectMock.
private static void addExpectedGetCompressedObjectMock(URI uri) throws IOException {
CloudObjectLocation location = new CloudObjectLocation(uri);
ByteArrayOutputStream gzipped = new ByteArrayOutputStream();
CompressionUtils.gzip(new ByteArrayInputStream(CONTENT), gzipped);
EasyMock.expect(STORAGE.get(EasyMock.eq(location.getBucket()), EasyMock.eq(location.getPath()), EasyMock.eq(0L))).andReturn(new ByteArrayInputStream(gzipped.toByteArray())).once();
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class GoogleCloudStorageInputSourceTest method testSerdeObjects.
@Test
public void testSerdeObjects() throws Exception {
final ObjectMapper mapper = createGoogleObjectMapper();
final GoogleCloudStorageInputSource withObjects = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, null, ImmutableList.of(new CloudObjectLocation("foo", "bar/file.gz")));
final GoogleCloudStorageInputSource serdeWithObjects = mapper.readValue(mapper.writeValueAsString(withObjects), GoogleCloudStorageInputSource.class);
Assert.assertEquals(withObjects, serdeWithObjects);
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class GoogleCloudStorageInputSource method getPrefixesSplitStream.
@Override
protected Stream<InputSplit<List<CloudObjectLocation>>> getPrefixesSplitStream(@Nonnull SplitHintSpec splitHintSpec) {
final Iterator<List<StorageObject>> splitIterator = splitHintSpec.split(storageObjectIterable().iterator(), storageObject -> {
final BigInteger sizeInBigInteger = storageObject.getSize();
long sizeInLong;
if (sizeInBigInteger == null) {
sizeInLong = Long.MAX_VALUE;
} else {
try {
sizeInLong = sizeInBigInteger.longValueExact();
} catch (ArithmeticException e) {
LOG.warn(e, "The object [%s, %s] has a size [%s] out of the range of the long type. " + "The max long value will be used for its size instead.", storageObject.getBucket(), storageObject.getName(), sizeInBigInteger);
sizeInLong = Long.MAX_VALUE;
}
}
return new InputFileAttribute(sizeInLong);
});
return Streams.sequentialStreamFrom(splitIterator).map(objects -> objects.stream().map(this::byteSourceFromStorageObject).collect(Collectors.toList())).map(InputSplit::new);
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3DataSegmentPullerTest method testGZUncompress.
@Test
public void testGZUncompress() throws IOException, SegmentLoadingException {
final String bucket = "bucket";
final String keyPrefix = "prefix/dir/0";
final ServerSideEncryptingAmazonS3 s3Client = EasyMock.createStrictMock(ServerSideEncryptingAmazonS3.class);
final byte[] value = bucket.getBytes(StandardCharsets.UTF_8);
final File tmpFile = temporaryFolder.newFile("gzTest.gz");
try (OutputStream outputStream = new GZIPOutputStream(new FileOutputStream(tmpFile))) {
outputStream.write(value);
}
final S3Object object0 = new S3Object();
object0.setBucketName(bucket);
object0.setKey(keyPrefix + "/renames-0.gz");
object0.getObjectMetadata().setLastModified(new Date(0));
object0.setObjectContent(new FileInputStream(tmpFile));
final S3ObjectSummary objectSummary = new S3ObjectSummary();
objectSummary.setBucketName(bucket);
objectSummary.setKey(keyPrefix + "/renames-0.gz");
objectSummary.setLastModified(new Date(0));
final File tmpDir = temporaryFolder.newFolder("gzTestDir");
EasyMock.expect(s3Client.doesObjectExist(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))).andReturn(true).once();
EasyMock.expect(s3Client.getObject(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))).andReturn(object0).once();
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
EasyMock.replay(s3Client);
FileUtils.FileCopyResult result = puller.getSegmentFiles(new CloudObjectLocation(bucket, object0.getKey()), tmpDir);
EasyMock.verify(s3Client);
Assert.assertEquals(value.length, result.size());
File expected = new File(tmpDir, "renames-0");
Assert.assertTrue(expected.exists());
Assert.assertEquals(value.length, expected.length());
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3DataSegmentPullerTest method testGZUncompressRetries.
@Test
public void testGZUncompressRetries() throws IOException, SegmentLoadingException {
final String bucket = "bucket";
final String keyPrefix = "prefix/dir/0";
final ServerSideEncryptingAmazonS3 s3Client = EasyMock.createStrictMock(ServerSideEncryptingAmazonS3.class);
final byte[] value = bucket.getBytes(StandardCharsets.UTF_8);
final File tmpFile = temporaryFolder.newFile("gzTest.gz");
try (OutputStream outputStream = new GZIPOutputStream(new FileOutputStream(tmpFile))) {
outputStream.write(value);
}
S3Object object0 = new S3Object();
object0.setBucketName(bucket);
object0.setKey(keyPrefix + "/renames-0.gz");
object0.getObjectMetadata().setLastModified(new Date(0));
object0.setObjectContent(new FileInputStream(tmpFile));
final S3ObjectSummary objectSummary = new S3ObjectSummary();
objectSummary.setBucketName(bucket);
objectSummary.setKey(keyPrefix + "/renames-0.gz");
objectSummary.setLastModified(new Date(0));
final ListObjectsV2Result listObjectsResult = new ListObjectsV2Result();
listObjectsResult.setKeyCount(1);
listObjectsResult.getObjectSummaries().add(objectSummary);
File tmpDir = temporaryFolder.newFolder("gzTestDir");
AmazonS3Exception exception = new AmazonS3Exception("S3DataSegmentPullerTest");
exception.setErrorCode("NoSuchKey");
exception.setStatusCode(404);
EasyMock.expect(s3Client.doesObjectExist(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))).andReturn(true).once();
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))).andThrow(exception).once();
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))).andReturn(object0).once();
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
EasyMock.replay(s3Client);
FileUtils.FileCopyResult result = puller.getSegmentFiles(new CloudObjectLocation(bucket, object0.getKey()), tmpDir);
EasyMock.verify(s3Client);
Assert.assertEquals(value.length, result.size());
File expected = new File(tmpDir, "renames-0");
Assert.assertTrue(expected.exists());
Assert.assertEquals(value.length, expected.length());
}
Aggregations