use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class AzureDataSegmentPullerTest method test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_deleteOutputDirectory.
@Test(expected = SegmentLoadingException.class)
public void test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_deleteOutputDirectory() throws IOException, URISyntaxException, StorageException, SegmentLoadingException {
final File outDir = FileUtils.createTempDir();
try {
EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME, BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME, BLOB_PATH));
EasyMock.expect(azureStorage.getBlobInputStream(0L, CONTAINER_NAME, BLOB_PATH)).andThrow(new StorageException(null, null, 0, null, null)).atLeastOnce();
replayAll();
AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory);
puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir);
Assert.assertFalse(outDir.exists());
verifyAll();
} catch (Exception e) {
Assert.assertFalse(outDir.exists());
verifyAll();
throw e;
} finally {
FileUtils.deleteDirectory(outDir);
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class CassandraDataSegmentPuller method getSegmentFiles.
FileUtils.FileCopyResult getSegmentFiles(final String key, final File outDir) throws SegmentLoadingException {
log.info("Pulling index from C* at path[%s] to outDir[%s]", key, outDir);
try {
FileUtils.mkdirp(outDir);
} catch (IOException e) {
throw new SegmentLoadingException(e, "");
}
long startTime = System.currentTimeMillis();
final File tmpFile = new File(outDir, "index.zip");
log.info("Pulling to temporary local cache [%s]", tmpFile.getAbsolutePath());
try {
RetryUtils.retry(() -> {
try (OutputStream os = new FileOutputStream(tmpFile)) {
ChunkedStorage.newReader(indexStorage, key, os).withBatchSize(BATCH_SIZE).withConcurrencyLevel(CONCURRENCY).call();
}
return new FileUtils.FileCopyResult(tmpFile);
}, Predicates.alwaysTrue(), 10);
} catch (Exception e) {
throw new SegmentLoadingException(e, "Unable to copy key [%s] to file [%s]", key, tmpFile.getAbsolutePath());
}
try {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(tmpFile, outDir);
log.info("Pull of file[%s] completed in %,d millis (%s bytes)", key, System.currentTimeMillis() - startTime, result.size());
return result;
} catch (Exception e) {
try {
FileUtils.deleteDirectory(outDir);
} catch (IOException e1) {
log.error(e1, "Error clearing segment directory [%s]", outDir.getAbsolutePath());
e.addSuppressed(e1);
}
throw new SegmentLoadingException(e, e.getMessage());
} finally {
if (!tmpFile.delete()) {
log.warn("Could not delete cache file at [%s]", tmpFile.getAbsolutePath());
}
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentMover method selfCheckingMove.
/**
* Copies an object and after that checks that the object is present at the target location, via a separate API call.
* If it is not, an exception is thrown, and the object is not deleted at the old location. This "paranoic" check
* is added after it was observed that S3 may report a successful move, and the object is not found at the target
* location.
*/
private void selfCheckingMove(String s3Bucket, String targetS3Bucket, String s3Path, String targetS3Path, String copyMsg) throws IOException, SegmentLoadingException {
if (s3Bucket.equals(targetS3Bucket) && s3Path.equals(targetS3Path)) {
log.info("No need to move file[s3://%s/%s] onto itself", s3Bucket, s3Path);
return;
}
final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get();
if (s3Client.doesObjectExist(s3Bucket, s3Path)) {
final ListObjectsV2Result listResult = s3Client.listObjectsV2(new ListObjectsV2Request().withBucketName(s3Bucket).withPrefix(s3Path).withMaxKeys(1));
// keyCount is still zero.
if (listResult.getObjectSummaries().size() == 0) {
// should never happen
throw new ISE("Unable to list object [s3://%s/%s]", s3Bucket, s3Path);
}
final S3ObjectSummary objectSummary = listResult.getObjectSummaries().get(0);
if (objectSummary.getStorageClass() != null && StorageClass.fromValue(StringUtils.toUpperCase(objectSummary.getStorageClass())).equals(StorageClass.Glacier)) {
throw new AmazonServiceException(StringUtils.format("Cannot move file[s3://%s/%s] of storage class glacier, skipping.", s3Bucket, s3Path));
} else {
log.info("Moving file %s", copyMsg);
final CopyObjectRequest copyRequest = new CopyObjectRequest(s3Bucket, s3Path, targetS3Bucket, targetS3Path);
if (!config.getDisableAcl()) {
copyRequest.setAccessControlList(S3Utils.grantFullControlToBucketOwner(s3Client, targetS3Bucket));
}
s3Client.copyObject(copyRequest);
if (!s3Client.doesObjectExist(targetS3Bucket, targetS3Path)) {
throw new IOE("After copy was reported as successful the file doesn't exist in the target location [%s]", copyMsg);
}
deleteWithRetriesSilent(s3Bucket, s3Path);
log.debug("Finished moving file %s", copyMsg);
}
} else {
// ensure object exists in target location
if (s3Client.doesObjectExist(targetS3Bucket, targetS3Path)) {
log.info("Not moving file [s3://%s/%s], already present in target location [s3://%s/%s]", s3Bucket, s3Path, targetS3Bucket, targetS3Path);
} else {
throw new SegmentLoadingException("Unable to move file %s, not present in either source or target location", copyMsg);
}
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentMover method move.
@Override
public DataSegment move(DataSegment segment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
try {
Map<String, Object> loadSpec = segment.getLoadSpec();
String s3Bucket = MapUtils.getString(loadSpec, "bucket");
String s3Path = MapUtils.getString(loadSpec, "key");
final String targetS3Bucket = MapUtils.getString(targetLoadSpec, "bucket");
final String targetS3BaseKey = MapUtils.getString(targetLoadSpec, "baseKey");
final String targetS3Path = S3Utils.constructSegmentPath(targetS3BaseKey, DataSegmentPusher.getDefaultStorageDir(segment, false));
if (targetS3Bucket.isEmpty()) {
throw new SegmentLoadingException("Target S3 bucket is not specified");
}
if (targetS3Path.isEmpty()) {
throw new SegmentLoadingException("Target S3 baseKey is not specified");
}
safeMove(s3Bucket, s3Path, targetS3Bucket, targetS3Path);
return segment.withLoadSpec(ImmutableMap.<String, Object>builder().putAll(Maps.filterKeys(loadSpec, new Predicate<String>() {
@Override
public boolean apply(String input) {
return !("bucket".equals(input) || "key".equals(input));
}
})).put("bucket", targetS3Bucket).put("key", targetS3Path).build());
} catch (AmazonServiceException e) {
throw new SegmentLoadingException(e, "Unable to move segment[%s]: [%s]", segment.getId(), e);
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class DeepStorageShuffleClient method fetchSegmentFile.
@Override
public File fetchSegmentFile(File partitionDir, String supervisorTaskId, DeepStoragePartitionLocation location) throws IOException {
final LoadSpec loadSpec = objectMapper.convertValue(location.getLoadSpec(), LoadSpec.class);
final File unzippedDir = new File(partitionDir, StringUtils.format("unzipped_%s", location.getSubTaskId()));
FileUtils.mkdirp(unzippedDir);
try {
loadSpec.loadSegment(unzippedDir);
} catch (SegmentLoadingException e) {
LOG.error(e, "Failed to load segment");
throw new IOException(e);
}
return unzippedDir;
}
Aggregations