use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentFinder method findSegments.
@Override
public Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor) throws SegmentLoadingException {
final Set<DataSegment> segments = Sets.newHashSet();
try {
Iterator<StorageObject> objectsIterator = S3Utils.storageObjectsIterator(s3Client, config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath, config.getMaxListingLength());
while (objectsIterator.hasNext()) {
StorageObject storageObject = objectsIterator.next();
storageObject.closeDataInputStream();
if (S3Utils.toFilename(storageObject.getKey()).equals("descriptor.json")) {
final String descriptorJson = storageObject.getKey();
String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson);
if (S3Utils.isObjectInBucket(s3Client, config.getBucket(), indexZip)) {
S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson);
try (InputStream is = indexObject.getDataInputStream()) {
final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class);
log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
if (!loadSpec.get("type").equals(S3StorageDruidModule.SCHEME) || !loadSpec.get("key").equals(indexZip)) {
loadSpec.put("type", S3StorageDruidModule.SCHEME);
loadSpec.put("key", indexZip);
if (updateDescriptor) {
log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", descriptorJson, indexObject);
S3Object newDescJsonObject = new S3Object(descriptorJson, jsonMapper.writeValueAsString(dataSegment));
s3Client.putObject(config.getBucket(), newDescJsonObject);
}
}
segments.add(dataSegment);
}
} else {
throw new SegmentLoadingException("index.zip didn't exist at [%s] while descriptor.json exists!?", indexZip);
}
}
}
} catch (ServiceException e) {
throw new SegmentLoadingException(e, "Problem interacting with S3");
} catch (IOException e) {
throw new SegmentLoadingException(e, "IO exception");
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class);
Throwables.propagate(e);
}
return segments;
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentKiller method kill.
@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
try {
Map<String, Object> loadSpec = segment.getLoadSpec();
String s3Bucket = MapUtils.getString(loadSpec, "bucket");
String s3Path = MapUtils.getString(loadSpec, "key");
String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path);
if (s3Client.isObjectInBucket(s3Bucket, s3Path)) {
log.info("Removing index file[s3://%s/%s] from s3!", s3Bucket, s3Path);
s3Client.deleteObject(s3Bucket, s3Path);
}
if (s3Client.isObjectInBucket(s3Bucket, s3DescriptorPath)) {
log.info("Removing descriptor file[s3://%s/%s] from s3!", s3Bucket, s3DescriptorPath);
s3Client.deleteObject(s3Bucket, s3DescriptorPath);
}
} catch (ServiceException e) {
throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e);
}
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentMover method move.
@Override
public DataSegment move(DataSegment segment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
try {
Map<String, Object> loadSpec = segment.getLoadSpec();
String s3Bucket = MapUtils.getString(loadSpec, "bucket");
String s3Path = MapUtils.getString(loadSpec, "key");
String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path);
final String targetS3Bucket = MapUtils.getString(targetLoadSpec, "bucket");
final String targetS3BaseKey = MapUtils.getString(targetLoadSpec, "baseKey");
final String targetS3Path = S3Utils.constructSegmentPath(targetS3BaseKey, segment);
String targetS3DescriptorPath = S3Utils.descriptorPathForSegmentPath(targetS3Path);
if (targetS3Bucket.isEmpty()) {
throw new SegmentLoadingException("Target S3 bucket is not specified");
}
if (targetS3Path.isEmpty()) {
throw new SegmentLoadingException("Target S3 baseKey is not specified");
}
safeMove(s3Bucket, s3Path, targetS3Bucket, targetS3Path);
safeMove(s3Bucket, s3DescriptorPath, targetS3Bucket, targetS3DescriptorPath);
return segment.withLoadSpec(ImmutableMap.<String, Object>builder().putAll(Maps.filterKeys(loadSpec, new Predicate<String>() {
@Override
public boolean apply(String input) {
return !(input.equals("bucket") || input.equals("key"));
}
})).put("bucket", targetS3Bucket).put("key", targetS3Path).build());
} catch (ServiceException e) {
throw new SegmentLoadingException(e, "Unable to move segment[%s]: [%s]", segment.getIdentifier(), e);
}
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(final S3Coords s3Coords, final File outDir) throws SegmentLoadingException {
log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
if (!isObjectInBucket(s3Coords)) {
throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
}
try {
org.apache.commons.io.FileUtils.forceMkdir(outDir);
final URI uri = URI.create(String.format("s3://%s/%s", s3Coords.bucket, s3Coords.path));
final ByteSource byteSource = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
try {
return buildFileObject(uri, s3Client).openInputStream();
} catch (ServiceException e) {
if (e.getCause() != null) {
if (S3Utils.S3RETRY.apply(e)) {
throw new IOException("Recoverable exception", e);
}
}
throw Throwables.propagate(e);
}
}
};
if (CompressionUtils.isZip(s3Coords.path)) {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, true);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
return result;
}
if (CompressionUtils.isGz(s3Coords.path)) {
final String fname = Files.getNameWithoutExtension(uri.getPath());
final File outFile = new File(outDir, fname);
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
return result;
}
throw new IAE("Do not know how to load file type at [%s]", uri.toString());
} catch (Exception e) {
try {
org.apache.commons.io.FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
}
throw new SegmentLoadingException(e, e.getMessage());
}
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class AzureDataSegmentKiller method kill.
@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
log.info("Killing segment [%s]", segment);
Map<String, Object> loadSpec = segment.getLoadSpec();
final String containerName = MapUtils.getString(loadSpec, "containerName");
final String blobPath = MapUtils.getString(loadSpec, "blobPath");
final String dirPath = Paths.get(blobPath).getParent().toString();
try {
azureStorage.emptyCloudBlobDirectory(containerName, dirPath);
} catch (StorageException e) {
throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e.getExtendedErrorInformation() == null ? null : e.getExtendedErrorInformation().getErrorMessage());
} catch (URISyntaxException e) {
throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e.getReason());
}
}
Aggregations