Search in sources :

Example 11 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class S3DataSegmentFinder method findSegments.

@Override
public Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor) throws SegmentLoadingException {
    final Set<DataSegment> segments = Sets.newHashSet();
    try {
        Iterator<StorageObject> objectsIterator = S3Utils.storageObjectsIterator(s3Client, config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath, config.getMaxListingLength());
        while (objectsIterator.hasNext()) {
            StorageObject storageObject = objectsIterator.next();
            storageObject.closeDataInputStream();
            if (S3Utils.toFilename(storageObject.getKey()).equals("descriptor.json")) {
                final String descriptorJson = storageObject.getKey();
                String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson);
                if (S3Utils.isObjectInBucket(s3Client, config.getBucket(), indexZip)) {
                    S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson);
                    try (InputStream is = indexObject.getDataInputStream()) {
                        final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class);
                        log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
                        final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                        if (!loadSpec.get("type").equals(S3StorageDruidModule.SCHEME) || !loadSpec.get("key").equals(indexZip)) {
                            loadSpec.put("type", S3StorageDruidModule.SCHEME);
                            loadSpec.put("key", indexZip);
                            if (updateDescriptor) {
                                log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", descriptorJson, indexObject);
                                S3Object newDescJsonObject = new S3Object(descriptorJson, jsonMapper.writeValueAsString(dataSegment));
                                s3Client.putObject(config.getBucket(), newDescJsonObject);
                            }
                        }
                        segments.add(dataSegment);
                    }
                } else {
                    throw new SegmentLoadingException("index.zip didn't exist at [%s] while descriptor.json exists!?", indexZip);
                }
            }
        }
    } catch (ServiceException e) {
        throw new SegmentLoadingException(e, "Problem interacting with S3");
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "IO exception");
    } catch (Exception e) {
        Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class);
        Throwables.propagate(e);
    }
    return segments;
}
Also used : StorageObject(org.jets3t.service.model.StorageObject) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) InputStream(java.io.InputStream) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) ServiceException(org.jets3t.service.ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) ServiceException(org.jets3t.service.ServiceException) S3Object(org.jets3t.service.model.S3Object) StorageObject(org.jets3t.service.model.StorageObject) S3Object(org.jets3t.service.model.S3Object)

Example 12 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class S3DataSegmentKiller method kill.

@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
    try {
        Map<String, Object> loadSpec = segment.getLoadSpec();
        String s3Bucket = MapUtils.getString(loadSpec, "bucket");
        String s3Path = MapUtils.getString(loadSpec, "key");
        String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path);
        if (s3Client.isObjectInBucket(s3Bucket, s3Path)) {
            log.info("Removing index file[s3://%s/%s] from s3!", s3Bucket, s3Path);
            s3Client.deleteObject(s3Bucket, s3Path);
        }
        if (s3Client.isObjectInBucket(s3Bucket, s3DescriptorPath)) {
            log.info("Removing descriptor file[s3://%s/%s] from s3!", s3Bucket, s3DescriptorPath);
            s3Client.deleteObject(s3Bucket, s3DescriptorPath);
        }
    } catch (ServiceException e) {
        throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e);
    }
}
Also used : ServiceException(org.jets3t.service.ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException)

Example 13 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class S3DataSegmentMover method move.

@Override
public DataSegment move(DataSegment segment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
    try {
        Map<String, Object> loadSpec = segment.getLoadSpec();
        String s3Bucket = MapUtils.getString(loadSpec, "bucket");
        String s3Path = MapUtils.getString(loadSpec, "key");
        String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path);
        final String targetS3Bucket = MapUtils.getString(targetLoadSpec, "bucket");
        final String targetS3BaseKey = MapUtils.getString(targetLoadSpec, "baseKey");
        final String targetS3Path = S3Utils.constructSegmentPath(targetS3BaseKey, segment);
        String targetS3DescriptorPath = S3Utils.descriptorPathForSegmentPath(targetS3Path);
        if (targetS3Bucket.isEmpty()) {
            throw new SegmentLoadingException("Target S3 bucket is not specified");
        }
        if (targetS3Path.isEmpty()) {
            throw new SegmentLoadingException("Target S3 baseKey is not specified");
        }
        safeMove(s3Bucket, s3Path, targetS3Bucket, targetS3Path);
        safeMove(s3Bucket, s3DescriptorPath, targetS3Bucket, targetS3DescriptorPath);
        return segment.withLoadSpec(ImmutableMap.<String, Object>builder().putAll(Maps.filterKeys(loadSpec, new Predicate<String>() {

            @Override
            public boolean apply(String input) {
                return !(input.equals("bucket") || input.equals("key"));
            }
        })).put("bucket", targetS3Bucket).put("key", targetS3Path).build());
    } catch (ServiceException e) {
        throw new SegmentLoadingException(e, "Unable to move segment[%s]: [%s]", segment.getIdentifier(), e);
    }
}
Also used : ServiceException(org.jets3t.service.ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) S3Object(org.jets3t.service.model.S3Object) Predicate(com.google.common.base.Predicate)

Example 14 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class S3DataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(final S3Coords s3Coords, final File outDir) throws SegmentLoadingException {
    log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
    if (!isObjectInBucket(s3Coords)) {
        throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
    }
    try {
        org.apache.commons.io.FileUtils.forceMkdir(outDir);
        final URI uri = URI.create(String.format("s3://%s/%s", s3Coords.bucket, s3Coords.path));
        final ByteSource byteSource = new ByteSource() {

            @Override
            public InputStream openStream() throws IOException {
                try {
                    return buildFileObject(uri, s3Client).openInputStream();
                } catch (ServiceException e) {
                    if (e.getCause() != null) {
                        if (S3Utils.S3RETRY.apply(e)) {
                            throw new IOException("Recoverable exception", e);
                        }
                    }
                    throw Throwables.propagate(e);
                }
            }
        };
        if (CompressionUtils.isZip(s3Coords.path)) {
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, true);
            log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
            return result;
        }
        if (CompressionUtils.isGz(s3Coords.path)) {
            final String fname = Files.getNameWithoutExtension(uri.getPath());
            final File outFile = new File(outDir, fname);
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
            log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
            return result;
        }
        throw new IAE("Do not know how to load file type at [%s]", uri.toString());
    } catch (Exception e) {
        try {
            org.apache.commons.io.FileUtils.deleteDirectory(outDir);
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
        }
        throw new SegmentLoadingException(e, e.getMessage());
    }
}
Also used : ServiceException(org.jets3t.service.ServiceException) S3ServiceException(org.jets3t.service.S3ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileUtils(io.druid.java.util.common.FileUtils) ByteSource(com.google.common.io.ByteSource) IOException(java.io.IOException) IAE(io.druid.java.util.common.IAE) URI(java.net.URI) File(java.io.File) ServiceException(org.jets3t.service.ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) S3ServiceException(org.jets3t.service.S3ServiceException) IOException(java.io.IOException)

Example 15 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class AzureDataSegmentKiller method kill.

@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
    log.info("Killing segment [%s]", segment);
    Map<String, Object> loadSpec = segment.getLoadSpec();
    final String containerName = MapUtils.getString(loadSpec, "containerName");
    final String blobPath = MapUtils.getString(loadSpec, "blobPath");
    final String dirPath = Paths.get(blobPath).getParent().toString();
    try {
        azureStorage.emptyCloudBlobDirectory(containerName, dirPath);
    } catch (StorageException e) {
        throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e.getExtendedErrorInformation() == null ? null : e.getExtendedErrorInformation().getErrorMessage());
    } catch (URISyntaxException e) {
        throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e.getReason());
    }
}
Also used : SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) URISyntaxException(java.net.URISyntaxException) StorageException(com.microsoft.azure.storage.StorageException)

Aggregations

SegmentLoadingException (io.druid.segment.loading.SegmentLoadingException)22 IOException (java.io.IOException)16 DataSegment (io.druid.timeline.DataSegment)9 File (java.io.File)6 FileUtils (io.druid.java.util.common.FileUtils)4 ServiceException (org.jets3t.service.ServiceException)4 ByteSource (com.google.common.io.ByteSource)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 ImmutableList (com.google.common.collect.ImmutableList)2 SegmentLoaderFactory (io.druid.indexing.common.SegmentLoaderFactory)2 TaskToolboxFactory (io.druid.indexing.common.TaskToolboxFactory)2 LocalTaskActionClientFactory (io.druid.indexing.common.actions.LocalTaskActionClientFactory)2 TaskActionToolbox (io.druid.indexing.common.actions.TaskActionToolbox)2 TaskConfig (io.druid.indexing.common.config.TaskConfig)2 DataSegmentArchiver (io.druid.segment.loading.DataSegmentArchiver)2 DataSegmentMover (io.druid.segment.loading.DataSegmentMover)2 ArrayList (java.util.ArrayList)2 FileUtils (org.apache.commons.io.FileUtils)2 S3Object (org.jets3t.service.model.S3Object)2