Search in sources :

Example 11 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class SegmentUtils method getVersionFromDir.

public static int getVersionFromDir(File inDir) throws IOException {
    File versionFile = new File(inDir, "version.bin");
    if (versionFile.exists()) {
        return Ints.fromByteArray(Files.toByteArray(versionFile));
    }
    final File indexFile = new File(inDir, "index.drd");
    int version;
    if (indexFile.exists()) {
        try (InputStream in = new FileInputStream(indexFile)) {
            version = in.read();
        }
        return version;
    }
    throw new IOE("Invalid segment dir [%s]. Can't find either of version.bin or index.drd.", inDir);
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) IOE(org.apache.druid.java.util.common.IOE)

Example 12 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class CompressionUtils method zip.

/**
 * Zips the contents of the input directory to the output stream. Sub directories are skipped
 *
 * @param directory The directory whose contents should be added to the zip in the output stream.
 * @param out       The output stream to write the zip data to. Caller is responsible for closing this stream.
 *
 * @return The number of bytes (uncompressed) read from the input directory.
 *
 * @throws IOException
 */
public static long zip(File directory, OutputStream out) throws IOException {
    if (!directory.isDirectory()) {
        throw new IOE("directory[%s] is not a directory", directory);
    }
    final ZipOutputStream zipOut = new ZipOutputStream(out);
    long totalSize = 0;
    // Sort entries to make life easier when writing streaming-decompression unit tests.
    for (File file : Arrays.stream(directory.listFiles()).sorted().collect(Collectors.toList())) {
        log.debug("Adding file[%s] with size[%,d].  Total size so far[%,d]", file, file.length(), totalSize);
        if (file.length() > Integer.MAX_VALUE) {
            zipOut.finish();
            throw new IOE("file[%s] too large [%,d]", file, file.length());
        }
        zipOut.putNextEntry(new ZipEntry(file.getName()));
        totalSize += Files.asByteSource(file).copyTo(zipOut);
    }
    zipOut.closeEntry();
    // Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
    zipOut.flush();
    zipOut.finish();
    return totalSize;
}
Also used : ZipOutputStream(java.util.zip.ZipOutputStream) ZipEntry(java.util.zip.ZipEntry) ZipFile(java.util.zip.ZipFile) File(java.io.File) IOE(org.apache.druid.java.util.common.IOE)

Example 13 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class OssDataSegmentMover method selfCheckingMove.

/**
 * Copies an object and after that checks that the object is present at the target location, via a separate API call.
 * If it is not, an exception is thrown, and the object is not deleted at the old location. This "paranoic" check
 * is added after it was observed that oss may report a successful move, and the object is not found at the target
 * location.
 */
private void selfCheckingMove(String srcBucket, String dstBucket, String srcPath, String dstPath, String copyMsg) throws IOException, SegmentLoadingException {
    if (srcBucket.equals(dstBucket) && srcPath.equals(dstPath)) {
        log.info("No need to move file[%s://%s/%s] onto itself", OssStorageDruidModule.SCHEME, srcBucket, srcPath);
        return;
    }
    final OSS client = this.clientSupplier.get();
    if (client.doesObjectExist(srcBucket, srcPath)) {
        final ObjectListing listResult = client.listObjects(new ListObjectsRequest(srcBucket, srcPath, null, null, 1));
        // keyCount is still zero.
        if (listResult.getObjectSummaries().size() == 0) {
            // should never happen
            throw new ISE("Unable to list object [%s://%s/%s]", OssStorageDruidModule.SCHEME, srcBucket, srcPath);
        }
        final OSSObjectSummary objectSummary = listResult.getObjectSummaries().get(0);
        if (objectSummary.getStorageClass() != null && objectSummary.getStorageClass().equals(StorageClass.IA.name())) {
            throw new OSSException(StringUtils.format("Cannot move file[%s://%s/%s] of storage class glacier, skipping.", OssStorageDruidModule.SCHEME, srcBucket, srcPath));
        } else {
            log.info("Moving file %s", copyMsg);
            final CopyObjectRequest copyRequest = new CopyObjectRequest(srcBucket, srcPath, dstBucket, dstPath);
            client.copyObject(copyRequest);
            if (!client.doesObjectExist(dstBucket, dstPath)) {
                throw new IOE("After copy was reported as successful the file doesn't exist in the target location [%s]", copyMsg);
            }
            deleteWithRetriesSilent(srcBucket, srcPath);
            log.debug("Finished moving file %s", copyMsg);
        }
    } else {
        // ensure object exists in target location
        if (client.doesObjectExist(dstBucket, dstPath)) {
            log.info("Not moving file [%s://%s/%s], already present in target location [%s://%s/%s]", OssStorageDruidModule.SCHEME, srcBucket, srcPath, OssStorageDruidModule.SCHEME, dstBucket, dstPath);
        } else {
            throw new SegmentLoadingException("Unable to move file %s, not present in either source or target location", copyMsg);
        }
    }
}
Also used : ListObjectsRequest(com.aliyun.oss.model.ListObjectsRequest) OSSObjectSummary(com.aliyun.oss.model.OSSObjectSummary) CopyObjectRequest(com.aliyun.oss.model.CopyObjectRequest) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) ObjectListing(com.aliyun.oss.model.ObjectListing) OSSException(com.aliyun.oss.OSSException) ISE(org.apache.druid.java.util.common.ISE) OSS(com.aliyun.oss.OSS) IOE(org.apache.druid.java.util.common.IOE)

Example 14 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class S3DataSegmentMover method selfCheckingMove.

/**
 * Copies an object and after that checks that the object is present at the target location, via a separate API call.
 * If it is not, an exception is thrown, and the object is not deleted at the old location. This "paranoic" check
 * is added after it was observed that S3 may report a successful move, and the object is not found at the target
 * location.
 */
private void selfCheckingMove(String s3Bucket, String targetS3Bucket, String s3Path, String targetS3Path, String copyMsg) throws IOException, SegmentLoadingException {
    if (s3Bucket.equals(targetS3Bucket) && s3Path.equals(targetS3Path)) {
        log.info("No need to move file[s3://%s/%s] onto itself", s3Bucket, s3Path);
        return;
    }
    final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get();
    if (s3Client.doesObjectExist(s3Bucket, s3Path)) {
        final ListObjectsV2Result listResult = s3Client.listObjectsV2(new ListObjectsV2Request().withBucketName(s3Bucket).withPrefix(s3Path).withMaxKeys(1));
        // keyCount is still zero.
        if (listResult.getObjectSummaries().size() == 0) {
            // should never happen
            throw new ISE("Unable to list object [s3://%s/%s]", s3Bucket, s3Path);
        }
        final S3ObjectSummary objectSummary = listResult.getObjectSummaries().get(0);
        if (objectSummary.getStorageClass() != null && StorageClass.fromValue(StringUtils.toUpperCase(objectSummary.getStorageClass())).equals(StorageClass.Glacier)) {
            throw new AmazonServiceException(StringUtils.format("Cannot move file[s3://%s/%s] of storage class glacier, skipping.", s3Bucket, s3Path));
        } else {
            log.info("Moving file %s", copyMsg);
            final CopyObjectRequest copyRequest = new CopyObjectRequest(s3Bucket, s3Path, targetS3Bucket, targetS3Path);
            if (!config.getDisableAcl()) {
                copyRequest.setAccessControlList(S3Utils.grantFullControlToBucketOwner(s3Client, targetS3Bucket));
            }
            s3Client.copyObject(copyRequest);
            if (!s3Client.doesObjectExist(targetS3Bucket, targetS3Path)) {
                throw new IOE("After copy was reported as successful the file doesn't exist in the target location [%s]", copyMsg);
            }
            deleteWithRetriesSilent(s3Bucket, s3Path);
            log.debug("Finished moving file %s", copyMsg);
        }
    } else {
        // ensure object exists in target location
        if (s3Client.doesObjectExist(targetS3Bucket, targetS3Path)) {
            log.info("Not moving file [s3://%s/%s], already present in target location [s3://%s/%s]", s3Bucket, s3Path, targetS3Bucket, targetS3Path);
        } else {
            throw new SegmentLoadingException("Unable to move file %s, not present in either source or target location", copyMsg);
        }
    }
}
Also used : CopyObjectRequest(com.amazonaws.services.s3.model.CopyObjectRequest) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) AmazonServiceException(com.amazonaws.AmazonServiceException) ISE(org.apache.druid.java.util.common.ISE) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IOE(org.apache.druid.java.util.common.IOE)

Example 15 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class S3DataSegmentPuller method getVersion.

/**
 * Returns the "version" (aka last modified timestamp) of the URI
 *
 * @param uri The URI to check the last timestamp
 * @return The time in ms of the last modification of the URI in String format
 * @throws IOException
 */
@Override
public String getVersion(URI uri) throws IOException {
    try {
        final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
        final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.getBucket(), coords.getPath());
        return StringUtils.format("%d", objectSummary.getLastModified().getTime());
    } catch (AmazonClientException e) {
        if (AWSClientUtil.isClientExceptionRecoverable(e)) {
            // The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
            throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri);
        } else {
            throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri);
        }
    }
}
Also used : RE(org.apache.druid.java.util.common.RE) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) AmazonClientException(com.amazonaws.AmazonClientException) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IOE(org.apache.druid.java.util.common.IOE)

Aggregations

IOE (org.apache.druid.java.util.common.IOE)20 File (java.io.File)7 IOException (java.io.IOException)7 Path (org.apache.hadoop.fs.Path)6 CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)4 ISE (org.apache.druid.java.util.common.ISE)4 Configuration (org.apache.hadoop.conf.Configuration)4 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)4 OSSException (com.aliyun.oss.OSSException)3 OSSObjectSummary (com.aliyun.oss.model.OSSObjectSummary)3 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)3 InputStream (java.io.InputStream)3 RE (org.apache.druid.java.util.common.RE)3 Closer (org.apache.druid.java.util.common.io.Closer)3 ChannelException (org.jboss.netty.channel.ChannelException)3 BeforeClass (org.junit.BeforeClass)3 AmazonServiceException (com.amazonaws.AmazonServiceException)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FilterInputStream (java.io.FilterInputStream)2 OutputStream (java.io.OutputStream)2