Search in sources :

Example 6 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class LocalIntermediaryDataManager method addSegment.

/**
 * Write a segment into one of configured locations. The location to write is chosen in a round-robin manner per
 * supervisorTaskId.
 */
@Override
public DataSegment addSegment(String supervisorTaskId, String subTaskId, DataSegment segment, File segmentDir) throws IOException {
    // Get or create the location iterator for supervisorTask.
    final Iterator<StorageLocation> iterator = locationIterators.computeIfAbsent(supervisorTaskId, k -> {
        final Iterator<StorageLocation> cyclicIterator = Iterators.cycle(shuffleDataLocations);
        // Random start of the iterator
        final int random = ThreadLocalRandom.current().nextInt(shuffleDataLocations.size());
        IntStream.range(0, random).forEach(i -> cyclicIterator.next());
        return cyclicIterator;
    });
    // Create a zipped segment in a temp directory.
    final File taskTempDir = taskConfig.getTaskTempDir(subTaskId);
    final Closer closer = Closer.create();
    closer.register(() -> {
        try {
            org.apache.commons.io.FileUtils.forceDelete(taskTempDir);
        } catch (IOException e) {
            LOG.warn(e, "Failed to delete directory[%s]", taskTempDir.getAbsolutePath());
        }
    });
    if (!(segment.getShardSpec() instanceof BucketNumberedShardSpec)) {
        throw new IAE("Invalid shardSpec type. Expected [%s] but got [%s]", BucketNumberedShardSpec.class.getName(), segment.getShardSpec().getClass().getName());
    }
    final BucketNumberedShardSpec<?> bucketNumberedShardSpec = (BucketNumberedShardSpec<?>) segment.getShardSpec();
    // noinspection unused
    try (final Closer resourceCloser = closer) {
        FileUtils.mkdirp(taskTempDir);
        // Tempary compressed file. Will be removed when taskTempDir is deleted.
        final File tempZippedFile = new File(taskTempDir, segment.getId().toString());
        final long unzippedSizeBytes = CompressionUtils.zip(segmentDir, tempZippedFile);
        if (unzippedSizeBytes == 0) {
            throw new IOE("Read 0 bytes from segmentDir[%s]", segmentDir.getAbsolutePath());
        }
        // Try copying the zipped segment to one of storage locations
        for (int i = 0; i < shuffleDataLocations.size(); i++) {
            final StorageLocation location = iterator.next();
            final String partitionFilePath = getPartitionFilePath(supervisorTaskId, subTaskId, segment.getInterval(), // we must use the bucket ID instead of partition ID
            bucketNumberedShardSpec.getBucketId());
            final File destFile = location.reserve(partitionFilePath, segment.getId().toString(), tempZippedFile.length());
            if (destFile != null) {
                try {
                    FileUtils.mkdirp(destFile.getParentFile());
                    FileUtils.writeAtomically(destFile, out -> Files.asByteSource(tempZippedFile).copyTo(out));
                    LOG.info("Wrote intermediary segment[%s] for subtask[%s] at [%s]", segment.getId(), subTaskId, destFile);
                    return segment.withSize(unzippedSizeBytes).withBinaryVersion(SegmentUtils.getVersionFromDir(segmentDir));
                } catch (Exception e) {
                    location.release(partitionFilePath, tempZippedFile.length());
                    org.apache.commons.io.FileUtils.deleteQuietly(destFile);
                    LOG.warn(e, "Failed to write segment[%s] at [%s]. Trying again with the next location", segment.getId(), destFile);
                }
            }
        }
        throw new ISE("Can't find location to handle segment[%s]", segment);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) IOException(java.io.IOException) IAE(org.apache.druid.java.util.common.IAE) IOException(java.io.IOException) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) ISE(org.apache.druid.java.util.common.ISE) StorageLocation(org.apache.druid.segment.loading.StorageLocation) File(java.io.File) IOE(org.apache.druid.java.util.common.IOE)

Example 7 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class OssDataSegmentPuller method getVersion.

/**
 * Returns the "version" (aka last modified timestamp) of the URI
 *
 * @param uri The URI to check the last timestamp
 * @return The time in ms of the last modification of the URI in String format
 * @throws IOException
 */
@Override
public String getVersion(URI uri) throws IOException {
    try {
        final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
        final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
        return StringUtils.format("%d", objectSummary.getLastModified().getTime());
    } catch (OSSException e) {
        if (OssUtils.isServiceExceptionRecoverable(e)) {
            // The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
            throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri);
        } else {
            throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri);
        }
    }
}
Also used : OSSObjectSummary(com.aliyun.oss.model.OSSObjectSummary) RE(org.apache.druid.java.util.common.RE) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) OSSException(com.aliyun.oss.OSSException) IOE(org.apache.druid.java.util.common.IOE)

Example 8 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class OssDataSegmentPuller method buildFileObject.

private FileObject buildFileObject(final URI uri) throws OSSException {
    final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
    final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
    final String path = uri.getPath();
    return new FileObject() {

        OSSObject ossObject = null;

        @Override
        public URI toUri() {
            return uri;
        }

        @Override
        public String getName() {
            final String ext = Files.getFileExtension(path);
            return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
        }

        /**
         * Returns an input stream for an OSS object. The returned input stream is not thread-safe.
         */
        @Override
        public InputStream openInputStream() throws IOException {
            try {
                if (ossObject == null) {
                    // lazily promote to full GET
                    ossObject = client.getObject(objectSummary.getBucketName(), objectSummary.getKey());
                }
                final InputStream in = ossObject.getObjectContent();
                final Closer closer = Closer.create();
                closer.register(in);
                closer.register(ossObject);
                return new FilterInputStream(in) {

                    @Override
                    public void close() throws IOException {
                        closer.close();
                    }
                };
            } catch (OSSException e) {
                throw new IOE(e, "Could not load OSS URI [%s]", uri);
            }
        }

        @Override
        public OutputStream openOutputStream() {
            throw new UOE("Cannot stream OSS output");
        }

        @Override
        public Reader openReader(boolean ignoreEncodingErrors) {
            throw new UOE("Cannot open reader");
        }

        @Override
        public CharSequence getCharContent(boolean ignoreEncodingErrors) {
            throw new UOE("Cannot open character sequence");
        }

        @Override
        public Writer openWriter() {
            throw new UOE("Cannot open writer");
        }

        @Override
        public long getLastModified() {
            return objectSummary.getLastModified().getTime();
        }

        @Override
        public boolean delete() {
            throw new UOE("Cannot delete OSS items anonymously. jetS3t doesn't support authenticated deletes easily.");
        }
    };
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) OSSObjectSummary(com.aliyun.oss.model.OSSObjectSummary) FilterInputStream(java.io.FilterInputStream) OSSObject(com.aliyun.oss.model.OSSObject) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) FilterInputStream(java.io.FilterInputStream) InputStream(java.io.InputStream) OSSException(com.aliyun.oss.OSSException) UOE(org.apache.druid.java.util.common.UOE) FileObject(javax.tools.FileObject) IOE(org.apache.druid.java.util.common.IOE)

Example 9 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class HdfsDataSegmentPullerTest method setupStatic.

@BeforeClass
public static void setupStatic() throws IOException {
    hdfsTmpDir = File.createTempFile("hdfsHandlerTest", "dir");
    if (!hdfsTmpDir.delete()) {
        throw new IOE("Unable to delete hdfsTmpDir [%s]", hdfsTmpDir.getAbsolutePath());
    }
    conf = new Configuration(true);
    fileSystem = new LocalFileSystem();
    fileSystem.initialize(hdfsTmpDir.toURI(), conf);
    fileSystem.setWorkingDirectory(new Path(hdfsTmpDir.toURI()));
    final File tmpFile = File.createTempFile("hdfsHandlerTest", ".data");
    tmpFile.delete();
    try {
        Files.copy(new ByteArrayInputStream(pathByteContents), tmpFile.toPath());
        try (OutputStream stream = fileSystem.create(filePath)) {
            Files.copy(tmpFile.toPath(), stream);
        }
    } finally {
        tmpFile.delete();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream(java.io.ByteArrayInputStream) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) File(java.io.File) IOE(org.apache.druid.java.util.common.IOE) BeforeClass(org.junit.BeforeClass)

Example 10 with IOE

use of org.apache.druid.java.util.common.IOE in project druid by druid-io.

the class HdfsTaskLogs method killOlderThan.

@Override
public void killOlderThan(long timestamp) throws IOException {
    Path taskLogDir = new Path(config.getDirectory());
    FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
    if (fs.exists(taskLogDir)) {
        if (!fs.isDirectory(taskLogDir)) {
            throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir);
        }
        RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
        while (iter.hasNext()) {
            LocatedFileStatus file = iter.next();
            if (file.getModificationTime() < timestamp) {
                Path p = file.getPath();
                log.info("Deleting hdfs task log [%s].", p.toUri().toString());
                fs.delete(p, true);
            }
            if (Thread.currentThread().isInterrupted()) {
                throw new IOException(new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) IOE(org.apache.druid.java.util.common.IOE)

Aggregations

IOE (org.apache.druid.java.util.common.IOE)20 File (java.io.File)7 IOException (java.io.IOException)7 Path (org.apache.hadoop.fs.Path)6 CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)4 ISE (org.apache.druid.java.util.common.ISE)4 Configuration (org.apache.hadoop.conf.Configuration)4 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)4 OSSException (com.aliyun.oss.OSSException)3 OSSObjectSummary (com.aliyun.oss.model.OSSObjectSummary)3 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)3 InputStream (java.io.InputStream)3 RE (org.apache.druid.java.util.common.RE)3 Closer (org.apache.druid.java.util.common.io.Closer)3 ChannelException (org.jboss.netty.channel.ChannelException)3 BeforeClass (org.junit.BeforeClass)3 AmazonServiceException (com.amazonaws.AmazonServiceException)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FilterInputStream (java.io.FilterInputStream)2 OutputStream (java.io.OutputStream)2