use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class LocalIntermediaryDataManager method addSegment.
/**
* Write a segment into one of configured locations. The location to write is chosen in a round-robin manner per
* supervisorTaskId.
*/
@Override
public DataSegment addSegment(String supervisorTaskId, String subTaskId, DataSegment segment, File segmentDir) throws IOException {
// Get or create the location iterator for supervisorTask.
final Iterator<StorageLocation> iterator = locationIterators.computeIfAbsent(supervisorTaskId, k -> {
final Iterator<StorageLocation> cyclicIterator = Iterators.cycle(shuffleDataLocations);
// Random start of the iterator
final int random = ThreadLocalRandom.current().nextInt(shuffleDataLocations.size());
IntStream.range(0, random).forEach(i -> cyclicIterator.next());
return cyclicIterator;
});
// Create a zipped segment in a temp directory.
final File taskTempDir = taskConfig.getTaskTempDir(subTaskId);
final Closer closer = Closer.create();
closer.register(() -> {
try {
org.apache.commons.io.FileUtils.forceDelete(taskTempDir);
} catch (IOException e) {
LOG.warn(e, "Failed to delete directory[%s]", taskTempDir.getAbsolutePath());
}
});
if (!(segment.getShardSpec() instanceof BucketNumberedShardSpec)) {
throw new IAE("Invalid shardSpec type. Expected [%s] but got [%s]", BucketNumberedShardSpec.class.getName(), segment.getShardSpec().getClass().getName());
}
final BucketNumberedShardSpec<?> bucketNumberedShardSpec = (BucketNumberedShardSpec<?>) segment.getShardSpec();
// noinspection unused
try (final Closer resourceCloser = closer) {
FileUtils.mkdirp(taskTempDir);
// Tempary compressed file. Will be removed when taskTempDir is deleted.
final File tempZippedFile = new File(taskTempDir, segment.getId().toString());
final long unzippedSizeBytes = CompressionUtils.zip(segmentDir, tempZippedFile);
if (unzippedSizeBytes == 0) {
throw new IOE("Read 0 bytes from segmentDir[%s]", segmentDir.getAbsolutePath());
}
// Try copying the zipped segment to one of storage locations
for (int i = 0; i < shuffleDataLocations.size(); i++) {
final StorageLocation location = iterator.next();
final String partitionFilePath = getPartitionFilePath(supervisorTaskId, subTaskId, segment.getInterval(), // we must use the bucket ID instead of partition ID
bucketNumberedShardSpec.getBucketId());
final File destFile = location.reserve(partitionFilePath, segment.getId().toString(), tempZippedFile.length());
if (destFile != null) {
try {
FileUtils.mkdirp(destFile.getParentFile());
FileUtils.writeAtomically(destFile, out -> Files.asByteSource(tempZippedFile).copyTo(out));
LOG.info("Wrote intermediary segment[%s] for subtask[%s] at [%s]", segment.getId(), subTaskId, destFile);
return segment.withSize(unzippedSizeBytes).withBinaryVersion(SegmentUtils.getVersionFromDir(segmentDir));
} catch (Exception e) {
location.release(partitionFilePath, tempZippedFile.length());
org.apache.commons.io.FileUtils.deleteQuietly(destFile);
LOG.warn(e, "Failed to write segment[%s] at [%s]. Trying again with the next location", segment.getId(), destFile);
}
}
}
throw new ISE("Can't find location to handle segment[%s]", segment);
}
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class OssDataSegmentPuller method getVersion.
/**
* Returns the "version" (aka last modified timestamp) of the URI
*
* @param uri The URI to check the last timestamp
* @return The time in ms of the last modification of the URI in String format
* @throws IOException
*/
@Override
public String getVersion(URI uri) throws IOException {
try {
final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
return StringUtils.format("%d", objectSummary.getLastModified().getTime());
} catch (OSSException e) {
if (OssUtils.isServiceExceptionRecoverable(e)) {
// The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri);
} else {
throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri);
}
}
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class OssDataSegmentPuller method buildFileObject.
private FileObject buildFileObject(final URI uri) throws OSSException {
final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
final String path = uri.getPath();
return new FileObject() {
OSSObject ossObject = null;
@Override
public URI toUri() {
return uri;
}
@Override
public String getName() {
final String ext = Files.getFileExtension(path);
return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
}
/**
* Returns an input stream for an OSS object. The returned input stream is not thread-safe.
*/
@Override
public InputStream openInputStream() throws IOException {
try {
if (ossObject == null) {
// lazily promote to full GET
ossObject = client.getObject(objectSummary.getBucketName(), objectSummary.getKey());
}
final InputStream in = ossObject.getObjectContent();
final Closer closer = Closer.create();
closer.register(in);
closer.register(ossObject);
return new FilterInputStream(in) {
@Override
public void close() throws IOException {
closer.close();
}
};
} catch (OSSException e) {
throw new IOE(e, "Could not load OSS URI [%s]", uri);
}
}
@Override
public OutputStream openOutputStream() {
throw new UOE("Cannot stream OSS output");
}
@Override
public Reader openReader(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open reader");
}
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open character sequence");
}
@Override
public Writer openWriter() {
throw new UOE("Cannot open writer");
}
@Override
public long getLastModified() {
return objectSummary.getLastModified().getTime();
}
@Override
public boolean delete() {
throw new UOE("Cannot delete OSS items anonymously. jetS3t doesn't support authenticated deletes easily.");
}
};
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class HdfsDataSegmentPullerTest method setupStatic.
@BeforeClass
public static void setupStatic() throws IOException {
hdfsTmpDir = File.createTempFile("hdfsHandlerTest", "dir");
if (!hdfsTmpDir.delete()) {
throw new IOE("Unable to delete hdfsTmpDir [%s]", hdfsTmpDir.getAbsolutePath());
}
conf = new Configuration(true);
fileSystem = new LocalFileSystem();
fileSystem.initialize(hdfsTmpDir.toURI(), conf);
fileSystem.setWorkingDirectory(new Path(hdfsTmpDir.toURI()));
final File tmpFile = File.createTempFile("hdfsHandlerTest", ".data");
tmpFile.delete();
try {
Files.copy(new ByteArrayInputStream(pathByteContents), tmpFile.toPath());
try (OutputStream stream = fileSystem.create(filePath)) {
Files.copy(tmpFile.toPath(), stream);
}
} finally {
tmpFile.delete();
}
}
use of org.apache.druid.java.util.common.IOE in project druid by druid-io.
the class HdfsTaskLogs method killOlderThan.
@Override
public void killOlderThan(long timestamp) throws IOException {
Path taskLogDir = new Path(config.getDirectory());
FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
if (fs.exists(taskLogDir)) {
if (!fs.isDirectory(taskLogDir)) {
throw new IOE("taskLogDir [%s] must be a directory.", taskLogDir);
}
RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(taskLogDir);
while (iter.hasNext()) {
LocatedFileStatus file = iter.next();
if (file.getModificationTime() < timestamp) {
Path p = file.getPath();
log.info("Deleting hdfs task log [%s].", p.toUri().toString());
fs.delete(p, true);
}
if (Thread.currentThread().isInterrupted()) {
throw new IOException(new InterruptedException("Thread interrupted. Couldn't delete all tasklogs."));
}
}
}
}
Aggregations