use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.
the class LocalIntermediaryDataManager method discoverSupervisorTaskPartitions.
/**
* LocalIntermediaryDataManager periodically calls this method after it starts up to search for unknown intermediary data.
*/
private void discoverSupervisorTaskPartitions() {
for (StorageLocation location : shuffleDataLocations) {
final Path locationPath = location.getPath().toPath().toAbsolutePath();
final MutableInt numDiscovered = new MutableInt(0);
final File[] dirsPerSupervisorTask = location.getPath().listFiles();
if (dirsPerSupervisorTask != null) {
for (File supervisorTaskDir : dirsPerSupervisorTask) {
final String supervisorTaskId = supervisorTaskDir.getName();
supervisorTaskCheckTimes.computeIfAbsent(supervisorTaskId, k -> {
for (File eachFile : org.apache.commons.io.FileUtils.listFiles(supervisorTaskDir, null, true)) {
final String relativeSegmentPath = locationPath.relativize(eachFile.toPath().toAbsolutePath()).toString();
// StorageLocation keeps track of how much storage capacity is being used.
// Newly found files should be known to the StorageLocation to keep it up to date.
final File reservedFile = location.reserve(relativeSegmentPath, eachFile.getName(), eachFile.length());
if (reservedFile == null) {
LOG.warn("Can't add a discovered partition[%s]", eachFile.getAbsolutePath());
}
}
numDiscovered.increment();
return getExpiryTimeFromNow();
});
}
}
if (numDiscovered.getValue() > 0) {
LOG.info("Discovered partitions for [%s] new supervisor tasks under location[%s]", numDiscovered.getValue(), location.getPath());
}
}
}
use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.
the class LocalIntermediaryDataManager method addSegment.
/**
* Write a segment into one of configured locations. The location to write is chosen in a round-robin manner per
* supervisorTaskId.
*/
@Override
public DataSegment addSegment(String supervisorTaskId, String subTaskId, DataSegment segment, File segmentDir) throws IOException {
// Get or create the location iterator for supervisorTask.
final Iterator<StorageLocation> iterator = locationIterators.computeIfAbsent(supervisorTaskId, k -> {
final Iterator<StorageLocation> cyclicIterator = Iterators.cycle(shuffleDataLocations);
// Random start of the iterator
final int random = ThreadLocalRandom.current().nextInt(shuffleDataLocations.size());
IntStream.range(0, random).forEach(i -> cyclicIterator.next());
return cyclicIterator;
});
// Create a zipped segment in a temp directory.
final File taskTempDir = taskConfig.getTaskTempDir(subTaskId);
final Closer closer = Closer.create();
closer.register(() -> {
try {
org.apache.commons.io.FileUtils.forceDelete(taskTempDir);
} catch (IOException e) {
LOG.warn(e, "Failed to delete directory[%s]", taskTempDir.getAbsolutePath());
}
});
if (!(segment.getShardSpec() instanceof BucketNumberedShardSpec)) {
throw new IAE("Invalid shardSpec type. Expected [%s] but got [%s]", BucketNumberedShardSpec.class.getName(), segment.getShardSpec().getClass().getName());
}
final BucketNumberedShardSpec<?> bucketNumberedShardSpec = (BucketNumberedShardSpec<?>) segment.getShardSpec();
// noinspection unused
try (final Closer resourceCloser = closer) {
FileUtils.mkdirp(taskTempDir);
// Tempary compressed file. Will be removed when taskTempDir is deleted.
final File tempZippedFile = new File(taskTempDir, segment.getId().toString());
final long unzippedSizeBytes = CompressionUtils.zip(segmentDir, tempZippedFile);
if (unzippedSizeBytes == 0) {
throw new IOE("Read 0 bytes from segmentDir[%s]", segmentDir.getAbsolutePath());
}
// Try copying the zipped segment to one of storage locations
for (int i = 0; i < shuffleDataLocations.size(); i++) {
final StorageLocation location = iterator.next();
final String partitionFilePath = getPartitionFilePath(supervisorTaskId, subTaskId, segment.getInterval(), // we must use the bucket ID instead of partition ID
bucketNumberedShardSpec.getBucketId());
final File destFile = location.reserve(partitionFilePath, segment.getId().toString(), tempZippedFile.length());
if (destFile != null) {
try {
FileUtils.mkdirp(destFile.getParentFile());
FileUtils.writeAtomically(destFile, out -> Files.asByteSource(tempZippedFile).copyTo(out));
LOG.info("Wrote intermediary segment[%s] for subtask[%s] at [%s]", segment.getId(), subTaskId, destFile);
return segment.withSize(unzippedSizeBytes).withBinaryVersion(SegmentUtils.getVersionFromDir(segmentDir));
} catch (Exception e) {
location.release(partitionFilePath, tempZippedFile.length());
org.apache.commons.io.FileUtils.deleteQuietly(destFile);
LOG.warn(e, "Failed to write segment[%s] at [%s]. Trying again with the next location", segment.getId(), destFile);
}
}
}
throw new ISE("Can't find location to handle segment[%s]", segment);
}
}
use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.
the class LocalIntermediaryDataManager method findPartitionFile.
@Override
public Optional<ByteSource> findPartitionFile(String supervisorTaskId, String subTaskId, Interval interval, int bucketId) {
IdUtils.validateId("supervisorTaskId", supervisorTaskId);
IdUtils.validateId("subTaskId", subTaskId);
for (StorageLocation location : shuffleDataLocations) {
final File partitionDir = new File(location.getPath(), getPartitionDirPath(supervisorTaskId, interval, bucketId));
if (partitionDir.exists()) {
supervisorTaskCheckTimes.put(supervisorTaskId, getExpiryTimeFromNow());
final File segmentFile = new File(partitionDir, subTaskId);
if (segmentFile.exists()) {
return Optional.of(Files.asByteSource(segmentFile));
} else {
return Optional.empty();
}
}
}
return Optional.empty();
}
use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.
the class LocalIntermediaryDataManager method deletePartitions.
@Override
public void deletePartitions(String supervisorTaskId) throws IOException {
IdUtils.validateId("supervisorTaskId", supervisorTaskId);
for (StorageLocation location : shuffleDataLocations) {
final File supervisorTaskPath = new File(location.getPath(), supervisorTaskId);
if (supervisorTaskPath.exists()) {
LOG.info("Cleaning up [%s]", supervisorTaskPath);
for (File eachFile : org.apache.commons.io.FileUtils.listFiles(supervisorTaskPath, null, true)) {
location.removeFile(eachFile);
}
org.apache.commons.io.FileUtils.forceDelete(supervisorTaskPath);
}
}
supervisorTaskCheckTimes.remove(supervisorTaskId);
}
use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.
the class LocalDataStorageDruidModuleTest method createInjector.
private static Injector createInjector() {
return GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of(new LocalDataStorageDruidModule(), (Module) binder -> {
binder.bind(new TypeLiteral<List<StorageLocation>>() {
}).toInstance(ImmutableList.of());
binder.bind(ColumnConfig.class).toInstance(() -> 0);
binder.bind(StorageLocationSelectorStrategy.class).toInstance(new RandomStorageLocationSelectorStrategy(ImmutableList.of()));
}));
}
Aggregations