Search in sources :

Example 1 with StorageLocation

use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.

the class LocalIntermediaryDataManager method discoverSupervisorTaskPartitions.

/**
 * LocalIntermediaryDataManager periodically calls this method after it starts up to search for unknown intermediary data.
 */
private void discoverSupervisorTaskPartitions() {
    for (StorageLocation location : shuffleDataLocations) {
        final Path locationPath = location.getPath().toPath().toAbsolutePath();
        final MutableInt numDiscovered = new MutableInt(0);
        final File[] dirsPerSupervisorTask = location.getPath().listFiles();
        if (dirsPerSupervisorTask != null) {
            for (File supervisorTaskDir : dirsPerSupervisorTask) {
                final String supervisorTaskId = supervisorTaskDir.getName();
                supervisorTaskCheckTimes.computeIfAbsent(supervisorTaskId, k -> {
                    for (File eachFile : org.apache.commons.io.FileUtils.listFiles(supervisorTaskDir, null, true)) {
                        final String relativeSegmentPath = locationPath.relativize(eachFile.toPath().toAbsolutePath()).toString();
                        // StorageLocation keeps track of how much storage capacity is being used.
                        // Newly found files should be known to the StorageLocation to keep it up to date.
                        final File reservedFile = location.reserve(relativeSegmentPath, eachFile.getName(), eachFile.length());
                        if (reservedFile == null) {
                            LOG.warn("Can't add a discovered partition[%s]", eachFile.getAbsolutePath());
                        }
                    }
                    numDiscovered.increment();
                    return getExpiryTimeFromNow();
                });
            }
        }
        if (numDiscovered.getValue() > 0) {
            LOG.info("Discovered partitions for [%s] new supervisor tasks under location[%s]", numDiscovered.getValue(), location.getPath());
        }
    }
}
Also used : Path(java.nio.file.Path) MutableInt(org.apache.commons.lang3.mutable.MutableInt) StorageLocation(org.apache.druid.segment.loading.StorageLocation) File(java.io.File)

Example 2 with StorageLocation

use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.

the class LocalIntermediaryDataManager method addSegment.

/**
 * Write a segment into one of configured locations. The location to write is chosen in a round-robin manner per
 * supervisorTaskId.
 */
@Override
public DataSegment addSegment(String supervisorTaskId, String subTaskId, DataSegment segment, File segmentDir) throws IOException {
    // Get or create the location iterator for supervisorTask.
    final Iterator<StorageLocation> iterator = locationIterators.computeIfAbsent(supervisorTaskId, k -> {
        final Iterator<StorageLocation> cyclicIterator = Iterators.cycle(shuffleDataLocations);
        // Random start of the iterator
        final int random = ThreadLocalRandom.current().nextInt(shuffleDataLocations.size());
        IntStream.range(0, random).forEach(i -> cyclicIterator.next());
        return cyclicIterator;
    });
    // Create a zipped segment in a temp directory.
    final File taskTempDir = taskConfig.getTaskTempDir(subTaskId);
    final Closer closer = Closer.create();
    closer.register(() -> {
        try {
            org.apache.commons.io.FileUtils.forceDelete(taskTempDir);
        } catch (IOException e) {
            LOG.warn(e, "Failed to delete directory[%s]", taskTempDir.getAbsolutePath());
        }
    });
    if (!(segment.getShardSpec() instanceof BucketNumberedShardSpec)) {
        throw new IAE("Invalid shardSpec type. Expected [%s] but got [%s]", BucketNumberedShardSpec.class.getName(), segment.getShardSpec().getClass().getName());
    }
    final BucketNumberedShardSpec<?> bucketNumberedShardSpec = (BucketNumberedShardSpec<?>) segment.getShardSpec();
    // noinspection unused
    try (final Closer resourceCloser = closer) {
        FileUtils.mkdirp(taskTempDir);
        // Tempary compressed file. Will be removed when taskTempDir is deleted.
        final File tempZippedFile = new File(taskTempDir, segment.getId().toString());
        final long unzippedSizeBytes = CompressionUtils.zip(segmentDir, tempZippedFile);
        if (unzippedSizeBytes == 0) {
            throw new IOE("Read 0 bytes from segmentDir[%s]", segmentDir.getAbsolutePath());
        }
        // Try copying the zipped segment to one of storage locations
        for (int i = 0; i < shuffleDataLocations.size(); i++) {
            final StorageLocation location = iterator.next();
            final String partitionFilePath = getPartitionFilePath(supervisorTaskId, subTaskId, segment.getInterval(), // we must use the bucket ID instead of partition ID
            bucketNumberedShardSpec.getBucketId());
            final File destFile = location.reserve(partitionFilePath, segment.getId().toString(), tempZippedFile.length());
            if (destFile != null) {
                try {
                    FileUtils.mkdirp(destFile.getParentFile());
                    FileUtils.writeAtomically(destFile, out -> Files.asByteSource(tempZippedFile).copyTo(out));
                    LOG.info("Wrote intermediary segment[%s] for subtask[%s] at [%s]", segment.getId(), subTaskId, destFile);
                    return segment.withSize(unzippedSizeBytes).withBinaryVersion(SegmentUtils.getVersionFromDir(segmentDir));
                } catch (Exception e) {
                    location.release(partitionFilePath, tempZippedFile.length());
                    org.apache.commons.io.FileUtils.deleteQuietly(destFile);
                    LOG.warn(e, "Failed to write segment[%s] at [%s]. Trying again with the next location", segment.getId(), destFile);
                }
            }
        }
        throw new ISE("Can't find location to handle segment[%s]", segment);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) IOException(java.io.IOException) IAE(org.apache.druid.java.util.common.IAE) IOException(java.io.IOException) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) ISE(org.apache.druid.java.util.common.ISE) StorageLocation(org.apache.druid.segment.loading.StorageLocation) File(java.io.File) IOE(org.apache.druid.java.util.common.IOE)

Example 3 with StorageLocation

use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.

the class LocalIntermediaryDataManager method findPartitionFile.

@Override
public Optional<ByteSource> findPartitionFile(String supervisorTaskId, String subTaskId, Interval interval, int bucketId) {
    IdUtils.validateId("supervisorTaskId", supervisorTaskId);
    IdUtils.validateId("subTaskId", subTaskId);
    for (StorageLocation location : shuffleDataLocations) {
        final File partitionDir = new File(location.getPath(), getPartitionDirPath(supervisorTaskId, interval, bucketId));
        if (partitionDir.exists()) {
            supervisorTaskCheckTimes.put(supervisorTaskId, getExpiryTimeFromNow());
            final File segmentFile = new File(partitionDir, subTaskId);
            if (segmentFile.exists()) {
                return Optional.of(Files.asByteSource(segmentFile));
            } else {
                return Optional.empty();
            }
        }
    }
    return Optional.empty();
}
Also used : StorageLocation(org.apache.druid.segment.loading.StorageLocation) File(java.io.File)

Example 4 with StorageLocation

use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.

the class LocalIntermediaryDataManager method deletePartitions.

@Override
public void deletePartitions(String supervisorTaskId) throws IOException {
    IdUtils.validateId("supervisorTaskId", supervisorTaskId);
    for (StorageLocation location : shuffleDataLocations) {
        final File supervisorTaskPath = new File(location.getPath(), supervisorTaskId);
        if (supervisorTaskPath.exists()) {
            LOG.info("Cleaning up [%s]", supervisorTaskPath);
            for (File eachFile : org.apache.commons.io.FileUtils.listFiles(supervisorTaskPath, null, true)) {
                location.removeFile(eachFile);
            }
            org.apache.commons.io.FileUtils.forceDelete(supervisorTaskPath);
        }
    }
    supervisorTaskCheckTimes.remove(supervisorTaskId);
}
Also used : StorageLocation(org.apache.druid.segment.loading.StorageLocation) File(java.io.File)

Example 5 with StorageLocation

use of org.apache.druid.segment.loading.StorageLocation in project druid by druid-io.

the class LocalDataStorageDruidModuleTest method createInjector.

private static Injector createInjector() {
    return GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of(new LocalDataStorageDruidModule(), (Module) binder -> {
        binder.bind(new TypeLiteral<List<StorageLocation>>() {
        }).toInstance(ImmutableList.of());
        binder.bind(ColumnConfig.class).toInstance(() -> 0);
        binder.bind(StorageLocationSelectorStrategy.class).toInstance(new RandomStorageLocationSelectorStrategy(ImmutableList.of()));
    }));
}
Also used : TypeLiteral(com.google.inject.TypeLiteral) ColumnConfig(org.apache.druid.segment.column.ColumnConfig) RandomStorageLocationSelectorStrategy(org.apache.druid.segment.loading.RandomStorageLocationSelectorStrategy) Module(com.google.inject.Module) StorageLocation(org.apache.druid.segment.loading.StorageLocation) StorageLocationSelectorStrategy(org.apache.druid.segment.loading.StorageLocationSelectorStrategy) RandomStorageLocationSelectorStrategy(org.apache.druid.segment.loading.RandomStorageLocationSelectorStrategy)

Aggregations

StorageLocation (org.apache.druid.segment.loading.StorageLocation)5 File (java.io.File)4 Module (com.google.inject.Module)1 TypeLiteral (com.google.inject.TypeLiteral)1 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 MutableInt (org.apache.commons.lang3.mutable.MutableInt)1 IAE (org.apache.druid.java.util.common.IAE)1 IOE (org.apache.druid.java.util.common.IOE)1 ISE (org.apache.druid.java.util.common.ISE)1 Closer (org.apache.druid.java.util.common.io.Closer)1 ColumnConfig (org.apache.druid.segment.column.ColumnConfig)1 RandomStorageLocationSelectorStrategy (org.apache.druid.segment.loading.RandomStorageLocationSelectorStrategy)1 StorageLocationSelectorStrategy (org.apache.druid.segment.loading.StorageLocationSelectorStrategy)1 BucketNumberedShardSpec (org.apache.druid.timeline.partition.BucketNumberedShardSpec)1