Search in sources :

Example 26 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class SegmentLoadDropHandler method addSegments.

private void addSegments(Collection<DataSegment> segments, final DataSegmentChangeCallback callback) {
    ExecutorService loadingExecutor = null;
    try (final BackgroundSegmentAnnouncer backgroundSegmentAnnouncer = new BackgroundSegmentAnnouncer(announcer, exec, config.getAnnounceIntervalMillis())) {
        backgroundSegmentAnnouncer.startAnnouncing();
        loadingExecutor = Execs.multiThreaded(config.getNumBootstrapThreads(), "Segment-Load-Startup-%s");
        final int numSegments = segments.size();
        final CountDownLatch latch = new CountDownLatch(numSegments);
        final AtomicInteger counter = new AtomicInteger(0);
        final CopyOnWriteArrayList<DataSegment> failedSegments = new CopyOnWriteArrayList<>();
        for (final DataSegment segment : segments) {
            loadingExecutor.submit(() -> {
                try {
                    log.info("Loading segment[%d/%d][%s]", counter.incrementAndGet(), numSegments, segment.getId());
                    loadSegment(segment, callback, config.isLazyLoadOnStart());
                    try {
                        backgroundSegmentAnnouncer.announceSegment(segment);
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                        throw new SegmentLoadingException(e, "Loading Interrupted");
                    }
                } catch (SegmentLoadingException e) {
                    log.error(e, "[%s] failed to load", segment.getId());
                    failedSegments.add(segment);
                } finally {
                    latch.countDown();
                }
            });
        }
        try {
            latch.await();
            if (failedSegments.size() > 0) {
                log.makeAlert("%,d errors seen while loading segments", failedSegments.size()).addData("failedSegments", failedSegments).emit();
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            log.makeAlert(e, "LoadingInterrupted").emit();
        }
        backgroundSegmentAnnouncer.finishAnnouncing();
    } catch (SegmentLoadingException e) {
        log.makeAlert(e, "Failed to load segments -- likely problem with announcing.").addData("numSegments", segments.size()).emit();
    } finally {
        callback.execute();
        if (loadingExecutor != null) {
            loadingExecutor.shutdownNow();
        }
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) CountDownLatch(java.util.concurrent.CountDownLatch) DataSegment(org.apache.druid.timeline.DataSegment) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 27 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class HdfsDataSegmentPuller method getSegmentFiles.

FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {
        FileUtils.mkdirp(outDir);
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "");
    }
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {
            try {
                return RetryUtils.retry(() -> {
                    if (!fs.exists(path)) {
                        throw new SegmentLoadingException("No files found at [%s]", path.toString());
                    }
                    final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                    final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                    while (children.hasNext()) {
                        final LocatedFileStatus child = children.next();
                        final Path childPath = child.getPath();
                        final String fname = childPath.getName();
                        if (fs.isDirectory(childPath)) {
                            log.warn("[%s] is a child directory, skipping", childPath.toString());
                        } else {
                            final File outFile = new File(outDir, fname);
                            try (final FSDataInputStream in = fs.open(childPath)) {
                                NativeIO.chunkedCopy(in, outFile);
                            }
                            result.addFile(outFile);
                        }
                    }
                    log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
                    return result;
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {
            // --------    zip     ---------
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);
            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
            return result;
        } else if (CompressionUtils.isGz(path.getName())) {
            // --------    gzip     ---------
            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);
            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) FileUtils(org.apache.druid.java.util.common.FileUtils) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ByteSource(com.google.common.io.ByteSource) File(java.io.File)

Example 28 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class SegmentManagerTest method assertResult.

// TODO remove when the bug in intelliJ is fixed.
@SuppressWarnings("RedundantThrows")
private void assertResult(List<DataSegment> expectedExistingSegments) throws SegmentLoadingException {
    final Map<String, Long> expectedDataSourceSizes = expectedExistingSegments.stream().collect(Collectors.toMap(DataSegment::getDataSource, DataSegment::getSize, Long::sum));
    final Map<String, Long> expectedDataSourceCounts = expectedExistingSegments.stream().collect(Collectors.toMap(DataSegment::getDataSource, segment -> 1L, Long::sum));
    final Set<String> expectedDataSourceNames = expectedExistingSegments.stream().map(DataSegment::getDataSource).collect(Collectors.toSet());
    final Map<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>> expectedTimelines = new HashMap<>();
    for (DataSegment segment : expectedExistingSegments) {
        final VersionedIntervalTimeline<String, ReferenceCountingSegment> expectedTimeline = expectedTimelines.computeIfAbsent(segment.getDataSource(), k -> new VersionedIntervalTimeline<>(Ordering.natural()));
        expectedTimeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(SEGMENT_LOADER.getSegment(segment, false, SegmentLazyLoadFailCallback.NOOP), segment.getShardSpec())));
    }
    Assert.assertEquals(expectedDataSourceNames, segmentManager.getDataSourceNames());
    Assert.assertEquals(expectedDataSourceCounts, segmentManager.getDataSourceCounts());
    Assert.assertEquals(expectedDataSourceSizes, segmentManager.getDataSourceSizes());
    final Map<String, DataSourceState> dataSources = segmentManager.getDataSources();
    Assert.assertEquals(expectedTimelines.size(), dataSources.size());
    dataSources.forEach((sourceName, dataSourceState) -> {
        Assert.assertEquals(expectedDataSourceCounts.get(sourceName).longValue(), dataSourceState.getNumSegments());
        Assert.assertEquals(expectedDataSourceSizes.get(sourceName).longValue(), dataSourceState.getTotalSegmentSize());
        Assert.assertEquals(expectedTimelines.get(sourceName).getAllTimelineEntries(), dataSourceState.getTimeline().getAllTimelineEntries());
    });
}
Also used : DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Intervals(org.apache.druid.java.util.common.Intervals) MapUtils(org.apache.druid.java.util.common.MapUtils) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) HashMap(java.util.HashMap) StorageAdapter(org.apache.druid.segment.StorageAdapter) SegmentLoader(org.apache.druid.segment.loading.SegmentLoader) DataSourceState(org.apache.druid.server.SegmentManager.DataSourceState) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) Future(java.util.concurrent.Future) ImmutableList(com.google.common.collect.ImmutableList) After(org.junit.After) Map(java.util.Map) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) Segment(org.apache.druid.segment.Segment) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ImmutableMap(com.google.common.collect.ImmutableMap) QueryableIndex(org.apache.druid.segment.QueryableIndex) Set(java.util.Set) Test(org.junit.Test) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) Collectors(java.util.stream.Collectors) TableDataSource(org.apache.druid.query.TableDataSource) Executors(java.util.concurrent.Executors) ExecutionException(java.util.concurrent.ExecutionException) List(java.util.List) Ordering(com.google.common.collect.Ordering) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) DataSegment(org.apache.druid.timeline.DataSegment) Optional(java.util.Optional) SegmentId(org.apache.druid.timeline.SegmentId) Assert(org.junit.Assert) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) DataSourceState(org.apache.druid.server.SegmentManager.DataSourceState)

Example 29 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class SegmentManagerThreadSafetyTest method testLoadMultipleSegments.

@Test(timeout = 6000L)
public void testLoadMultipleSegments() throws IOException, ExecutionException, InterruptedException {
    final List<DataSegment> segments = new ArrayList<>(88);
    for (int i = 0; i < 11; i++) {
        for (int j = 0; j < 8; j++) {
            segments.add(createSegment(StringUtils.format("2019-%02d-01/2019-%02d-01", i + 1, i + 2)));
        }
    }
    final List<Future> futures = IntStream.range(0, 16).mapToObj(i -> exec.submit(() -> {
        for (DataSegment segment : segments) {
            try {
                segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP);
            } catch (SegmentLoadingException e) {
                throw new RuntimeException(e);
            }
        }
    })).collect(Collectors.toList());
    for (Future future : futures) {
        future.get();
    }
    Assert.assertEquals(11, segmentPuller.numFileLoaded.size());
    Assert.assertEquals(1, segmentPuller.numFileLoaded.values().iterator().next().intValue());
    Assert.assertEquals(0, segmentCacheManager.getSegmentLocks().size());
}
Also used : StorageAdapter(org.apache.druid.segment.StorageAdapter) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) Future(java.util.concurrent.Future) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) After(org.junit.After) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) FileUtils(org.apache.druid.java.util.common.FileUtils) Execs(org.apache.druid.java.util.common.concurrent.Execs) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Collectors(java.util.stream.Collectors) SegmentLocalCacheLoader(org.apache.druid.segment.loading.SegmentLocalCacheLoader) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) IntStream(java.util.stream.IntStream) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) Intervals(org.apache.druid.java.util.common.Intervals) Std(com.fasterxml.jackson.databind.InjectableValues.Std) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) FileCopyResult(org.apache.druid.java.util.common.FileUtils.FileCopyResult) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) Segment(org.apache.druid.segment.Segment) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) File(java.io.File) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ExecutionException(java.util.concurrent.ExecutionException) Rule(org.junit.Rule) Assert(org.junit.Assert) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) DataSegment(org.apache.druid.timeline.DataSegment) Test(org.junit.Test)

Aggregations

SegmentLoadingException (org.apache.druid.segment.loading.SegmentLoadingException)29 IOException (java.io.IOException)17 File (java.io.File)10 FileUtils (org.apache.druid.java.util.common.FileUtils)8 DataSegment (org.apache.druid.timeline.DataSegment)7 Test (org.junit.Test)6 OSSException (com.aliyun.oss.OSSException)4 AmazonServiceException (com.amazonaws.AmazonServiceException)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 List (java.util.List)4 Map (java.util.Map)4 ExecutionException (java.util.concurrent.ExecutionException)4 ExecutorService (java.util.concurrent.ExecutorService)4 Collectors (java.util.stream.Collectors)4 QueryableIndex (org.apache.druid.segment.QueryableIndex)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 Interval (org.joda.time.Interval)4 Assert (org.junit.Assert)4 ByteSource (com.google.common.io.ByteSource)3