use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class SegmentLoadDropHandler method addSegments.
private void addSegments(Collection<DataSegment> segments, final DataSegmentChangeCallback callback) {
ExecutorService loadingExecutor = null;
try (final BackgroundSegmentAnnouncer backgroundSegmentAnnouncer = new BackgroundSegmentAnnouncer(announcer, exec, config.getAnnounceIntervalMillis())) {
backgroundSegmentAnnouncer.startAnnouncing();
loadingExecutor = Execs.multiThreaded(config.getNumBootstrapThreads(), "Segment-Load-Startup-%s");
final int numSegments = segments.size();
final CountDownLatch latch = new CountDownLatch(numSegments);
final AtomicInteger counter = new AtomicInteger(0);
final CopyOnWriteArrayList<DataSegment> failedSegments = new CopyOnWriteArrayList<>();
for (final DataSegment segment : segments) {
loadingExecutor.submit(() -> {
try {
log.info("Loading segment[%d/%d][%s]", counter.incrementAndGet(), numSegments, segment.getId());
loadSegment(segment, callback, config.isLazyLoadOnStart());
try {
backgroundSegmentAnnouncer.announceSegment(segment);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SegmentLoadingException(e, "Loading Interrupted");
}
} catch (SegmentLoadingException e) {
log.error(e, "[%s] failed to load", segment.getId());
failedSegments.add(segment);
} finally {
latch.countDown();
}
});
}
try {
latch.await();
if (failedSegments.size() > 0) {
log.makeAlert("%,d errors seen while loading segments", failedSegments.size()).addData("failedSegments", failedSegments).emit();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.makeAlert(e, "LoadingInterrupted").emit();
}
backgroundSegmentAnnouncer.finishAnnouncing();
} catch (SegmentLoadingException e) {
log.makeAlert(e, "Failed to load segments -- likely problem with announcing.").addData("numSegments", segments.size()).emit();
} finally {
callback.execute();
if (loadingExecutor != null) {
loadingExecutor.shutdownNow();
}
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class HdfsDataSegmentPuller method getSegmentFiles.
FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
try {
FileUtils.mkdirp(outDir);
} catch (IOException e) {
throw new SegmentLoadingException(e, "");
}
try {
final FileSystem fs = path.getFileSystem(config);
if (fs.isDirectory(path)) {
try {
return RetryUtils.retry(() -> {
if (!fs.exists(path)) {
throw new SegmentLoadingException("No files found at [%s]", path.toString());
}
final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
while (children.hasNext()) {
final LocatedFileStatus child = children.next();
final Path childPath = child.getPath();
final String fname = childPath.getName();
if (fs.isDirectory(childPath)) {
log.warn("[%s] is a child directory, skipping", childPath.toString());
} else {
final File outFile = new File(outDir, fname);
try (final FSDataInputStream in = fs.open(childPath)) {
NativeIO.chunkedCopy(in, outFile);
}
result.addFile(outFile);
}
}
log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
}, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
} catch (Exception e) {
throw new RuntimeException(e);
}
} else if (CompressionUtils.isZip(path.getName())) {
// -------- zip ---------
final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outDir, shouldRetryPredicate(), false);
log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
} else if (CompressionUtils.isGz(path.getName())) {
// -------- gzip ---------
final String fname = path.getName();
final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outFile);
log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
return result;
} else {
throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class SegmentManagerTest method assertResult.
// TODO remove when the bug in intelliJ is fixed.
@SuppressWarnings("RedundantThrows")
private void assertResult(List<DataSegment> expectedExistingSegments) throws SegmentLoadingException {
final Map<String, Long> expectedDataSourceSizes = expectedExistingSegments.stream().collect(Collectors.toMap(DataSegment::getDataSource, DataSegment::getSize, Long::sum));
final Map<String, Long> expectedDataSourceCounts = expectedExistingSegments.stream().collect(Collectors.toMap(DataSegment::getDataSource, segment -> 1L, Long::sum));
final Set<String> expectedDataSourceNames = expectedExistingSegments.stream().map(DataSegment::getDataSource).collect(Collectors.toSet());
final Map<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>> expectedTimelines = new HashMap<>();
for (DataSegment segment : expectedExistingSegments) {
final VersionedIntervalTimeline<String, ReferenceCountingSegment> expectedTimeline = expectedTimelines.computeIfAbsent(segment.getDataSource(), k -> new VersionedIntervalTimeline<>(Ordering.natural()));
expectedTimeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(SEGMENT_LOADER.getSegment(segment, false, SegmentLazyLoadFailCallback.NOOP), segment.getShardSpec())));
}
Assert.assertEquals(expectedDataSourceNames, segmentManager.getDataSourceNames());
Assert.assertEquals(expectedDataSourceCounts, segmentManager.getDataSourceCounts());
Assert.assertEquals(expectedDataSourceSizes, segmentManager.getDataSourceSizes());
final Map<String, DataSourceState> dataSources = segmentManager.getDataSources();
Assert.assertEquals(expectedTimelines.size(), dataSources.size());
dataSources.forEach((sourceName, dataSourceState) -> {
Assert.assertEquals(expectedDataSourceCounts.get(sourceName).longValue(), dataSourceState.getNumSegments());
Assert.assertEquals(expectedDataSourceSizes.get(sourceName).longValue(), dataSourceState.getTotalSegmentSize());
Assert.assertEquals(expectedTimelines.get(sourceName).getAllTimelineEntries(), dataSourceState.getTimeline().getAllTimelineEntries());
});
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class SegmentManagerThreadSafetyTest method testLoadMultipleSegments.
@Test(timeout = 6000L)
public void testLoadMultipleSegments() throws IOException, ExecutionException, InterruptedException {
final List<DataSegment> segments = new ArrayList<>(88);
for (int i = 0; i < 11; i++) {
for (int j = 0; j < 8; j++) {
segments.add(createSegment(StringUtils.format("2019-%02d-01/2019-%02d-01", i + 1, i + 2)));
}
}
final List<Future> futures = IntStream.range(0, 16).mapToObj(i -> exec.submit(() -> {
for (DataSegment segment : segments) {
try {
segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
}
})).collect(Collectors.toList());
for (Future future : futures) {
future.get();
}
Assert.assertEquals(11, segmentPuller.numFileLoaded.size());
Assert.assertEquals(1, segmentPuller.numFileLoaded.values().iterator().next().intValue());
Assert.assertEquals(0, segmentCacheManager.getSegmentLocks().size());
}
Aggregations