use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class HdfsDataSegmentKiller method kill.
@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
final Path segmentPath = getPath(segment);
log.info("Killing segment[%s] mapped to path[%s]", segment.getId(), segmentPath);
try {
String filename = segmentPath.getName();
final FileSystem fs = segmentPath.getFileSystem(config);
if (!filename.endsWith(".zip")) {
throw new SegmentLoadingException("Unknown file type[%s]", segmentPath);
} else {
if (!fs.exists(segmentPath)) {
log.warn("Segment path [%s] does not exist", segmentPath);
return;
}
// There are 3 supported path formats:
// - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/index.zip
// - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip
// - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_index.zip
String[] zipParts = filename.split("_");
Path descriptorPath = new Path(segmentPath.getParent(), "descriptor.json");
if (zipParts.length > 1) {
Preconditions.checkState(zipParts.length <= 3 && StringUtils.isNumeric(zipParts[0]) && "index.zip".equals(zipParts[zipParts.length - 1]), "Unexpected segmentPath format [%s]", segmentPath);
descriptorPath = new Path(segmentPath.getParent(), org.apache.druid.java.util.common.StringUtils.format("%s_%sdescriptor.json", zipParts[0], zipParts.length == 2 ? "" : zipParts[1] + "_"));
}
if (!fs.delete(segmentPath, false)) {
throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", segmentPath.toString());
}
// descriptor.json is a file to store segment metadata in deep storage. This file is deprecated and not stored
// anymore, but we still delete them if exists.
fs.delete(descriptorPath, false);
removeEmptyParentDirectories(fs, segmentPath, zipParts.length > 1 ? 2 : 3);
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Unable to kill segment");
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class GoogleDataSegmentPuller method getSegmentFiles.
FileUtils.FileCopyResult getSegmentFiles(final String bucket, final String path, File outDir) throws SegmentLoadingException {
LOG.info("Pulling index at bucket[%s] path[%s] to outDir[%s]", bucket, path, outDir.getAbsolutePath());
try {
FileUtils.mkdirp(outDir);
final GoogleByteSource byteSource = new GoogleByteSource(storage, bucket, path);
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, GoogleUtils::isRetryable, false);
LOG.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath());
return result;
} catch (Exception e) {
try {
FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
LOG.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), path);
}
throw new SegmentLoadingException(e, e.getMessage());
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class GoogleDataSegmentKiller method kill.
@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
LOG.info("Killing segment [%s]", segment.getId());
Map<String, Object> loadSpec = segment.getLoadSpec();
final String bucket = MapUtils.getString(loadSpec, "bucket");
final String indexPath = MapUtils.getString(loadSpec, "path");
final String descriptorPath = DataSegmentKiller.descriptorPath(indexPath);
try {
deleteIfPresent(bucket, indexPath);
// descriptor.json is a file to store segment metadata in deep storage. This file is deprecated and not stored
// anymore, but we still delete them if exists.
deleteIfPresent(bucket, descriptorPath);
} catch (IOException e) {
throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getId(), e.getMessage());
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
// Download all segments locally.
// Note: this requires enough local storage space to fit all of the segments, even though
// IngestSegmentFirehose iterates over the segments in series. We may want to change this
// to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
final DataSegment segment = chunk.getObject();
segmentFileMap.computeIfAbsent(segment, k -> {
try {
return segmentCacheManager.getSegmentFiles(segment);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
});
}
}
final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
})));
final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class AbstractMultiPhaseParallelIndexingTest method loadSegment.
private Segment loadSegment(DataSegment dataSegment, File tempSegmentDir) {
final SegmentCacheManager cacheManager = new SegmentCacheManagerFactory(getObjectMapper()).manufacturate(tempSegmentDir);
final SegmentLoader loader = new SegmentLocalCacheLoader(cacheManager, getIndexIO(), getObjectMapper());
try {
return loader.getSegment(dataSegment, false, SegmentLazyLoadFailCallback.NOOP);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
}
Aggregations