use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class S3DataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(final S3Coords s3Coords, final File outDir) throws SegmentLoadingException {
log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
if (!isObjectInBucket(s3Coords)) {
throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
}
try {
org.apache.commons.io.FileUtils.forceMkdir(outDir);
final URI uri = URI.create(String.format("s3://%s/%s", s3Coords.bucket, s3Coords.path));
final ByteSource byteSource = new ByteSource() {
@Override
public InputStream openStream() throws IOException {
try {
return buildFileObject(uri, s3Client).openInputStream();
} catch (ServiceException e) {
if (e.getCause() != null) {
if (S3Utils.S3RETRY.apply(e)) {
throw new IOException("Recoverable exception", e);
}
}
throw Throwables.propagate(e);
}
}
};
if (CompressionUtils.isZip(s3Coords.path)) {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, true);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
return result;
}
if (CompressionUtils.isGz(s3Coords.path)) {
final String fname = Files.getNameWithoutExtension(uri.getPath());
final File outFile = new File(outDir, fname);
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
return result;
}
throw new IAE("Do not know how to load file type at [%s]", uri.toString());
} catch (Exception e) {
try {
org.apache.commons.io.FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
}
throw new SegmentLoadingException(e, e.getMessage());
}
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class TaskLifecycleTest method setUpTaskToolboxFactory.
private TaskToolboxFactory setUpTaskToolboxFactory(DataSegmentPusher dataSegmentPusher, SegmentHandoffNotifierFactory handoffNotifierFactory, TestIndexerMetadataStorageCoordinator mdc) throws IOException {
Preconditions.checkNotNull(queryRunnerFactoryConglomerate);
Preconditions.checkNotNull(monitorScheduler);
Preconditions.checkNotNull(taskStorage);
Preconditions.checkNotNull(emitter);
taskLockbox = new TaskLockbox(taskStorage);
tac = new LocalTaskActionClientFactory(taskStorage, new TaskActionToolbox(taskLockbox, mdc, emitter, EasyMock.createMock(SupervisorManager.class)));
File tmpDir = temporaryFolder.newFolder();
taskConfig = new TaskConfig(tmpDir.toString(), null, null, 50000, null, false, null, null);
return new TaskToolboxFactory(taskConfig, tac, emitter, dataSegmentPusher, new LocalDataSegmentKiller(new LocalDataSegmentPusherConfig()), new DataSegmentMover() {
@Override
public DataSegment move(DataSegment dataSegment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
return dataSegment;
}
}, new DataSegmentArchiver() {
@Override
public DataSegment archive(DataSegment segment) throws SegmentLoadingException {
return segment;
}
@Override
public DataSegment restore(DataSegment segment) throws SegmentLoadingException {
return segment;
}
}, new DataSegmentAnnouncer() {
@Override
public void announceSegment(DataSegment segment) throws IOException {
announcedSinks++;
}
@Override
public void unannounceSegment(DataSegment segment) throws IOException {
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
}
@Override
public boolean isAnnounced(DataSegment segment) {
return false;
}
}, // segment announcer
handoffNotifierFactory, // query runner factory conglomerate corporation unionized collective
queryRunnerFactoryConglomerate, // query executor service
MoreExecutors.sameThreadExecutor(), // monitor scheduler
monitorScheduler, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {
@Override
public List<StorageLocationConfig> getLocations() {
return Lists.newArrayList();
}
}, new DefaultObjectMapper())), MAPPER, INDEX_MERGER, INDEX_IO, MapCache.create(0), FireDepartmentTest.NO_CACHE_CONFIG, INDEX_MERGER_V9);
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class CassandraDataSegmentPuller method getSegmentFiles.
public io.druid.java.util.common.FileUtils.FileCopyResult getSegmentFiles(final String key, final File outDir) throws SegmentLoadingException {
log.info("Pulling index from C* at path[%s] to outDir[%s]", key, outDir);
if (!outDir.exists()) {
outDir.mkdirs();
}
if (!outDir.isDirectory()) {
throw new ISE("outDir[%s] must be a directory.", outDir);
}
long startTime = System.currentTimeMillis();
final File tmpFile = new File(outDir, "index.zip");
log.info("Pulling to temporary local cache [%s]", tmpFile.getAbsolutePath());
final io.druid.java.util.common.FileUtils.FileCopyResult localResult;
try {
localResult = RetryUtils.retry(new Callable<io.druid.java.util.common.FileUtils.FileCopyResult>() {
@Override
public io.druid.java.util.common.FileUtils.FileCopyResult call() throws Exception {
try (OutputStream os = new FileOutputStream(tmpFile)) {
final ObjectMetadata meta = ChunkedStorage.newReader(indexStorage, key, os).withBatchSize(BATCH_SIZE).withConcurrencyLevel(CONCURRENCY).call();
}
return new io.druid.java.util.common.FileUtils.FileCopyResult(tmpFile);
}
}, Predicates.<Throwable>alwaysTrue(), 10);
} catch (Exception e) {
throw new SegmentLoadingException(e, "Unable to copy key [%s] to file [%s]", key, tmpFile.getAbsolutePath());
}
try {
final io.druid.java.util.common.FileUtils.FileCopyResult result = CompressionUtils.unzip(tmpFile, outDir);
log.info("Pull of file[%s] completed in %,d millis (%s bytes)", key, System.currentTimeMillis() - startTime, result.size());
return result;
} catch (Exception e) {
try {
FileUtils.deleteDirectory(outDir);
} catch (IOException e1) {
log.error(e1, "Error clearing segment directory [%s]", outDir.getAbsolutePath());
e.addSuppressed(e1);
}
throw new SegmentLoadingException(e, e.getMessage());
} finally {
if (!tmpFile.delete()) {
log.warn("Could not delete cache file at [%s]", tmpFile.getAbsolutePath());
}
}
}
use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class CloudFilesDataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(String region, String container, String path, File outDir) throws SegmentLoadingException {
CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(cloudFilesApi, region, container);
final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);
try {
final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, CloudFilesUtils.CLOUDFILESRETRY, true);
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath());
return result;
} catch (Exception e) {
try {
org.apache.commons.io.FileUtils.deleteDirectory(outDir);
} catch (IOException ioe) {
log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), path);
}
throw new SegmentLoadingException(e, e.getMessage());
} finally {
try {
byteSource.closeStream();
} catch (IOException ioe) {
log.warn(ioe, "Failed to close payload for segmente pulled from [%s]", path);
}
}
}
Aggregations