Search in sources :

Example 6 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class S3DataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(final S3Coords s3Coords, final File outDir) throws SegmentLoadingException {
    log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir);
    if (!isObjectInBucket(s3Coords)) {
        throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords);
    }
    try {
        org.apache.commons.io.FileUtils.forceMkdir(outDir);
        final URI uri = URI.create(String.format("s3://%s/%s", s3Coords.bucket, s3Coords.path));
        final ByteSource byteSource = new ByteSource() {

            @Override
            public InputStream openStream() throws IOException {
                try {
                    return buildFileObject(uri, s3Client).openInputStream();
                } catch (ServiceException e) {
                    if (e.getCause() != null) {
                        if (S3Utils.S3RETRY.apply(e)) {
                            throw new IOException("Recoverable exception", e);
                        }
                    }
                    throw Throwables.propagate(e);
                }
            }
        };
        if (CompressionUtils.isZip(s3Coords.path)) {
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, S3Utils.S3RETRY, true);
            log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outDir.getAbsolutePath());
            return result;
        }
        if (CompressionUtils.isGz(s3Coords.path)) {
            final String fname = Files.getNameWithoutExtension(uri.getPath());
            final File outFile = new File(outDir, fname);
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
            log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
            return result;
        }
        throw new IAE("Do not know how to load file type at [%s]", uri.toString());
    } catch (Exception e) {
        try {
            org.apache.commons.io.FileUtils.deleteDirectory(outDir);
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), s3Coords.toString());
        }
        throw new SegmentLoadingException(e, e.getMessage());
    }
}
Also used : ServiceException(org.jets3t.service.ServiceException) S3ServiceException(org.jets3t.service.S3ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileUtils(io.druid.java.util.common.FileUtils) ByteSource(com.google.common.io.ByteSource) IOException(java.io.IOException) IAE(io.druid.java.util.common.IAE) URI(java.net.URI) File(java.io.File) ServiceException(org.jets3t.service.ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) S3ServiceException(org.jets3t.service.S3ServiceException) IOException(java.io.IOException)

Example 7 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    if (taskToolbox == null) {
        // Noop Task is just used to create the toolbox and list segments.
        taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
    }
    try {
        final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
        final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
        for (DataSegment segment : usedSegments) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
        final List<String> dims;
        if (dimensions != null) {
            dims = dimensions;
        } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
                    return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getDimensions();
                        }
                    }));
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        final List<String> metricsList;
        if (metrics != null) {
            metricsList = metrics;
        } else {
            Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
                    return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getMetrics();
                        }
                    }));
                }
            })));
            metricsList = Lists.newArrayList(metricsSet);
        }
        final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

            @Override
            public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
                return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                    @Override
                    public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                        final DataSegment segment = input.getObject();
                        try {
                            return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } catch (SegmentLoadingException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Set(java.util.Set) NoopTask(io.druid.indexing.common.task.NoopTask) DataSegment(io.druid.timeline.DataSegment) Function(com.google.common.base.Function) PartitionChunk(io.druid.timeline.partition.PartitionChunk) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File)

Example 8 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class TaskLifecycleTest method setUpTaskToolboxFactory.

private TaskToolboxFactory setUpTaskToolboxFactory(DataSegmentPusher dataSegmentPusher, SegmentHandoffNotifierFactory handoffNotifierFactory, TestIndexerMetadataStorageCoordinator mdc) throws IOException {
    Preconditions.checkNotNull(queryRunnerFactoryConglomerate);
    Preconditions.checkNotNull(monitorScheduler);
    Preconditions.checkNotNull(taskStorage);
    Preconditions.checkNotNull(emitter);
    taskLockbox = new TaskLockbox(taskStorage);
    tac = new LocalTaskActionClientFactory(taskStorage, new TaskActionToolbox(taskLockbox, mdc, emitter, EasyMock.createMock(SupervisorManager.class)));
    File tmpDir = temporaryFolder.newFolder();
    taskConfig = new TaskConfig(tmpDir.toString(), null, null, 50000, null, false, null, null);
    return new TaskToolboxFactory(taskConfig, tac, emitter, dataSegmentPusher, new LocalDataSegmentKiller(new LocalDataSegmentPusherConfig()), new DataSegmentMover() {

        @Override
        public DataSegment move(DataSegment dataSegment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
            return dataSegment;
        }
    }, new DataSegmentArchiver() {

        @Override
        public DataSegment archive(DataSegment segment) throws SegmentLoadingException {
            return segment;
        }

        @Override
        public DataSegment restore(DataSegment segment) throws SegmentLoadingException {
            return segment;
        }
    }, new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(DataSegment segment) throws IOException {
            announcedSinks++;
        }

        @Override
        public void unannounceSegment(DataSegment segment) throws IOException {
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
        }

        @Override
        public boolean isAnnounced(DataSegment segment) {
            return false;
        }
    }, // segment announcer
    handoffNotifierFactory, // query runner factory conglomerate corporation unionized collective
    queryRunnerFactoryConglomerate, // query executor service
    MoreExecutors.sameThreadExecutor(), // monitor scheduler
    monitorScheduler, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {

        @Override
        public List<StorageLocationConfig> getLocations() {
            return Lists.newArrayList();
        }
    }, new DefaultObjectMapper())), MAPPER, INDEX_MERGER, INDEX_IO, MapCache.create(0), FireDepartmentTest.NO_CACHE_CONFIG, INDEX_MERGER_V9);
}
Also used : DataSegmentAnnouncer(io.druid.server.coordination.DataSegmentAnnouncer) DataSegmentArchiver(io.druid.segment.loading.DataSegmentArchiver) DataSegmentMover(io.druid.segment.loading.DataSegmentMover) TaskConfig(io.druid.indexing.common.config.TaskConfig) DataSegment(io.druid.timeline.DataSegment) TaskToolboxFactory(io.druid.indexing.common.TaskToolboxFactory) TaskActionToolbox(io.druid.indexing.common.actions.TaskActionToolbox) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LocalTaskActionClientFactory(io.druid.indexing.common.actions.LocalTaskActionClientFactory) SegmentLoaderConfig(io.druid.segment.loading.SegmentLoaderConfig) SegmentLoaderFactory(io.druid.indexing.common.SegmentLoaderFactory) SegmentLoaderLocalCacheManager(io.druid.segment.loading.SegmentLoaderLocalCacheManager) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) LocalDataSegmentPusherConfig(io.druid.segment.loading.LocalDataSegmentPusherConfig) IOException(java.io.IOException) SupervisorManager(io.druid.indexing.overlord.supervisor.SupervisorManager) LocalDataSegmentKiller(io.druid.segment.loading.LocalDataSegmentKiller) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) File(java.io.File)

Example 9 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class CassandraDataSegmentPuller method getSegmentFiles.

public io.druid.java.util.common.FileUtils.FileCopyResult getSegmentFiles(final String key, final File outDir) throws SegmentLoadingException {
    log.info("Pulling index from C* at path[%s] to outDir[%s]", key, outDir);
    if (!outDir.exists()) {
        outDir.mkdirs();
    }
    if (!outDir.isDirectory()) {
        throw new ISE("outDir[%s] must be a directory.", outDir);
    }
    long startTime = System.currentTimeMillis();
    final File tmpFile = new File(outDir, "index.zip");
    log.info("Pulling to temporary local cache [%s]", tmpFile.getAbsolutePath());
    final io.druid.java.util.common.FileUtils.FileCopyResult localResult;
    try {
        localResult = RetryUtils.retry(new Callable<io.druid.java.util.common.FileUtils.FileCopyResult>() {

            @Override
            public io.druid.java.util.common.FileUtils.FileCopyResult call() throws Exception {
                try (OutputStream os = new FileOutputStream(tmpFile)) {
                    final ObjectMetadata meta = ChunkedStorage.newReader(indexStorage, key, os).withBatchSize(BATCH_SIZE).withConcurrencyLevel(CONCURRENCY).call();
                }
                return new io.druid.java.util.common.FileUtils.FileCopyResult(tmpFile);
            }
        }, Predicates.<Throwable>alwaysTrue(), 10);
    } catch (Exception e) {
        throw new SegmentLoadingException(e, "Unable to copy key [%s] to file [%s]", key, tmpFile.getAbsolutePath());
    }
    try {
        final io.druid.java.util.common.FileUtils.FileCopyResult result = CompressionUtils.unzip(tmpFile, outDir);
        log.info("Pull of file[%s] completed in %,d millis (%s bytes)", key, System.currentTimeMillis() - startTime, result.size());
        return result;
    } catch (Exception e) {
        try {
            FileUtils.deleteDirectory(outDir);
        } catch (IOException e1) {
            log.error(e1, "Error clearing segment directory [%s]", outDir.getAbsolutePath());
            e.addSuppressed(e1);
        }
        throw new SegmentLoadingException(e, e.getMessage());
    } finally {
        if (!tmpFile.delete()) {
            log.warn("Could not delete cache file at [%s]", tmpFile.getAbsolutePath());
        }
    }
}
Also used : FileUtils(org.apache.commons.io.FileUtils) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) ISE(io.druid.java.util.common.ISE) File(java.io.File) ObjectMetadata(com.netflix.astyanax.recipes.storage.ObjectMetadata)

Example 10 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class CloudFilesDataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(String region, String container, String path, File outDir) throws SegmentLoadingException {
    CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(cloudFilesApi, region, container);
    final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);
    try {
        final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, CloudFilesUtils.CLOUDFILESRETRY, true);
        log.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath());
        return result;
    } catch (Exception e) {
        try {
            org.apache.commons.io.FileUtils.deleteDirectory(outDir);
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), path);
        }
        throw new SegmentLoadingException(e, e.getMessage());
    } finally {
        try {
            byteSource.closeStream();
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to close payload for segmente pulled from [%s]", path);
        }
    }
}
Also used : FileUtils(io.druid.java.util.common.FileUtils) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Aggregations

SegmentLoadingException (io.druid.segment.loading.SegmentLoadingException)22 IOException (java.io.IOException)16 DataSegment (io.druid.timeline.DataSegment)9 File (java.io.File)6 FileUtils (io.druid.java.util.common.FileUtils)4 ServiceException (org.jets3t.service.ServiceException)4 ByteSource (com.google.common.io.ByteSource)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 ImmutableList (com.google.common.collect.ImmutableList)2 SegmentLoaderFactory (io.druid.indexing.common.SegmentLoaderFactory)2 TaskToolboxFactory (io.druid.indexing.common.TaskToolboxFactory)2 LocalTaskActionClientFactory (io.druid.indexing.common.actions.LocalTaskActionClientFactory)2 TaskActionToolbox (io.druid.indexing.common.actions.TaskActionToolbox)2 TaskConfig (io.druid.indexing.common.config.TaskConfig)2 DataSegmentArchiver (io.druid.segment.loading.DataSegmentArchiver)2 DataSegmentMover (io.druid.segment.loading.DataSegmentMover)2 ArrayList (java.util.ArrayList)2 FileUtils (org.apache.commons.io.FileUtils)2 S3Object (org.jets3t.service.model.S3Object)2