Search in sources :

Example 16 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project hive by apache.

the class DruidStorageHandler method deleteSegment.

@VisibleForTesting
protected void deleteSegment(DataSegment segment) throws SegmentLoadingException {
    final Path path = DruidStorageHandlerUtils.getPath(segment);
    LOG.info("removing segment {}, located at path {}", segment.getIdentifier(), path);
    try {
        if (path.getName().endsWith(".zip")) {
            final FileSystem fs = path.getFileSystem(getConf());
            if (!fs.exists(path)) {
                LOG.warn("Segment Path {} does not exist. It appears to have been deleted already.", path);
                return;
            }
            // path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
            Path partitionNumDir = path.getParent();
            if (!fs.delete(partitionNumDir, true)) {
                throw new SegmentLoadingException("Unable to kill segment, failed to delete dir [%s]", partitionNumDir.toString());
            }
            // try to delete other directories if possible
            Path versionDir = partitionNumDir.getParent();
            if (safeNonRecursiveDelete(fs, versionDir)) {
                Path intervalDir = versionDir.getParent();
                if (safeNonRecursiveDelete(fs, intervalDir)) {
                    Path dataSourceDir = intervalDir.getParent();
                    safeNonRecursiveDelete(fs, dataSourceDir);
                }
            }
        } else {
            throw new SegmentLoadingException("Unknown file type[%s]", path);
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Unable to kill segment");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 17 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class HdfsDataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {
            try {
                return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {

                    @Override
                    public FileUtils.FileCopyResult call() throws Exception {
                        if (!fs.exists(path)) {
                            throw new SegmentLoadingException("No files found at [%s]", path.toString());
                        }
                        final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                        final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
                        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                        while (children.hasNext()) {
                            final LocatedFileStatus child = children.next();
                            final Path childPath = child.getPath();
                            final String fname = childPath.getName();
                            if (fs.isDirectory(childPath)) {
                                log.warn("[%s] is a child directory, skipping", childPath.toString());
                            } else {
                                final File outFile = new File(outDir, fname);
                                // Actual copy
                                fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
                                result.addFile(outFile);
                            }
                        }
                        log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
                        return result;
                    }
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {
            // --------    zip     ---------
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);
            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
            return result;
        } else if (CompressionUtils.isGz(path.getName())) {
            // --------    gzip     ---------
            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);
            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileUtils(io.druid.java.util.common.FileUtils) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FileSystem(org.apache.hadoop.fs.FileSystem) ByteSource(com.google.common.io.ByteSource) File(java.io.File)

Example 18 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method constructorFeeder.

@Parameterized.Parameters(name = "{1}")
public static Collection<Object[]> constructorFeeder() throws IOException {
    final IndexSpec indexSpec = new IndexSpec();
    final HeapMemoryTaskStorage ts = new HeapMemoryTaskStorage(new TaskStorageConfig(null) {
    });
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.NONE).withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME) }).build();
    final OnheapIncrementalIndex index = new OnheapIncrementalIndex(schema, true, MAX_ROWS * MAX_SHARD_NUMBER);
    for (Integer i = 0; i < MAX_ROWS; ++i) {
        index.add(ROW_PARSER.parse(buildRow(i.longValue())));
    }
    if (!persistDir.mkdirs() && !persistDir.exists()) {
        throw new IOException(String.format("Could not create directory at [%s]", persistDir.getAbsolutePath()));
    }
    INDEX_MERGER.persist(index, persistDir, indexSpec);
    final TaskLockbox tl = new TaskLockbox(ts);
    final IndexerSQLMetadataStorageCoordinator mdc = new IndexerSQLMetadataStorageCoordinator(null, null, null) {

        private final Set<DataSegment> published = Sets.newHashSet();

        private final Set<DataSegment> nuked = Sets.newHashSet();

        @Override
        public List<DataSegment> getUsedSegmentsForInterval(String dataSource, Interval interval) throws IOException {
            return ImmutableList.copyOf(segmentSet);
        }

        @Override
        public List<DataSegment> getUsedSegmentsForIntervals(String dataSource, List<Interval> interval) throws IOException {
            return ImmutableList.copyOf(segmentSet);
        }

        @Override
        public List<DataSegment> getUnusedSegmentsForInterval(String dataSource, Interval interval) {
            return ImmutableList.of();
        }

        @Override
        public Set<DataSegment> announceHistoricalSegments(Set<DataSegment> segments) {
            Set<DataSegment> added = Sets.newHashSet();
            for (final DataSegment segment : segments) {
                if (published.add(segment)) {
                    added.add(segment);
                }
            }
            return ImmutableSet.copyOf(added);
        }

        @Override
        public void deleteSegments(Set<DataSegment> segments) {
            nuked.addAll(segments);
        }
    };
    final LocalTaskActionClientFactory tac = new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tl, mdc, newMockEmitter(), EasyMock.createMock(SupervisorManager.class)));
    SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
    EasyMock.replay(notifierFactory);
    final TaskToolboxFactory taskToolboxFactory = new TaskToolboxFactory(new TaskConfig(tmpDir.getAbsolutePath(), null, null, 50000, null, false, null, null), tac, newMockEmitter(), new DataSegmentPusher() {

        @Deprecated
        @Override
        public String getPathForHadoop(String dataSource) {
            return getPathForHadoop();
        }

        @Override
        public String getPathForHadoop() {
            throw new UnsupportedOperationException();
        }

        @Override
        public DataSegment push(File file, DataSegment segment) throws IOException {
            return segment;
        }
    }, new DataSegmentKiller() {

        @Override
        public void kill(DataSegment segments) throws SegmentLoadingException {
        }

        @Override
        public void killAll() throws IOException {
            throw new UnsupportedOperationException("not implemented");
        }
    }, new DataSegmentMover() {

        @Override
        public DataSegment move(DataSegment dataSegment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
            return dataSegment;
        }
    }, new DataSegmentArchiver() {

        @Override
        public DataSegment archive(DataSegment segment) throws SegmentLoadingException {
            return segment;
        }

        @Override
        public DataSegment restore(DataSegment segment) throws SegmentLoadingException {
            return segment;
        }
    }, // segment announcer
    null, notifierFactory, // query runner factory conglomerate corporation unionized collective
    null, // query executor service
    null, // monitor scheduler
    null, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {

        @Override
        public List<StorageLocationConfig> getLocations() {
            return Lists.newArrayList();
        }
    }, MAPPER)), MAPPER, INDEX_MERGER, INDEX_IO, null, null, INDEX_MERGER_V9);
    Collection<Object[]> values = new LinkedList<>();
    for (InputRowParser parser : Arrays.<InputRowParser>asList(ROW_PARSER, new MapInputRowParser(new JSONParseSpec(new TimestampSpec(TIME_COLUMN, "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.<String>of()), ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME), ImmutableList.<SpatialDimensionSchema>of()), null, null)))) {
        for (List<String> dim_names : Arrays.<List<String>>asList(null, ImmutableList.of(DIM_NAME))) {
            for (List<String> metric_names : Arrays.<List<String>>asList(null, ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME))) {
                values.add(new Object[] { new IngestSegmentFirehoseFactory(DATA_SOURCE_NAME, FOREVER, new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, Guice.createInjector(new Module() {

                    @Override
                    public void configure(Binder binder) {
                        binder.bind(TaskToolboxFactory.class).toInstance(taskToolboxFactory);
                    }
                }), INDEX_IO), String.format("DimNames[%s]MetricNames[%s]ParserDimNames[%s]", dim_names == null ? "null" : "dims", metric_names == null ? "null" : "metrics", parser == ROW_PARSER ? "dims" : "null"), parser });
            }
        }
    }
    return values;
}
Also used : DataSegmentArchiver(io.druid.segment.loading.DataSegmentArchiver) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DataSegmentMover(io.druid.segment.loading.DataSegmentMover) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) LocalTaskActionClientFactory(io.druid.indexing.common.actions.LocalTaskActionClientFactory) SegmentLoaderConfig(io.druid.segment.loading.SegmentLoaderConfig) SegmentLoaderFactory(io.druid.indexing.common.SegmentLoaderFactory) SegmentLoaderLocalCacheManager(io.druid.segment.loading.SegmentLoaderLocalCacheManager) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) IndexerSQLMetadataStorageCoordinator(io.druid.metadata.IndexerSQLMetadataStorageCoordinator) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) LinkedList(java.util.LinkedList) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) InputRowParser(io.druid.data.input.impl.InputRowParser) Module(com.google.inject.Module) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) File(java.io.File) Interval(org.joda.time.Interval) IndexSpec(io.druid.segment.IndexSpec) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) MapInputRowParser(io.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DataSegmentKiller(io.druid.segment.loading.DataSegmentKiller) TaskConfig(io.druid.indexing.common.config.TaskConfig) DataSegment(io.druid.timeline.DataSegment) Binder(com.google.inject.Binder) TaskToolboxFactory(io.druid.indexing.common.TaskToolboxFactory) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) TaskActionToolbox(io.druid.indexing.common.actions.TaskActionToolbox) TimestampSpec(io.druid.data.input.impl.TimestampSpec) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) TaskStorageConfig(io.druid.indexing.common.config.TaskStorageConfig) IOException(java.io.IOException) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) SegmentHandoffNotifierFactory(io.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory) SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) TaskLockbox(io.druid.indexing.overlord.TaskLockbox) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec)

Example 19 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class InsertSegment method run.

@Override
public void run() {
    final Injector injector = makeInjector();
    mapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class));
    indexerMetadataStorageCoordinator = injector.getInstance(IndexerMetadataStorageCoordinator.class);
    final DataSegmentFinder dataSegmentFinder = injector.getInstance(DataSegmentFinder.class);
    log.info("Start seraching segments under [%s]", workingDirPath);
    Set<DataSegment> segments = null;
    try {
        segments = dataSegmentFinder.findSegments(workingDirPath, Boolean.valueOf(updateDescriptor));
    } catch (SegmentLoadingException e) {
        Throwables.propagate(e);
    }
    log.info("Done searching segments under [%s], [%d] segments were found", workingDirPath, segments.size());
    try {
        insertSegments(segments);
    } catch (IOException e) {
        Throwables.propagate(e);
    }
    log.info("Done processing [%d] segments", segments.size());
}
Also used : IndexerMetadataStorageCoordinator(io.druid.indexing.overlord.IndexerMetadataStorageCoordinator) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) Injector(com.google.inject.Injector) DataSegmentFinder(io.druid.segment.loading.DataSegmentFinder) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment)

Example 20 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class GoogleDataSegmentKiller method kill.

@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
    LOG.info("Killing segment [%s]", segment);
    Map<String, Object> loadSpec = segment.getLoadSpec();
    final String bucket = MapUtils.getString(loadSpec, "bucket");
    final String indexPath = MapUtils.getString(loadSpec, "path");
    final String descriptorPath = indexPath.substring(0, indexPath.lastIndexOf("/")) + "/descriptor.json";
    try {
        storage.delete(bucket, indexPath);
        storage.delete(bucket, descriptorPath);
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e.getMessage());
    }
}
Also used : SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Aggregations

SegmentLoadingException (io.druid.segment.loading.SegmentLoadingException)22 IOException (java.io.IOException)16 DataSegment (io.druid.timeline.DataSegment)9 File (java.io.File)6 FileUtils (io.druid.java.util.common.FileUtils)4 ServiceException (org.jets3t.service.ServiceException)4 ByteSource (com.google.common.io.ByteSource)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 ImmutableList (com.google.common.collect.ImmutableList)2 SegmentLoaderFactory (io.druid.indexing.common.SegmentLoaderFactory)2 TaskToolboxFactory (io.druid.indexing.common.TaskToolboxFactory)2 LocalTaskActionClientFactory (io.druid.indexing.common.actions.LocalTaskActionClientFactory)2 TaskActionToolbox (io.druid.indexing.common.actions.TaskActionToolbox)2 TaskConfig (io.druid.indexing.common.config.TaskConfig)2 DataSegmentArchiver (io.druid.segment.loading.DataSegmentArchiver)2 DataSegmentMover (io.druid.segment.loading.DataSegmentMover)2 ArrayList (java.util.ArrayList)2 FileUtils (org.apache.commons.io.FileUtils)2 S3Object (org.jets3t.service.model.S3Object)2