Search in sources :

Example 1 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project hive by apache.

the class DruidStorageHandler method deleteSegment.

@VisibleForTesting
protected void deleteSegment(DataSegment segment) throws SegmentLoadingException {
    final Path path = getPath(segment);
    LOG.info(String.format("removing segment[%s], located at path[%s]", segment.getIdentifier(), path));
    try {
        if (path.getName().endsWith(".zip")) {
            final FileSystem fs = path.getFileSystem(getConf());
            if (!fs.exists(path)) {
                LOG.warn(String.format("Segment Path [%s] does not exist. It appears to have been deleted already.", path));
                return;
            }
            // path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
            Path partitionNumDir = path.getParent();
            if (!fs.delete(partitionNumDir, true)) {
                throw new SegmentLoadingException("Unable to kill segment, failed to delete dir [%s]", partitionNumDir.toString());
            }
            //try to delete other directories if possible
            Path versionDir = partitionNumDir.getParent();
            if (safeNonRecursiveDelete(fs, versionDir)) {
                Path intervalDir = versionDir.getParent();
                if (safeNonRecursiveDelete(fs, intervalDir)) {
                    Path dataSourceDir = intervalDir.getParent();
                    safeNonRecursiveDelete(fs, dataSourceDir);
                }
            }
        } else {
            throw new SegmentLoadingException("Unknown file type[%s]", path);
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Unable to kill segment");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class CassandraDataSegmentPuller method getSegmentFiles.

public io.druid.java.util.common.FileUtils.FileCopyResult getSegmentFiles(final String key, final File outDir) throws SegmentLoadingException {
    log.info("Pulling index from C* at path[%s] to outDir[%s]", key, outDir);
    if (!outDir.exists()) {
        outDir.mkdirs();
    }
    if (!outDir.isDirectory()) {
        throw new ISE("outDir[%s] must be a directory.", outDir);
    }
    long startTime = System.currentTimeMillis();
    final File tmpFile = new File(outDir, "index.zip");
    log.info("Pulling to temporary local cache [%s]", tmpFile.getAbsolutePath());
    final io.druid.java.util.common.FileUtils.FileCopyResult localResult;
    try {
        localResult = RetryUtils.retry(new Callable<io.druid.java.util.common.FileUtils.FileCopyResult>() {

            @Override
            public io.druid.java.util.common.FileUtils.FileCopyResult call() throws Exception {
                try (OutputStream os = new FileOutputStream(tmpFile)) {
                    final ObjectMetadata meta = ChunkedStorage.newReader(indexStorage, key, os).withBatchSize(BATCH_SIZE).withConcurrencyLevel(CONCURRENCY).call();
                }
                return new io.druid.java.util.common.FileUtils.FileCopyResult(tmpFile);
            }
        }, Predicates.<Throwable>alwaysTrue(), 10);
    } catch (Exception e) {
        throw new SegmentLoadingException(e, "Unable to copy key [%s] to file [%s]", key, tmpFile.getAbsolutePath());
    }
    try {
        final io.druid.java.util.common.FileUtils.FileCopyResult result = CompressionUtils.unzip(tmpFile, outDir);
        log.info("Pull of file[%s] completed in %,d millis (%s bytes)", key, System.currentTimeMillis() - startTime, result.size());
        return result;
    } catch (Exception e) {
        try {
            FileUtils.deleteDirectory(outDir);
        } catch (IOException e1) {
            log.error(e1, "Error clearing segment directory [%s]", outDir.getAbsolutePath());
            e.addSuppressed(e1);
        }
        throw new SegmentLoadingException(e, e.getMessage());
    } finally {
        if (!tmpFile.delete()) {
            log.warn("Could not delete cache file at [%s]", tmpFile.getAbsolutePath());
        }
    }
}
Also used : FileUtils(org.apache.commons.io.FileUtils) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) ISE(io.druid.java.util.common.ISE) File(java.io.File) ObjectMetadata(com.netflix.astyanax.recipes.storage.ObjectMetadata)

Example 3 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class CloudFilesDataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(String region, String container, String path, File outDir) throws SegmentLoadingException {
    CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(cloudFilesApi, region, container);
    final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);
    try {
        final FileUtils.FileCopyResult result = CompressionUtils.unzip(byteSource, outDir, CloudFilesUtils.CLOUDFILESRETRY, true);
        log.info("Loaded %d bytes from [%s] to [%s]", result.size(), path, outDir.getAbsolutePath());
        return result;
    } catch (Exception e) {
        try {
            org.apache.commons.io.FileUtils.deleteDirectory(outDir);
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to remove output directory [%s] for segment pulled from [%s]", outDir.getAbsolutePath(), path);
        }
        throw new SegmentLoadingException(e, e.getMessage());
    } finally {
        try {
            byteSource.closeStream();
        } catch (IOException ioe) {
            log.warn(ioe, "Failed to close payload for segmente pulled from [%s]", path);
        }
    }
}
Also used : FileUtils(io.druid.java.util.common.FileUtils) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Example 4 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class ServerManager method loadSegment.

/**
   * Load a single segment.
   *
   * @param segment segment to load
   *
   * @return true if the segment was newly loaded, false if it was already loaded
   *
   * @throws SegmentLoadingException if the segment cannot be loaded
   */
public boolean loadSegment(final DataSegment segment) throws SegmentLoadingException {
    final Segment adapter;
    try {
        adapter = segmentLoader.getSegment(segment);
    } catch (SegmentLoadingException e) {
        try {
            segmentLoader.cleanup(segment);
        } catch (SegmentLoadingException e1) {
        // ignore
        }
        throw e;
    }
    if (adapter == null) {
        throw new SegmentLoadingException("Null adapter from loadSpec[%s]", segment.getLoadSpec());
    }
    synchronized (lock) {
        String dataSource = segment.getDataSource();
        VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSources.get(dataSource);
        if (loadedIntervals == null) {
            loadedIntervals = new VersionedIntervalTimeline<>(Ordering.natural());
            dataSources.put(dataSource, loadedIntervals);
        }
        PartitionHolder<ReferenceCountingSegment> entry = loadedIntervals.findEntry(segment.getInterval(), segment.getVersion());
        if ((entry != null) && (entry.getChunk(segment.getShardSpec().getPartitionNum()) != null)) {
            log.warn("Told to load a adapter for a segment[%s] that already exists", segment.getIdentifier());
            return false;
        }
        loadedIntervals.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(new ReferenceCountingSegment(adapter)));
        synchronized (dataSourceSizes) {
            dataSourceSizes.add(dataSource, segment.getSize());
        }
        synchronized (dataSourceCounts) {
            dataSourceCounts.add(dataSource, 1L);
        }
        return true;
    }
}
Also used : ReferenceCountingSegment(io.druid.segment.ReferenceCountingSegment) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) DataSegment(io.druid.timeline.DataSegment) ReferenceCountingSegment(io.druid.segment.ReferenceCountingSegment) Segment(io.druid.segment.Segment)

Example 5 with SegmentLoadingException

use of io.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class ZkCoordinator method addSegment.

@Override
public void addSegment(DataSegment segment, DataSegmentChangeCallback callback) {
    try {
        log.info("Loading segment %s", segment.getIdentifier());
        /*
         The lock below is used to prevent a race condition when the scheduled runnable in removeSegment() starts,
         and if(segmentsToDelete.remove(segment)) returns true, in which case historical will start deleting segment
         files. At that point, it's possible that right after the "if" check, addSegment() is called and actually loads
         the segment, which makes dropping segment and downloading segment happen at the same time.
       */
        if (segmentsToDelete.contains(segment)) {
            /*
           Both contains(segment) and remove(segment) can be moved inside the synchronized block. However, in that case,
           each time when addSegment() is called, it has to wait for the lock in order to make progress, which will make
           things slow. Given that in most cases segmentsToDelete.contains(segment) returns false, it will save a lot of
           cost of acquiring lock by doing the "contains" check outside the synchronized block.
         */
            synchronized (lock) {
                segmentsToDelete.remove(segment);
            }
        }
        loadSegment(segment, callback);
        if (!announcer.isAnnounced(segment)) {
            try {
                announcer.announceSegment(segment);
            } catch (IOException e) {
                throw new SegmentLoadingException(e, "Failed to announce segment[%s]", segment.getIdentifier());
            }
        }
    } catch (SegmentLoadingException e) {
        log.makeAlert(e, "Failed to load segment for dataSource").addData("segment", segment).emit();
    } finally {
        callback.execute();
    }
}
Also used : SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Aggregations

SegmentLoadingException (io.druid.segment.loading.SegmentLoadingException)22 IOException (java.io.IOException)16 DataSegment (io.druid.timeline.DataSegment)9 File (java.io.File)6 FileUtils (io.druid.java.util.common.FileUtils)4 ServiceException (org.jets3t.service.ServiceException)4 ByteSource (com.google.common.io.ByteSource)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 ImmutableList (com.google.common.collect.ImmutableList)2 SegmentLoaderFactory (io.druid.indexing.common.SegmentLoaderFactory)2 TaskToolboxFactory (io.druid.indexing.common.TaskToolboxFactory)2 LocalTaskActionClientFactory (io.druid.indexing.common.actions.LocalTaskActionClientFactory)2 TaskActionToolbox (io.druid.indexing.common.actions.TaskActionToolbox)2 TaskConfig (io.druid.indexing.common.config.TaskConfig)2 DataSegmentArchiver (io.druid.segment.loading.DataSegmentArchiver)2 DataSegmentMover (io.druid.segment.loading.DataSegmentMover)2 ArrayList (java.util.ArrayList)2 FileUtils (org.apache.commons.io.FileUtils)2 S3Object (org.jets3t.service.model.S3Object)2