Search in sources :

Example 11 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project hive by apache.

the class DruidStorageHandler method deleteSegment.

@VisibleForTesting
void deleteSegment(DataSegment segment) throws SegmentLoadingException {
    final Path path = DruidStorageHandlerUtils.getPath(segment);
    LOG.info("removing segment {}, located at path {}", segment.getId().toString(), path);
    try {
        if (path.getName().endsWith(".zip")) {
            final FileSystem fs = path.getFileSystem(getConf());
            if (!fs.exists(path)) {
                LOG.warn("Segment Path {} does not exist. It appears to have been deleted already.", path);
                return;
            }
            // path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
            Path partitionNumDir = path.getParent();
            if (!fs.delete(partitionNumDir, true)) {
                throw new SegmentLoadingException("Unable to kill segment, failed to delete dir [%s]", partitionNumDir.toString());
            }
            // try to delete other directories if possible
            Path versionDir = partitionNumDir.getParent();
            if (safeNonRecursiveDelete(fs, versionDir)) {
                Path intervalDir = versionDir.getParent();
                if (safeNonRecursiveDelete(fs, intervalDir)) {
                    Path dataSourceDir = intervalDir.getParent();
                    safeNonRecursiveDelete(fs, dataSourceDir);
                }
            }
        } else {
            throw new SegmentLoadingException("Unknown file type[%s]", path);
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Unable to kill segment");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 12 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 13 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project hive by apache.

the class DruidStorageHandler method commitDropTable.

@Override
public void commitDropTable(Table table, boolean deleteData) {
    if (DruidKafkaUtils.isKafkaStreamingTable(table)) {
        // Stop Kafka Ingestion first
        final String overlordAddress = Preconditions.checkNotNull(HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_OVERLORD_DEFAULT_ADDRESS), "Druid Overlord Address is null");
        String dataSourceName = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, Constants.DRUID_DATA_SOURCE), "Druid Datasource name is null");
        stopKafkaIngestion(overlordAddress, dataSourceName);
    }
    String dataSourceName = Preconditions.checkNotNull(table.getParameters().get(Constants.DRUID_DATA_SOURCE), "DataSource name is null !");
    // MetaStoreUtils.isExternalTablePurge(table) returns true.
    if (deleteData && MetaStoreUtils.isExternalTablePurge(table)) {
        LOG.info("Dropping with purge all the data for data source {}", dataSourceName);
        List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getDataSegmentList(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName);
        if (dataSegmentList.isEmpty()) {
            LOG.info("Nothing to delete for data source {}", dataSourceName);
            return;
        }
        for (DataSegment dataSegment : dataSegmentList) {
            try {
                deleteSegment(dataSegment);
            } catch (SegmentLoadingException e) {
                LOG.error(String.format("Error while deleting segment [%s]", dataSegment.getId().toString()), e);
            }
        }
    }
    if (DruidStorageHandlerUtils.disableDataSource(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName)) {
        LOG.info("Successfully dropped druid data source {}", dataSourceName);
    }
}
Also used : SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) DataSegment(org.apache.druid.timeline.DataSegment)

Example 14 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class OssDataSegmentMover method selfCheckingMove.

/**
 * Copies an object and after that checks that the object is present at the target location, via a separate API call.
 * If it is not, an exception is thrown, and the object is not deleted at the old location. This "paranoic" check
 * is added after it was observed that oss may report a successful move, and the object is not found at the target
 * location.
 */
private void selfCheckingMove(String srcBucket, String dstBucket, String srcPath, String dstPath, String copyMsg) throws IOException, SegmentLoadingException {
    if (srcBucket.equals(dstBucket) && srcPath.equals(dstPath)) {
        log.info("No need to move file[%s://%s/%s] onto itself", OssStorageDruidModule.SCHEME, srcBucket, srcPath);
        return;
    }
    final OSS client = this.clientSupplier.get();
    if (client.doesObjectExist(srcBucket, srcPath)) {
        final ObjectListing listResult = client.listObjects(new ListObjectsRequest(srcBucket, srcPath, null, null, 1));
        // keyCount is still zero.
        if (listResult.getObjectSummaries().size() == 0) {
            // should never happen
            throw new ISE("Unable to list object [%s://%s/%s]", OssStorageDruidModule.SCHEME, srcBucket, srcPath);
        }
        final OSSObjectSummary objectSummary = listResult.getObjectSummaries().get(0);
        if (objectSummary.getStorageClass() != null && objectSummary.getStorageClass().equals(StorageClass.IA.name())) {
            throw new OSSException(StringUtils.format("Cannot move file[%s://%s/%s] of storage class glacier, skipping.", OssStorageDruidModule.SCHEME, srcBucket, srcPath));
        } else {
            log.info("Moving file %s", copyMsg);
            final CopyObjectRequest copyRequest = new CopyObjectRequest(srcBucket, srcPath, dstBucket, dstPath);
            client.copyObject(copyRequest);
            if (!client.doesObjectExist(dstBucket, dstPath)) {
                throw new IOE("After copy was reported as successful the file doesn't exist in the target location [%s]", copyMsg);
            }
            deleteWithRetriesSilent(srcBucket, srcPath);
            log.debug("Finished moving file %s", copyMsg);
        }
    } else {
        // ensure object exists in target location
        if (client.doesObjectExist(dstBucket, dstPath)) {
            log.info("Not moving file [%s://%s/%s], already present in target location [%s://%s/%s]", OssStorageDruidModule.SCHEME, srcBucket, srcPath, OssStorageDruidModule.SCHEME, dstBucket, dstPath);
        } else {
            throw new SegmentLoadingException("Unable to move file %s, not present in either source or target location", copyMsg);
        }
    }
}
Also used : ListObjectsRequest(com.aliyun.oss.model.ListObjectsRequest) OSSObjectSummary(com.aliyun.oss.model.OSSObjectSummary) CopyObjectRequest(com.aliyun.oss.model.CopyObjectRequest) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) ObjectListing(com.aliyun.oss.model.ObjectListing) OSSException(com.aliyun.oss.OSSException) ISE(org.apache.druid.java.util.common.ISE) OSS(com.aliyun.oss.OSS) IOE(org.apache.druid.java.util.common.IOE)

Example 15 with SegmentLoadingException

use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.

the class OssDataSegmentMover method move.

@Override
public DataSegment move(DataSegment segment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
    try {
        Map<String, Object> loadSpec = segment.getLoadSpec();
        String bucket = MapUtils.getString(loadSpec, "bucket");
        String key = MapUtils.getString(loadSpec, "key");
        final String targetBucket = MapUtils.getString(targetLoadSpec, "bucket");
        final String targetKey = MapUtils.getString(targetLoadSpec, "baseKey");
        final String targetPath = OssUtils.constructSegmentPath(targetKey, DataSegmentPusher.getDefaultStorageDir(segment, false));
        if (targetBucket.isEmpty()) {
            throw new SegmentLoadingException("Target OSS bucket is not specified");
        }
        if (targetPath.isEmpty()) {
            throw new SegmentLoadingException("Target OSS baseKey is not specified");
        }
        safeMove(bucket, key, targetBucket, targetPath);
        return segment.withLoadSpec(ImmutableMap.<String, Object>builder().putAll(Maps.filterKeys(loadSpec, new Predicate<String>() {

            @Override
            public boolean apply(String input) {
                return !("bucket".equals(input) || "key".equals(input));
            }
        })).put("bucket", targetBucket).put("key", targetPath).build());
    } catch (OSSException e) {
        throw new SegmentLoadingException(e, "Unable to move segment[%s]: [%s]", segment.getId(), e);
    }
}
Also used : SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) OSSException(com.aliyun.oss.OSSException) Predicate(com.google.common.base.Predicate)

Aggregations

SegmentLoadingException (org.apache.druid.segment.loading.SegmentLoadingException)29 IOException (java.io.IOException)17 File (java.io.File)10 FileUtils (org.apache.druid.java.util.common.FileUtils)8 DataSegment (org.apache.druid.timeline.DataSegment)7 Test (org.junit.Test)6 OSSException (com.aliyun.oss.OSSException)4 AmazonServiceException (com.amazonaws.AmazonServiceException)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 List (java.util.List)4 Map (java.util.Map)4 ExecutionException (java.util.concurrent.ExecutionException)4 ExecutorService (java.util.concurrent.ExecutorService)4 Collectors (java.util.stream.Collectors)4 QueryableIndex (org.apache.druid.segment.QueryableIndex)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)4 Interval (org.joda.time.Interval)4 Assert (org.junit.Assert)4 ByteSource (com.google.common.io.ByteSource)3