use of org.apache.druid.segment.loading.SegmentLoadingException in project hive by apache.
the class DruidStorageHandler method deleteSegment.
@VisibleForTesting
void deleteSegment(DataSegment segment) throws SegmentLoadingException {
final Path path = DruidStorageHandlerUtils.getPath(segment);
LOG.info("removing segment {}, located at path {}", segment.getId().toString(), path);
try {
if (path.getName().endsWith(".zip")) {
final FileSystem fs = path.getFileSystem(getConf());
if (!fs.exists(path)) {
LOG.warn("Segment Path {} does not exist. It appears to have been deleted already.", path);
return;
}
// path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
Path partitionNumDir = path.getParent();
if (!fs.delete(partitionNumDir, true)) {
throw new SegmentLoadingException("Unable to kill segment, failed to delete dir [%s]", partitionNumDir.toString());
}
// try to delete other directories if possible
Path versionDir = partitionNumDir.getParent();
if (safeNonRecursiveDelete(fs, versionDir)) {
Path intervalDir = versionDir.getParent();
if (safeNonRecursiveDelete(fs, intervalDir)) {
Path dataSourceDir = intervalDir.getParent();
safeNonRecursiveDelete(fs, dataSourceDir);
}
}
} else {
throw new SegmentLoadingException("Unknown file type[%s]", path);
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Unable to kill segment");
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project hive by apache.
the class DruidStorageHandler method commitDropTable.
@Override
public void commitDropTable(Table table, boolean deleteData) {
if (DruidKafkaUtils.isKafkaStreamingTable(table)) {
// Stop Kafka Ingestion first
final String overlordAddress = Preconditions.checkNotNull(HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_OVERLORD_DEFAULT_ADDRESS), "Druid Overlord Address is null");
String dataSourceName = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, Constants.DRUID_DATA_SOURCE), "Druid Datasource name is null");
stopKafkaIngestion(overlordAddress, dataSourceName);
}
String dataSourceName = Preconditions.checkNotNull(table.getParameters().get(Constants.DRUID_DATA_SOURCE), "DataSource name is null !");
// MetaStoreUtils.isExternalTablePurge(table) returns true.
if (deleteData && MetaStoreUtils.isExternalTablePurge(table)) {
LOG.info("Dropping with purge all the data for data source {}", dataSourceName);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getDataSegmentList(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName);
if (dataSegmentList.isEmpty()) {
LOG.info("Nothing to delete for data source {}", dataSourceName);
return;
}
for (DataSegment dataSegment : dataSegmentList) {
try {
deleteSegment(dataSegment);
} catch (SegmentLoadingException e) {
LOG.error(String.format("Error while deleting segment [%s]", dataSegment.getId().toString()), e);
}
}
}
if (DruidStorageHandlerUtils.disableDataSource(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName)) {
LOG.info("Successfully dropped druid data source {}", dataSourceName);
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class OssDataSegmentMover method selfCheckingMove.
/**
* Copies an object and after that checks that the object is present at the target location, via a separate API call.
* If it is not, an exception is thrown, and the object is not deleted at the old location. This "paranoic" check
* is added after it was observed that oss may report a successful move, and the object is not found at the target
* location.
*/
private void selfCheckingMove(String srcBucket, String dstBucket, String srcPath, String dstPath, String copyMsg) throws IOException, SegmentLoadingException {
if (srcBucket.equals(dstBucket) && srcPath.equals(dstPath)) {
log.info("No need to move file[%s://%s/%s] onto itself", OssStorageDruidModule.SCHEME, srcBucket, srcPath);
return;
}
final OSS client = this.clientSupplier.get();
if (client.doesObjectExist(srcBucket, srcPath)) {
final ObjectListing listResult = client.listObjects(new ListObjectsRequest(srcBucket, srcPath, null, null, 1));
// keyCount is still zero.
if (listResult.getObjectSummaries().size() == 0) {
// should never happen
throw new ISE("Unable to list object [%s://%s/%s]", OssStorageDruidModule.SCHEME, srcBucket, srcPath);
}
final OSSObjectSummary objectSummary = listResult.getObjectSummaries().get(0);
if (objectSummary.getStorageClass() != null && objectSummary.getStorageClass().equals(StorageClass.IA.name())) {
throw new OSSException(StringUtils.format("Cannot move file[%s://%s/%s] of storage class glacier, skipping.", OssStorageDruidModule.SCHEME, srcBucket, srcPath));
} else {
log.info("Moving file %s", copyMsg);
final CopyObjectRequest copyRequest = new CopyObjectRequest(srcBucket, srcPath, dstBucket, dstPath);
client.copyObject(copyRequest);
if (!client.doesObjectExist(dstBucket, dstPath)) {
throw new IOE("After copy was reported as successful the file doesn't exist in the target location [%s]", copyMsg);
}
deleteWithRetriesSilent(srcBucket, srcPath);
log.debug("Finished moving file %s", copyMsg);
}
} else {
// ensure object exists in target location
if (client.doesObjectExist(dstBucket, dstPath)) {
log.info("Not moving file [%s://%s/%s], already present in target location [%s://%s/%s]", OssStorageDruidModule.SCHEME, srcBucket, srcPath, OssStorageDruidModule.SCHEME, dstBucket, dstPath);
} else {
throw new SegmentLoadingException("Unable to move file %s, not present in either source or target location", copyMsg);
}
}
}
use of org.apache.druid.segment.loading.SegmentLoadingException in project druid by druid-io.
the class OssDataSegmentMover method move.
@Override
public DataSegment move(DataSegment segment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
try {
Map<String, Object> loadSpec = segment.getLoadSpec();
String bucket = MapUtils.getString(loadSpec, "bucket");
String key = MapUtils.getString(loadSpec, "key");
final String targetBucket = MapUtils.getString(targetLoadSpec, "bucket");
final String targetKey = MapUtils.getString(targetLoadSpec, "baseKey");
final String targetPath = OssUtils.constructSegmentPath(targetKey, DataSegmentPusher.getDefaultStorageDir(segment, false));
if (targetBucket.isEmpty()) {
throw new SegmentLoadingException("Target OSS bucket is not specified");
}
if (targetPath.isEmpty()) {
throw new SegmentLoadingException("Target OSS baseKey is not specified");
}
safeMove(bucket, key, targetBucket, targetPath);
return segment.withLoadSpec(ImmutableMap.<String, Object>builder().putAll(Maps.filterKeys(loadSpec, new Predicate<String>() {
@Override
public boolean apply(String input) {
return !("bucket".equals(input) || "key".equals(input));
}
})).put("bucket", targetBucket).put("key", targetPath).build());
} catch (OSSException e) {
throw new SegmentLoadingException(e, "Unable to move segment[%s]: [%s]", segment.getId(), e);
}
}
Aggregations