use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusher in project hive by apache.
the class TestDruidStorageHandler method testDeleteSegment.
@Test
public void testDeleteSegment() throws IOException, SegmentLoadingException {
String segmentRootPath = temporaryFolder.newFolder().getAbsolutePath();
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
DataSegment dataSegment = createSegment(new Path(taskDirPath, "index.zip").toString());
HdfsDataSegmentPusherConfig hdfsDSPConfig = new HdfsDataSegmentPusherConfig();
hdfsDSPConfig.setStorageDirectory(segmentRootPath);
HdfsDataSegmentPusher hdfsDataSegmentPusher = new HdfsDataSegmentPusher(hdfsDSPConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
Path segmentOutputPath = JobHelper.makeFileNamePath(new Path(segmentRootPath), localFileSystem, dataSegment, JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
Path indexPath = new Path(segmentOutputPath, "index.zip");
DataSegment dataSegmentWithLoadspect = DataSegment.builder(dataSegment).loadSpec(ImmutableMap.of("path", indexPath)).build();
OutputStream outputStream = localFileSystem.create(indexPath, true);
outputStream.close();
Assert.assertTrue("index file is not created ??", localFileSystem.exists(indexPath));
Assert.assertTrue(localFileSystem.exists(segmentOutputPath));
druidStorageHandler.deleteSegment(dataSegmentWithLoadspect);
// path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
Assert.assertFalse("Index file still there ??", localFileSystem.exists(indexPath));
// path format of segmentOutputPath -- > .../dataSource/interval/version/partitionNum/
Assert.assertFalse("PartitionNum directory still there ??", localFileSystem.exists(segmentOutputPath));
Assert.assertFalse("Version directory still there ??", localFileSystem.exists(segmentOutputPath.getParent()));
Assert.assertFalse("Interval directory still there ??", localFileSystem.exists(segmentOutputPath.getParent().getParent()));
Assert.assertFalse("Data source directory still there ??", localFileSystem.exists(segmentOutputPath.getParent().getParent().getParent()));
}
use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusher in project hive by apache.
the class TestDruidStorageHandler method testInsertIntoAppendOneMorePartition.
@Test
public void testInsertIntoAppendOneMorePartition() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
Assert.assertEquals(2, dataSegmentList.size());
DataSegment persistedSegment = dataSegmentList.get(1);
Assert.assertEquals("v0", persistedSegment.getVersion());
Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
Assert.assertEquals(1, persistedSegment.getShardSpec().getPartitionNum());
Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusher in project hive by apache.
the class TestDruidStorageHandler method testCommitInsertIntoTable.
@Test
public void testCommitInsertIntoTable() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
Assert.assertEquals(2, dataSegmentList.size());
DataSegment persistedSegment = dataSegmentList.get(1);
// Insert into appends to old version
Assert.assertEquals("v0", persistedSegment.getVersion());
Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusher in project hive by apache.
the class TestDruidStorageHandler method testCommitInsertIntoWithNonExtendableSegment.
@Test(expected = MetaException.class)
public void testCommitInsertIntoWithNonExtendableSegment() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", NoneShardSpec.instance()), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(200, 250, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(250, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(taskDirPath.toString());
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
// Try appending to non extendable shard spec
DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
}
use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusher in project hive by apache.
the class DruidStorageHandler method loadAndCommitDruidSegments.
/**
* Creates metadata moves then commit the Segment's metadata to Druid metadata store in one TxN.
*
* @param table Hive table
* @param overwrite true if it is an insert overwrite table.
*/
private List<DataSegment> loadAndCommitDruidSegments(Table table, boolean overwrite, List<DataSegment> segmentsToLoad) throws IOException, CallbackFailedException {
final String dataSourceName = table.getParameters().get(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = table.getParameters().get(DruidConstants.DRUID_SEGMENT_DIRECTORY) != null ? table.getParameters().get(DruidConstants.DRUID_SEGMENT_DIRECTORY) : HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY);
final HdfsDataSegmentPusherConfig hdfsSegmentPusherConfig = new HdfsDataSegmentPusherConfig();
List<DataSegment> publishedDataSegmentList;
LOG.info(String.format("Moving [%s] Druid segments from staging directory [%s] to Deep storage [%s]", segmentsToLoad.size(), getStagingWorkingDir().toString(), segmentDirectory));
hdfsSegmentPusherConfig.setStorageDirectory(segmentDirectory);
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(hdfsSegmentPusherConfig, getConf(), JSON_MAPPER);
publishedDataSegmentList = DruidStorageHandlerUtils.publishSegmentsAndCommit(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName, segmentsToLoad, overwrite, getConf(), dataSegmentPusher);
return publishedDataSegmentList;
}
Aggregations