Search in sources :

Example 6 with HdfsDataSegmentPusherConfig

use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig in project hive by apache.

the class TestDruidStorageHandler method testDeleteSegment.

@Test
public void testDeleteSegment() throws IOException, SegmentLoadingException {
    String segmentRootPath = temporaryFolder.newFolder().getAbsolutePath();
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    DataSegment dataSegment = createSegment(new Path(taskDirPath, "index.zip").toString());
    HdfsDataSegmentPusherConfig hdfsDSPConfig = new HdfsDataSegmentPusherConfig();
    hdfsDSPConfig.setStorageDirectory(segmentRootPath);
    HdfsDataSegmentPusher hdfsDataSegmentPusher = new HdfsDataSegmentPusher(hdfsDSPConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    Path segmentOutputPath = JobHelper.makeFileNamePath(new Path(segmentRootPath), localFileSystem, dataSegment, JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
    Path indexPath = new Path(segmentOutputPath, "index.zip");
    DataSegment dataSegmentWithLoadspect = DataSegment.builder(dataSegment).loadSpec(ImmutableMap.of("path", indexPath)).build();
    OutputStream outputStream = localFileSystem.create(indexPath, true);
    outputStream.close();
    Assert.assertTrue("index file is not created ??", localFileSystem.exists(indexPath));
    Assert.assertTrue(localFileSystem.exists(segmentOutputPath));
    druidStorageHandler.deleteSegment(dataSegmentWithLoadspect);
    // path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
    Assert.assertFalse("Index file still there ??", localFileSystem.exists(indexPath));
    // path format of segmentOutputPath -- > .../dataSource/interval/version/partitionNum/
    Assert.assertFalse("PartitionNum directory still there ??", localFileSystem.exists(segmentOutputPath));
    Assert.assertFalse("Version directory still there ??", localFileSystem.exists(segmentOutputPath.getParent()));
    Assert.assertFalse("Interval directory still there ??", localFileSystem.exists(segmentOutputPath.getParent().getParent()));
    Assert.assertFalse("Data source directory still there ??", localFileSystem.exists(segmentOutputPath.getParent().getParent().getParent()));
}
Also used : Path(org.apache.hadoop.fs.Path) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) OutputStream(java.io.OutputStream) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) Test(org.junit.Test)

Example 7 with HdfsDataSegmentPusherConfig

use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig in project hive by apache.

the class TestDruidStorageHandler method testInsertIntoAppendOneMorePartition.

@Test
public void testInsertIntoAppendOneMorePartition() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(2, dataSegmentList.size());
    DataSegment persistedSegment = dataSegmentList.get(1);
    Assert.assertEquals("v0", persistedSegment.getVersion());
    Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
    Assert.assertEquals(1, persistedSegment.getShardSpec().getPartitionNum());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegment(org.apache.druid.timeline.DataSegment) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 8 with HdfsDataSegmentPusherConfig

use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoTable.

@Test
public void testCommitInsertIntoTable() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(2, dataSegmentList.size());
    DataSegment persistedSegment = dataSegmentList.get(1);
    // Insert into appends to old version
    Assert.assertEquals("v0", persistedSegment.getVersion());
    Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
    Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 9 with HdfsDataSegmentPusherConfig

use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoWithNonExtendableSegment.

@Test(expected = MetaException.class)
public void testCommitInsertIntoWithNonExtendableSegment() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", NoneShardSpec.instance()), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(200, 250, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(250, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(taskDirPath.toString());
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // Try appending to non extendable shard spec
    DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 10 with HdfsDataSegmentPusherConfig

use of org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig in project hive by apache.

the class DruidStorageHandler method loadAndCommitDruidSegments.

/**
 * Creates metadata moves then commit the Segment's metadata to Druid metadata store in one TxN.
 *
 * @param table Hive table
 * @param overwrite true if it is an insert overwrite table.
 */
private List<DataSegment> loadAndCommitDruidSegments(Table table, boolean overwrite, List<DataSegment> segmentsToLoad) throws IOException, CallbackFailedException {
    final String dataSourceName = table.getParameters().get(Constants.DRUID_DATA_SOURCE);
    final String segmentDirectory = table.getParameters().get(DruidConstants.DRUID_SEGMENT_DIRECTORY) != null ? table.getParameters().get(DruidConstants.DRUID_SEGMENT_DIRECTORY) : HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY);
    final HdfsDataSegmentPusherConfig hdfsSegmentPusherConfig = new HdfsDataSegmentPusherConfig();
    List<DataSegment> publishedDataSegmentList;
    LOG.info(String.format("Moving [%s] Druid segments from staging directory [%s] to Deep storage [%s]", segmentsToLoad.size(), getStagingWorkingDir().toString(), segmentDirectory));
    hdfsSegmentPusherConfig.setStorageDirectory(segmentDirectory);
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(hdfsSegmentPusherConfig, getConf(), JSON_MAPPER);
    publishedDataSegmentList = DruidStorageHandlerUtils.publishSegmentsAndCommit(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName, segmentsToLoad, overwrite, getConf(), dataSegmentPusher);
    return publishedDataSegmentList;
}
Also used : DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)

Aggregations

HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)10 HdfsDataSegmentPusherConfig (org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig)10 DataSegment (org.apache.druid.timeline.DataSegment)9 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)8 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)8 Path (org.apache.hadoop.fs.Path)8 Test (org.junit.Test)8 MetadataStorageTablesConfig (org.apache.druid.metadata.MetadataStorageTablesConfig)7 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)7 Interval (org.joda.time.Interval)7 File (java.io.File)4 OutputStream (java.io.OutputStream)1