use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.
the class TestDruidStorageHandler method testCommitCreateTablePlusCommitDropTableWithoutPurge.
@Test
public void testCommitCreateTablePlusCommitDropTableWithoutPurge() throws MetaException, IOException {
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
DataSegment dataSegment = createSegment(new Path(taskDirPath, "index.zip").toString());
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
druidStorageHandler.commitCreateTable(tableMock);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(derbyConnectorRule.getConnector(), derbyConnectorRule.metadataTablesConfigSupplier().get())).toArray());
druidStorageHandler.commitDropTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList().toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(derbyConnectorRule.getConnector(), derbyConnectorRule.metadataTablesConfigSupplier().get())).toArray());
}
use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.
the class TestDruidStorageHandler method testCommitMultiInsertOverwriteTable.
@Test
public void testCommitMultiInsertOverwriteTable() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
druidStorageHandler.preCreateTable(tableMock);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
// This create and publish the segment to be overwritten
List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
// Check that there is one datasource with the published segment
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
// Sequence is the following:
// 1) INSERT with no segments -> Original segment still present in the datasource
// 2) INSERT OVERWRITE with no segments -> Datasource is empty
// 3) INSERT OVERWRITE with no segments -> Datasource is empty
// 4) INSERT with no segments -> Datasource is empty
// 5) INSERT with one segment -> Datasource has one segment
// 6) INSERT OVERWRITE with one segment -> Datasource has one segment
// 7) INSERT with one segment -> Datasource has two segments
// 8) INSERT OVERWRITE with no segments -> Datasource is empty
// We start:
// #1
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
Assert.assertEquals(1, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #2
druidStorageHandler.commitInsertTable(tableMock, true);
Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #3
druidStorageHandler.commitInsertTable(tableMock, true);
Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #4
druidStorageHandler.commitInsertTable(tableMock, true);
Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #5
DataSegment dataSegment1 = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(180, 250, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath1 = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment1, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment1, descriptorPath1);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
Assert.assertEquals(1, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #6
DataSegment dataSegment2 = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(200, 250, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath2 = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment2, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment2, descriptorPath2);
druidStorageHandler.commitInsertTable(tableMock, true);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
Assert.assertEquals(1, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #7
DataSegment dataSegment3 = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 200, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath3 = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment3, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment3, descriptorPath3);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
Assert.assertEquals(2, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
// #8
druidStorageHandler.commitInsertTable(tableMock, true);
Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
}
use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.
the class TestDruidStorageHandler method testDeleteSegment.
@Test
public void testDeleteSegment() throws IOException, SegmentLoadingException {
String segmentRootPath = temporaryFolder.newFolder().getAbsolutePath();
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
DataSegment dataSegment = createSegment(new Path(taskDirPath, "index.zip").toString());
HdfsDataSegmentPusherConfig hdfsDSPConfig = new HdfsDataSegmentPusherConfig();
hdfsDSPConfig.setStorageDirectory(segmentRootPath);
HdfsDataSegmentPusher hdfsDataSegmentPusher = new HdfsDataSegmentPusher(hdfsDSPConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
Path segmentOutputPath = JobHelper.makeFileNamePath(new Path(segmentRootPath), localFileSystem, dataSegment, JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
Path indexPath = new Path(segmentOutputPath, "index.zip");
DataSegment dataSegmentWithLoadspect = DataSegment.builder(dataSegment).loadSpec(ImmutableMap.of("path", indexPath)).build();
OutputStream outputStream = localFileSystem.create(indexPath, true);
outputStream.close();
Assert.assertTrue("index file is not created ??", localFileSystem.exists(indexPath));
Assert.assertTrue(localFileSystem.exists(segmentOutputPath));
druidStorageHandler.deleteSegment(dataSegmentWithLoadspect);
// path format -- > .../dataSource/interval/version/partitionNum/xxx.zip
Assert.assertFalse("Index file still there ??", localFileSystem.exists(indexPath));
// path format of segmentOutputPath -- > .../dataSource/interval/version/partitionNum/
Assert.assertFalse("PartitionNum directory still there ??", localFileSystem.exists(segmentOutputPath));
Assert.assertFalse("Version directory still there ??", localFileSystem.exists(segmentOutputPath.getParent()));
Assert.assertFalse("Interval directory still there ??", localFileSystem.exists(segmentOutputPath.getParent().getParent()));
Assert.assertFalse("Data source directory still there ??", localFileSystem.exists(segmentOutputPath.getParent().getParent().getParent()));
}
use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.
the class TestDruidStorageHandler method testInsertIntoAppendOneMorePartition.
@Test
public void testInsertIntoAppendOneMorePartition() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
Assert.assertEquals(2, dataSegmentList.size());
DataSegment persistedSegment = dataSegmentList.get(1);
Assert.assertEquals("v0", persistedSegment.getVersion());
Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
Assert.assertEquals(1, persistedSegment.getShardSpec().getPartitionNum());
Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
use of org.apache.hadoop.fs.LocalFileSystem in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
Aggregations