Search in sources :

Example 6 with MetadataStorageTablesConfig

use of io.druid.metadata.MetadataStorageTablesConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitMultiInsertOverwriteTable.

@Test
public void testCommitMultiInsertOverwriteTable() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    druidStorageHandler.preCreateTable(tableMock);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    // This create and publish the segment to be overwritten
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // Check that there is one datasource with the published segment
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    // Sequence is the following:
    // 1) INSERT with no segments -> Original segment still present in the datasource
    // 2) INSERT OVERWRITE with no segments -> Datasource is empty
    // 3) INSERT OVERWRITE with no segments -> Datasource is empty
    // 4) INSERT with no segments -> Datasource is empty
    // 5) INSERT with one segment -> Datasource has one segment
    // 6) INSERT OVERWRITE with one segment -> Datasource has one segment
    // 7) INSERT with one segment -> Datasource has two segments
    // 8) INSERT OVERWRITE with no segments -> Datasource is empty
    // We start:
    // #1
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    Assert.assertEquals(1, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #2
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #3
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #4
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #5
    DataSegment dataSegment1 = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(180, 250, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath1 = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment1, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment1, descriptorPath1);
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    Assert.assertEquals(1, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #6
    DataSegment dataSegment2 = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(200, 250, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath2 = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment2, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment2, descriptorPath2);
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    Assert.assertEquals(1, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #7
    DataSegment dataSegment3 = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 200, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath3 = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment3, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment3, descriptorPath3);
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    Assert.assertEquals(2, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
    // #8
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertEquals(0, getUsedSegmentsList(connector, metadataStorageTablesConfig).size());
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 7 with MetadataStorageTablesConfig

use of io.druid.metadata.MetadataStorageTablesConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertOverwriteTable.

@Test
public void testCommitInsertOverwriteTable() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    // This create and publish the segment to be overwritten
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // This creates and publish new segment
    DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(180, 250, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(1, dataSegmentList.size());
    DataSegment persistedSegment = Iterables.getOnlyElement(dataSegmentList);
    Assert.assertEquals(dataSegment, persistedSegment);
    Assert.assertEquals(dataSegment.getVersion(), persistedSegment.getVersion());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegment(io.druid.timeline.DataSegment) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 8 with MetadataStorageTablesConfig

use of io.druid.metadata.MetadataStorageTablesConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoWithConflictingIntervalSegment.

@Test(expected = IllegalStateException.class)
public void testCommitInsertIntoWithConflictingIntervalSegment() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(150, 200, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(200, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(taskDirPath.toString());
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // Try appending segment with conflicting interval
    DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 300, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(io.druid.timeline.DataSegment) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 9 with MetadataStorageTablesConfig

use of io.druid.metadata.MetadataStorageTablesConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoTable.

@Test
public void testCommitInsertIntoTable() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(2, dataSegmentList.size());
    DataSegment persistedSegment = dataSegmentList.get(1);
    // Insert into appends to old version
    Assert.assertEquals("v0", persistedSegment.getVersion());
    Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
    Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(io.druid.timeline.DataSegment) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 10 with MetadataStorageTablesConfig

use of io.druid.metadata.MetadataStorageTablesConfig in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoWithNonExtendableSegment.

@Test(expected = IllegalStateException.class)
public void testCommitInsertIntoWithNonExtendableSegment() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new NoneShardSpec()), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(200, 250, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(250, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(taskDirPath.toString());
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // Try appending to non extendable shard spec
    DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) DataSegment(io.druid.timeline.DataSegment) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

MetadataStorageTablesConfig (io.druid.metadata.MetadataStorageTablesConfig)10 DataSegmentPusher (io.druid.segment.loading.DataSegmentPusher)8 HdfsDataSegmentPusher (io.druid.storage.hdfs.HdfsDataSegmentPusher)8 HdfsDataSegmentPusherConfig (io.druid.storage.hdfs.HdfsDataSegmentPusherConfig)8 DataSegment (io.druid.timeline.DataSegment)8 LinearShardSpec (io.druid.timeline.partition.LinearShardSpec)8 Path (org.apache.hadoop.fs.Path)8 Interval (org.joda.time.Interval)8 Test (org.junit.Test)8 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)7 File (java.io.File)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)2 InjectableValues (com.fasterxml.jackson.databind.InjectableValues)1 NamedType (com.fasterxml.jackson.databind.jsontype.NamedType)1 SmileFactory (com.fasterxml.jackson.dataformat.smile.SmileFactory)1 Throwables (com.google.common.base.Throwables)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Interner (com.google.common.collect.Interner)1