Search in sources :

Example 1 with DataSegmentPusher

use of org.apache.druid.segment.loading.DataSegmentPusher in project druid by druid-io.

the class TaskLifecycleTest method testRealtimeIndexTaskFailure.

@Test(timeout = 60_000L)
public void testRealtimeIndexTaskFailure() throws Exception {
    dataSegmentPusher = new DataSegmentPusher() {

        @Deprecated
        @Override
        public String getPathForHadoop(String s) {
            return getPathForHadoop();
        }

        @Override
        public String getPathForHadoop() {
            throw new UnsupportedOperationException();
        }

        @Override
        public DataSegment push(File file, DataSegment dataSegment, boolean useUniquePath) {
            throw new RuntimeException("FAILURE");
        }

        @Override
        public Map<String, Object> makeLoadSpec(URI uri) {
            throw new UnsupportedOperationException();
        }
    };
    tb = setUpTaskToolboxFactory(dataSegmentPusher, handoffNotifierFactory, mdc);
    taskRunner = setUpThreadPoolTaskRunner(tb);
    taskQueue = setUpTaskQueue(taskStorage, taskRunner);
    monitorScheduler.addMonitor(EasyMock.anyObject(Monitor.class));
    EasyMock.expectLastCall().atLeastOnce();
    monitorScheduler.removeMonitor(EasyMock.anyObject(Monitor.class));
    EasyMock.expectLastCall().anyTimes();
    EasyMock.replay(monitorScheduler, queryRunnerFactoryConglomerate);
    RealtimeIndexTask realtimeIndexTask = newRealtimeIndexTask();
    final String taskId = realtimeIndexTask.getId();
    taskQueue.start();
    taskQueue.add(realtimeIndexTask);
    // Wait for realtime index task to fail
    while (tsqa.getStatus(taskId).get().isRunnable()) {
        Thread.sleep(10);
    }
    TaskStatus status = tsqa.getStatus(taskId).get();
    Assert.assertTrue("Task should be in Failure state", status.isFailure());
    Assert.assertEquals(taskLocation, status.getLocation());
    EasyMock.verify(monitorScheduler, queryRunnerFactoryConglomerate);
}
Also used : DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) URI(java.net.URI) Monitor(org.apache.druid.java.util.metrics.Monitor) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) FireDepartmentTest(org.apache.druid.segment.realtime.FireDepartmentTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 2 with DataSegmentPusher

use of org.apache.druid.segment.loading.DataSegmentPusher in project druid by druid-io.

the class KinesisIndexTaskTest method makeToolboxFactory.

private void makeToolboxFactory() throws IOException {
    directory = tempFolder.newFolder();
    final TestUtils testUtils = new TestUtils();
    final ObjectMapper objectMapper = testUtils.getTestObjectMapper();
    objectMapper.setInjectableValues(((InjectableValues.Std) objectMapper.getInjectableValues()).addValue(AWSCredentialsConfig.class, new AWSCredentialsConfig()));
    for (Module module : new KinesisIndexingServiceModule().getJacksonModules()) {
        objectMapper.registerModule(module);
    }
    final TaskConfig taskConfig = new TaskConfig(new File(directory, "baseDir").getPath(), new File(directory, "baseTaskDir").getPath(), null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
    final TestDerbyConnector derbyConnector = derby.getConnector();
    derbyConnector.createDataSourceTable();
    derbyConnector.createPendingSegmentsTable();
    derbyConnector.createSegmentTable();
    derbyConnector.createRulesTable();
    derbyConnector.createConfigTable();
    derbyConnector.createTaskTables();
    derbyConnector.createAuditTable();
    taskStorage = new MetadataTaskStorage(derbyConnector, new TaskStorageConfig(null), new DerbyMetadataStorageActionHandlerFactory(derbyConnector, derby.metadataTablesConfigSupplier().get(), objectMapper));
    metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(testUtils.getTestObjectMapper(), derby.metadataTablesConfigSupplier().get(), derbyConnector);
    taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator);
    final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, metadataStorageCoordinator, emitter, new SupervisorManager(null) {

        @Override
        public boolean checkPointDataSourceMetadata(String supervisorId, int taskGroupId, @Nullable DataSourceMetadata checkpointMetadata) {
            LOG.info("Adding checkpoint hash to the set");
            checkpointRequestsHash.add(Objects.hash(supervisorId, taskGroupId, checkpointMetadata));
            return true;
        }
    });
    final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
    final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {

        @Override
        public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
            if (doHandoff) {
                // Simulate immediate handoff
                exec.execute(handOffRunnable);
            }
            return true;
        }

        @Override
        public void start() {
        // Noop
        }

        @Override
        public void close() {
        // Noop
        }
    };
    final LocalDataSegmentPusherConfig dataSegmentPusherConfig = new LocalDataSegmentPusherConfig();
    dataSegmentPusherConfig.storageDirectory = getSegmentDirectory();
    final DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(dataSegmentPusherConfig);
    toolboxFactory = new TaskToolboxFactory(taskConfig, // taskExecutorNode
    null, taskActionClientFactory, emitter, dataSegmentPusher, new TestDataSegmentKiller(), // DataSegmentMover
    null, // DataSegmentArchiver
    null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, this::makeTimeseriesOnlyConglomerate, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) DirectQueryProcessingPool(org.apache.druid.query.DirectQueryProcessingPool) Arrays(java.util.Arrays) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) NoopJoinableFactory(org.apache.druid.segment.join.NoopJoinableFactory) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) AfterClass(org.junit.AfterClass) Execs(org.apache.druid.java.util.common.concurrent.Execs) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) CacheConfig(org.apache.druid.client.cache.CacheConfig) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) Executors(java.util.concurrent.Executors) TaskState(org.apache.druid.indexer.TaskState) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) SeekableStreamIndexTaskTestBase(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTestBase) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) RunWith(org.junit.runner.RunWith) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) TaskStatus(org.apache.druid.indexer.TaskStatus) LinkedHashMap(java.util.LinkedHashMap) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) Nullable(javax.annotation.Nullable) Before(org.junit.Before) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) Executor(java.util.concurrent.Executor) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Futures(com.google.common.util.concurrent.Futures) TreeMap(java.util.TreeMap) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) DruidNode(org.apache.druid.server.DruidNode) Named(com.google.inject.name.Named) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) Module(com.fasterxml.jackson.databind.Module) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TimeoutException(java.util.concurrent.TimeoutException) TaskResource(org.apache.druid.indexing.common.task.TaskResource) SeekableStreamSupervisor(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor) SequenceMetadata(org.apache.druid.indexing.seekablestream.SequenceMetadata) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) ServerType(org.apache.druid.server.coordination.ServerType) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) Parameterized(org.junit.runners.Parameterized) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TestUtils(org.apache.druid.indexing.common.TestUtils) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) Objects(java.util.Objects) DataNodeService(org.apache.druid.discovery.DataNodeService) List(java.util.List) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) KinesisSupervisor(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisor) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) MapCache(org.apache.druid.client.cache.MapCache) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) BeforeClass(org.junit.BeforeClass) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) JsonTypeName(com.fasterxml.jackson.annotation.JsonTypeName) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Period(org.joda.time.Period) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) AWSCredentialsConfig(org.apache.druid.common.aws.AWSCredentialsConfig) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) TimeUnit(java.util.concurrent.TimeUnit) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) TemporaryFolder(org.junit.rules.TemporaryFolder) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) TestUtils(org.apache.druid.indexing.common.TestUtils) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Executor(java.util.concurrent.Executor) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) CacheConfig(org.apache.druid.client.cache.CacheConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) LookupNodeService(org.apache.druid.discovery.LookupNodeService) AWSCredentialsConfig(org.apache.druid.common.aws.AWSCredentialsConfig) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) Module(com.fasterxml.jackson.databind.Module) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) DataNodeService(org.apache.druid.discovery.DataNodeService) File(java.io.File) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager)

Example 3 with DataSegmentPusher

use of org.apache.druid.segment.loading.DataSegmentPusher in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoWhenDestinationSegmentFileExist.

@Test
public void testCommitInsertIntoWhenDestinationSegmentFileExist() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, "index_old.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    DataSegment dataSegment = createSegment(new Path(taskDirPath, "index.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    // Create segment file at the destination location with LinearShardSpec(2)
    DataSegment segment = createSegment(new Path(taskDirPath, "index_conflict.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(1));
    Path segmentPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(segment, DruidStorageHandlerUtils.INDEX_ZIP));
    FileUtils.writeStringToFile(new File(segmentPath.toUri()), "dummy");
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(2, dataSegmentList.size());
    DataSegment persistedSegment = dataSegmentList.get(1);
    // Insert into appends to old version
    Assert.assertEquals("v0", persistedSegment.getVersion());
    Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
    // insert into should skip and increment partition number to 3
    Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 4 with DataSegmentPusher

use of org.apache.druid.segment.loading.DataSegmentPusher in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoWithConflictingIntervalSegment.

@Test(expected = MetaException.class)
public void testCommitInsertIntoWithConflictingIntervalSegment() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(150, 200, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(200, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(taskDirPath.toString());
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // Try appending segment with conflicting interval
    DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 300, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 5 with DataSegmentPusher

use of org.apache.druid.segment.loading.DataSegmentPusher in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertOverwriteTable.

@Test
public void testCommitInsertOverwriteTable() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    // This create and publish the segment to be overwritten
    List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // This creates and publish new segment
    DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(180, 250, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, true);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(1, dataSegmentList.size());
    DataSegment persistedSegment = Iterables.getOnlyElement(dataSegmentList);
    Assert.assertEquals(dataSegment, persistedSegment);
    Assert.assertEquals(dataSegment.getVersion(), persistedSegment.getVersion());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) DataSegment(org.apache.druid.timeline.DataSegment) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)14 DataSegment (org.apache.druid.timeline.DataSegment)11 Test (org.junit.Test)11 File (java.io.File)9 Interval (org.joda.time.Interval)9 HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)8 HdfsDataSegmentPusherConfig (org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig)8 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)8 Path (org.apache.hadoop.fs.Path)8 MetadataStorageTablesConfig (org.apache.druid.metadata.MetadataStorageTablesConfig)7 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 IOException (java.io.IOException)4 List (java.util.List)4 DataSchema (org.apache.druid.segment.indexing.DataSchema)4 ImmutableList (com.google.common.collect.ImmutableList)3 Map (java.util.Map)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2