Search in sources :

Example 1 with LocalDataSegmentPusher

use of org.apache.druid.segment.loading.LocalDataSegmentPusher in project druid by druid-io.

the class HdfsDataSegmentPusherTest method shouldMakeDefaultSegmentOutputPathIfNotHDFS.

@Test
public void shouldMakeDefaultSegmentOutputPathIfNotHDFS() {
    final HadoopIngestionSpec schema;
    try {
        schema = objectMapper.readValue("{\n" + "    \"dataSchema\": {\n" + "        \"dataSource\": \"the:data:source\",\n" + "        \"metricsSpec\": [],\n" + "        \"granularitySpec\": {\n" + "            \"type\": \"uniform\",\n" + "            \"segmentGranularity\": \"hour\",\n" + "            \"intervals\": [\"2012-07-10/P1D\"]\n" + "        }\n" + "    },\n" + "    \"ioConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"segmentOutputPath\": \"/tmp/dru:id/data:test\"\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
    Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30, ISOChronology.getInstanceUTC()), 4712);
    Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), JobHelper.INDEX_ZIP, new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig()));
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip", path.toString());
    path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig()));
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip.0", path.toString());
}
Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) DataSegment(org.apache.druid.timeline.DataSegment) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) ExpectedException(org.junit.rules.ExpectedException) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Bucket(org.apache.druid.indexer.Bucket) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 2 with LocalDataSegmentPusher

use of org.apache.druid.segment.loading.LocalDataSegmentPusher in project druid by druid-io.

the class KinesisIndexTaskTest method makeToolboxFactory.

private void makeToolboxFactory() throws IOException {
    directory = tempFolder.newFolder();
    final TestUtils testUtils = new TestUtils();
    final ObjectMapper objectMapper = testUtils.getTestObjectMapper();
    objectMapper.setInjectableValues(((InjectableValues.Std) objectMapper.getInjectableValues()).addValue(AWSCredentialsConfig.class, new AWSCredentialsConfig()));
    for (Module module : new KinesisIndexingServiceModule().getJacksonModules()) {
        objectMapper.registerModule(module);
    }
    final TaskConfig taskConfig = new TaskConfig(new File(directory, "baseDir").getPath(), new File(directory, "baseTaskDir").getPath(), null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
    final TestDerbyConnector derbyConnector = derby.getConnector();
    derbyConnector.createDataSourceTable();
    derbyConnector.createPendingSegmentsTable();
    derbyConnector.createSegmentTable();
    derbyConnector.createRulesTable();
    derbyConnector.createConfigTable();
    derbyConnector.createTaskTables();
    derbyConnector.createAuditTable();
    taskStorage = new MetadataTaskStorage(derbyConnector, new TaskStorageConfig(null), new DerbyMetadataStorageActionHandlerFactory(derbyConnector, derby.metadataTablesConfigSupplier().get(), objectMapper));
    metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(testUtils.getTestObjectMapper(), derby.metadataTablesConfigSupplier().get(), derbyConnector);
    taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator);
    final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, metadataStorageCoordinator, emitter, new SupervisorManager(null) {

        @Override
        public boolean checkPointDataSourceMetadata(String supervisorId, int taskGroupId, @Nullable DataSourceMetadata checkpointMetadata) {
            LOG.info("Adding checkpoint hash to the set");
            checkpointRequestsHash.add(Objects.hash(supervisorId, taskGroupId, checkpointMetadata));
            return true;
        }
    });
    final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
    final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {

        @Override
        public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
            if (doHandoff) {
                // Simulate immediate handoff
                exec.execute(handOffRunnable);
            }
            return true;
        }

        @Override
        public void start() {
        // Noop
        }

        @Override
        public void close() {
        // Noop
        }
    };
    final LocalDataSegmentPusherConfig dataSegmentPusherConfig = new LocalDataSegmentPusherConfig();
    dataSegmentPusherConfig.storageDirectory = getSegmentDirectory();
    final DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(dataSegmentPusherConfig);
    toolboxFactory = new TaskToolboxFactory(taskConfig, // taskExecutorNode
    null, taskActionClientFactory, emitter, dataSegmentPusher, new TestDataSegmentKiller(), // DataSegmentMover
    null, // DataSegmentArchiver
    null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, this::makeTimeseriesOnlyConglomerate, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) DirectQueryProcessingPool(org.apache.druid.query.DirectQueryProcessingPool) Arrays(java.util.Arrays) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) NoopJoinableFactory(org.apache.druid.segment.join.NoopJoinableFactory) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) AfterClass(org.junit.AfterClass) Execs(org.apache.druid.java.util.common.concurrent.Execs) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) CacheConfig(org.apache.druid.client.cache.CacheConfig) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) Executors(java.util.concurrent.Executors) TaskState(org.apache.druid.indexer.TaskState) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) SeekableStreamIndexTaskTestBase(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTestBase) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) RunWith(org.junit.runner.RunWith) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) TaskStatus(org.apache.druid.indexer.TaskStatus) LinkedHashMap(java.util.LinkedHashMap) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) Nullable(javax.annotation.Nullable) Before(org.junit.Before) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) Executor(java.util.concurrent.Executor) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Futures(com.google.common.util.concurrent.Futures) TreeMap(java.util.TreeMap) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) DruidNode(org.apache.druid.server.DruidNode) Named(com.google.inject.name.Named) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) Module(com.fasterxml.jackson.databind.Module) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TimeoutException(java.util.concurrent.TimeoutException) TaskResource(org.apache.druid.indexing.common.task.TaskResource) SeekableStreamSupervisor(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor) SequenceMetadata(org.apache.druid.indexing.seekablestream.SequenceMetadata) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) ServerType(org.apache.druid.server.coordination.ServerType) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) Parameterized(org.junit.runners.Parameterized) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TestUtils(org.apache.druid.indexing.common.TestUtils) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) Objects(java.util.Objects) DataNodeService(org.apache.druid.discovery.DataNodeService) List(java.util.List) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) KinesisSupervisor(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisor) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) MapCache(org.apache.druid.client.cache.MapCache) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) BeforeClass(org.junit.BeforeClass) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) JsonTypeName(com.fasterxml.jackson.annotation.JsonTypeName) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Period(org.joda.time.Period) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) AWSCredentialsConfig(org.apache.druid.common.aws.AWSCredentialsConfig) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) TimeUnit(java.util.concurrent.TimeUnit) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) TemporaryFolder(org.junit.rules.TemporaryFolder) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) TestUtils(org.apache.druid.indexing.common.TestUtils) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Executor(java.util.concurrent.Executor) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) CacheConfig(org.apache.druid.client.cache.CacheConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) LookupNodeService(org.apache.druid.discovery.LookupNodeService) AWSCredentialsConfig(org.apache.druid.common.aws.AWSCredentialsConfig) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) Module(com.fasterxml.jackson.databind.Module) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) DataNodeService(org.apache.druid.discovery.DataNodeService) File(java.io.File) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager)

Example 3 with LocalDataSegmentPusher

use of org.apache.druid.segment.loading.LocalDataSegmentPusher in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 4 with LocalDataSegmentPusher

use of org.apache.druid.segment.loading.LocalDataSegmentPusher in project druid by druid-io.

the class KafkaIndexTaskTest method makeToolboxFactory.

private void makeToolboxFactory() throws IOException {
    directory = tempFolder.newFolder();
    final TestUtils testUtils = new TestUtils();
    rowIngestionMetersFactory = testUtils.getRowIngestionMetersFactory();
    final ObjectMapper objectMapper = testUtils.getTestObjectMapper();
    for (Module module : new KafkaIndexTaskModule().getJacksonModules()) {
        objectMapper.registerModule(module);
    }
    objectMapper.registerModule(TEST_MODULE);
    final TaskConfig taskConfig = new TaskConfig(new File(directory, "baseDir").getPath(), new File(directory, "baseTaskDir").getPath(), null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
    final TestDerbyConnector derbyConnector = derby.getConnector();
    derbyConnector.createDataSourceTable();
    derbyConnector.createPendingSegmentsTable();
    derbyConnector.createSegmentTable();
    derbyConnector.createRulesTable();
    derbyConnector.createConfigTable();
    derbyConnector.createTaskTables();
    derbyConnector.createAuditTable();
    taskStorage = new MetadataTaskStorage(derbyConnector, new TaskStorageConfig(null), new DerbyMetadataStorageActionHandlerFactory(derbyConnector, derby.metadataTablesConfigSupplier().get(), objectMapper));
    metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(testUtils.getTestObjectMapper(), derby.metadataTablesConfigSupplier().get(), derbyConnector);
    taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator);
    final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, metadataStorageCoordinator, emitter, new SupervisorManager(null) {

        @Override
        public boolean checkPointDataSourceMetadata(String supervisorId, int taskGroupId, @Nullable DataSourceMetadata previousDataSourceMetadata) {
            log.info("Adding checkpoint hash to the set");
            checkpointRequestsHash.add(Objects.hash(supervisorId, taskGroupId, previousDataSourceMetadata));
            return true;
        }
    });
    final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
    final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {

        @Override
        public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
            if (doHandoff) {
                // Simulate immediate handoff
                exec.execute(handOffRunnable);
            }
            return true;
        }

        @Override
        public void start() {
        // Noop
        }

        @Override
        public void close() {
        // Noop
        }
    };
    final LocalDataSegmentPusherConfig dataSegmentPusherConfig = new LocalDataSegmentPusherConfig();
    dataSegmentPusherConfig.storageDirectory = getSegmentDirectory();
    final DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(dataSegmentPusherConfig);
    toolboxFactory = new TaskToolboxFactory(taskConfig, // taskExecutorNode
    null, taskActionClientFactory, emitter, dataSegmentPusher, new TestDataSegmentKiller(), // DataSegmentMover
    null, // DataSegmentArchiver
    null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, this::makeTimeseriesAndScanConglomerate, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) DirectQueryProcessingPool(org.apache.druid.query.DirectQueryProcessingPool) Arrays(java.util.Arrays) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) ScanQueryQueryToolChest(org.apache.druid.query.scan.ScanQueryQueryToolChest) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) NoopJoinableFactory(org.apache.druid.segment.join.NoopJoinableFactory) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) AfterClass(org.junit.AfterClass) Execs(org.apache.druid.java.util.common.concurrent.Execs) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) CacheConfig(org.apache.druid.client.cache.CacheConfig) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) InputRow(org.apache.druid.data.input.InputRow) TaskState(org.apache.druid.indexer.TaskState) Stream(java.util.stream.Stream) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) TestingCluster(org.apache.curator.test.TestingCluster) TestBroker(org.apache.druid.indexing.kafka.test.TestBroker) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) SeekableStreamIndexTaskTestBase(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTestBase) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) RunWith(org.junit.runner.RunWith) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) InputRowSchema(org.apache.druid.data.input.InputRowSchema) TaskStatus(org.apache.druid.indexer.TaskStatus) ScanQuery(org.apache.druid.query.scan.ScanQuery) KafkaSupervisorIOConfig(org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorIOConfig) LinkedHashMap(java.util.LinkedHashMap) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) Nullable(javax.annotation.Nullable) Before(org.junit.Before) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) Executor(java.util.concurrent.Executor) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Futures(com.google.common.util.concurrent.Futures) KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) TreeMap(java.util.TreeMap) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) DruidNode(org.apache.druid.server.DruidNode) QueryRunnerFactory(org.apache.druid.query.QueryRunnerFactory) DefaultGenericQueryMetricsFactory(org.apache.druid.query.DefaultGenericQueryMetricsFactory) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) Module(com.fasterxml.jackson.databind.Module) QueryPlus(org.apache.druid.query.QueryPlus) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TimeoutException(java.util.concurrent.TimeoutException) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SeekableStreamSupervisor(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) Druids(org.apache.druid.query.Druids) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) ServerType(org.apache.druid.server.coordination.ServerType) TypeReference(com.fasterxml.jackson.core.type.TypeReference) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) Parameterized(org.junit.runners.Parameterized) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Status(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner.Status) StringUtils(org.apache.druid.java.util.common.StringUtils) ScanQueryEngine(org.apache.druid.query.scan.ScanQueryEngine) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) TestUtils(org.apache.druid.indexing.common.TestUtils) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) Objects(java.util.Objects) DataNodeService(org.apache.druid.discovery.DataNodeService) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) Header(org.apache.kafka.common.header.Header) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) MapCache(org.apache.druid.client.cache.MapCache) Logger(org.apache.druid.java.util.common.logger.Logger) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) BeforeClass(org.junit.BeforeClass) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) HashSet(java.util.HashSet) ScanQueryConfig(org.apache.druid.query.scan.ScanQueryConfig) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) Query(org.apache.druid.query.Query) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) ScanQueryRunnerFactory(org.apache.druid.query.scan.ScanQueryRunnerFactory) KafkaSupervisor(org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) KafkaStringHeaderFormat(org.apache.druid.data.input.kafkainput.KafkaStringHeaderFormat) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Period(org.joda.time.Period) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) KafkaInputFormat(org.apache.druid.data.input.kafkainput.KafkaInputFormat) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) InputEntity(org.apache.druid.data.input.InputEntity) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) TemporaryFolder(org.junit.rules.TemporaryFolder) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) TestUtils(org.apache.druid.indexing.common.TestUtils) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Executor(java.util.concurrent.Executor) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) CacheConfig(org.apache.druid.client.cache.CacheConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) Module(com.fasterxml.jackson.databind.Module) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) DataNodeService(org.apache.druid.discovery.DataNodeService) File(java.io.File) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager)

Example 5 with LocalDataSegmentPusher

use of org.apache.druid.segment.loading.LocalDataSegmentPusher in project druid by druid-io.

the class ShuffleDataSegmentPusherTest method setup.

@Before
public void setup() throws IOException {
    final WorkerConfig workerConfig = new WorkerConfig();
    final TaskConfig taskConfig = new TaskConfig(null, null, null, null, null, false, null, null, ImmutableList.of(new StorageLocationConfig(temporaryFolder.newFolder(), null, null)), false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
    final IndexingServiceClient indexingServiceClient = new NoopIndexingServiceClient();
    if (LOCAL.equals(intermediateDataStore)) {
        intermediaryDataManager = new LocalIntermediaryDataManager(workerConfig, taskConfig, indexingServiceClient);
    } else if (DEEPSTORE.equals(intermediateDataStore)) {
        localDeepStore = temporaryFolder.newFolder("localStorage");
        intermediaryDataManager = new DeepStorageIntermediaryDataManager(new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

            @Override
            public File getStorageDirectory() {
                return localDeepStore;
            }
        }));
    }
    intermediaryDataManager.start();
    segmentPusher = new ShuffleDataSegmentPusher("supervisorTaskId", "subTaskId", intermediaryDataManager);
    final Injector injector = GuiceInjectors.makeStartupInjectorWithModules(ImmutableList.of(binder -> binder.bind(LocalDataSegmentPuller.class)));
    mapper = new DefaultObjectMapper();
    mapper.registerModule(new SimpleModule("loadSpecTest").registerSubtypes(LocalLoadSpec.class));
    mapper.setInjectableValues(new GuiceInjectableValues(injector));
    final GuiceAnnotationIntrospector guiceIntrospector = new GuiceAnnotationIntrospector();
    mapper.setAnnotationIntrospectors(new AnnotationIntrospectorPair(guiceIntrospector, mapper.getSerializationConfig().getAnnotationIntrospector()), new AnnotationIntrospectorPair(guiceIntrospector, mapper.getDeserializationConfig().getAnnotationIntrospector()));
}
Also used : Arrays(java.util.Arrays) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Intervals(org.apache.druid.java.util.common.Intervals) RunWith(org.junit.runner.RunWith) CompressionUtils(org.apache.druid.utils.CompressionUtils) IndexingServiceClient(org.apache.druid.client.indexing.IndexingServiceClient) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) AnnotationIntrospectorPair(com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) After(org.junit.After) ByteSource(com.google.common.io.ByteSource) LoadSpec(org.apache.druid.segment.loading.LoadSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) Parameterized(org.junit.runners.Parameterized) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) Before(org.junit.Before) WorkerConfig(org.apache.druid.indexing.worker.config.WorkerConfig) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) Collection(java.util.Collection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) IOException(java.io.IOException) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) Ints(com.google.common.primitives.Ints) GuiceInjectors(org.apache.druid.guice.GuiceInjectors) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Injector(com.google.inject.Injector) Mockito(org.mockito.Mockito) List(java.util.List) Rule(org.junit.Rule) GuiceAnnotationIntrospector(org.apache.druid.guice.GuiceAnnotationIntrospector) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) DataSegment(org.apache.druid.timeline.DataSegment) Optional(java.util.Optional) Assert(org.junit.Assert) Comparator(java.util.Comparator) GuiceInjectableValues(org.apache.druid.guice.GuiceInjectableValues) TemporaryFolder(org.junit.rules.TemporaryFolder) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) AnnotationIntrospectorPair(com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair) IndexingServiceClient(org.apache.druid.client.indexing.IndexingServiceClient) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) Injector(com.google.inject.Injector) WorkerConfig(org.apache.druid.indexing.worker.config.WorkerConfig) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GuiceAnnotationIntrospector(org.apache.druid.guice.GuiceAnnotationIntrospector) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) GuiceInjectableValues(org.apache.druid.guice.GuiceInjectableValues) Before(org.junit.Before)

Aggregations

IOException (java.io.IOException)5 LocalDataSegmentPusher (org.apache.druid.segment.loading.LocalDataSegmentPusher)5 LocalDataSegmentPusherConfig (org.apache.druid.segment.loading.LocalDataSegmentPusherConfig)5 Test (org.junit.Test)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 ImmutableList (com.google.common.collect.ImmutableList)4 File (java.io.File)4 List (java.util.List)4 Assert (org.junit.Assert)4 Rule (org.junit.Rule)4 TemporaryFolder (org.junit.rules.TemporaryFolder)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Arrays (java.util.Arrays)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)2 Module (com.fasterxml.jackson.databind.Module)2 SimpleModule (com.fasterxml.jackson.databind.module.SimpleModule)2 ImmutableSet (com.google.common.collect.ImmutableSet)2