use of org.apache.druid.segment.loading.DataSegmentPusher in project hive by apache.
the class DruidStorageHandler method loadAndCommitDruidSegments.
/**
* Creates metadata moves then commit the Segment's metadata to Druid metadata store in one TxN.
*
* @param table Hive table
* @param overwrite true if it is an insert overwrite table.
*/
private List<DataSegment> loadAndCommitDruidSegments(Table table, boolean overwrite, List<DataSegment> segmentsToLoad) throws IOException, CallbackFailedException {
final String dataSourceName = table.getParameters().get(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = table.getParameters().get(DruidConstants.DRUID_SEGMENT_DIRECTORY) != null ? table.getParameters().get(DruidConstants.DRUID_SEGMENT_DIRECTORY) : HiveConf.getVar(getConf(), HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY);
final HdfsDataSegmentPusherConfig hdfsSegmentPusherConfig = new HdfsDataSegmentPusherConfig();
List<DataSegment> publishedDataSegmentList;
LOG.info(String.format("Moving [%s] Druid segments from staging directory [%s] to Deep storage [%s]", segmentsToLoad.size(), getStagingWorkingDir().toString(), segmentDirectory));
hdfsSegmentPusherConfig.setStorageDirectory(segmentDirectory);
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(hdfsSegmentPusherConfig, getConf(), JSON_MAPPER);
publishedDataSegmentList = DruidStorageHandlerUtils.publishSegmentsAndCommit(getConnector(), getDruidMetadataStorageTablesConfig(), dataSourceName, segmentsToLoad, overwrite, getConf(), dataSegmentPusher);
return publishedDataSegmentList;
}
use of org.apache.druid.segment.loading.DataSegmentPusher in project druid by druid-io.
the class PartialSegmentMergeTask method mergeAndPushSegments.
private Set<DataSegment> mergeAndPushSegments(TaskToolbox toolbox, DataSchema dataSchema, ParallelIndexTuningConfig tuningConfig, File persistDir, Map<Interval, String> intervalToVersion, Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles) throws Exception {
final DataSegmentPusher segmentPusher = toolbox.getSegmentPusher();
final Set<DataSegment> pushedSegments = new HashSet<>();
for (Entry<Interval, Int2ObjectMap<List<File>>> entryPerInterval : intervalToUnzippedFiles.entrySet()) {
final Interval interval = entryPerInterval.getKey();
for (Int2ObjectMap.Entry<List<File>> entryPerBucketId : entryPerInterval.getValue().int2ObjectEntrySet()) {
final int bucketId = entryPerBucketId.getIntKey();
final List<File> segmentFilesToMerge = entryPerBucketId.getValue();
final Pair<File, List<String>> mergedFileAndDimensionNames = mergeSegmentsInSamePartition(dataSchema, tuningConfig, toolbox.getIndexIO(), toolbox.getIndexMergerV9(), segmentFilesToMerge, tuningConfig.getMaxNumSegmentsToMerge(), persistDir, 0);
final List<String> metricNames = Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList());
// Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
final DataSegment segment = RetryUtils.retry(() -> segmentPusher.push(mergedFileAndDimensionNames.lhs, new DataSegment(getDataSource(), interval, Preconditions.checkNotNull(ParallelIndexSupervisorTask.findVersion(intervalToVersion, interval), "version for interval[%s]", interval), // will be filled in the segmentPusher
null, mergedFileAndDimensionNames.rhs, metricNames, createShardSpec(toolbox, interval, bucketId), // will be filled in the segmentPusher
null, // will be filled in the segmentPusher
0), false), exception -> !(exception instanceof NullPointerException) && exception instanceof Exception, 5);
pushedSegments.add(segment);
}
}
return pushedSegments;
}
use of org.apache.druid.segment.loading.DataSegmentPusher in project druid by druid-io.
the class KafkaIndexTaskTest method makeToolboxFactory.
private void makeToolboxFactory() throws IOException {
directory = tempFolder.newFolder();
final TestUtils testUtils = new TestUtils();
rowIngestionMetersFactory = testUtils.getRowIngestionMetersFactory();
final ObjectMapper objectMapper = testUtils.getTestObjectMapper();
for (Module module : new KafkaIndexTaskModule().getJacksonModules()) {
objectMapper.registerModule(module);
}
objectMapper.registerModule(TEST_MODULE);
final TaskConfig taskConfig = new TaskConfig(new File(directory, "baseDir").getPath(), new File(directory, "baseTaskDir").getPath(), null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
final TestDerbyConnector derbyConnector = derby.getConnector();
derbyConnector.createDataSourceTable();
derbyConnector.createPendingSegmentsTable();
derbyConnector.createSegmentTable();
derbyConnector.createRulesTable();
derbyConnector.createConfigTable();
derbyConnector.createTaskTables();
derbyConnector.createAuditTable();
taskStorage = new MetadataTaskStorage(derbyConnector, new TaskStorageConfig(null), new DerbyMetadataStorageActionHandlerFactory(derbyConnector, derby.metadataTablesConfigSupplier().get(), objectMapper));
metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(testUtils.getTestObjectMapper(), derby.metadataTablesConfigSupplier().get(), derbyConnector);
taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator);
final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, metadataStorageCoordinator, emitter, new SupervisorManager(null) {
@Override
public boolean checkPointDataSourceMetadata(String supervisorId, int taskGroupId, @Nullable DataSourceMetadata previousDataSourceMetadata) {
log.info("Adding checkpoint hash to the set");
checkpointRequestsHash.add(Objects.hash(supervisorId, taskGroupId, previousDataSourceMetadata));
return true;
}
});
final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {
@Override
public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
if (doHandoff) {
// Simulate immediate handoff
exec.execute(handOffRunnable);
}
return true;
}
@Override
public void start() {
// Noop
}
@Override
public void close() {
// Noop
}
};
final LocalDataSegmentPusherConfig dataSegmentPusherConfig = new LocalDataSegmentPusherConfig();
dataSegmentPusherConfig.storageDirectory = getSegmentDirectory();
final DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(dataSegmentPusherConfig);
toolboxFactory = new TaskToolboxFactory(taskConfig, // taskExecutorNode
null, taskActionClientFactory, emitter, dataSegmentPusher, new TestDataSegmentKiller(), // DataSegmentMover
null, // DataSegmentArchiver
null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, this::makeTimeseriesAndScanConglomerate, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
use of org.apache.druid.segment.loading.DataSegmentPusher in project druid by druid-io.
the class RealtimePlumberSchoolTest method setUp.
@Before
public void setUp() throws Exception {
tmpDir = FileUtils.createTempDir();
ObjectMapper jsonMapper = new DefaultObjectMapper();
schema = new DataSchema("test", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), DimensionsSpec.EMPTY, null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), null, jsonMapper);
schema2 = new DataSchema("test", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), DimensionsSpec.EMPTY, null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.YEAR, Granularities.NONE, null), null, jsonMapper);
announcer = EasyMock.createMock(DataSegmentAnnouncer.class);
announcer.announceSegment(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
segmentPublisher = EasyMock.createNiceMock(SegmentPublisher.class);
dataSegmentPusher = EasyMock.createNiceMock(DataSegmentPusher.class);
handoffNotifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
handoffNotifier = EasyMock.createNiceMock(SegmentHandoffNotifier.class);
EasyMock.expect(handoffNotifierFactory.createSegmentHandoffNotifier(EasyMock.anyString())).andReturn(handoffNotifier).anyTimes();
EasyMock.expect(handoffNotifier.registerSegmentHandoffCallback(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject())).andReturn(true).anyTimes();
emitter = EasyMock.createMock(ServiceEmitter.class);
EasyMock.replay(announcer, segmentPublisher, dataSegmentPusher, handoffNotifierFactory, handoffNotifier, emitter);
tuningConfig = new RealtimeTuningConfig(null, 1, null, null, null, null, null, new IntervalStartVersioningPolicy(), rejectionPolicy, null, null, null, null, 0, 0, false, null, null, null, null);
realtimePlumberSchool = new RealtimePlumberSchool(emitter, new DefaultQueryRunnerFactoryConglomerate(new HashMap<>()), dataSegmentPusher, announcer, segmentPublisher, handoffNotifierFactory, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory), TestHelper.getTestIndexIO(), MapCache.create(0), FireDepartmentTest.NO_CACHE_CONFIG, new CachePopulatorStats(), TestHelper.makeJsonMapper());
metrics = new FireDepartmentMetrics();
plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema, tuningConfig, metrics);
}
Aggregations