use of org.apache.druid.segment.loading.LocalDataSegmentPuller in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method setup.
@Before
public void setup() throws IOException {
segmentPuller = new LocalDataSegmentPuller();
objectMapper = new DefaultObjectMapper().registerModule(new SegmentizerModule()).registerModule(new SimpleModule().registerSubtypes(new NamedType(LocalLoadSpec.class, "local")));
indexIO = new IndexIO(objectMapper, () -> 0);
objectMapper.setInjectableValues(new InjectableValues.Std().addValue(LocalDataSegmentPuller.class, segmentPuller).addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE).addValue(ObjectMapper.class.getName(), objectMapper).addValue(IndexIO.class, indexIO));
segmentCacheDir = temporaryFolder.newFolder();
segmentDeepStorageDir = temporaryFolder.newFolder();
segmentCacheManager = new SegmentLocalCacheManager(new SegmentLoaderConfig() {
@Override
public List<StorageLocationConfig> getLocations() {
return Collections.singletonList(new StorageLocationConfig(segmentCacheDir, null, null));
}
}, objectMapper);
segmentManager = new SegmentManager(new SegmentLocalCacheLoader(segmentCacheManager, indexIO, objectMapper));
joinableFactory = new BroadcastTableJoinableFactory(segmentManager);
EmittingLogger.registerEmitter(new NoopServiceEmitter());
}
use of org.apache.druid.segment.loading.LocalDataSegmentPuller in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of org.apache.druid.segment.loading.LocalDataSegmentPuller in project druid by druid-io.
the class AbstractParallelIndexSupervisorTaskTest method prepareObjectMapper.
public void prepareObjectMapper(ObjectMapper objectMapper, IndexIO indexIO) {
final TaskConfig taskConfig = new TaskConfig(null, null, null, null, null, false, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
objectMapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class, LookupEnabledTestExprMacroTable.INSTANCE).addValue(IndexIO.class, indexIO).addValue(ObjectMapper.class, objectMapper).addValue(ChatHandlerProvider.class, new NoopChatHandlerProvider()).addValue(AuthConfig.class, new AuthConfig()).addValue(AuthorizerMapper.class, null).addValue(RowIngestionMetersFactory.class, new DropwizardRowIngestionMetersFactory()).addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT).addValue(AuthorizerMapper.class, new AuthorizerMapper(ImmutableMap.of())).addValue(AppenderatorsManager.class, TestUtils.APPENDERATORS_MANAGER).addValue(LocalDataSegmentPuller.class, new LocalDataSegmentPuller()).addValue(CoordinatorClient.class, coordinatorClient).addValue(SegmentCacheManagerFactory.class, new SegmentCacheManagerFactory(objectMapper)).addValue(RetryPolicyFactory.class, new RetryPolicyFactory(new RetryPolicyConfig())).addValue(TaskConfig.class, taskConfig));
objectMapper.registerSubtypes(new NamedType(ParallelIndexSupervisorTask.class, ParallelIndexSupervisorTask.TYPE), new NamedType(CompactionTask.CompactionTuningConfig.class, CompactionTask.CompactionTuningConfig.TYPE), new NamedType(SinglePhaseSubTask.class, SinglePhaseSubTask.TYPE), new NamedType(PartialHashSegmentGenerateTask.class, PartialHashSegmentGenerateTask.TYPE), new NamedType(PartialRangeSegmentGenerateTask.class, PartialRangeSegmentGenerateTask.TYPE), new NamedType(PartialGenericSegmentMergeTask.class, PartialGenericSegmentMergeTask.TYPE), new NamedType(PartialDimensionDistributionTask.class, PartialDimensionDistributionTask.TYPE), new NamedType(PartialDimensionCardinalityTask.class, PartialDimensionCardinalityTask.TYPE));
}
use of org.apache.druid.segment.loading.LocalDataSegmentPuller in project druid by druid-io.
the class BatchDeltaIngestionTest method testIngestion.
private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment, List<String> expectedDimensions, List<String> expectedMetrics) throws Exception {
IndexGeneratorJob job = new IndexGeneratorJob(config);
Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
Assert.assertFalse(workingPath.exists());
File segmentFolder = new File(StringUtils.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentFolder.exists());
File indexZip = new File(segmentFolder, "index.zip");
Assert.assertTrue(indexZip.exists());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(indexZip, tmpUnzippedSegmentDir);
QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), TransformSpec.NONE, expectedDimensions, expectedMetrics, null);
List<InputRow> rows = new ArrayList<>();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRowsGenerated, rows, expectedDimensions, expectedMetrics);
}
Aggregations