use of org.apache.druid.segment.loading.SegmentizerFactory in project druid by druid-io.
the class CustomSegmentizerFactoryTest method testCustomSegmentizerPersist.
@Test
public void testCustomSegmentizerPersist() throws IOException {
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
File segment = new File(temporaryFolder.newFolder(), "segment");
File persisted = INDEX_MERGER.persist(data, Intervals.of("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z"), segment, new IndexSpec(null, null, null, null, new CustomSegmentizerFactory()), null);
File factoryJson = new File(persisted, "factory.json");
Assert.assertTrue(factoryJson.exists());
SegmentizerFactory factory = JSON_MAPPER.readValue(factoryJson, SegmentizerFactory.class);
Assert.assertTrue(factory instanceof CustomSegmentizerFactory);
}
use of org.apache.druid.segment.loading.SegmentizerFactory in project druid by druid-io.
the class CustomSegmentizerFactoryTest method testDefaultSegmentizerPersist.
@Test
public void testDefaultSegmentizerPersist() throws IOException {
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
File segment = new File(temporaryFolder.newFolder(), "segment");
File persisted = INDEX_MERGER.persist(data, Intervals.of("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z"), segment, new IndexSpec(null, null, null, null, null), null);
File factoryJson = new File(persisted, "factory.json");
Assert.assertTrue(factoryJson.exists());
SegmentizerFactory factory = JSON_MAPPER.readValue(factoryJson, SegmentizerFactory.class);
Assert.assertTrue(factory instanceof MMappedQueryableSegmentizerFactory);
}
use of org.apache.druid.segment.loading.SegmentizerFactory in project druid by druid-io.
the class BroadcastSegmentIndexedTableTest method setup.
@Before
public void setup() throws IOException, SegmentLoadingException {
final ObjectMapper mapper = new DefaultObjectMapper();
mapper.registerModule(new SegmentizerModule());
final IndexIO indexIO = new IndexIO(mapper, () -> 0);
mapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE).addValue(ObjectMapper.class.getName(), mapper).addValue(IndexIO.class, indexIO).addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT));
final IndexMerger indexMerger = new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
Interval testInterval = Intervals.of("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z");
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
File segment = new File(temporaryFolder.newFolder(), "segment");
File persisted = indexMerger.persist(data, testInterval, segment, new IndexSpec(), null);
File factoryJson = new File(persisted, "factory.json");
Assert.assertTrue(factoryJson.exists());
SegmentizerFactory factory = mapper.readValue(factoryJson, SegmentizerFactory.class);
Assert.assertTrue(factory instanceof MMappedQueryableSegmentizerFactory);
DataSegment dataSegment = new DataSegment(DATASOURCE, testInterval, DateTimes.nowUtc().toString(), ImmutableMap.of(), columnNames, ImmutableList.of(), null, null, segment.getTotalSpace());
backingSegment = (QueryableIndexSegment) factory.factorize(dataSegment, segment, false, SegmentLazyLoadFailCallback.NOOP);
columnNames = ImmutableList.<String>builder().add(ColumnHolder.TIME_COLUMN_NAME).addAll(backingSegment.asQueryableIndex().getColumnNames()).build();
broadcastTable = new BroadcastSegmentIndexedTable(backingSegment, keyColumns, dataSegment.getVersion());
}
use of org.apache.druid.segment.loading.SegmentizerFactory in project druid by druid-io.
the class SegmentManagerBroadcastJoinIndexedTableTest method createSegment.
private DataSegment createSegment(IncrementalIndex data, String interval, String version) throws IOException {
final DataSegment tmpSegment = new DataSegment(TABLE_NAME, Intervals.of(interval), version, Collections.emptyMap(), Collections.emptyList(), Collections.emptyList(), new NumberedShardSpec(0, 0), 9, 100);
final String storageDir = DataSegmentPusher.getDefaultStorageDir(tmpSegment, false);
final File segmentDir = new File(segmentDeepStorageDir, storageDir);
FileUtils.mkdirp(segmentDir);
IndexMerger indexMerger = new IndexMergerV9(objectMapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
SegmentizerFactory factory = new BroadcastJoinableMMappedQueryableSegmentizerFactory(indexIO, KEY_COLUMNS);
indexMerger.persist(data, Intervals.of(interval), segmentDir, new IndexSpec(null, null, null, null, factory), null);
final File factoryJson = new File(segmentDir, "factory.json");
objectMapper.writeValue(factoryJson, factory);
return tmpSegment.withLoadSpec(ImmutableMap.of("type", "local", "path", segmentDir.getAbsolutePath()));
}
use of org.apache.druid.segment.loading.SegmentizerFactory in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
final Metadata segmentMetadata;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
FileUtils.mkdirp(outDir);
SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
closer.register(segmentWriteOutMedium);
long startTime = System.currentTimeMillis();
Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
if (customSegmentLoader != null) {
mapper.writeValue(fos, customSegmentLoader);
} else {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
}
log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMergerV9> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionHandler handler = handlers.get(mergedDimensions.get(i));
mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
}
/**
*********** Setup Dim Conversions *************
*/
progress.progress();
startTime = System.currentTimeMillis();
writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/**
*********** Walk through data sets, merge them, and write merged columns ************
*/
progress.progress();
final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
closer.register(timeAndDimsIterator);
final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
/**
********** Create Inverted Indexes and Finalize Build Columns ************
*/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/**
*********** Make index.drd & metadata.drd files *************
*/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
Aggregations