use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class FilterPartitionBenchmark method readWithExFnPostFilter.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithExFnPostFilter(Blackhole blackhole) throws Exception {
Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
Sequence<Cursor> cursors = makeCursors(sa, filter);
Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
List<String> strings = Sequences.toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
for (String st : strings) {
blackhole.consume(st);
}
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class DatasourceRecordReader method initialize.
@Override
public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException {
spec = readAndVerifyDatasourceIngestionSpec(context.getConfiguration(), HadoopDruidIndexerConfig.JSON_MAPPER);
List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments();
List<WindowedStorageAdapter> adapters = Lists.transform(segments, new Function<WindowedDataSegment, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(WindowedDataSegment segment) {
try {
logger.info("Getting storage path for segment [%s]", segment.getSegment().getIdentifier());
Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));
logger.info("Fetch segment files from [%s]", path);
File dir = Files.createTempDir();
tmpSegmentDirs.add(dir);
logger.info("Locally storing fetched segment at [%s]", dir);
JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context);
logger.info("finished fetching segment files");
QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
indexes.add(index);
numRows += index.getNumRows();
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index), segment.getInterval());
} catch (IOException ex) {
throw Throwables.propagate(ex);
}
}
});
firehose = new IngestSegmentFirehose(adapters, spec.getDimensions(), spec.getMetrics(), spec.getFilter(), spec.getGranularity());
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class BatchDeltaIngestionTest method testIngestion.
private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
IndexGeneratorJob job = new IndexGeneratorJob(config);
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentFolder.exists());
File descriptor = new File(segmentFolder, "descriptor.json");
File indexZip = new File(segmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(0, spec.getPartitionNum());
Assert.assertEquals(1, spec.getPartitions());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRowsGenerated, rows);
}
use of io.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.
the class BaseFilterTest method makeConstructors.
public static Collection<Object[]> makeConstructors() {
final List<Object[]> constructors = Lists.newArrayList();
final Map<String, BitmapSerdeFactory> bitmapSerdeFactories = ImmutableMap.<String, BitmapSerdeFactory>of("concise", new ConciseBitmapSerdeFactory(), "roaring", new RoaringBitmapSerdeFactory(true));
final Map<String, IndexMerger> indexMergers = ImmutableMap.<String, IndexMerger>of("IndexMerger", TestHelper.getTestIndexMerger(), "IndexMergerV9", TestHelper.getTestIndexMergerV9());
final Map<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finishers = ImmutableMap.of("incremental", new Function<IndexBuilder, Pair<StorageAdapter, Closeable>>() {
@Override
public Pair<StorageAdapter, Closeable> apply(IndexBuilder input) {
final IncrementalIndex index = input.buildIncrementalIndex();
return Pair.<StorageAdapter, Closeable>of(new IncrementalIndexStorageAdapter(index), new Closeable() {
@Override
public void close() throws IOException {
index.close();
}
});
}
}, "mmapped", new Function<IndexBuilder, Pair<StorageAdapter, Closeable>>() {
@Override
public Pair<StorageAdapter, Closeable> apply(IndexBuilder input) {
final QueryableIndex index = input.buildMMappedIndex();
return Pair.<StorageAdapter, Closeable>of(new QueryableIndexStorageAdapter(index), new Closeable() {
@Override
public void close() throws IOException {
index.close();
}
});
}
}, "mmappedMerged", new Function<IndexBuilder, Pair<StorageAdapter, Closeable>>() {
@Override
public Pair<StorageAdapter, Closeable> apply(IndexBuilder input) {
final QueryableIndex index = input.buildMMappedMergedIndex();
return Pair.<StorageAdapter, Closeable>of(new QueryableIndexStorageAdapter(index), new Closeable() {
@Override
public void close() throws IOException {
index.close();
}
});
}
});
for (Map.Entry<String, BitmapSerdeFactory> bitmapSerdeFactoryEntry : bitmapSerdeFactories.entrySet()) {
for (Map.Entry<String, IndexMerger> indexMergerEntry : indexMergers.entrySet()) {
for (Map.Entry<String, Function<IndexBuilder, Pair<StorageAdapter, Closeable>>> finisherEntry : finishers.entrySet()) {
for (boolean cnf : ImmutableList.of(false, true)) {
for (boolean optimize : ImmutableList.of(false, true)) {
final String testName = String.format("bitmaps[%s], indexMerger[%s], finisher[%s], optimize[%s]", bitmapSerdeFactoryEntry.getKey(), indexMergerEntry.getKey(), finisherEntry.getKey(), optimize);
final IndexBuilder indexBuilder = IndexBuilder.create().indexSpec(new IndexSpec(bitmapSerdeFactoryEntry.getValue(), null, null, null)).indexMerger(indexMergerEntry.getValue());
constructors.add(new Object[] { testName, indexBuilder, finisherEntry.getValue(), cnf, optimize });
}
}
}
}
}
return constructors;
}
Aggregations