use of io.druid.segment.QueryableIndexIndexableAdapter in project druid by druid-io.
the class OrcIndexGeneratorJobTest method verifyJob.
private void verifyJob(IndexGeneratorJob job) throws IOException {
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
int segmentNum = 0;
for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
Integer[][] shardInfo = shardInfoForEachSegment[segmentNum++];
File segmentOutputFolder = new File(String.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentOutputFolder.exists());
Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
int rowCount = 0;
for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
Assert.assertTrue(individualSegmentFolder.exists());
File descriptor = new File(individualSegmentFolder, "descriptor.json");
File indexZip = new File(individualSegmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = mapper.readValue(descriptor, DataSegment.class);
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(new Interval(currTime, currTime.plusDays(1)), dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
Assert.assertEquals(dataSourceName, dataSegment.getDataSource());
Assert.assertTrue(dataSegment.getDimensions().size() == 1);
String[] dimensions = dataSegment.getDimensions().toArray(new String[dataSegment.getDimensions().size()]);
Arrays.sort(dimensions);
Assert.assertEquals("host", dimensions[0]);
Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Integer[] hashShardInfo = shardInfo[partitionNum];
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
Assert.assertEquals((int) hashShardInfo[1], spec.getPartitions());
File dir = Files.createTempDir();
unzip(indexZip, dir);
QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
for (Rowboat row : adapter.getRows()) {
Object[] metrics = row.getMetrics();
rowCount++;
Assert.assertTrue(metrics.length == 2);
}
}
Assert.assertEquals(rowCount, data.size());
}
}
use of io.druid.segment.QueryableIndexIndexableAdapter in project druid by druid-io.
the class AppendTask method merge.
@Override
public File merge(final TaskToolbox toolbox, final Map<DataSegment, File> segments, final File outDir) throws Exception {
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : segments.keySet()) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final Iterable<SegmentToMergeHolder> segmentsToMerge = Iterables.concat(Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<String, DataSegment>, Iterable<SegmentToMergeHolder>>() {
@Override
public Iterable<SegmentToMergeHolder> apply(final TimelineObjectHolder<String, DataSegment> input) {
return Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, SegmentToMergeHolder>() {
@Nullable
@Override
public SegmentToMergeHolder apply(PartitionChunk<DataSegment> chunkInput) {
DataSegment segment = chunkInput.getObject();
return new SegmentToMergeHolder(segment, input.getInterval(), Preconditions.checkNotNull(segments.get(segment), "File for segment %s", segment.getIdentifier()));
}
});
}
}));
List<IndexableAdapter> adapters = Lists.newArrayList();
for (final SegmentToMergeHolder holder : segmentsToMerge) {
adapters.add(new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(toolbox.getIndexIO().loadIndex(holder.getFile())), new Predicate<Rowboat>() {
@Override
public boolean apply(Rowboat input) {
return holder.getInterval().contains(input.getTimestamp());
}
}));
}
IndexMerger indexMerger = buildV9Directly ? toolbox.getIndexMergerV9() : toolbox.getIndexMerger();
return indexMerger.append(adapters, aggregators == null ? null : aggregators.toArray(new AggregatorFactory[aggregators.size()]), outDir, indexSpec);
}
Aggregations