Search in sources :

Example 1 with QueryableIndexIndexableAdapter

use of io.druid.segment.QueryableIndexIndexableAdapter in project druid by druid-io.

the class OrcIndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Integer[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(String.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        int rowCount = 0;
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File descriptor = new File(individualSegmentFolder, "descriptor.json");
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(descriptor.exists());
            Assert.assertTrue(indexZip.exists());
            DataSegment dataSegment = mapper.readValue(descriptor, DataSegment.class);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals(new Interval(currTime, currTime.plusDays(1)), dataSegment.getInterval());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            Assert.assertEquals(dataSourceName, dataSegment.getDataSource());
            Assert.assertTrue(dataSegment.getDimensions().size() == 1);
            String[] dimensions = dataSegment.getDimensions().toArray(new String[dataSegment.getDimensions().size()]);
            Arrays.sort(dimensions);
            Assert.assertEquals("host", dimensions[0]);
            Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
            Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            Integer[] hashShardInfo = shardInfo[partitionNum];
            HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
            Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
            Assert.assertEquals((int) hashShardInfo[1], spec.getPartitions());
            File dir = Files.createTempDir();
            unzip(indexZip, dir);
            QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
            QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
            for (Rowboat row : adapter.getRows()) {
                Object[] metrics = row.getMetrics();
                rowCount++;
                Assert.assertTrue(metrics.length == 2);
            }
        }
        Assert.assertEquals(rowCount, data.size());
    }
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) QueryableIndex(io.druid.segment.QueryableIndex) OrcFile(org.apache.orc.OrcFile) File(java.io.File) Rowboat(io.druid.segment.Rowboat) Interval(org.joda.time.Interval)

Example 2 with QueryableIndexIndexableAdapter

use of io.druid.segment.QueryableIndexIndexableAdapter in project druid by druid-io.

the class AppendTask method merge.

@Override
public File merge(final TaskToolbox toolbox, final Map<DataSegment, File> segments, final File outDir) throws Exception {
    VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural().nullsFirst());
    for (DataSegment segment : segments.keySet()) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }
    final Iterable<SegmentToMergeHolder> segmentsToMerge = Iterables.concat(Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<String, DataSegment>, Iterable<SegmentToMergeHolder>>() {

        @Override
        public Iterable<SegmentToMergeHolder> apply(final TimelineObjectHolder<String, DataSegment> input) {
            return Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, SegmentToMergeHolder>() {

                @Nullable
                @Override
                public SegmentToMergeHolder apply(PartitionChunk<DataSegment> chunkInput) {
                    DataSegment segment = chunkInput.getObject();
                    return new SegmentToMergeHolder(segment, input.getInterval(), Preconditions.checkNotNull(segments.get(segment), "File for segment %s", segment.getIdentifier()));
                }
            });
        }
    }));
    List<IndexableAdapter> adapters = Lists.newArrayList();
    for (final SegmentToMergeHolder holder : segmentsToMerge) {
        adapters.add(new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(toolbox.getIndexIO().loadIndex(holder.getFile())), new Predicate<Rowboat>() {

            @Override
            public boolean apply(Rowboat input) {
                return holder.getInterval().contains(input.getTimestamp());
            }
        }));
    }
    IndexMerger indexMerger = buildV9Directly ? toolbox.getIndexMergerV9() : toolbox.getIndexMerger();
    return indexMerger.append(adapters, aggregators == null ? null : aggregators.toArray(new AggregatorFactory[aggregators.size()]), outDir, indexSpec);
}
Also used : IndexMerger(io.druid.segment.IndexMerger) RowboatFilteringIndexAdapter(io.druid.segment.RowboatFilteringIndexAdapter) DataSegment(io.druid.timeline.DataSegment) Predicate(com.google.common.base.Predicate) Function(com.google.common.base.Function) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) IndexableAdapter(io.druid.segment.IndexableAdapter) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) PartitionChunk(io.druid.timeline.partition.PartitionChunk) Nullable(javax.annotation.Nullable) Rowboat(io.druid.segment.Rowboat) Interval(org.joda.time.Interval)

Aggregations

QueryableIndexIndexableAdapter (io.druid.segment.QueryableIndexIndexableAdapter)2 Rowboat (io.druid.segment.Rowboat)2 DataSegment (io.druid.timeline.DataSegment)2 Interval (org.joda.time.Interval)2 Function (com.google.common.base.Function)1 Predicate (com.google.common.base.Predicate)1 IndexMerger (io.druid.segment.IndexMerger)1 IndexableAdapter (io.druid.segment.IndexableAdapter)1 QueryableIndex (io.druid.segment.QueryableIndex)1 RowboatFilteringIndexAdapter (io.druid.segment.RowboatFilteringIndexAdapter)1 TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)1 VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)1 HashBasedNumberedShardSpec (io.druid.timeline.partition.HashBasedNumberedShardSpec)1 PartitionChunk (io.druid.timeline.partition.PartitionChunk)1 File (java.io.File)1 Nullable (javax.annotation.Nullable)1 OrcFile (org.apache.orc.OrcFile)1 DateTime (org.joda.time.DateTime)1