Search in sources :

Example 1 with Rowboat

use of io.druid.segment.Rowboat in project druid by druid-io.

the class OrcIndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Integer[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(String.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        int rowCount = 0;
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File descriptor = new File(individualSegmentFolder, "descriptor.json");
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(descriptor.exists());
            Assert.assertTrue(indexZip.exists());
            DataSegment dataSegment = mapper.readValue(descriptor, DataSegment.class);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals(new Interval(currTime, currTime.plusDays(1)), dataSegment.getInterval());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            Assert.assertEquals(dataSourceName, dataSegment.getDataSource());
            Assert.assertTrue(dataSegment.getDimensions().size() == 1);
            String[] dimensions = dataSegment.getDimensions().toArray(new String[dataSegment.getDimensions().size()]);
            Arrays.sort(dimensions);
            Assert.assertEquals("host", dimensions[0]);
            Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
            Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            Integer[] hashShardInfo = shardInfo[partitionNum];
            HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
            Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
            Assert.assertEquals((int) hashShardInfo[1], spec.getPartitions());
            File dir = Files.createTempDir();
            unzip(indexZip, dir);
            QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
            QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
            for (Rowboat row : adapter.getRows()) {
                Object[] metrics = row.getMetrics();
                rowCount++;
                Assert.assertTrue(metrics.length == 2);
            }
        }
        Assert.assertEquals(rowCount, data.size());
    }
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) QueryableIndex(io.druid.segment.QueryableIndex) OrcFile(org.apache.orc.OrcFile) File(java.io.File) Rowboat(io.druid.segment.Rowboat) Interval(org.joda.time.Interval)

Example 2 with Rowboat

use of io.druid.segment.Rowboat in project druid by druid-io.

the class IncrementalIndexAdapter method getRows.

@Override
public Iterable<Rowboat> getRows() {
    return new Iterable<Rowboat>() {

        @Override
        public Iterator<Rowboat> iterator() {
            final List<IncrementalIndex.DimensionDesc> dimensions = index.getDimensions();
            final DimensionHandler[] handlers = new DimensionHandler[dimensions.size()];
            final DimensionIndexer[] indexers = new DimensionIndexer[dimensions.size()];
            for (IncrementalIndex.DimensionDesc dimension : dimensions) {
                handlers[dimension.getIndex()] = dimension.getHandler();
                indexers[dimension.getIndex()] = dimension.getIndexer();
            }
            /*
         * Note that the transform function increments a counter to determine the rowNum of
         * the iterated Rowboats. We need to return a new iterator on each
         * iterator() call to ensure the counter starts at 0.
         */
            return Iterators.transform(index.getFacts().entrySet().iterator(), new Function<Map.Entry<IncrementalIndex.TimeAndDims, Integer>, Rowboat>() {

                int count = 0;

                @Override
                public Rowboat apply(Map.Entry<IncrementalIndex.TimeAndDims, Integer> input) {
                    final IncrementalIndex.TimeAndDims timeAndDims = input.getKey();
                    final Object[] dimValues = timeAndDims.getDims();
                    final int rowOffset = input.getValue();
                    Object[] dims = new Object[dimValues.length];
                    for (IncrementalIndex.DimensionDesc dimension : dimensions) {
                        final int dimIndex = dimension.getIndex();
                        if (dimIndex >= dimValues.length || dimValues[dimIndex] == null) {
                            continue;
                        }
                        final DimensionIndexer indexer = indexers[dimIndex];
                        Object sortedDimVals = indexer.convertUnsortedEncodedKeyComponentToSortedEncodedKeyComponent(dimValues[dimIndex]);
                        dims[dimIndex] = sortedDimVals;
                    }
                    Object[] metrics = new Object[index.getMetricAggs().length];
                    for (int i = 0; i < metrics.length; i++) {
                        metrics[i] = index.getMetricObjectValue(rowOffset, i);
                    }
                    return new Rowboat(timeAndDims.getTimestamp(), dims, metrics, count++, handlers);
                }
            });
        }
    };
}
Also used : DimensionHandler(io.druid.segment.DimensionHandler) DimensionIndexer(io.druid.segment.DimensionIndexer) Map(java.util.Map) Rowboat(io.druid.segment.Rowboat)

Example 3 with Rowboat

use of io.druid.segment.Rowboat in project druid by druid-io.

the class IncrementalIndexAdapterTest method testGetRowsIterable.

@Test
public void testGetRowsIterable() throws Exception {
    final long timestamp = System.currentTimeMillis();
    IncrementalIndex toPersist1 = IncrementalIndexTest.createIndex(null);
    IncrementalIndexTest.populateIndex(timestamp, toPersist1);
    final IndexableAdapter incrementalAdapter = new IncrementalIndexAdapter(toPersist1.getInterval(), toPersist1, INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory());
    Iterable<Rowboat> boats = incrementalAdapter.getRows();
    List<Rowboat> boatList = new ArrayList<>();
    for (Rowboat boat : boats) {
        boatList.add(boat);
    }
    Assert.assertEquals(2, boatList.size());
    Assert.assertEquals(0, boatList.get(0).getRowNum());
    Assert.assertEquals(1, boatList.get(1).getRowNum());
    /* Iterate through the Iterable a few times, check that boat row numbers are correct afterwards */
    boatList = new ArrayList<>();
    for (Rowboat boat : boats) {
        boatList.add(boat);
    }
    boatList = new ArrayList<>();
    for (Rowboat boat : boats) {
        boatList.add(boat);
    }
    boatList = new ArrayList<>();
    for (Rowboat boat : boats) {
        boatList.add(boat);
    }
    boatList = new ArrayList<>();
    for (Rowboat boat : boats) {
        boatList.add(boat);
    }
    Assert.assertEquals(2, boatList.size());
    Assert.assertEquals(0, boatList.get(0).getRowNum());
    Assert.assertEquals(1, boatList.get(1).getRowNum());
}
Also used : ArrayList(java.util.ArrayList) IndexableAdapter(io.druid.segment.IndexableAdapter) Rowboat(io.druid.segment.Rowboat) IncrementalIndexTest(io.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Example 4 with Rowboat

use of io.druid.segment.Rowboat in project druid by druid-io.

the class RowboatTest method testBiggerCompare.

@Test
public void testBiggerCompare() {
    DimensionHandler[] handlers = getDefaultHandlers(14);
    Rowboat rb1 = new Rowboat(0, new int[][] { new int[] { 0 }, new int[] { 138 }, new int[] { 44 }, new int[] { 374 }, new int[] { 0 }, new int[] { 0 }, new int[] { 552 }, new int[] { 338 }, new int[] { 910 }, new int[] { 25570 }, new int[] { 9 }, new int[] { 0 }, new int[] { 0 }, new int[] { 0 } }, new Object[] { 1.0, 47.0, "someMetric" }, 0, handlers);
    Rowboat rb2 = new Rowboat(0, new int[][] { new int[] { 0 }, new int[] { 138 }, new int[] { 44 }, new int[] { 374 }, new int[] { 0 }, new int[] { 0 }, new int[] { 553 }, new int[] { 338 }, new int[] { 910 }, new int[] { 25580 }, new int[] { 9 }, new int[] { 0 }, new int[] { 0 }, new int[] { 0 } }, new Object[] { 1.0, 47.0, "someMetric" }, 0, handlers);
    Assert.assertNotEquals(0, rb1.compareTo(rb2));
}
Also used : DimensionHandler(io.druid.segment.DimensionHandler) StringDimensionHandler(io.druid.segment.StringDimensionHandler) Rowboat(io.druid.segment.Rowboat) Test(org.junit.Test)

Example 5 with Rowboat

use of io.druid.segment.Rowboat in project druid by druid-io.

the class RowboatTest method testRowboatCompare.

@Test
public void testRowboatCompare() {
    DimensionHandler[] handlers = getDefaultHandlers(3);
    Rowboat rb1 = new Rowboat(12345L, new int[][] { new int[] { 1 }, new int[] { 2 } }, new Object[] { new Integer(7) }, 5, handlers);
    Rowboat rb2 = new Rowboat(12345L, new int[][] { new int[] { 1 }, new int[] { 2 } }, new Object[] { new Integer(7) }, 5, handlers);
    Assert.assertEquals(0, rb1.compareTo(rb2));
    Rowboat rb3 = new Rowboat(12345L, new int[][] { new int[] { 3 }, new int[] { 2 } }, new Object[] { new Integer(7) }, 5, handlers);
    Assert.assertNotEquals(0, rb1.compareTo(rb3));
}
Also used : DimensionHandler(io.druid.segment.DimensionHandler) StringDimensionHandler(io.druid.segment.StringDimensionHandler) Rowboat(io.druid.segment.Rowboat) Test(org.junit.Test)

Aggregations

Rowboat (io.druid.segment.Rowboat)6 DimensionHandler (io.druid.segment.DimensionHandler)3 Test (org.junit.Test)3 IndexableAdapter (io.druid.segment.IndexableAdapter)2 QueryableIndexIndexableAdapter (io.druid.segment.QueryableIndexIndexableAdapter)2 StringDimensionHandler (io.druid.segment.StringDimensionHandler)2 DataSegment (io.druid.timeline.DataSegment)2 Interval (org.joda.time.Interval)2 Function (com.google.common.base.Function)1 Predicate (com.google.common.base.Predicate)1 DimensionIndexer (io.druid.segment.DimensionIndexer)1 IndexMerger (io.druid.segment.IndexMerger)1 QueryableIndex (io.druid.segment.QueryableIndex)1 RowboatFilteringIndexAdapter (io.druid.segment.RowboatFilteringIndexAdapter)1 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)1 TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)1 VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)1 HashBasedNumberedShardSpec (io.druid.timeline.partition.HashBasedNumberedShardSpec)1 PartitionChunk (io.druid.timeline.partition.PartitionChunk)1 File (java.io.File)1