Examples with HashBasedNumberedShardSpec - io.druid.timeline.partition.HashBasedNumberedShardSpec

Example 6 with HashBasedNumberedShardSpec

use of io.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class OrcIndexGeneratorJobTest method loadShardSpecs.

private Map<Long, List<HadoopyShardSpec>> loadShardSpecs(Integer[][][] shardInfoForEachShard) {
    Map<Long, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
    int shardCount = 0;
    int segmentNum = 0;
    for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        List<ShardSpec> specs = Lists.newArrayList();
        for (Integer[] shardInfo : shardInfoForEachShard[segmentNum++]) {
            specs.add(new HashBasedNumberedShardSpec(shardInfo[0], shardInfo[1], null, HadoopDruidIndexerConfig.JSON_MAPPER));
        }
        List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
        for (ShardSpec spec : specs) {
            actualSpecs.add(new HadoopyShardSpec(spec, shardCount++));
        }
        shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
    }
    return shardSpecs;
}

Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopyShardSpec(io.druid.indexer.HadoopyShardSpec) HadoopyShardSpec(io.druid.indexer.HadoopyShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval)

Example 7 with HashBasedNumberedShardSpec

use of io.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class BatchDeltaIngestionTest method testIngestion.

private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
    IndexGeneratorJob job = new IndexGeneratorJob(config);
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
    Assert.assertTrue(segmentFolder.exists());
    File descriptor = new File(segmentFolder, "descriptor.json");
    File indexZip = new File(segmentFolder, "index.zip");
    Assert.assertTrue(descriptor.exists());
    Assert.assertTrue(indexZip.exists());
    DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
    Assert.assertEquals("website", dataSegment.getDataSource());
    Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
    Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
    Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
    Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
    Assert.assertEquals("host", dataSegment.getDimensions().get(0));
    Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
    Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
    Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
    HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
    Assert.assertEquals(0, spec.getPartitionNum());
    Assert.assertEquals(1, spec.getPartitions());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
    QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRowsGenerated, rows);
}

Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) File(java.io.File) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 8 with HashBasedNumberedShardSpec

use of io.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class HadoopDruidIndexerConfigTest method testHashedBucketSelection.

@Test
public void testHashedBucketSelection() {
    List<HadoopyShardSpec> specs = Lists.newArrayList();
    final int partitionCount = 10;
    for (int i = 0; i < partitionCount; i++) {
        specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, partitionCount, null, new DefaultObjectMapper()), i));
    }
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.MINUTE, Granularities.MINUTE, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "bar", "type", "static"), null, null), new HadoopTuningConfig(null, null, null, ImmutableMap.of(new DateTime("2010-01-01T01:00:00").getMillis(), specs), null, null, false, false, false, false, null, false, false, null, null, null, false, false));
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(spec);
    final List<String> dims = Arrays.asList("diM1", "dIM2");
    final ImmutableMap<String, Object> values = ImmutableMap.<String, Object>of("Dim1", "1", "DiM2", "2", "dim1", "3", "dim2", "4");
    final long timestamp = new DateTime("2010-01-01T01:00:01").getMillis();
    final Bucket expectedBucket = config.getBucket(new MapBasedInputRow(timestamp, dims, values)).get();
    final long nextBucketTimestamp = Granularities.MINUTE.bucketEnd(new DateTime(timestamp)).getMillis();
    // check that all rows having same set of dims and truncated timestamp hash to same bucket
    for (int i = 0; timestamp + i < nextBucketTimestamp; i++) {
        Assert.assertEquals(expectedBucket.partitionNum, config.getBucket(new MapBasedInputRow(timestamp + i, dims, values)).get().partitionNum);
    }
}

Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 9 with HashBasedNumberedShardSpec

use of io.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class IndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(String.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File descriptor = new File(individualSegmentFolder, "descriptor.json");
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(descriptor.exists());
            Assert.assertTrue(indexZip.exists());
            DataSegment dataSegment = mapper.readValue(descriptor, DataSegment.class);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals(new Interval(currTime, currTime.plusDays(1)), dataSegment.getInterval());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            if (datasourceName.equals("website")) {
                Assert.assertEquals("website", dataSegment.getDataSource());
                Assert.assertEquals("host", dataSegment.getDimensions().get(0));
                Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
                Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            } else if (datasourceName.equals("inherit_dims")) {
                Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else if (datasourceName.equals("inherit_dims2")) {
                Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else {
                Assert.fail("Test did not specify supported datasource name");
            }
            if (forceExtendableShardSpecs) {
                NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(partitionNum, spec.getPartitionNum());
                Assert.assertEquals(shardInfo.length, spec.getPartitions());
            } else if (partitionType.equals("hashed")) {
                Integer[] hashShardInfo = (Integer[]) shardInfo[partitionNum];
                HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
                Assert.assertEquals((int) hashShardInfo[1], spec.getPartitions());
            } else if (partitionType.equals("single")) {
                String[] singleDimensionShardInfo = (String[]) shardInfo[partitionNum];
                SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
                Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
            } else {
                throw new RuntimeException(String.format("Invalid partition type:[%s]", partitionType));
            }
        }
    }
}

Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) SingleDimensionShardSpec(io.druid.timeline.partition.SingleDimensionShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval)

Aggregations

HashBasedNumberedShardSpec (io.druid.timeline.partition.HashBasedNumberedShardSpec)9 Interval (org.joda.time.Interval)7 DateTime (org.joda.time.DateTime)5 List (java.util.List)4 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)3 DataSegment (io.druid.timeline.DataSegment)3 File (java.io.File)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Firehose (io.druid.data.input.Firehose)2 InputRow (io.druid.data.input.InputRow)2 QueryableIndex (io.druid.segment.QueryableIndex)2 DataSchema (io.druid.segment.indexing.DataSchema)2 NumberedShardSpec (io.druid.timeline.partition.NumberedShardSpec)2 ShardSpec (io.druid.timeline.partition.ShardSpec)2 Map (java.util.Map)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Optional (com.google.common.base.Optional)1 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1