Search in sources :

Example 11 with NumberedShardSpec

use of io.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class FiniteAppenderatorDriverTest method testSimple.

@Test
public void testSimple() throws Exception {
    final TestCommitterSupplier<Integer> committerSupplier = new TestCommitterSupplier<>();
    Assert.assertNull(driver.startJob());
    for (int i = 0; i < ROWS.size(); i++) {
        committerSupplier.setMetadata(i + 1);
        Assert.assertNotNull(driver.add(ROWS.get(i), "dummy", committerSupplier));
    }
    final SegmentsAndMetadata segmentsAndMetadata = driver.finish(makeOkPublisher(), committerSupplier.get());
    Assert.assertEquals(ImmutableSet.of(new SegmentIdentifier(DATA_SOURCE, new Interval("2000/PT1H"), VERSION, new NumberedShardSpec(0, 0)), new SegmentIdentifier(DATA_SOURCE, new Interval("2000T01/PT1H"), VERSION, new NumberedShardSpec(0, 0))), asIdentifiers(segmentsAndMetadata.getSegments()));
    Assert.assertEquals(3, segmentsAndMetadata.getCommitMetadata());
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 12 with NumberedShardSpec

use of io.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class NumberedShardSpecTest method testSerdeRoundTrip.

@Test
public void testSerdeRoundTrip() throws Exception {
    final ShardSpec spec = TestUtil.MAPPER.readValue(TestUtil.MAPPER.writeValueAsBytes(new NumberedShardSpec(1, 2)), ShardSpec.class);
    Assert.assertEquals(1, spec.getPartitionNum());
    Assert.assertEquals(2, ((NumberedShardSpec) spec).getPartitions());
}
Also used : NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 13 with NumberedShardSpec

use of io.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class NumberedShardSpecTest method testPartitionChunks.

@Test
public void testPartitionChunks() {
    final List<ShardSpec> specs = ImmutableList.<ShardSpec>of(new NumberedShardSpec(0, 3), new NumberedShardSpec(1, 3), new NumberedShardSpec(2, 3));
    final List<PartitionChunk<String>> chunks = Lists.transform(specs, new Function<ShardSpec, PartitionChunk<String>>() {

        @Override
        public PartitionChunk<String> apply(ShardSpec shardSpec) {
            return shardSpec.createChunk("rofl");
        }
    });
    Assert.assertEquals(0, chunks.get(0).getChunkNumber());
    Assert.assertEquals(1, chunks.get(1).getChunkNumber());
    Assert.assertEquals(2, chunks.get(2).getChunkNumber());
    Assert.assertTrue(chunks.get(0).isStart());
    Assert.assertFalse(chunks.get(1).isStart());
    Assert.assertFalse(chunks.get(2).isStart());
    Assert.assertFalse(chunks.get(0).isEnd());
    Assert.assertFalse(chunks.get(1).isEnd());
    Assert.assertTrue(chunks.get(2).isEnd());
    Assert.assertTrue(chunks.get(0).abuts(chunks.get(1)));
    Assert.assertTrue(chunks.get(1).abuts(chunks.get(2)));
    Assert.assertFalse(chunks.get(0).abuts(chunks.get(0)));
    Assert.assertFalse(chunks.get(0).abuts(chunks.get(2)));
    Assert.assertFalse(chunks.get(1).abuts(chunks.get(0)));
    Assert.assertFalse(chunks.get(1).abuts(chunks.get(1)));
    Assert.assertFalse(chunks.get(2).abuts(chunks.get(0)));
    Assert.assertFalse(chunks.get(2).abuts(chunks.get(1)));
    Assert.assertFalse(chunks.get(2).abuts(chunks.get(2)));
}
Also used : PartitionChunk(io.druid.timeline.partition.PartitionChunk) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 14 with NumberedShardSpec

use of io.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class NumberedShardSpecTest method testSerdeBackwardsCompat.

@Test
public void testSerdeBackwardsCompat() throws Exception {
    final ShardSpec spec = TestUtil.MAPPER.readValue("{\"type\": \"numbered\", \"partitions\": 2, \"partitionNum\": 1}", ShardSpec.class);
    Assert.assertEquals(1, spec.getPartitionNum());
    Assert.assertEquals(2, ((NumberedShardSpec) spec).getPartitions());
}
Also used : NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) Test(org.junit.Test)

Example 15 with NumberedShardSpec

use of io.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class IndexTask method generateAndPublishSegments.

private boolean generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final Map<Interval, List<ShardSpec>> shardSpecs, final String version, final FirehoseFactory firehoseFactory) throws IOException, InterruptedException {
    final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
    final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    final Map<String, ShardSpec> sequenceNameToShardSpecMap = Maps.newHashMap();
    if (toolbox.getMonitorScheduler() != null) {
        toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
    }
    final SegmentAllocator segmentAllocator;
    if (ingestionSchema.getIOConfig().isAppendToExisting()) {
        segmentAllocator = new ActionBasedSegmentAllocator(toolbox.getTaskActionClient(), dataSchema);
    } else {
        segmentAllocator = new SegmentAllocator() {

            @Override
            public SegmentIdentifier allocate(DateTime timestamp, String sequenceName, String previousSegmentId) throws IOException {
                Optional<Interval> interval = granularitySpec.bucketInterval(timestamp);
                if (!interval.isPresent()) {
                    throw new ISE("Could not find interval for timestamp [%s]", timestamp);
                }
                ShardSpec shardSpec = sequenceNameToShardSpecMap.get(sequenceName);
                if (shardSpec == null) {
                    throw new ISE("Could not find ShardSpec for sequenceName [%s]", sequenceName);
                }
                return new SegmentIdentifier(getDataSource(), interval.get(), version, shardSpec);
            }
        };
    }
    try (final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema);
        final FiniteAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator, fireDepartmentMetrics);
        final Firehose firehose = firehoseFactory.connect(dataSchema.getParser())) {
        final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
        final Map<Interval, ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
        if (driver.startJob() != null) {
            driver.clear();
        }
        try {
            while (firehose.hasMore()) {
                try {
                    final InputRow inputRow = firehose.nextRow();
                    final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
                    if (!optInterval.isPresent()) {
                        fireDepartmentMetrics.incrementThrownAway();
                        continue;
                    }
                    final Interval interval = optInterval.get();
                    if (!shardSpecLookups.containsKey(interval)) {
                        final List<ShardSpec> intervalShardSpecs = shardSpecs.get(interval);
                        if (intervalShardSpecs == null || intervalShardSpecs.isEmpty()) {
                            throw new ISE("Failed to get shardSpec for interval[%s]", interval);
                        }
                        shardSpecLookups.put(interval, intervalShardSpecs.get(0).getLookup(intervalShardSpecs));
                    }
                    final ShardSpec shardSpec = shardSpecLookups.get(interval).getShardSpec(inputRow.getTimestampFromEpoch(), inputRow);
                    final String sequenceName = String.format("index_%s_%s_%d", interval, version, shardSpec.getPartitionNum());
                    if (!sequenceNameToShardSpecMap.containsKey(sequenceName)) {
                        final ShardSpec shardSpecForPublishing = ingestionSchema.getTuningConfig().isForceExtendableShardSpecs() || ingestionSchema.getIOConfig().isAppendToExisting() ? new NumberedShardSpec(shardSpec.getPartitionNum(), shardSpecs.get(interval).size()) : shardSpec;
                        sequenceNameToShardSpecMap.put(sequenceName, shardSpecForPublishing);
                    }
                    final SegmentIdentifier identifier = driver.add(inputRow, sequenceName, committerSupplier);
                    if (identifier == null) {
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                    fireDepartmentMetrics.incrementProcessed();
                } catch (ParseException e) {
                    if (ingestionSchema.getTuningConfig().isReportParseExceptions()) {
                        throw e;
                    } else {
                        fireDepartmentMetrics.incrementUnparseable();
                    }
                }
            }
        } finally {
            driver.persist(committerSupplier.get());
        }
        final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {

            @Override
            public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
                final SegmentTransactionalInsertAction action = new SegmentTransactionalInsertAction(segments, null, null);
                return toolbox.getTaskActionClient().submit(action).isSuccess();
            }
        };
        final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
        if (published == null) {
            log.error("Failed to publish segments, aborting!");
            return false;
        } else {
            log.info("Published segments[%s]", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {

                @Override
                public String apply(DataSegment input) {
                    return input.getIdentifier();
                }
            })));
            return true;
        }
    }
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) SortedSet(java.util.SortedSet) Set(java.util.Set) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) ShardSpecLookup(io.druid.timeline.partition.ShardSpecLookup) SegmentTransactionalInsertAction(io.druid.indexing.common.actions.SegmentTransactionalInsertAction) DataSegment(io.druid.timeline.DataSegment) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DateTime(org.joda.time.DateTime) FireDepartment(io.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ActionBasedSegmentAllocator(io.druid.indexing.appenderator.ActionBasedSegmentAllocator) ISE(io.druid.java.util.common.ISE) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Optional(com.google.common.base.Optional) Firehose(io.druid.data.input.Firehose) SegmentsAndMetadata(io.druid.segment.realtime.appenderator.SegmentsAndMetadata) IOException(java.io.IOException) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) Appenderator(io.druid.segment.realtime.appenderator.Appenderator) GranularitySpec(io.druid.segment.indexing.granularity.GranularitySpec) ActionBasedSegmentAllocator(io.druid.indexing.appenderator.ActionBasedSegmentAllocator) SegmentAllocator(io.druid.segment.realtime.appenderator.SegmentAllocator) FiniteAppenderatorDriver(io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver) InputRow(io.druid.data.input.InputRow) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) ParseException(io.druid.java.util.common.parsers.ParseException) Interval(org.joda.time.Interval)

Aggregations

NumberedShardSpec (io.druid.timeline.partition.NumberedShardSpec)25 Test (org.junit.Test)17 DataSegment (io.druid.timeline.DataSegment)11 SegmentIdentifier (io.druid.segment.realtime.appenderator.SegmentIdentifier)10 Interval (org.joda.time.Interval)8 NoopTask (io.druid.indexing.common.task.NoopTask)7 Task (io.druid.indexing.common.task.Task)7 HashBasedNumberedShardSpec (io.druid.timeline.partition.HashBasedNumberedShardSpec)6 DateTime (org.joda.time.DateTime)6 ShardSpec (io.druid.timeline.partition.ShardSpec)5 TaskLock (io.druid.indexing.common.TaskLock)4 Predicate (com.google.common.base.Predicate)3 File (java.io.File)3 Path (org.apache.hadoop.fs.Path)3 ImmutableSegmentLoadInfo (io.druid.client.ImmutableSegmentLoadInfo)2 CoordinatorClient (io.druid.client.coordinator.CoordinatorClient)2 SegmentTransactionalInsertAction (io.druid.indexing.common.actions.SegmentTransactionalInsertAction)2 ISE (io.druid.java.util.common.ISE)2 SegmentDescriptor (io.druid.query.SegmentDescriptor)2 NoneShardSpec (io.druid.timeline.partition.NoneShardSpec)2