Search in sources :

Example 46 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class HdfsDataSegmentPusherTest method shouldMakeHDFSCompliantSegmentOutputPath.

@Test
public void shouldMakeHDFSCompliantSegmentOutputPath() {
    HadoopIngestionSpec schema;
    try {
        schema = objectMapper.readValue("{\n" + "    \"dataSchema\": {\n" + "        \"dataSource\": \"source\",\n" + "        \"metricsSpec\": [],\n" + "        \"granularitySpec\": {\n" + "            \"type\": \"uniform\",\n" + "            \"segmentGranularity\": \"hour\",\n" + "            \"intervals\": [\"2012-07-10/P1D\"]\n" + "        }\n" + "    },\n" + "    \"ioConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"segmentOutputPath\": \"hdfs://server:9100/tmp/druid/datatest\"\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    // DataSchema dataSchema = new DataSchema("dataSource", null, null, Gra)
    // schema = new HadoopIngestionSpec(dataSchema, ioConfig, HadoopTuningConfig.makeDefaultTuningConfig());
    HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
    Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30, ISOChronology.getInstanceUTC()), 4712);
    Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
    Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip", path.toString());
    path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), hdfsDataSegmentPusher);
    Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip.0", path.toString());
}
Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) Path(org.apache.hadoop.fs.Path) Bucket(org.apache.druid.indexer.Bucket) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) HadoopDruidIndexerConfig(org.apache.druid.indexer.HadoopDruidIndexerConfig) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DataSegment(org.apache.druid.timeline.DataSegment) ExpectedException(org.junit.rules.ExpectedException) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 47 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class SegmentAllocateActionTest method testCannotAddToExistingNumberedShardSpecsWithCoarserQueryGranularity.

@Test
public void testCannotAddToExistingNumberedShardSpecsWithCoarserQueryGranularity() throws Exception {
    final Task task = NoopTask.create();
    taskActionTestKit.getMetadataStorageCoordinator().announceHistoricalSegments(ImmutableSet.of(DataSegment.builder().dataSource(DATA_SOURCE).interval(Granularities.HOUR.bucket(PARTY_TIME)).version(PARTY_TIME.toString()).shardSpec(new NumberedShardSpec(0, 2)).size(0).build(), DataSegment.builder().dataSource(DATA_SOURCE).interval(Granularities.HOUR.bucket(PARTY_TIME)).version(PARTY_TIME.toString()).shardSpec(new NumberedShardSpec(1, 2)).size(0).build()));
    taskActionTestKit.getTaskLockbox().add(task);
    final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.DAY, Granularities.DAY, "s1", null);
    Assert.assertNull(id1);
}
Also used : Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 48 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class SegmentAllocateActionTest method testMultipleSequences.

@Test
public void testMultipleSequences() {
    final Task task = NoopTask.create();
    taskActionTestKit.getTaskLockbox().add(task);
    final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", null);
    final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s2", null);
    final SegmentIdWithShardSpec id3 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
    final SegmentIdWithShardSpec id4 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.HOUR, "s1", id3.toString());
    final SegmentIdWithShardSpec id5 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.HOUR, "s2", id2.toString());
    final SegmentIdWithShardSpec id6 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", null);
    if (lockGranularity == LockGranularity.TIME_CHUNK) {
        final TaskLock partyLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(new Predicate<TaskLock>() {

            @Override
            public boolean apply(TaskLock input) {
                return input.getInterval().contains(PARTY_TIME);
            }
        }));
        final TaskLock futureLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(new Predicate<TaskLock>() {

            @Override
            public boolean apply(TaskLock input) {
                return input.getInterval().contains(THE_DISTANT_FUTURE);
            }
        }));
        assertSameIdentifier(id1, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(0, 0)));
        assertSameIdentifier(id2, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(1, 0)));
        assertSameIdentifier(id3, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(2, 0)));
        assertSameIdentifier(id4, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), futureLock.getVersion(), new NumberedShardSpec(0, 0)));
        assertSameIdentifier(id5, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), futureLock.getVersion(), new NumberedShardSpec(1, 0)));
    } else {
        final List<TaskLock> partyLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(PARTY_TIME)).collect(Collectors.toList());
        Assert.assertEquals(3, partyLocks.size());
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLocks.get(0).getVersion(), new NumberedShardSpec(0, 0)), id1);
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLocks.get(1).getVersion(), new NumberedShardSpec(1, 0)), id2);
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLocks.get(2).getVersion(), new NumberedShardSpec(2, 0)), id3);
        final List<TaskLock> futureLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(THE_DISTANT_FUTURE)).collect(Collectors.toList());
        Assert.assertEquals(2, futureLocks.size());
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), futureLocks.get(0).getVersion(), new NumberedShardSpec(0, 0)), id4);
        assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), futureLocks.get(1).getVersion(), new NumberedShardSpec(1, 0)), id5);
    }
    assertSameIdentifier(id1, id6);
}
Also used : NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) Iterables(com.google.common.collect.Iterables) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) FluentIterable(com.google.common.collect.FluentIterable) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Task(org.apache.druid.indexing.common.task.Task) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) ExpectedException(org.junit.rules.ExpectedException) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) DateTimes(org.apache.druid.java.util.common.DateTimes) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) Period(org.joda.time.Period) ImmutableSet(com.google.common.collect.ImmutableSet) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Granularities(org.apache.druid.java.util.common.granularity.Granularities) NoopTask(org.apache.druid.indexing.common.task.NoopTask) List(java.util.List) Rule(org.junit.Rule) Predicate(com.google.common.base.Predicate) SegmentLock(org.apache.druid.indexing.common.SegmentLock) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) LinearPartialShardSpec(org.apache.druid.timeline.partition.LinearPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) Assert(org.junit.Assert) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) TaskLock(org.apache.druid.indexing.common.TaskLock) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Predicate(com.google.common.base.Predicate) Test(org.junit.Test)

Example 49 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CachingClusteredClientCacheKeyManagerTest method makeServerSelector.

/**
 * using partitionNumber, its possible to create segments with different ids
 */
private SegmentServerSelector makeServerSelector(boolean isHistorical, int partitionNumber) {
    ServerSelector serverSelector = mock(ServerSelector.class);
    QueryableDruidServer queryableDruidServer = mock(QueryableDruidServer.class);
    DruidServer server = mock(DruidServer.class);
    SegmentId segmentId = SegmentId.dummy("data-source", partitionNumber);
    DataSegment segment = new DataSegment(segmentId, null, null, null, new NumberedShardSpec(partitionNumber, 10), null, 0, 0);
    expect(server.isSegmentReplicationTarget()).andReturn(isHistorical).anyTimes();
    expect(serverSelector.pick(query)).andReturn(queryableDruidServer).anyTimes();
    expect(queryableDruidServer.getServer()).andReturn(server).anyTimes();
    expect(serverSelector.getSegment()).andReturn(segment).anyTimes();
    replay(serverSelector, queryableDruidServer, server);
    return new SegmentServerSelector(serverSelector, segmentId.toDescriptor());
}
Also used : ServerSelector(org.apache.druid.client.selector.ServerSelector) SegmentId(org.apache.druid.timeline.SegmentId) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer) DataSegment(org.apache.druid.timeline.DataSegment) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer)

Example 50 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class SegmentManagerThreadSafetyTest method createSegment.

private DataSegment createSegment(String interval) throws IOException {
    final DataSegment tmpSegment = new DataSegment("dataSource", Intervals.of(interval), "version", Collections.emptyMap(), Collections.emptyList(), Collections.emptyList(), new NumberedShardSpec(0, 0), 9, 100);
    final String storageDir = DataSegmentPusher.getDefaultStorageDir(tmpSegment, false);
    final File segmentDir = new File(segmentDeepStorageDir, storageDir);
    FileUtils.mkdirp(segmentDir);
    final File factoryJson = new File(segmentDir, "factory.json");
    objectMapper.writeValue(factoryJson, new TestSegmentizerFactory());
    return tmpSegment.withLoadSpec(ImmutableMap.of("type", "local", "path", segmentDir.getAbsolutePath()));
}
Also used : DataSegment(org.apache.druid.timeline.DataSegment) File(java.io.File) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec)

Aggregations

NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)58 Test (org.junit.Test)45 DataSegment (org.apache.druid.timeline.DataSegment)41 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)26 ImmutableList (com.google.common.collect.ImmutableList)24 List (java.util.List)24 ArrayList (java.util.ArrayList)23 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)14 Interval (org.joda.time.Interval)14 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 IOException (java.io.IOException)12 File (java.io.File)11 Map (java.util.Map)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 HashMap (java.util.HashMap)10 TaskStatus (org.apache.druid.indexer.TaskStatus)9 Before (org.junit.Before)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 NoopTask (org.apache.druid.indexing.common.task.NoopTask)8 Task (org.apache.druid.indexing.common.task.Task)8