Search in sources :

Example 1 with DimensionBasedPartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec in project druid by druid-io.

the class RangePartitionAdjustingCorePartitionSizeTest method testLessPartitionsThanBuckets.

@Test
public void testLessPartitionsThanBuckets() throws IOException {
    final File inputDir = temporaryFolder.newFolder();
    for (int i = 0; i < 2; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,aaa,b1,10\n"));
        }
    }
    for (int i = 0; i < 3; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + (i + 2)).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,zzz,b1,10\n"));
        }
    }
    final List<String> partitionDimensions = Collections.singletonList("dim1");
    final DimensionBasedPartitionsSpec partitionsSpec = new DimensionRangePartitionsSpec(2, null, partitionDimensions, false);
    final List<DataSegment> segments = new ArrayList<>(runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS));
    Assert.assertEquals(1, segments.size());
    final DataSegment segment = segments.get(0);
    Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass());
    final SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
    Assert.assertEquals(1, shardSpec.getNumCorePartitions());
    Assert.assertEquals(0, shardSpec.getPartitionNum());
    Assert.assertEquals(partitionDimensions, shardSpec.getDimensions());
}
Also used : ArrayList(java.util.ArrayList) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec) File(java.io.File) DataSegment(org.apache.druid.timeline.DataSegment) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(java.io.Writer) Test(org.junit.Test)

Example 2 with DimensionBasedPartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec in project druid by druid-io.

the class HashPartitionAdjustingCorePartitionSizeTest method testEqualNumberOfPartitionsToBuckets.

@Test
public void testEqualNumberOfPartitionsToBuckets() throws IOException {
    final File inputDir = temporaryFolder.newFolder();
    for (int i = 0; i < 10; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,%s,b1,%d\n", "aa" + (i + 10), 10 * (i + 1)));
        }
    }
    final DimensionBasedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 5, ImmutableList.of("dim1"));
    final Set<DataSegment> segments = runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS);
    Assert.assertEquals(5, segments.size());
    segments.forEach(segment -> {
        Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
        final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
        Assert.assertEquals(5, shardSpec.getNumCorePartitions());
        Assert.assertEquals(5, shardSpec.getNumBuckets());
        Assert.assertEquals(ImmutableList.of("dim1"), shardSpec.getPartitionDimensions());
    });
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec) File(java.io.File) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Test(org.junit.Test)

Example 3 with DimensionBasedPartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec in project druid by druid-io.

the class RangePartitionAdjustingCorePartitionSizeTest method testEqualNumberOfPartitionsToBuckets.

@Test
public void testEqualNumberOfPartitionsToBuckets() throws IOException {
    final File inputDir = temporaryFolder.newFolder();
    for (int i = 0; i < 10; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,%s,b1,%d\n", "aa" + (i + 10), 10 * (i + 1)));
        }
    }
    final List<String> partitionDimensions = Collections.singletonList("dim1");
    final DimensionBasedPartitionsSpec partitionsSpec = new DimensionRangePartitionsSpec(2, null, partitionDimensions, false);
    final Set<DataSegment> segments = runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS);
    Assert.assertEquals(5, segments.size());
    segments.forEach(segment -> {
        Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass());
        final SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
        Assert.assertEquals(5, shardSpec.getNumCorePartitions());
        Assert.assertTrue(shardSpec.getPartitionNum() < shardSpec.getNumCorePartitions());
        Assert.assertEquals(partitionDimensions, shardSpec.getDimensions());
    });
}
Also used : DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec) File(java.io.File) DataSegment(org.apache.druid.timeline.DataSegment) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(java.io.Writer) Test(org.junit.Test)

Example 4 with DimensionBasedPartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec in project druid by druid-io.

the class ITHadoopIndexTest method testIndexData.

@Test(dataProvider = "resources")
public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Exception {
    String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID();
    String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID();
    try (final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
        final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix())) {
        final Function<String, String> specPathsTransform = spec -> {
            try {
                String path = "/batch_index/json";
                spec = StringUtils.replace(spec, "%%INPUT_PATHS%%", path);
                spec = StringUtils.replace(spec, "%%PARTITIONS_SPEC%%", jsonMapper.writeValueAsString(partitionsSpec));
                spec = StringUtils.replace(spec, "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", jsonMapper.writeValueAsString(0));
                return spec;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        };
        doIndexTest(indexDatasource, INDEX_TASK, specPathsTransform, INDEX_QUERIES_RESOURCE, false, true, true, new Pair<>(false, false));
        doReindexTest(indexDatasource, reindexDatasource, REINDEX_TASK, REINDEX_QUERIES_RESOURCE, new Pair<>(false, false));
    }
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) DataProvider(org.testng.annotations.DataProvider) ITRetryUtil(org.apache.druid.testing.utils.ITRetryUtil) Inject(com.google.inject.Inject) StringUtils(org.apache.druid.java.util.common.StringUtils) DruidTestModuleFactory(org.apache.druid.testing.guice.DruidTestModuleFactory) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Test(org.testng.annotations.Test) UUID(java.util.UUID) Function(java.util.function.Function) Guice(org.testng.annotations.Guice) Pair(org.apache.druid.java.util.common.Pair) CoordinatorDynamicConfig(org.apache.druid.server.coordinator.CoordinatorDynamicConfig) ImmutableList(com.google.common.collect.ImmutableList) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) CoordinatorResourceTestClient(org.apache.druid.testing.clients.CoordinatorResourceTestClient) TestNGGroup(org.apache.druid.tests.TestNGGroup) Closeable(java.io.Closeable) DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec) AbstractITBatchIndexTest(org.apache.druid.tests.indexer.AbstractITBatchIndexTest) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Closeable(java.io.Closeable) Test(org.testng.annotations.Test) AbstractITBatchIndexTest(org.apache.druid.tests.indexer.AbstractITBatchIndexTest)

Example 5 with DimensionBasedPartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec in project druid by druid-io.

the class HadoopDruidIndexerConfig method getTargetPartitionSize.

public int getTargetPartitionSize() {
    DimensionBasedPartitionsSpec spec = schema.getTuningConfig().getPartitionsSpec();
    if (spec.getTargetRowsPerSegment() != null) {
        return spec.getTargetRowsPerSegment();
    }
    final Integer targetPartitionSize = spec.getMaxRowsPerSegment();
    return targetPartitionSize == null ? -1 : targetPartitionSize;
}
Also used : DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec)

Aggregations

DimensionBasedPartitionsSpec (org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec)6 File (java.io.File)4 Writer (java.io.Writer)4 DataSegment (org.apache.druid.timeline.DataSegment)4 Test (org.junit.Test)4 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)3 ImmutableList (com.google.common.collect.ImmutableList)2 ArrayList (java.util.ArrayList)2 DimensionRangePartitionsSpec (org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec)2 StringUtils (org.apache.druid.java.util.common.StringUtils)2 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)2 SingleDimensionShardSpec (org.apache.druid.timeline.partition.SingleDimensionShardSpec)2 Inject (com.google.inject.Inject)1 Closeable (java.io.Closeable)1 IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Files (java.nio.file.Files)1 Arrays (java.util.Arrays)1 Comparator (java.util.Comparator)1 List (java.util.List)1