use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class IndexTaskTest method testNumShardsAndPartitionDimensionsProvided.
@Test
public void testNumShardsAndPartitionDimensionsProvided() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,1\n");
writer.write("2014-01-01T01:00:20Z,b,1\n");
writer.write("2014-01-01T02:00:30Z,c,1\n");
}
final IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")), true), false, false), null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(2, segments.size());
for (DataSegment segment : segments) {
Assert.assertEquals(DATASOURCE, segment.getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segment.getInterval());
Assert.assertEquals(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec hashBasedNumberedShardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, hashBasedNumberedShardSpec.getPartitionFunction());
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Integer> hashes = cursorSequence.map(cursor -> {
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
final int hash = HashPartitionFunction.MURMUR3_32_ABS.hash(HashBasedNumberedShardSpec.serializeGroupKey(jsonMapper, Collections.singletonList(selector.getObject())), hashBasedNumberedShardSpec.getNumBuckets());
cursor.advance();
return hash;
}).toList();
Assert.assertTrue(hashes.stream().allMatch(h -> h.intValue() == hashes.get(0)));
}
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class IndexTaskTest method testMultipleParseExceptionsFailureAtDeterminePartitions.
@Test
public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,dim,dimLong,dimFloat,val\n");
// unparseable
writer.write("unparseable,a,2,3.0,1\n");
// valid row
writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n");
// unparseable
writer.write("9.0,a,2,3.0,1\n");
// thrown away
writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n");
// unparseable
writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n");
}
// Allow up to 3 parse exceptions, and save up to 2 parse exceptions
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, null, null, null, null, null, null, null, new HashedPartitionsSpec(2, null, null), INDEX_SPEC, null, null, true, false, null, null, null, true, 2, 5, null, null);
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat")));
final List<String> columns = Arrays.asList("time", "dim", "dimLong", "dimFloat", "val");
final IndexIngestionSpec ingestionSpec;
List<String> expectedMessages;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, dimensionsSpec, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList(StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", tmpFile.toURI()), StringUtils.format("Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", tmpFile.toURI()), StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()));
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
TaskStatus status = runTask(indexTask).lhs;
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, useInputFormatApi ? 1 : 2), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 0, RowIngestionMeters.UNPARSEABLE, 0, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.DETERMINE_PARTITIONS);
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class IndexTaskTest method testNumShardsProvided.
@Test
public void testNumShardsProvided() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,1\n");
writer.write("2014-01-01T01:00:20Z,b,1\n");
writer.write("2014-01-01T02:00:30Z,c,1\n");
}
IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 1, null), true), false, false), null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(HashBasedNumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, ((HashBasedNumberedShardSpec) segments.get(0).getShardSpec()).getPartitionFunction());
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class HashPartitionAdjustingCorePartitionSizeTest method testLessPartitionsThanBuckets.
@Test
public void testLessPartitionsThanBuckets() throws IOException {
final File inputDir = temporaryFolder.newFolder();
for (int i = 0; i < 3; i++) {
try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
writer.write(StringUtils.format("2020-01-01T00:00:00,%s,b1,%d\n", "a" + (i + 1), 10 * (i + 1)));
}
}
final DimensionBasedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 10, ImmutableList.of("dim1"));
final List<DataSegment> segments = new ArrayList<>(runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS));
Assert.assertEquals(3, segments.size());
segments.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum()));
int prevPartitionId = -1;
for (DataSegment segment : segments) {
Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(3, shardSpec.getNumCorePartitions());
Assert.assertEquals(10, shardSpec.getNumBuckets());
Assert.assertEquals(ImmutableList.of("dim1"), shardSpec.getPartitionDimensions());
Assert.assertEquals(prevPartitionId + 1, shardSpec.getPartitionNum());
prevPartitionId = shardSpec.getPartitionNum();
}
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class ParallelIndexSupervisorTaskSerdeTest method forceGuaranteedRollupWithHashPartitionsMissingNumShards.
@Test
public void forceGuaranteedRollupWithHashPartitionsMissingNumShards() {
Integer numShards = null;
ParallelIndexSupervisorTask task = new ParallelIndexSupervisorTaskBuilder().ingestionSpec(new ParallelIndexIngestionSpecBuilder().forceGuaranteedRollup(true).partitionsSpec(new HashedPartitionsSpec(null, numShards, null)).inputIntervals(INTERVALS).build()).build();
PartitionsSpec partitionsSpec = task.getIngestionSchema().getTuningConfig().getPartitionsSpec();
Assert.assertThat(partitionsSpec, CoreMatchers.instanceOf(HashedPartitionsSpec.class));
}
Aggregations