use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyCanUpdateCompactionConfig.
@Test
public void testAutoCompactionDutyCanUpdateCompactionConfig() throws Exception {
loadData(INDEX_TASK);
try (final Closeable ignored = unloader(fullDatasourceName)) {
final List<String> intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName);
intervalsBeforeCompaction.sort(null);
// 4 segments across 2 days (4 total)...
verifySegmentsCount(4);
verifyQuery(INDEX_QUERIES_RESOURCE);
// Dummy compaction config which will be overwritten
submitCompactionConfig(10000, NO_SKIP_OFFSET);
// New compaction config should overwrites the existing compaction config
submitCompactionConfig(1, NO_SKIP_OFFSET);
LOG.info("Auto compaction test with dynamic partitioning");
// Instead of merging segments, the updated config will split segments!
// ...compacted into 10 new segments across 2 days. 5 new segments each day (10 total)
forceTriggerAutoCompaction(10);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(10, 1);
checkCompactionIntervals(intervalsBeforeCompaction);
LOG.info("Auto compaction test with hash partitioning");
final HashedPartitionsSpec hashedPartitionsSpec = new HashedPartitionsSpec(null, 3, null);
submitCompactionConfig(hashedPartitionsSpec, NO_SKIP_OFFSET, 1, null, null, null, null, false);
// 2 segments published per day after compaction.
forceTriggerAutoCompaction(4);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(hashedPartitionsSpec, 4);
checkCompactionIntervals(intervalsBeforeCompaction);
LOG.info("Auto compaction test with range partitioning");
final SingleDimensionPartitionsSpec rangePartitionsSpec = new SingleDimensionPartitionsSpec(5, null, "city", false);
submitCompactionConfig(rangePartitionsSpec, NO_SKIP_OFFSET, 1, null, null, null, null, false);
forceTriggerAutoCompaction(2);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(rangePartitionsSpec, 2);
checkCompactionIntervals(intervalsBeforeCompaction);
}
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class ITHadoopIndexTest method testIndexDataAwaitSegmentAvailability.
/**
* Test Hadoop Batch Ingestion with a non-zero value for awaitSegmentAvailabilityTimeoutMillis. This will confirm that
* the report for the task indicates segments were confirmed to be available on the cluster before finishing the job.
*
* @throws Exception
*/
@Test
public void testIndexDataAwaitSegmentAvailability() throws Exception {
String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID();
try (final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix())) {
final Function<String, String> specPathsTransform = spec -> {
try {
String path = "/batch_index/json";
spec = StringUtils.replace(spec, "%%INPUT_PATHS%%", path);
spec = StringUtils.replace(spec, "%%PARTITIONS_SPEC%%", jsonMapper.writeValueAsString(new HashedPartitionsSpec(3, null, null)));
spec = StringUtils.replace(spec, "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", jsonMapper.writeValueAsString(600000));
return spec;
} catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(indexDatasource, INDEX_TASK, specPathsTransform, INDEX_QUERIES_RESOURCE, false, true, true, new Pair<>(true, true));
}
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class ITHadoopIndexTest method testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds.
/**
* Test Hadoop Batch Indexing with non-zero value for awaitSegmentAvailabilityTimeoutMillis. The coordinator
* is paused when the task runs. This should result in a successful task with a flag in the task report indicating
* that we did not confirm segment availability.
*
* @throws Exception
*/
@Test
public void testIndexDataAwaitSegmentAvailabilityFailsButTaskSucceeds() throws Exception {
String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID();
try (final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix())) {
coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_PAUSED);
final Function<String, String> specPathsTransform = spec -> {
try {
String path = "/batch_index/json";
spec = StringUtils.replace(spec, "%%INPUT_PATHS%%", path);
spec = StringUtils.replace(spec, "%%PARTITIONS_SPEC%%", jsonMapper.writeValueAsString(new HashedPartitionsSpec(3, null, null)));
spec = StringUtils.replace(spec, "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", jsonMapper.writeValueAsString(1));
return spec;
} catch (Exception e) {
throw new RuntimeException(e);
}
};
doIndexTest(indexDatasource, INDEX_TASK, specPathsTransform, INDEX_QUERIES_RESOURCE, false, false, false, new Pair<>(true, false));
coordinatorClient.postDynamicConfig(DYNAMIC_CONFIG_DEFAULT);
ITRetryUtil.retryUntilTrue(() -> coordinatorClient.areSegmentsLoaded(indexDatasource + config.getExtraDatasourceNameSuffix()), "Segment Load For: " + indexDatasource + config.getExtraDatasourceNameSuffix());
}
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class IndexTaskTest method testNumShardsAndHashPartitionFunctionProvided.
@Test
public void testNumShardsAndHashPartitionFunctionProvided() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,1\n");
writer.write("2014-01-01T01:00:20Z,b,1\n");
writer.write("2014-01-01T02:00:30Z,c,1\n");
}
IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 1, null, HashPartitionFunction.MURMUR3_32_ABS), true), false, false), null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(HashBasedNumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, ((HashBasedNumberedShardSpec) segments.get(0).getShardSpec()).getPartitionFunction());
}
use of org.apache.druid.indexer.partitions.HashedPartitionsSpec in project druid by druid-io.
the class IndexTaskTest method testMultipleParseExceptionsSuccess.
@Test
public void testMultipleParseExceptionsSuccess() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
// unparseable time
writer.write("{\"time\":\"unparseable\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
// valid row
writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
writer.write(// row with invalid long dimension
"{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":\"notnumber\",\"dimFloat\":3.0,\"val\":1}\n");
writer.write(// row with invalid float dimension
"{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":\"notnumber\",\"val\":1}\n");
writer.write(// row with invalid metric
"{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":4.0,\"val\":\"notnumber\"}\n");
// invalid JSON
writer.write("{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
writer.write(// thrown away
"{\"time\":\"3014-03-01T00:00:10Z\",\"dim\":\"outsideofinterval\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
// unparseable time
writer.write("{\"time\":\"99999999999-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
// invalid JSON
writer.write("this is not JSON\n");
}
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, null, null, null, null, null, null, null, new HashedPartitionsSpec(2, null, null), INDEX_SPEC, null, null, true, false, null, null, null, true, 7, 7, null, null);
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat")));
final IndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, dimensionsSpec, new JsonInputFormat(null, null, null), null, null, tuningConfig, false, false);
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new JSONParseSpec(timestampSpec, dimensionsSpec, null, null, null), null, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
TaskStatus status = runTask(indexTask).lhs;
Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode());
Assert.assertEquals(null, status.getErrorMsg());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 4, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 1), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 3, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 1));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages;
if (useInputFormatApi) {
expectedMessages = Arrays.asList(StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", tmpFile.toURI()), StringUtils.format("Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", tmpFile.toURI()), "Unable to parse value[notnumber] for field[val]", "could not convert value [notnumber] to float", "could not convert value [notnumber] to long", StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", tmpFile.toURI()));
} else {
expectedMessages = Arrays.asList("Unable to parse row [this is not JSON]", "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", "Unable to parse value[notnumber] for field[val]", "could not convert value [notnumber] to float", "could not convert value [notnumber] to long", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("this is not JSON", "{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}", "{time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=4.0, val=notnumber}", "{time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=notnumber, val=1}", "{time=2014-01-01T00:00:10Z, dim=b, dimLong=notnumber, dimFloat=3.0, val=1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.DETERMINE_PARTITIONS);
if (useInputFormatApi) {
expectedMessages = Arrays.asList(StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", tmpFile.toURI()), StringUtils.format("Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", tmpFile.toURI()), StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", tmpFile.toURI()));
} else {
expectedMessages = Arrays.asList("Unable to parse row [this is not JSON]", "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
expectedInputs = Arrays.asList("this is not JSON", "{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
Aggregations