use of org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec in project druid by druid-io.
the class ParallelIndexSupervisorTask method runSequential.
private TaskStatus runSequential(TaskToolbox toolbox) throws Exception {
IndexTask sequentialIndexTask = new IndexTask(getId(), getGroupId(), getTaskResource(), getDataSource(), baseSubtaskSpecName, new IndexIngestionSpec(getIngestionSchema().getDataSchema(), getIngestionSchema().getIOConfig(), convertToIndexTuningConfig(getIngestionSchema().getTuningConfig())), getContext(), getIngestionSchema().getTuningConfig().getMaxAllowedLockCount());
if (currentSubTaskHolder.setTask(sequentialIndexTask) && sequentialIndexTask.isReady(toolbox.getTaskActionClient())) {
return sequentialIndexTask.run(toolbox);
} else {
String msg = "Task was asked to stop. Finish as failed";
LOG.info(msg);
return TaskStatus.failure(getId(), msg);
}
}
use of org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec in project druid by druid-io.
the class IndexTaskTest method testCSVFileWithHeaderColumnOverride.
@Test
public void testCSVFileWithHeaderColumnOverride() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,d,val\n");
writer.write("2014-01-01T00:00:10Z,a,1\n");
}
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final List<String> columns = Arrays.asList("time", "dim", "val");
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, true);
final IndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, false, false);
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, DimensionsSpec.EMPTY, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(Collections.singletonList("d"), segments.get(0).getDimensions());
Assert.assertEquals(Collections.singletonList("val"), segments.get(0).getMetrics());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
}
use of org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec in project druid by druid-io.
the class IndexTaskTest method testCSVFileWithHeader.
@Test
public void testCSVFileWithHeader() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,d,val\n");
writer.write("2014-01-01T00:00:10Z,a,1\n");
}
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, true);
final IndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, null, true, 0), null, null, tuningConfig, false, false);
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, DimensionsSpec.EMPTY, new CsvInputFormat(null, null, null, true, 0), null, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(Collections.singletonList("d"), segments.get(0).getDimensions());
Assert.assertEquals(Collections.singletonList("val"), segments.get(0).getMetrics());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
}
use of org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec in project druid by druid-io.
the class IndexTaskTest method testTransformSpec.
@Test
public void testTransformSpec() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
}
final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
final String listDelimiter = "|";
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
final IndexIngestionSpec indexIngestionSpec;
if (useInputFormatApi) {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
} else {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
DataSegment segment = segments.get(0);
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
Map<String, Object> row = new HashMap<>();
row.put("dimt", selector1.defaultGetObject());
row.put("dimtarray1", selector2.defaultGetObject());
row.put("dimtarray2", selector3.defaultGetObject());
row.put("dimtnum_array", selector4.defaultGetObject());
cursor.advance();
return row;
}).toList();
Assert.assertEquals(1, transforms.size());
Assert.assertEquals("bb", transforms.get(0).get("dimt"));
Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
use of org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec in project druid by druid-io.
the class IndexIngestionSpecTest method testFirehoseAndInputFormat.
@Test
public void testFirehoseAndInputFormat() {
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Cannot use firehose and inputFormat together.");
final IndexIngestionSpec spec = new IndexIngestionSpec(new DataSchema("dataSource", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[0], new ArbitraryGranularitySpec(Granularities.NONE, null), null), new IndexIOConfig(new NoopFirehoseFactory(), null, new NoopInputFormat(), null, null), null);
}
Aggregations