use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class FirehoseFactoryToInputSourceAdaptorTest method testUnimplementedInputFormat.
@Test
public void testUnimplementedInputFormat() throws IOException {
final List<String> lines = new ArrayList<>();
for (int i = 0; i < 10; i++) {
lines.add(StringUtils.format("%d,name_%d,%d", 20190101 + i, i, i + 100));
}
final TestFirehoseFactory firehoseFactory = new TestFirehoseFactory(lines);
final StringInputRowParser inputRowParser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec(null, "yyyyMMdd", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "name", "score"))), ",", Arrays.asList("timestamp", "name", "score"), false, 0), StringUtils.UTF8_STRING);
final FirehoseFactoryToInputSourceAdaptor inputSourceAdaptor = new FirehoseFactoryToInputSourceAdaptor(firehoseFactory, inputRowParser);
final InputSourceReader reader = inputSourceAdaptor.reader(new InputRowSchema(inputRowParser.getParseSpec().getTimestampSpec(), inputRowParser.getParseSpec().getDimensionsSpec(), ColumnsFilter.all()), null, null);
final List<InputRow> result = new ArrayList<>();
try (CloseableIterator<InputRow> iterator = reader.read()) {
while (iterator.hasNext()) {
result.add(iterator.next());
}
}
Assert.assertEquals(10, result.size());
for (int i = 0; i < 10; i++) {
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", 1 + i)), result.get(i).getTimestamp());
Assert.assertEquals(StringUtils.format("name_%d", i), Iterables.getOnlyElement(result.get(i).getDimension("name")));
Assert.assertEquals(StringUtils.format("%d", i + 100), Iterables.getOnlyElement(result.get(i).getDimension("score")));
}
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class MapVirtualColumnTestBase method generateIndex.
static IncrementalIndex generateIndex() throws IOException {
final CharSource input = CharSource.wrap("2011-01-12T00:00:00.000Z\ta\tkey1,key2,key3\tvalue1,value2,value3\n" + "2011-01-12T00:00:00.000Z\tb\tkey4,key5,key6\tvalue4\n" + "2011-01-12T00:00:00.000Z\tc\tkey1,key5\tvalue1,value5,value9\n");
final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim", "keys", "values"))), "\t", ",", Arrays.asList("ts", "dim", "keys", "values"), false, 0), "utf8");
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).build();
return TestIndex.loadIncrementalIndex(() -> new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build(), input, parser);
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class HashPartitionTaskKillTest method createTestTask.
private ParallelIndexSupervisorTask createTestTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting, boolean useInputFormatApi, int succeedsBeforeFailing) {
GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
final ParallelIndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
Preconditions.checkArgument(parseSpec == null);
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
} else {
Preconditions.checkArgument(inputFormat == null);
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
// noinspection unchecked
ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
}
return new ParallelIndexSupervisorTaskTest(null, null, null, ingestionSpec, null, Collections.emptyMap(), succeedsBeforeFailing);
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class GroupByQueryRunnerFactoryTest method createSegment.
private Segment createSegment() throws Exception {
IncrementalIndex incrementalIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setConcurrentEventAdd(true).setMaxRowCount(5000).build();
StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags"))), "\t", ImmutableList.of("timestamp", "product", "tags"), false, 0), "UTF-8");
String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
for (String row : rows) {
incrementalIndex.add(parser.parse(row));
}
closerRule.closeLater(incrementalIndex);
return new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("test"));
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class AbstractMultiPhaseParallelIndexingTest method newTask.
protected ParallelIndexSupervisorTask newTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting) {
GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
final ParallelIndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
Preconditions.checkArgument(parseSpec == null);
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
} else {
Preconditions.checkArgument(inputFormat == null);
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
// noinspection unchecked
ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
}
// set up test tools
return new ParallelIndexSupervisorTask(null, null, null, ingestionSpec, Collections.emptyMap());
}
Aggregations