Search in sources :

Example 16 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class FirehoseFactoryToInputSourceAdaptorTest method testUnimplementedInputFormat.

@Test
public void testUnimplementedInputFormat() throws IOException {
    final List<String> lines = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        lines.add(StringUtils.format("%d,name_%d,%d", 20190101 + i, i, i + 100));
    }
    final TestFirehoseFactory firehoseFactory = new TestFirehoseFactory(lines);
    final StringInputRowParser inputRowParser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec(null, "yyyyMMdd", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "name", "score"))), ",", Arrays.asList("timestamp", "name", "score"), false, 0), StringUtils.UTF8_STRING);
    final FirehoseFactoryToInputSourceAdaptor inputSourceAdaptor = new FirehoseFactoryToInputSourceAdaptor(firehoseFactory, inputRowParser);
    final InputSourceReader reader = inputSourceAdaptor.reader(new InputRowSchema(inputRowParser.getParseSpec().getTimestampSpec(), inputRowParser.getParseSpec().getDimensionsSpec(), ColumnsFilter.all()), null, null);
    final List<InputRow> result = new ArrayList<>();
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        while (iterator.hasNext()) {
            result.add(iterator.next());
        }
    }
    Assert.assertEquals(10, result.size());
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-%02d", 1 + i)), result.get(i).getTimestamp());
        Assert.assertEquals(StringUtils.format("name_%d", i), Iterables.getOnlyElement(result.get(i).getDimension("name")));
        Assert.assertEquals(StringUtils.format("%d", i + 100), Iterables.getOnlyElement(result.get(i).getDimension("score")));
    }
}
Also used : ArrayList(java.util.ArrayList) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 17 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class MapVirtualColumnTestBase method generateIndex.

static IncrementalIndex generateIndex() throws IOException {
    final CharSource input = CharSource.wrap("2011-01-12T00:00:00.000Z\ta\tkey1,key2,key3\tvalue1,value2,value3\n" + "2011-01-12T00:00:00.000Z\tb\tkey4,key5,key6\tvalue4\n" + "2011-01-12T00:00:00.000Z\tc\tkey1,key5\tvalue1,value5,value9\n");
    final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim", "keys", "values"))), "\t", ",", Arrays.asList("ts", "dim", "keys", "values"), false, 0), "utf8");
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).build();
    return TestIndex.loadIncrementalIndex(() -> new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build(), input, parser);
}
Also used : CharSource(com.google.common.io.CharSource) DelimitedParseSpec(org.apache.druid.data.input.impl.DelimitedParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 18 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class HashPartitionTaskKillTest method createTestTask.

private ParallelIndexSupervisorTask createTestTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting, boolean useInputFormatApi, int succeedsBeforeFailing) {
    GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
    final ParallelIndexIngestionSpec ingestionSpec;
    if (useInputFormatApi) {
        Preconditions.checkArgument(parseSpec == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
    } else {
        Preconditions.checkArgument(inputFormat == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
        // noinspection unchecked
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
    }
    return new ParallelIndexSupervisorTaskTest(null, null, null, ingestionSpec, null, Collections.emptyMap(), succeedsBeforeFailing);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) Map(java.util.Map) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Example 19 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class GroupByQueryRunnerFactoryTest method createSegment.

private Segment createSegment() throws Exception {
    IncrementalIndex incrementalIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setConcurrentEventAdd(true).setMaxRowCount(5000).build();
    StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags"))), "\t", ImmutableList.of("timestamp", "product", "tags"), false, 0), "UTF-8");
    String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
    for (String row : rows) {
        incrementalIndex.add(parser.parse(row));
    }
    closerRule.closeLater(incrementalIndex);
    return new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("test"));
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec)

Example 20 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class AbstractMultiPhaseParallelIndexingTest method newTask.

protected ParallelIndexSupervisorTask newTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting) {
    GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
    final ParallelIndexIngestionSpec ingestionSpec;
    if (useInputFormatApi) {
        Preconditions.checkArgument(parseSpec == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
    } else {
        Preconditions.checkArgument(inputFormat == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
        // noinspection unchecked
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
    }
    // set up test tools
    return new ParallelIndexSupervisorTask(null, null, null, ingestionSpec, Collections.emptyMap());
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) Map(java.util.Map) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Aggregations

StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)30 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)22 Test (org.junit.Test)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 IdUtilsTest (org.apache.druid.common.utils.IdUtilsTest)9 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)9 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)8 DataSchema (org.apache.druid.segment.indexing.DataSchema)8 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)8 ArrayList (java.util.ArrayList)7 CSVParseSpec (org.apache.druid.data.input.impl.CSVParseSpec)7 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)7 Map (java.util.Map)6 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)5 Before (org.junit.Before)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 File (java.io.File)4