Search in sources :

Example 1 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class AggregationTestHelper method createIndex.

public void createIndex(InputStream inputDataStream, String parserJson, String aggregators, File outDir, long minTimestamp, Granularity gran, int maxRowCount, boolean rollup) throws Exception {
    try {
        StringInputRowParser parser = mapper.readValue(parserJson, StringInputRowParser.class);
        LineIterator iter = IOUtils.lineIterator(inputDataStream, "UTF-8");
        List<AggregatorFactory> aggregatorSpecs = mapper.readValue(aggregators, new TypeReference<List<AggregatorFactory>>() {
        });
        createIndex(iter, parser, aggregatorSpecs.toArray(new AggregatorFactory[0]), outDir, minTimestamp, gran, true, maxRowCount, rollup);
    } finally {
        Closeables.close(inputDataStream, true);
    }
}
Also used : StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) List(java.util.List) ArrayList(java.util.ArrayList) LineIterator(org.apache.commons.io.LineIterator)

Example 2 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class AggregationTestHelper method createIndex.

public void createIndex(Iterator rows, InputRowParser parser, final AggregatorFactory[] metrics, File outDir, long minTimestamp, Granularity gran, boolean deserializeComplexMetrics, int maxRowCount, boolean rollup) throws Exception {
    IncrementalIndex index = null;
    List<File> toMerge = new ArrayList<>();
    try {
        index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build()).setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount).build();
        while (rows.hasNext()) {
            Object row = rows.next();
            if (!index.canAppendRow()) {
                File tmp = tempFolder.newFolder();
                toMerge.add(tmp);
                indexMerger.persist(index, tmp, new IndexSpec(), null);
                index.close();
                index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build()).setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount).build();
            }
            if (row instanceof String && parser instanceof StringInputRowParser) {
                // Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to
                // InputRowsParser<String>
                index.add(((StringInputRowParser) parser).parse((String) row));
            } else {
                index.add(((List<InputRow>) parser.parseBatch(row)).get(0));
            }
        }
        if (toMerge.size() > 0) {
            File tmp = tempFolder.newFolder();
            toMerge.add(tmp);
            indexMerger.persist(index, tmp, new IndexSpec(), null);
            List<QueryableIndex> indexes = new ArrayList<>(toMerge.size());
            for (File file : toMerge) {
                indexes.add(indexIO.loadIndex(file));
            }
            indexMerger.mergeQueryableIndex(indexes, rollup, metrics, outDir, new IndexSpec(), null, -1);
            for (QueryableIndex qi : indexes) {
                qi.close();
            }
        } else {
            indexMerger.persist(index, outDir, new IndexSpec(), null);
        }
    } finally {
        if (index != null) {
            index.close();
        }
    }
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 3 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class DataSchemaTest method testEmptyDatasource.

@Test
public void testEmptyDatasource() {
    Map<String, Object> parser = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), DimensionsSpec.builder().setDimensions(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))).setDimensionExclusions(ImmutableList.of("dimC")).build(), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    expectedException.expect(CoreMatchers.instanceOf(IllegalArgumentException.class));
    expectedException.expectMessage("dataSource cannot be null or empty. Please provide a dataSource.");
    DataSchema schema = new DataSchema("", parser, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), null, jsonMapper);
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IdUtilsTest(org.apache.druid.common.utils.IdUtilsTest)

Example 4 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class DataSchemaTest method testExplicitInclude.

@Test
public void testExplicitInclude() {
    Map<String, Object> parser = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), DimensionsSpec.builder().setDimensions(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))).setDimensionExclusions(ImmutableList.of("dimC")).build(), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parser, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), null, jsonMapper);
    Assert.assertEquals(ImmutableSet.of("__time", "dimC", "col1", "metric1", "metric2"), schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions());
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IdUtilsTest(org.apache.druid.common.utils.IdUtilsTest)

Example 5 with StringInputRowParser

use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.

the class DataSchemaTest method testTransformSpec.

@Test
public void testTransformSpec() {
    Map<String, Object> parserMap = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parserMap, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), new TransformSpec(new SelectorDimFilter("dimA", "foo", null), ImmutableList.of(new ExpressionTransform("expr", "concat(dimA,dimA)", TestExprMacroTable.INSTANCE))), jsonMapper);
    // Test hack that produces a StringInputRowParser.
    final StringInputRowParser parser = (StringInputRowParser) schema.getParser();
    final InputRow row1bb = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(StandardCharsets.UTF_8))).get(0);
    Assert.assertEquals(DateTimes.of("2000-01-01"), row1bb.getTimestamp());
    Assert.assertEquals("foo", row1bb.getRaw("dimA"));
    Assert.assertEquals("foofoo", row1bb.getRaw("expr"));
    final InputRow row1string = parser.parse("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}");
    Assert.assertEquals(DateTimes.of("2000-01-01"), row1string.getTimestamp());
    Assert.assertEquals("foo", row1string.getRaw("dimA"));
    Assert.assertEquals("foofoo", row1string.getRaw("expr"));
    final InputRow row2 = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(StandardCharsets.UTF_8))).get(0);
    Assert.assertNull(row2);
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IdUtilsTest(org.apache.druid.common.utils.IdUtilsTest)

Aggregations

StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)30 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)22 Test (org.junit.Test)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 IdUtilsTest (org.apache.druid.common.utils.IdUtilsTest)9 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)9 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)8 DataSchema (org.apache.druid.segment.indexing.DataSchema)8 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)8 ArrayList (java.util.ArrayList)7 CSVParseSpec (org.apache.druid.data.input.impl.CSVParseSpec)7 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)7 Map (java.util.Map)6 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)5 Before (org.junit.Before)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 File (java.io.File)4