Search in sources :

Example 1 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class DataGenerator method generator.

/**
 * Initialize a Java Stream generator for InputRow from this DataGenerator.
 * The generator will log its progress once every 10,000 rows.
 *
 * @param numOfRows the number of rows to generate
 * @return a generator
 */
private Stream<InputRow> generator(int numOfRows) {
    return Stream.generate(new Supplier<InputRow>() {

        int i = 0;

        @Override
        public InputRow get() {
            InputRow row = DataGenerator.this.nextRow();
            i++;
            if (i % 10_000 == 0) {
                log.info("%,d/%,d rows generated.", i, numOfRows);
            }
            return row;
        }
    }).limit(numOfRows);
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) Supplier(java.util.function.Supplier)

Example 2 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class StringColumnAggregationTest method setup.

@Before
public void setup() throws Exception {
    List<String> dimensions = ImmutableList.of(singleValue, multiValue);
    List<InputRow> inputRows = new ArrayList<>(n);
    for (int i = 1; i <= n; i++) {
        String val = String.valueOf(i * 1.0d);
        inputRows.add(new MapBasedInputRow(DateTime.now(DateTimeZone.UTC), dimensions, ImmutableMap.of(singleValue, val, multiValue, Lists.newArrayList(val, null, val))));
    }
    aggregationTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(Collections.emptyList(), new GroupByQueryConfig(), tempFolder);
    IncrementalIndex index = AggregationTestHelper.createIncrementalIndex(inputRows.iterator(), new NoopInputRowParser(null), new AggregatorFactory[] { new CountAggregatorFactory("count") }, 0, Granularities.NONE, false, 100, false);
    this.segments = ImmutableList.of(new IncrementalIndexSegment(index, SegmentId.dummy("test")), aggregationTestHelper.persistIncrementalIndex(index, null));
    // we have ingested arithmetic progression from 1 to 10, so sums can be computed using following
    // All sum values are multiplied by 2 because we are running query on duplicated segment twice.
    numRows = 2 * n;
    singleValueSum = n * (n + 1);
    multiValueSum = 2 * n * (n + 1);
    singleValueMax = n;
    multiValueMax = n;
    singleValueMin = 1;
    multiValueMin = 1;
}
Also used : GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) ArrayList(java.util.ArrayList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) NoopInputRowParser(org.apache.druid.data.input.impl.NoopInputRowParser) Before(org.junit.Before)

Example 3 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class AggregationTestHelper method createIndex.

public void createIndex(Iterator rows, InputRowParser parser, final AggregatorFactory[] metrics, File outDir, long minTimestamp, Granularity gran, boolean deserializeComplexMetrics, int maxRowCount, boolean rollup) throws Exception {
    IncrementalIndex index = null;
    List<File> toMerge = new ArrayList<>();
    try {
        index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build()).setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount).build();
        while (rows.hasNext()) {
            Object row = rows.next();
            if (!index.canAppendRow()) {
                File tmp = tempFolder.newFolder();
                toMerge.add(tmp);
                indexMerger.persist(index, tmp, new IndexSpec(), null);
                index.close();
                index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build()).setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount).build();
            }
            if (row instanceof String && parser instanceof StringInputRowParser) {
                // Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to
                // InputRowsParser<String>
                index.add(((StringInputRowParser) parser).parse((String) row));
            } else {
                index.add(((List<InputRow>) parser.parseBatch(row)).get(0));
            }
        }
        if (toMerge.size() > 0) {
            File tmp = tempFolder.newFolder();
            toMerge.add(tmp);
            indexMerger.persist(index, tmp, new IndexSpec(), null);
            List<QueryableIndex> indexes = new ArrayList<>(toMerge.size());
            for (File file : toMerge) {
                indexes.add(indexIO.loadIndex(file));
            }
            indexMerger.mergeQueryableIndex(indexes, rollup, metrics, outDir, new IndexSpec(), null, -1);
            for (QueryableIndex qi : indexes) {
                qi.close();
            }
        } else {
            indexMerger.persist(index, outDir, new IndexSpec(), null);
        }
    } finally {
        if (index != null) {
            index.close();
        }
    }
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 4 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class GroupByLimitPushDownInsufficientBufferTest method setup.

@Before
public void setup() throws Exception {
    tmpDir = FileUtils.createTempDir();
    InputRow row;
    List<String> dimNames = Arrays.asList("dimA", "metA");
    Map<String, Object> event;
    final IncrementalIndex indexA = makeIncIndex(false);
    incrementalIndices.add(indexA);
    event = new HashMap<>();
    event.put("dimA", "hello");
    event.put("metA", 100);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "mango");
    event.put("metA", 95);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "world");
    event.put("metA", 75);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "fubaz");
    event.put("metA", 75);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "zortaxx");
    event.put("metA", 999);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "blarg");
    event.put("metA", 125);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    event = new HashMap<>();
    event.put("dimA", "blerg");
    event.put("metA", 130);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexA.add(row);
    final File fileA = INDEX_MERGER_V9.persist(indexA, new File(tmpDir, "A"), new IndexSpec(), OffHeapMemorySegmentWriteOutMediumFactory.instance());
    QueryableIndex qindexA = INDEX_IO.loadIndex(fileA);
    final IncrementalIndex indexB = makeIncIndex(false);
    incrementalIndices.add(indexB);
    event = new HashMap<>();
    event.put("dimA", "foo");
    event.put("metA", 200);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "world");
    event.put("metA", 75);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "mango");
    event.put("metA", 95);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "zebra");
    event.put("metA", 180);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    event = new HashMap<>();
    event.put("dimA", "blorg");
    event.put("metA", 120);
    row = new MapBasedInputRow(1000, dimNames, event);
    indexB.add(row);
    final File fileB = INDEX_MERGER_V9.persist(indexB, new File(tmpDir, "B"), new IndexSpec(), OffHeapMemorySegmentWriteOutMediumFactory.instance());
    QueryableIndex qindexB = INDEX_IO.loadIndex(fileB);
    groupByIndices = Arrays.asList(qindexA, qindexB);
    resourceCloser = Closer.create();
    setupGroupByFactory();
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) File(java.io.File) Before(org.junit.Before)

Example 5 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class DataSchemaTest method testTransformSpec.

@Test
public void testTransformSpec() {
    Map<String, Object> parserMap = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parserMap, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), new TransformSpec(new SelectorDimFilter("dimA", "foo", null), ImmutableList.of(new ExpressionTransform("expr", "concat(dimA,dimA)", TestExprMacroTable.INSTANCE))), jsonMapper);
    // Test hack that produces a StringInputRowParser.
    final StringInputRowParser parser = (StringInputRowParser) schema.getParser();
    final InputRow row1bb = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(StandardCharsets.UTF_8))).get(0);
    Assert.assertEquals(DateTimes.of("2000-01-01"), row1bb.getTimestamp());
    Assert.assertEquals("foo", row1bb.getRaw("dimA"));
    Assert.assertEquals("foofoo", row1bb.getRaw("expr"));
    final InputRow row1string = parser.parse("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}");
    Assert.assertEquals(DateTimes.of("2000-01-01"), row1string.getTimestamp());
    Assert.assertEquals("foo", row1string.getRaw("dimA"));
    Assert.assertEquals("foofoo", row1string.getRaw("expr"));
    final InputRow row2 = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(StandardCharsets.UTF_8))).get(0);
    Assert.assertNull(row2);
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IdUtilsTest(org.apache.druid.common.utils.IdUtilsTest)

Aggregations

InputRow (org.apache.druid.data.input.InputRow)266 Test (org.junit.Test)193 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)57 InputEntityReader (org.apache.druid.data.input.InputEntityReader)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)52 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)52 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)49 ArrayList (java.util.ArrayList)46 List (java.util.List)37 ImmutableList (com.google.common.collect.ImmutableList)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)29 File (java.io.File)27 HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)27 DateTime (org.joda.time.DateTime)24 Map (java.util.Map)23 IOException (java.io.IOException)18 Interval (org.joda.time.Interval)18