Search in sources :

Example 16 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class SegmentGenerator method generateIncrementalIndex.

public IncrementalIndex generateIncrementalIndex(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
    // In case we need to generate hyperUniques.
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
    final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
    schemaInfo.getDataInterval(), numRows);
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
    final List<InputRow> rows = new ArrayList<>();
    for (int i = 0; i < numRows; i++) {
        final InputRow row = dataGenerator.nextRow();
        rows.add(row);
        if ((i + 1) % 20000 == 0) {
            log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
        }
    }
    log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
    return makeIncrementalIndex(dataSegment.getId(), dataHash, 0, rows, indexSchema);
}
Also used : InputRow(org.apache.druid.data.input.InputRow) ArrayList(java.util.ArrayList) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 17 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class SqlFirehoseTest method testFirehoseStringParser.

@Test
public void testFirehoseStringParser() throws Exception {
    final TestCloseable closeable = new TestCloseable();
    List<Object> expectedResults = new ArrayList<>();
    for (Map<String, Object> map : inputs) {
        expectedResults.add(map.get("x"));
    }
    final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
    final InputRowParser stringParser = TransformSpec.NONE.decorate(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x")))), Charset.defaultCharset().name()));
    try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) {
        final List<Object> results = new ArrayList<>();
        while (firehose.hasMore()) {
            final InputRow inputRow = firehose.nextRow();
            if (inputRow == null) {
                results.add(null);
            } else {
                results.add(inputRow.getDimension("x").get(0));
            }
        }
        Assert.assertEquals(expectedResults, results);
    }
}
Also used : JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Charset(java.nio.charset.Charset) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TypeReference(com.fasterxml.jackson.core.type.TypeReference) FileUtils(org.apache.druid.java.util.common.FileUtils) Before(org.junit.Before) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) ImmutableMap(com.google.common.collect.ImmutableMap) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileOutputStream(java.io.FileOutputStream) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) File(java.io.File) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Closeable(java.io.Closeable) Assert(org.junit.Assert) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ArrayList(java.util.ArrayList) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Test(org.junit.Test)

Example 18 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class SqlFirehoseTest method testFirehose.

@Test
public void testFirehose() throws Exception {
    final TestCloseable closeable = new TestCloseable();
    List<Object> expectedResults = new ArrayList<>();
    for (Map<String, Object> map : inputs) {
        expectedResults.add(map.get("x"));
    }
    final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
    try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), parser, closeable)) {
        final List<Object> results = new ArrayList<>();
        while (firehose.hasMore()) {
            final InputRow inputRow = firehose.nextRow();
            if (inputRow == null) {
                results.add(null);
            } else {
                results.add(inputRow.getDimension("x").get(0));
            }
        }
        Assert.assertEquals(expectedResults, results);
    }
}
Also used : JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Charset(java.nio.charset.Charset) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TypeReference(com.fasterxml.jackson.core.type.TypeReference) FileUtils(org.apache.druid.java.util.common.FileUtils) Before(org.junit.Before) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) ImmutableMap(com.google.common.collect.ImmutableMap) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileOutputStream(java.io.FileOutputStream) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) File(java.io.File) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Closeable(java.io.Closeable) Assert(org.junit.Assert) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ArrayList(java.util.ArrayList) InputRow(org.apache.druid.data.input.InputRow) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) Test(org.junit.Test)

Example 19 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class SinkTest method testSwap.

@Test
public void testSwap() throws Exception {
    final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
    final Interval interval = Intervals.of("2013-01-01/2013-01-02");
    final String version = DateTimes.nowUtc().toString();
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
    final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return new ArrayList<>();
        }

        @Override
        public long getTimestampFromEpoch() {
            return DateTimes.of("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return DateTimes.of("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return new ArrayList<>();
        }

        @Override
        public Number getMetric(String metric) {
            return 0;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    }, false);
    FireHydrant currHydrant = sink.getCurrHydrant();
    Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
    FireHydrant swapHydrant = sink.swap();
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return new ArrayList<>();
        }

        @Override
        public long getTimestampFromEpoch() {
            return DateTimes.of("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return DateTimes.of("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return new ArrayList<>();
        }

        @Override
        public Number getMetric(String metric) {
            return 0;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    }, false);
    Assert.assertEquals(currHydrant, swapHydrant);
    Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
    Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
    Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
Also used : Period(org.joda.time.Period) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DateTime(org.joda.time.DateTime) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Row(org.apache.druid.data.input.Row) InputRow(org.apache.druid.data.input.InputRow) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Interval(org.joda.time.Interval) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 20 with InputRow

use of org.apache.druid.data.input.InputRow in project druid by druid-io.

the class OpenAndClosedSegmentsBatchAppenderatorDriverTest method testIncrementalPush.

@Test(timeout = 5000L)
public void testIncrementalPush() throws Exception {
    Assert.assertNull(driver.startJob(null));
    int i = 0;
    for (InputRow row : ROWS) {
        Assert.assertTrue(driver.add(row, "dummy").isOk());
        checkSegmentStates(1, SegmentState.APPENDING);
        checkSegmentStates(i, SegmentState.PUSHED_AND_DROPPED);
        driver.pushAllAndClear(TIMEOUT);
        checkSegmentStates(0, SegmentState.APPENDING);
        checkSegmentStates(++i, SegmentState.PUSHED_AND_DROPPED);
    }
    final SegmentsAndCommitMetadata published = driver.publishAll(null, null, makeOkPublisher(), Function.identity()).get(TIMEOUT, TimeUnit.MILLISECONDS);
    Assert.assertEquals(ImmutableSet.of(new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000/PT1H"), VERSION, new NumberedShardSpec(0, 0)), new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000T01/PT1H"), VERSION, new NumberedShardSpec(0, 0)), new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000T01/PT1H"), VERSION, new NumberedShardSpec(1, 0))), published.getSegments().stream().map(SegmentIdWithShardSpec::fromDataSegment).collect(Collectors.toSet()));
    Assert.assertNull(published.getCommitMetadata());
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Aggregations

InputRow (org.apache.druid.data.input.InputRow)266 Test (org.junit.Test)193 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)57 InputEntityReader (org.apache.druid.data.input.InputEntityReader)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)52 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)52 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)49 ArrayList (java.util.ArrayList)46 List (java.util.List)37 ImmutableList (com.google.common.collect.ImmutableList)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)29 File (java.io.File)27 HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)27 DateTime (org.joda.time.DateTime)24 Map (java.util.Map)23 IOException (java.io.IOException)18 Interval (org.joda.time.Interval)18