Search in sources :

Example 6 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestore.

@Test(timeout = 60_000L)
public void testRestore() throws Exception {
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    final DataSegment publishedSegment;
    // First run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
        // Nothing should be published.
        Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
    }
    // Second run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task2.getFirehose() == null) {
            Thread.sleep(50);
        }
        // Do a query, at this point the previous data should be loaded.
        Assert.assertEquals(1, sumMetric(task2, "rows"));
        final TestFirehose firehose = (TestFirehose) task2.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar"))));
        // Stop the firehose, this will drain out existing events.
        firehose.close();
        // Wait for publish.
        while (mdc.getPublished().isEmpty()) {
            Thread.sleep(50);
        }
        publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
        // Do a query.
        Assert.assertEquals(2, sumMetric(task2, "rows"));
        // Simulate handoff.
        for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
            final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
            Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
            executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
        }
        handOffCallbacks.clear();
        // Wait for the task to finish.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
    }
}
Also used : TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 7 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class DatasourceRecordReaderTest method testSanity.

@Test
public void testSanity() throws Exception {
    DataSegment segment = new DefaultObjectMapper().readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"), DataSegment.class).withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path", this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()));
    InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment)), null);
    Configuration config = new Configuration();
    config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(), segment.getInterval(), null, null, null, null, segment.getDimensions(), segment.getMetrics(), false)));
    TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(context);
    DatasourceRecordReader rr = new DatasourceRecordReader();
    rr.initialize(split, context);
    Assert.assertEquals(0, rr.getProgress(), 0.0001);
    List<InputRow> rows = Lists.newArrayList();
    while (rr.nextKeyValue()) {
        rows.add(rr.getCurrentValue());
    }
    verifyRows(rows);
    Assert.assertEquals(1, rr.getProgress(), 0.0001);
    rr.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputRow(io.druid.data.input.InputRow) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DataSegment(io.druid.timeline.DataSegment) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 8 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTimelineTest method testSimple.

@Test
public void testSimple() throws Exception {
    int count = 0;
    long sum = 0;
    try (final Firehose firehose = factory.connect(ROW_PARSER)) {
        while (firehose.hasMore()) {
            final InputRow row = firehose.nextRow();
            count++;
            sum += row.getLongMetric(METRICS[0]);
        }
    }
    Assert.assertEquals("count", expectedCount, count);
    Assert.assertEquals("sum", expectedSum, sum);
}
Also used : Firehose(io.druid.data.input.Firehose) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) Test(org.junit.Test)

Example 9 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class HyperUniquesSerde method getExtractor.

@Override
public ComplexMetricExtractor getExtractor() {
    return new ComplexMetricExtractor() {

        @Override
        public Class<HyperLogLogCollector> extractedClass() {
            return HyperLogLogCollector.class;
        }

        @Override
        public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
            Object rawValue = inputRow.getRaw(metricName);
            if (rawValue instanceof HyperLogLogCollector) {
                return (HyperLogLogCollector) rawValue;
            } else {
                HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                List<String> dimValues = inputRow.getDimension(metricName);
                if (dimValues == null) {
                    return collector;
                }
                for (String dimensionValue : dimValues) {
                    collector.add(hyperLogLogHash.hash(dimensionValue));
                }
                return collector;
            }
        }
    };
}
Also used : HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) ComplexMetricExtractor(io.druid.segment.serde.ComplexMetricExtractor) InputRow(io.druid.data.input.InputRow)

Example 10 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class SpatialDimensionRowTransformer method apply.

@Override
public InputRow apply(final InputRow row) {
    final Map<String, List<String>> spatialLookup = Maps.newHashMap();
    // remove all spatial dimensions
    final List<String> finalDims = Lists.newArrayList(Iterables.filter(row.getDimensions(), new Predicate<String>() {

        @Override
        public boolean apply(String input) {
            return !spatialDimensionMap.containsKey(input) && !spatialPartialDimNames.contains(input);
        }
    }));
    InputRow retVal = new InputRow() {

        @Override
        public List<String> getDimensions() {
            return finalDims;
        }

        @Override
        public long getTimestampFromEpoch() {
            return row.getTimestampFromEpoch();
        }

        @Override
        public DateTime getTimestamp() {
            return row.getTimestamp();
        }

        @Override
        public List<String> getDimension(String dimension) {
            List<String> retVal = spatialLookup.get(dimension);
            return (retVal == null) ? row.getDimension(dimension) : retVal;
        }

        @Override
        public Object getRaw(String dimension) {
            List<String> retVal = spatialLookup.get(dimension);
            return (retVal == null) ? row.getRaw(dimension) : retVal;
        }

        @Override
        public long getLongMetric(String metric) {
            try {
                return row.getLongMetric(metric);
            } catch (ParseException e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public float getFloatMetric(String metric) {
            try {
                return row.getFloatMetric(metric);
            } catch (ParseException e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public String toString() {
            return row.toString();
        }

        @Override
        public int compareTo(Row o) {
            return getTimestamp().compareTo(o.getTimestamp());
        }
    };
    for (Map.Entry<String, SpatialDimensionSchema> entry : spatialDimensionMap.entrySet()) {
        final String spatialDimName = entry.getKey();
        final SpatialDimensionSchema spatialDim = entry.getValue();
        List<String> dimVals = row.getDimension(spatialDimName);
        if (dimVals != null && !dimVals.isEmpty()) {
            if (dimVals.size() != 1) {
                throw new ISE("Spatial dimension value must be in an array!");
            }
            if (isJoinedSpatialDimValValid(dimVals.get(0))) {
                spatialLookup.put(spatialDimName, dimVals);
                finalDims.add(spatialDimName);
            }
        } else {
            List<String> spatialDimVals = Lists.newArrayList();
            for (String dim : spatialDim.getDims()) {
                List<String> partialDimVals = row.getDimension(dim);
                if (isSpatialDimValsValid(partialDimVals)) {
                    spatialDimVals.addAll(partialDimVals);
                }
            }
            if (spatialDimVals.size() == spatialDim.getDims().size()) {
                spatialLookup.put(spatialDimName, Arrays.asList(JOINER.join(spatialDimVals)));
                finalDims.add(spatialDimName);
            }
        }
    }
    return retVal;
}
Also used : SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) InputRow(io.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ISE(io.druid.java.util.common.ISE) ParseException(io.druid.java.util.common.parsers.ParseException) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) Map(java.util.Map) Predicate(com.google.common.base.Predicate)

Aggregations

InputRow (io.druid.data.input.InputRow)81 Test (org.junit.Test)35 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)24 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)22 File (java.io.File)18 Setup (org.openjdk.jmh.annotations.Setup)15 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)14 Firehose (io.druid.data.input.Firehose)12 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)12 IndexSpec (io.druid.segment.IndexSpec)11 ArrayList (java.util.ArrayList)11 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)10 DateTime (org.joda.time.DateTime)10 QueryableIndex (io.druid.segment.QueryableIndex)9 IOException (java.io.IOException)9 BenchmarkColumnSchema (io.druid.benchmark.datagen.BenchmarkColumnSchema)8 Interval (org.joda.time.Interval)8 ParseException (io.druid.java.util.common.parsers.ParseException)7 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 DataSegment (io.druid.timeline.DataSegment)5