Search in sources :

Example 51 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class MapInputRowParser method parse.

@Override
public InputRow parse(Map<String, Object> theMap) {
    final List<String> dimensions = parseSpec.getDimensionsSpec().hasCustomDimensions() ? parseSpec.getDimensionsSpec().getDimensionNames() : Lists.newArrayList(Sets.difference(theMap.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()));
    final DateTime timestamp;
    try {
        timestamp = parseSpec.getTimestampSpec().extractTimestamp(theMap);
        if (timestamp == null) {
            final String input = theMap.toString();
            throw new NullPointerException(String.format("Null timestamp in input: %s", input.length() < 100 ? input : input.substring(0, 100) + "..."));
        }
    } catch (Exception e) {
        throw new ParseException(e, "Unparseable timestamp found!");
    }
    return new MapBasedInputRow(timestamp.getMillis(), dimensions, theMap);
}
Also used : ParseException(io.druid.java.util.common.parsers.ParseException) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) ParseException(io.druid.java.util.common.parsers.ParseException)

Example 52 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testReportParseExceptionsOnBadMetric.

@Test(timeout = 60_000L)
public void testReportParseExceptionsOnBadMetric() throws Exception {
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final RealtimeIndexTask task = makeRealtimeTask(null, true);
    final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")), new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "foo")), new MapBasedInputRow(now.minus(new Period("P1D")), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "foo")), new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0))));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for the task to finish.
    expectedException.expect(ExecutionException.class);
    expectedException.expectCause(CoreMatchers.<Throwable>instanceOf(ParseException.class));
    expectedException.expectCause(ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Encountered parse error for aggregator[met1]")));
    expectedException.expect(ThrowableCauseMatcher.hasCause(ThrowableCauseMatcher.hasCause(CoreMatchers.allOf(CoreMatchers.<Throwable>instanceOf(ParseException.class), ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Unable to parse metrics[met1], value[foo]"))))));
    statusFuture.get();
}
Also used : TaskToolbox(io.druid.indexing.common.TaskToolbox) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) Period(org.joda.time.Period) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) ParseException(io.druid.java.util.common.parsers.ParseException) TaskStatus(io.druid.indexing.common.TaskStatus) Test(org.junit.Test)

Example 53 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestoreAfterHandoffAttemptDuringShutdown.

@Test(timeout = 60_000L)
public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception {
    final TaskStorage taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null));
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    final DataSegment publishedSegment;
    // First run:
    {
        final TaskToolbox taskToolbox = makeToolbox(task1, taskStorage, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Stop the firehose, this will trigger a finishJob.
        firehose.close();
        // Wait for publish.
        while (mdc.getPublished().isEmpty()) {
            Thread.sleep(50);
        }
        publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
        // Do a query.
        Assert.assertEquals(1, sumMetric(task1, "rows"));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter.
        while (!statusFuture.isDone()) {
            Thread.sleep(50);
        }
    }
    // Second run:
    {
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, taskStorage, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task2.getFirehose() == null) {
            Thread.sleep(50);
        }
        // Stop the firehose again, this will start another handoff.
        final TestFirehose firehose = (TestFirehose) task2.getFirehose();
        // Stop the firehose, this will trigger a finishJob.
        firehose.close();
        // publishedSegment is still published. No reason it shouldn't be.
        Assert.assertEquals(ImmutableSet.of(publishedSegment), mdc.getPublished());
        // Wait for a handoffCallback to show up.
        while (handOffCallbacks.isEmpty()) {
            Thread.sleep(50);
        }
        // Simulate handoff.
        for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
            final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
            Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
            executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
        }
        handOffCallbacks.clear();
        // Wait for the task to finish.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
    }
}
Also used : TaskStorageConfig(io.druid.indexing.common.config.TaskStorageConfig) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TaskStorage(io.druid.indexing.overlord.TaskStorage) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 54 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testBasics.

@Test(timeout = 60_000L)
public void testBasics() throws Exception {
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final RealtimeIndexTask task = makeRealtimeTask(null);
    final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
    final DataSegment publishedSegment;
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")), new MapBasedInputRow(now.minus(new Period("P1D")), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)), new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0))));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    while (mdc.getPublished().isEmpty()) {
        Thread.sleep(50);
    }
    publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
    // Check metrics.
    Assert.assertEquals(2, task.getMetrics().processed());
    Assert.assertEquals(1, task.getMetrics().thrownAway());
    Assert.assertEquals(0, task.getMetrics().unparseable());
    // Do some queries.
    Assert.assertEquals(2, sumMetric(task, "rows"));
    Assert.assertEquals(3, sumMetric(task, "met1"));
    // Simulate handoff.
    for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
        final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
        Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 55 with MapBasedInputRow

use of io.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testNoReportParseExceptions.

@Test(timeout = 60_000L)
public void testNoReportParseExceptions() throws Exception {
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final RealtimeIndexTask task = makeRealtimeTask(null, false);
    final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
    final DataSegment publishedSegment;
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(Arrays.<InputRow>asList(// Good row- will be processed.
    new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")), // Null row- will be unparseable.
    null, // Bad metric- will count as processed, but that particular metric won't update.
    new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "foo")), // Bad row- will be unparseable.
    new MapBasedInputRow(now, ImmutableList.of("dim1", FAIL_DIM), ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)), // Old row- will be thrownAway.
    new MapBasedInputRow(now.minus(new Period("P1D")), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)), // Good row- will be processed.
    new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0))));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    while (mdc.getPublished().isEmpty()) {
        Thread.sleep(50);
    }
    publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
    // Check metrics.
    Assert.assertEquals(3, task.getMetrics().processed());
    Assert.assertEquals(1, task.getMetrics().thrownAway());
    Assert.assertEquals(2, task.getMetrics().unparseable());
    // Do some queries.
    Assert.assertEquals(3, sumMetric(task, "rows"));
    Assert.assertEquals(3, sumMetric(task, "met1"));
    // Simulate handoff.
    for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
        final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
        Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Aggregations

MapBasedInputRow (io.druid.data.input.MapBasedInputRow)73 Test (org.junit.Test)51 DateTime (org.joda.time.DateTime)38 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)32 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)30 File (java.io.File)19 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)13 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)12 InputRow (io.druid.data.input.InputRow)11 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)11 Interval (org.joda.time.Interval)11 IOException (java.io.IOException)10 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)9 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 Row (io.druid.data.input.Row)7 TaskStatus (io.druid.indexing.common.TaskStatus)7 TaskToolbox (io.druid.indexing.common.TaskToolbox)7 TestIndexerMetadataStorageCoordinator (io.druid.indexing.test.TestIndexerMetadataStorageCoordinator)7 SpatialDimensionSchema (io.druid.data.input.impl.SpatialDimensionSchema)6 Pair (io.druid.java.util.common.Pair)6