Search in sources :

Example 71 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestoreAfterHandoffAttemptDuringShutdown.

@Test(timeout = 60_000L)
public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception {
    final TaskStorage taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null));
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    final DataSegment publishedSegment;
    // First run:
    {
        final TaskToolbox taskToolbox = makeToolbox(task1, taskStorage, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Stop the firehose, this will trigger a finishJob.
        firehose.close();
        // Wait for publish.
        while (mdc.getPublished().isEmpty()) {
            Thread.sleep(50);
        }
        publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
        // Do a query.
        Assert.assertEquals(1, sumMetric(task1, "rows"));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter.
        while (!statusFuture.isDone()) {
            Thread.sleep(50);
        }
    }
    // Second run:
    {
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, taskStorage, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task2.getFirehose() == null) {
            Thread.sleep(50);
        }
        // Stop the firehose again, this will start another handoff.
        final TestFirehose firehose = (TestFirehose) task2.getFirehose();
        // Stop the firehose, this will trigger a finishJob.
        firehose.close();
        // publishedSegment is still published. No reason it shouldn't be.
        Assert.assertEquals(ImmutableSet.of(publishedSegment), mdc.getPublished());
        // Wait for a handoffCallback to show up.
        while (handOffCallbacks.isEmpty()) {
            Thread.sleep(50);
        }
        // Simulate handoff.
        for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
            final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
            Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
            executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
        }
        handOffCallbacks.clear();
        // Wait for the task to finish.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
    }
}
Also used : TaskStorageConfig(io.druid.indexing.common.config.TaskStorageConfig) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TaskStorage(io.druid.indexing.overlord.TaskStorage) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 72 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestoreCorruptData.

@Test(timeout = 60_000L)
public void testRestoreCorruptData() throws Exception {
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    // First run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
        // Nothing should be published.
        Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
    }
    // Corrupt the data:
    final File smooshFile = new File(String.format("%s/persistent/task/%s/work/persist/%s/%s_%s/0/00000.smoosh", directory, task1.getId(), task1.getDataSource(), Granularities.DAY.bucketStart(now), Granularities.DAY.bucketEnd(now)));
    Files.write(smooshFile.toPath(), "oops!".getBytes(Charsets.UTF_8));
    // Second run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for the task to finish.
        boolean caught = false;
        try {
            statusFuture.get();
        } catch (Exception e) {
            caught = true;
        }
        Assert.assertTrue("expected exception", caught);
    }
}
Also used : TaskToolbox(io.druid.indexing.common.TaskToolbox) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) TaskStatus(io.druid.indexing.common.TaskStatus) File(java.io.File) EntryExistsException(io.druid.metadata.EntryExistsException) ParseException(io.druid.java.util.common.parsers.ParseException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Example 73 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class HyperUniquesSerdeForTest method getExtractor.

@Override
public ComplexMetricExtractor getExtractor() {
    return new ComplexMetricExtractor() {

        @Override
        public Class<HyperLogLogCollector> extractedClass() {
            return HyperLogLogCollector.class;
        }

        @Override
        public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
            Object rawValue = inputRow.getRaw(metricName);
            if (rawValue instanceof HyperLogLogCollector) {
                return (HyperLogLogCollector) rawValue;
            } else {
                HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                List<String> dimValues = inputRow.getDimension(metricName);
                if (dimValues == null) {
                    return collector;
                }
                for (String dimensionValue : dimValues) {
                    collector.add(hashFn.hashBytes(StringUtils.toUtf8(dimensionValue)).asBytes());
                }
                return collector;
            }
        }
    };
}
Also used : HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) InputRow(io.druid.data.input.InputRow)

Example 74 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class RocketMQFirehoseFactory method connect.

@Override
public Firehose connect(ByteBufferInputRowParser byteBufferInputRowParser) throws IOException, ParseException {
    Set<String> newDimExclus = Sets.union(byteBufferInputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions(), Sets.newHashSet("feed"));
    final ByteBufferInputRowParser theParser = byteBufferInputRowParser.withParseSpec(byteBufferInputRowParser.getParseSpec().withDimensionsSpec(byteBufferInputRowParser.getParseSpec().getDimensionsSpec().withDimensionExclusions(newDimExclus)));
    /**
     * Topic-Queue mapping.
     */
    final ConcurrentHashMap<String, Set<MessageQueue>> topicQueueMap;
    /**
     * Default Pull-style client for RocketMQ.
     */
    final DefaultMQPullConsumer defaultMQPullConsumer;
    final DruidPullMessageService pullMessageService;
    messageQueueTreeSetMap.clear();
    windows.clear();
    try {
        defaultMQPullConsumer = new DefaultMQPullConsumer(this.consumerGroup);
        defaultMQPullConsumer.setMessageModel(MessageModel.CLUSTERING);
        topicQueueMap = new ConcurrentHashMap<>();
        pullMessageService = new DruidPullMessageService(defaultMQPullConsumer);
        for (String topic : feed) {
            Validators.checkTopic(topic);
            topicQueueMap.put(topic, defaultMQPullConsumer.fetchSubscribeMessageQueues(topic));
        }
        DruidMessageQueueListener druidMessageQueueListener = new DruidMessageQueueListener(Sets.newHashSet(feed), topicQueueMap, defaultMQPullConsumer);
        defaultMQPullConsumer.setMessageQueueListener(druidMessageQueueListener);
        defaultMQPullConsumer.start();
        pullMessageService.start();
    } catch (MQClientException e) {
        LOGGER.error("Failed to start DefaultMQPullConsumer", e);
        throw new IOException("Failed to start RocketMQ client", e);
    }
    return new Firehose() {

        @Override
        public boolean hasMore() {
            boolean hasMore = false;
            DruidPullRequest earliestPullRequest = null;
            for (Map.Entry<String, Set<MessageQueue>> entry : topicQueueMap.entrySet()) {
                for (MessageQueue messageQueue : entry.getValue()) {
                    if (JavaCompatUtils.keySet(messageQueueTreeSetMap).contains(messageQueue) && !messageQueueTreeSetMap.get(messageQueue).isEmpty()) {
                        hasMore = true;
                    } else {
                        try {
                            long offset = defaultMQPullConsumer.fetchConsumeOffset(messageQueue, false);
                            int batchSize = (null == pullBatchSize || pullBatchSize.isEmpty()) ? DEFAULT_PULL_BATCH_SIZE : Integer.parseInt(pullBatchSize);
                            DruidPullRequest newPullRequest = new DruidPullRequest(messageQueue, null, offset, batchSize, !hasMessagesPending());
                            // notify pull message service to pull messages from brokers.
                            pullMessageService.putRequest(newPullRequest);
                            // set the earliest pull in case we need to block.
                            if (null == earliestPullRequest) {
                                earliestPullRequest = newPullRequest;
                            }
                        } catch (MQClientException e) {
                            LOGGER.error("Failed to fetch consume offset for queue: {}", entry.getKey());
                        }
                    }
                }
            }
            // Block only when there is no locally pending messages.
            if (!hasMore && null != earliestPullRequest) {
                try {
                    earliestPullRequest.getCountDownLatch().await();
                    hasMore = true;
                } catch (InterruptedException e) {
                    LOGGER.error("CountDownLatch await got interrupted", e);
                }
            }
            return hasMore;
        }

        @Override
        public InputRow nextRow() {
            for (Map.Entry<MessageQueue, ConcurrentSkipListSet<MessageExt>> entry : messageQueueTreeSetMap.entrySet()) {
                if (!entry.getValue().isEmpty()) {
                    MessageExt message = entry.getValue().pollFirst();
                    InputRow inputRow = theParser.parse(ByteBuffer.wrap(message.getBody()));
                    if (!JavaCompatUtils.keySet(windows).contains(entry.getKey())) {
                        windows.put(entry.getKey(), new ConcurrentSkipListSet<Long>());
                    }
                    windows.get(entry.getKey()).add(message.getQueueOffset());
                    return inputRow;
                }
            }
            // should never happen.
            throw new RuntimeException("Unexpected Fatal Error! There should have been one row available.");
        }

        @Override
        public Runnable commit() {
            return new Runnable() {

                @Override
                public void run() {
                    OffsetStore offsetStore = defaultMQPullConsumer.getOffsetStore();
                    Set<MessageQueue> updated = new HashSet<>();
                    // calculate offsets according to consuming windows.
                    for (ConcurrentHashMap.Entry<MessageQueue, ConcurrentSkipListSet<Long>> entry : windows.entrySet()) {
                        while (!entry.getValue().isEmpty()) {
                            long offset = offsetStore.readOffset(entry.getKey(), ReadOffsetType.MEMORY_FIRST_THEN_STORE);
                            if (offset + 1 > entry.getValue().first()) {
                                entry.getValue().pollFirst();
                            } else if (offset + 1 == entry.getValue().first()) {
                                entry.getValue().pollFirst();
                                offsetStore.updateOffset(entry.getKey(), offset + 1, true);
                                updated.add(entry.getKey());
                            } else {
                                break;
                            }
                        }
                    }
                    offsetStore.persistAll(updated);
                }
            };
        }

        @Override
        public void close() throws IOException {
            defaultMQPullConsumer.shutdown();
            pullMessageService.shutdown(false);
        }
    };
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) DefaultMQPullConsumer(com.alibaba.rocketmq.client.consumer.DefaultMQPullConsumer) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MQClientException(com.alibaba.rocketmq.client.exception.MQClientException) HashSet(java.util.HashSet) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) Firehose(io.druid.data.input.Firehose) IOException(java.io.IOException) ByteBufferInputRowParser(io.druid.data.input.ByteBufferInputRowParser) MessageExt(com.alibaba.rocketmq.common.message.MessageExt) MessageQueue(com.alibaba.rocketmq.common.message.MessageQueue) InputRow(io.druid.data.input.InputRow) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) OffsetStore(com.alibaba.rocketmq.client.consumer.store.OffsetStore)

Example 75 with InputRow

use of io.druid.data.input.InputRow in project druid by druid-io.

the class DruidParquetInputTest method testBinaryAsString.

@Test
public void testBinaryAsString() throws IOException, InterruptedException {
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromFile(new File("example/impala_hadoop_parquet_job.json"));
    Job job = Job.getInstance(new Configuration());
    config.intoConfiguration(job);
    GenericRecord data = getFirstRecord(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
    InputRow row = config.getParser().parse(data);
    // without binaryAsString: true, the value would something like "[104, 101, 121, 32, 116, 104, 105, 115, 32, 105, 115, 3.... ]"
    assertEquals(row.getDimension("field").get(0), "hey this is &é(-è_çà)=^$ù*! Ω^^");
    assertEquals(row.getTimestampFromEpoch(), 1471800234);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputRow(io.druid.data.input.InputRow) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) Job(org.apache.hadoop.mapreduce.Job) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Aggregations

InputRow (io.druid.data.input.InputRow)81 Test (org.junit.Test)35 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)24 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)22 File (java.io.File)18 Setup (org.openjdk.jmh.annotations.Setup)15 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)14 Firehose (io.druid.data.input.Firehose)12 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)12 IndexSpec (io.druid.segment.IndexSpec)11 ArrayList (java.util.ArrayList)11 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)10 DateTime (org.joda.time.DateTime)10 QueryableIndex (io.druid.segment.QueryableIndex)9 IOException (java.io.IOException)9 BenchmarkColumnSchema (io.druid.benchmark.datagen.BenchmarkColumnSchema)8 Interval (org.joda.time.Interval)8 ParseException (io.druid.java.util.common.parsers.ParseException)7 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 DataSegment (io.druid.timeline.DataSegment)5