Search in sources :

Example 1 with ByteBufferInputRowParser

use of io.druid.data.input.ByteBufferInputRowParser in project druid by druid-io.

the class InputRowParserSerdeTest method testStringInputRowParserSerde.

@Test
public void testStringInputRowParserSerde() throws Exception {
    final StringInputRowParser parser = new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo", "bar")), null, null), null, null), null);
    final ByteBufferInputRowParser parser2 = jsonMapper.readValue(jsonMapper.writeValueAsBytes(parser), ByteBufferInputRowParser.class);
    final InputRow parsed = parser2.parse(ByteBuffer.wrap("{\"foo\":\"x\",\"bar\":\"y\",\"qux\":\"z\",\"timestamp\":\"2000\"}".getBytes(Charsets.UTF_8)));
    Assert.assertEquals(ImmutableList.of("foo", "bar"), parsed.getDimensions());
    Assert.assertEquals(ImmutableList.of("x"), parsed.getDimension("foo"));
    Assert.assertEquals(ImmutableList.of("y"), parsed.getDimension("bar"));
    Assert.assertEquals(new DateTime("2000").getMillis(), parsed.getTimestampFromEpoch());
}
Also used : ByteBufferInputRowParser(io.druid.data.input.ByteBufferInputRowParser) InputRow(io.druid.data.input.InputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 2 with ByteBufferInputRowParser

use of io.druid.data.input.ByteBufferInputRowParser in project druid by druid-io.

the class KafkaEightFirehoseFactory method connect.

@Override
public Firehose connect(final ByteBufferInputRowParser firehoseParser) throws IOException {
    Set<String> newDimExclus = Sets.union(firehoseParser.getParseSpec().getDimensionsSpec().getDimensionExclusions(), Sets.newHashSet("feed"));
    final ByteBufferInputRowParser theParser = firehoseParser.withParseSpec(firehoseParser.getParseSpec().withDimensionsSpec(firehoseParser.getParseSpec().getDimensionsSpec().withDimensionExclusions(newDimExclus)));
    final ConsumerConnector connector = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerProps));
    final Map<String, List<KafkaStream<byte[], byte[]>>> streams = connector.createMessageStreams(ImmutableMap.of(feed, 1));
    final List<KafkaStream<byte[], byte[]>> streamList = streams.get(feed);
    if (streamList == null || streamList.size() != 1) {
        return null;
    }
    final KafkaStream<byte[], byte[]> stream = streamList.get(0);
    final ConsumerIterator<byte[], byte[]> iter = stream.iterator();
    return new Firehose() {

        @Override
        public boolean hasMore() {
            return iter.hasNext();
        }

        @Override
        public InputRow nextRow() {
            try {
                final byte[] message = iter.next().message();
                if (message == null) {
                    return null;
                }
                return theParser.parse(ByteBuffer.wrap(message));
            } catch (InvalidMessageException e) {
                /*
          IF the CRC is caused within the wire transfer, this is not the best way to handel CRC.
          Probably it is better to shutdown the fireHose without commit and start it again.
           */
                log.error(e, "Message failed its checksum and it is corrupt, will skip it");
                return null;
            }
        }

        @Override
        public Runnable commit() {
            return new Runnable() {

                @Override
                public void run() {
                    /*
                 This is actually not going to do exactly what we want, cause it will be called asynchronously
                 after the persist is complete.  So, it's going to commit that it's processed more than was actually
                 persisted.  This is unfortunate, but good enough for now.  Should revisit along with an upgrade
                 of our Kafka version.
               */
                    log.info("committing offsets");
                    connector.commitOffsets();
                }
            };
        }

        @Override
        public void close() throws IOException {
            connector.shutdown();
        }
    };
}
Also used : InvalidMessageException(kafka.message.InvalidMessageException) Firehose(io.druid.data.input.Firehose) ConsumerConnector(kafka.javaapi.consumer.ConsumerConnector) KafkaStream(kafka.consumer.KafkaStream) ByteBufferInputRowParser(io.druid.data.input.ByteBufferInputRowParser) ConsumerConfig(kafka.consumer.ConsumerConfig) List(java.util.List)

Example 3 with ByteBufferInputRowParser

use of io.druid.data.input.ByteBufferInputRowParser in project druid by druid-io.

the class InputRowParserSerdeTest method testCharsetParseHelper.

private InputRow testCharsetParseHelper(Charset charset) throws Exception {
    final StringInputRowParser parser = new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo", "bar")), null, null), null, null), charset.name());
    final ByteBufferInputRowParser parser2 = jsonMapper.readValue(jsonMapper.writeValueAsBytes(parser), ByteBufferInputRowParser.class);
    final InputRow parsed = parser2.parse(ByteBuffer.wrap("{\"foo\":\"x\",\"bar\":\"y\",\"qux\":\"z\",\"timestamp\":\"3000\"}".getBytes(charset)));
    return parsed;
}
Also used : ByteBufferInputRowParser(io.druid.data.input.ByteBufferInputRowParser) InputRow(io.druid.data.input.InputRow)

Example 4 with ByteBufferInputRowParser

use of io.druid.data.input.ByteBufferInputRowParser in project druid by druid-io.

the class RocketMQFirehoseFactory method connect.

@Override
public Firehose connect(ByteBufferInputRowParser byteBufferInputRowParser) throws IOException, ParseException {
    Set<String> newDimExclus = Sets.union(byteBufferInputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions(), Sets.newHashSet("feed"));
    final ByteBufferInputRowParser theParser = byteBufferInputRowParser.withParseSpec(byteBufferInputRowParser.getParseSpec().withDimensionsSpec(byteBufferInputRowParser.getParseSpec().getDimensionsSpec().withDimensionExclusions(newDimExclus)));
    /**
     * Topic-Queue mapping.
     */
    final ConcurrentHashMap<String, Set<MessageQueue>> topicQueueMap;
    /**
     * Default Pull-style client for RocketMQ.
     */
    final DefaultMQPullConsumer defaultMQPullConsumer;
    final DruidPullMessageService pullMessageService;
    messageQueueTreeSetMap.clear();
    windows.clear();
    try {
        defaultMQPullConsumer = new DefaultMQPullConsumer(this.consumerGroup);
        defaultMQPullConsumer.setMessageModel(MessageModel.CLUSTERING);
        topicQueueMap = new ConcurrentHashMap<>();
        pullMessageService = new DruidPullMessageService(defaultMQPullConsumer);
        for (String topic : feed) {
            Validators.checkTopic(topic);
            topicQueueMap.put(topic, defaultMQPullConsumer.fetchSubscribeMessageQueues(topic));
        }
        DruidMessageQueueListener druidMessageQueueListener = new DruidMessageQueueListener(Sets.newHashSet(feed), topicQueueMap, defaultMQPullConsumer);
        defaultMQPullConsumer.setMessageQueueListener(druidMessageQueueListener);
        defaultMQPullConsumer.start();
        pullMessageService.start();
    } catch (MQClientException e) {
        LOGGER.error("Failed to start DefaultMQPullConsumer", e);
        throw new IOException("Failed to start RocketMQ client", e);
    }
    return new Firehose() {

        @Override
        public boolean hasMore() {
            boolean hasMore = false;
            DruidPullRequest earliestPullRequest = null;
            for (Map.Entry<String, Set<MessageQueue>> entry : topicQueueMap.entrySet()) {
                for (MessageQueue messageQueue : entry.getValue()) {
                    if (JavaCompatUtils.keySet(messageQueueTreeSetMap).contains(messageQueue) && !messageQueueTreeSetMap.get(messageQueue).isEmpty()) {
                        hasMore = true;
                    } else {
                        try {
                            long offset = defaultMQPullConsumer.fetchConsumeOffset(messageQueue, false);
                            int batchSize = (null == pullBatchSize || pullBatchSize.isEmpty()) ? DEFAULT_PULL_BATCH_SIZE : Integer.parseInt(pullBatchSize);
                            DruidPullRequest newPullRequest = new DruidPullRequest(messageQueue, null, offset, batchSize, !hasMessagesPending());
                            // notify pull message service to pull messages from brokers.
                            pullMessageService.putRequest(newPullRequest);
                            // set the earliest pull in case we need to block.
                            if (null == earliestPullRequest) {
                                earliestPullRequest = newPullRequest;
                            }
                        } catch (MQClientException e) {
                            LOGGER.error("Failed to fetch consume offset for queue: {}", entry.getKey());
                        }
                    }
                }
            }
            // Block only when there is no locally pending messages.
            if (!hasMore && null != earliestPullRequest) {
                try {
                    earliestPullRequest.getCountDownLatch().await();
                    hasMore = true;
                } catch (InterruptedException e) {
                    LOGGER.error("CountDownLatch await got interrupted", e);
                }
            }
            return hasMore;
        }

        @Override
        public InputRow nextRow() {
            for (Map.Entry<MessageQueue, ConcurrentSkipListSet<MessageExt>> entry : messageQueueTreeSetMap.entrySet()) {
                if (!entry.getValue().isEmpty()) {
                    MessageExt message = entry.getValue().pollFirst();
                    InputRow inputRow = theParser.parse(ByteBuffer.wrap(message.getBody()));
                    if (!JavaCompatUtils.keySet(windows).contains(entry.getKey())) {
                        windows.put(entry.getKey(), new ConcurrentSkipListSet<Long>());
                    }
                    windows.get(entry.getKey()).add(message.getQueueOffset());
                    return inputRow;
                }
            }
            // should never happen.
            throw new RuntimeException("Unexpected Fatal Error! There should have been one row available.");
        }

        @Override
        public Runnable commit() {
            return new Runnable() {

                @Override
                public void run() {
                    OffsetStore offsetStore = defaultMQPullConsumer.getOffsetStore();
                    Set<MessageQueue> updated = new HashSet<>();
                    // calculate offsets according to consuming windows.
                    for (ConcurrentHashMap.Entry<MessageQueue, ConcurrentSkipListSet<Long>> entry : windows.entrySet()) {
                        while (!entry.getValue().isEmpty()) {
                            long offset = offsetStore.readOffset(entry.getKey(), ReadOffsetType.MEMORY_FIRST_THEN_STORE);
                            if (offset + 1 > entry.getValue().first()) {
                                entry.getValue().pollFirst();
                            } else if (offset + 1 == entry.getValue().first()) {
                                entry.getValue().pollFirst();
                                offsetStore.updateOffset(entry.getKey(), offset + 1, true);
                                updated.add(entry.getKey());
                            } else {
                                break;
                            }
                        }
                    }
                    offsetStore.persistAll(updated);
                }
            };
        }

        @Override
        public void close() throws IOException {
            defaultMQPullConsumer.shutdown();
            pullMessageService.shutdown(false);
        }
    };
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) DefaultMQPullConsumer(com.alibaba.rocketmq.client.consumer.DefaultMQPullConsumer) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MQClientException(com.alibaba.rocketmq.client.exception.MQClientException) HashSet(java.util.HashSet) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) Firehose(io.druid.data.input.Firehose) IOException(java.io.IOException) ByteBufferInputRowParser(io.druid.data.input.ByteBufferInputRowParser) MessageExt(com.alibaba.rocketmq.common.message.MessageExt) MessageQueue(com.alibaba.rocketmq.common.message.MessageQueue) InputRow(io.druid.data.input.InputRow) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) OffsetStore(com.alibaba.rocketmq.client.consumer.store.OffsetStore)

Aggregations

ByteBufferInputRowParser (io.druid.data.input.ByteBufferInputRowParser)4 InputRow (io.druid.data.input.InputRow)3 Firehose (io.druid.data.input.Firehose)2 DefaultMQPullConsumer (com.alibaba.rocketmq.client.consumer.DefaultMQPullConsumer)1 OffsetStore (com.alibaba.rocketmq.client.consumer.store.OffsetStore)1 MQClientException (com.alibaba.rocketmq.client.exception.MQClientException)1 MessageExt (com.alibaba.rocketmq.common.message.MessageExt)1 MessageQueue (com.alibaba.rocketmq.common.message.MessageQueue)1 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentSkipListSet (java.util.concurrent.ConcurrentSkipListSet)1 ConsumerConfig (kafka.consumer.ConsumerConfig)1 KafkaStream (kafka.consumer.KafkaStream)1 ConsumerConnector (kafka.javaapi.consumer.ConsumerConnector)1 InvalidMessageException (kafka.message.InvalidMessageException)1 DateTime (org.joda.time.DateTime)1