Search in sources :

Example 1 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class InlineSchemaAvroBytesDecoder method parse.

@Override
public GenericRecord parse(ByteBuffer bytes) {
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schemaObj);
    ByteBufferInputStream inputStream = new ByteBufferInputStream(Collections.singletonList(bytes));
    try {
        return reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null));
    } catch (Exception e) {
        throw new ParseException(e, "Fail to decode avro message!");
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) ByteBufferInputStream(org.apache.avro.util.ByteBufferInputStream) ParseException(io.druid.java.util.common.parsers.ParseException) GenericRecord(org.apache.avro.generic.GenericRecord) ParseException(io.druid.java.util.common.parsers.ParseException)

Example 2 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class InlineSchemasAvroBytesDecoder method parse.

// It is assumed that record has following format.
// byte 1 : version, static 0x1
// byte 2-5 : int schemaId
// remaining bytes would have avro data
@Override
public GenericRecord parse(ByteBuffer bytes) {
    if (bytes.remaining() < 5) {
        throw new ParseException("record must have at least 5 bytes carrying version and schemaId");
    }
    byte version = bytes.get();
    if (version != V1) {
        throw new ParseException("found record of arbitrary version [%s]", version);
    }
    int schemaId = bytes.getInt();
    Schema schemaObj = schemaObjs.get(schemaId);
    if (schemaObj == null) {
        throw new ParseException("Failed to find schema for id [%s]", schemaId);
    }
    try {
        DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schemaObj);
        ByteBufferInputStream inputStream = new ByteBufferInputStream(Collections.singletonList(bytes));
        return reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null));
    } catch (Exception e) {
        throw new ParseException(e, "Fail to decode avro message with schemaId [%s].", schemaId);
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ByteBufferInputStream(org.apache.avro.util.ByteBufferInputStream) ParseException(io.druid.java.util.common.parsers.ParseException) GenericRecord(org.apache.avro.generic.GenericRecord) ParseException(io.druid.java.util.common.parsers.ParseException)

Example 3 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class Plumbers method addNextRow.

public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) {
    final InputRow inputRow;
    try {
        inputRow = firehose.nextRow();
    } catch (ParseException e) {
        if (reportParseExceptions) {
            throw e;
        } else {
            log.debug(e, "Discarded row due to exception, considering unparseable.");
            metrics.incrementUnparseable();
            return;
        }
    }
    if (inputRow == null) {
        if (reportParseExceptions) {
            throw new ParseException("null input row");
        } else {
            log.debug("Discarded null input row, considering unparseable.");
            metrics.incrementUnparseable();
            return;
        }
    }
    final int numRows;
    try {
        numRows = plumber.add(inputRow, committerSupplier);
    } catch (IndexSizeExceededException e) {
        // plumber.add should be swapping out indexes before they fill up.
        throw new ISE(e, "WTF?! Index size exceeded, this shouldn't happen. Bad Plumber!");
    }
    if (numRows == -1) {
        metrics.incrementThrownAway();
        log.debug("Discarded row[%s], considering thrownAway.", inputRow);
        return;
    }
    metrics.incrementProcessed();
}
Also used : InputRow(io.druid.data.input.InputRow) ISE(io.druid.java.util.common.ISE) ParseException(io.druid.java.util.common.parsers.ParseException) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 4 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class RealtimeManagerTest method setUp.

@Before
public void setUp() throws Exception {
    final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
    ObjectMapper jsonMapper = new DefaultObjectMapper();
    schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {

        @Override
        public Firehose connect(InputRowParser parser) throws IOException {
            return new TestFirehose(rows.iterator());
        }
    }, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber;
        }
    }, null);
    RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber2;
        }
    }, new FirehoseFactoryV2() {

        @Override
        public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
            return new TestFirehoseV2(rows.iterator());
        }
    });
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
    plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
    plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
    tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
    tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
    schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
    FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
    QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {

        @Override
        public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
            return factory;
        }
    };
    chiefStartedLatch = new CountDownLatch(2);
    RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
    startFireChiefWithPartitionNum(fireChief_0, 0);
    startFireChiefWithPartitionNum(fireChief_1, 1);
}
Also used : FirehoseV2(io.druid.data.input.FirehoseV2) RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) BaseQuery(io.druid.query.BaseQuery) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) FirehoseFactory(io.druid.data.input.FirehoseFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) DateTime(org.joda.time.DateTime) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) QueryRunnerFactoryConglomerate(io.druid.query.QueryRunnerFactoryConglomerate) Sink(io.druid.segment.realtime.plumber.Sink) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Firehose(io.druid.data.input.Firehose) Period(org.joda.time.Period) IOException(java.io.IOException) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) DataSchema(io.druid.segment.indexing.DataSchema) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) Plumber(io.druid.segment.realtime.plumber.Plumber) ParseException(io.druid.java.util.common.parsers.ParseException) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) InputRowParser(io.druid.data.input.impl.InputRowParser) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) FirehoseFactoryV2(io.druid.data.input.FirehoseFactoryV2) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 5 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class KafkaIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    log.info("Starting up!");
    startTime = DateTime.now();
    mapper = toolbox.getObjectMapper();
    status = Status.STARTING;
    if (chatHandlerProvider.isPresent()) {
        log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName());
        chatHandlerProvider.get().register(getId(), this, false);
    } else {
        log.warn("No chat handler detected");
    }
    runThread = Thread.currentThread();
    // Set up FireDepartmentMetrics
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
    fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
    try (final Appenderator appenderator0 = newAppenderator(fireDepartmentMetrics, toolbox);
        final FiniteAppenderatorDriver driver = newDriver(appenderator0, toolbox, fireDepartmentMetrics);
        final KafkaConsumer<byte[], byte[]> consumer = newConsumer()) {
        appenderator = appenderator0;
        final String topic = ioConfig.getStartPartitions().getTopic();
        // Start up, set up initial offsets.
        final Object restoredMetadata = driver.startJob();
        if (restoredMetadata == null) {
            nextOffsets.putAll(ioConfig.getStartPartitions().getPartitionOffsetMap());
        } else {
            final Map<String, Object> restoredMetadataMap = (Map) restoredMetadata;
            final KafkaPartitions restoredNextPartitions = toolbox.getObjectMapper().convertValue(restoredMetadataMap.get(METADATA_NEXT_PARTITIONS), KafkaPartitions.class);
            nextOffsets.putAll(restoredNextPartitions.getPartitionOffsetMap());
            // Sanity checks.
            if (!restoredNextPartitions.getTopic().equals(ioConfig.getStartPartitions().getTopic())) {
                throw new ISE("WTF?! Restored topic[%s] but expected topic[%s]", restoredNextPartitions.getTopic(), ioConfig.getStartPartitions().getTopic());
            }
            if (!nextOffsets.keySet().equals(ioConfig.getStartPartitions().getPartitionOffsetMap().keySet())) {
                throw new ISE("WTF?! Restored partitions[%s] but expected partitions[%s]", nextOffsets.keySet(), ioConfig.getStartPartitions().getPartitionOffsetMap().keySet());
            }
        }
        // Set up sequenceNames.
        final Map<Integer, String> sequenceNames = Maps.newHashMap();
        for (Integer partitionNum : nextOffsets.keySet()) {
            sequenceNames.put(partitionNum, String.format("%s_%s", ioConfig.getBaseSequenceName(), partitionNum));
        }
        // Set up committer.
        final Supplier<Committer> committerSupplier = new Supplier<Committer>() {

            @Override
            public Committer get() {
                final Map<Integer, Long> snapshot = ImmutableMap.copyOf(nextOffsets);
                return new Committer() {

                    @Override
                    public Object getMetadata() {
                        return ImmutableMap.of(METADATA_NEXT_PARTITIONS, new KafkaPartitions(ioConfig.getStartPartitions().getTopic(), snapshot));
                    }

                    @Override
                    public void run() {
                    // Do nothing.
                    }
                };
            }
        };
        Set<Integer> assignment = assignPartitionsAndSeekToNext(consumer, topic);
        // Main loop.
        // Could eventually support leader/follower mode (for keeping replicas more in sync)
        boolean stillReading = !assignment.isEmpty();
        status = Status.READING;
        try {
            while (stillReading) {
                if (possiblyPause(assignment)) {
                    // The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
                    // partitions upon resuming. This is safe even if the end offsets have not been modified.
                    assignment = assignPartitionsAndSeekToNext(consumer, topic);
                    if (assignment.isEmpty()) {
                        log.info("All partitions have been fully read");
                        publishOnStop = true;
                        stopRequested = true;
                    }
                }
                if (stopRequested) {
                    break;
                }
                // The retrying business is because the KafkaConsumer throws OffsetOutOfRangeException if the seeked-to
                // offset is not present in the topic-partition. This can happen if we're asking a task to read from data
                // that has not been written yet (which is totally legitimate). So let's wait for it to show up.
                ConsumerRecords<byte[], byte[]> records = ConsumerRecords.empty();
                try {
                    records = consumer.poll(POLL_TIMEOUT);
                } catch (OffsetOutOfRangeException e) {
                    log.warn("OffsetOutOfRangeException with message [%s]", e.getMessage());
                    possiblyResetOffsetsOrWait(e.offsetOutOfRangePartitions(), consumer, toolbox);
                    stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
                }
                for (ConsumerRecord<byte[], byte[]> record : records) {
                    if (log.isTraceEnabled()) {
                        log.trace("Got topic[%s] partition[%d] offset[%,d].", record.topic(), record.partition(), record.offset());
                    }
                    if (record.offset() < endOffsets.get(record.partition())) {
                        if (record.offset() != nextOffsets.get(record.partition())) {
                            throw new ISE("WTF?! Got offset[%,d] after offset[%,d] in partition[%d].", record.offset(), nextOffsets.get(record.partition()), record.partition());
                        }
                        try {
                            final byte[] valueBytes = record.value();
                            if (valueBytes == null) {
                                throw new ParseException("null value");
                            }
                            final InputRow row = Preconditions.checkNotNull(parser.parse(ByteBuffer.wrap(valueBytes)), "row");
                            if (!ioConfig.getMinimumMessageTime().isPresent() || !ioConfig.getMinimumMessageTime().get().isAfter(row.getTimestamp())) {
                                final SegmentIdentifier identifier = driver.add(row, sequenceNames.get(record.partition()), committerSupplier);
                                if (identifier == null) {
                                    // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                                    throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
                                }
                                fireDepartmentMetrics.incrementProcessed();
                            } else {
                                fireDepartmentMetrics.incrementThrownAway();
                            }
                        } catch (ParseException e) {
                            if (tuningConfig.isReportParseExceptions()) {
                                throw e;
                            } else {
                                log.debug(e, "Dropping unparseable row from partition[%d] offset[%,d].", record.partition(), record.offset());
                                fireDepartmentMetrics.incrementUnparseable();
                            }
                        }
                        nextOffsets.put(record.partition(), record.offset() + 1);
                    }
                    if (nextOffsets.get(record.partition()).equals(endOffsets.get(record.partition())) && assignment.remove(record.partition())) {
                        log.info("Finished reading topic[%s], partition[%,d].", record.topic(), record.partition());
                        assignPartitions(consumer, topic, assignment);
                        stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
                    }
                }
            }
        } finally {
            // persist pending data
            driver.persist(committerSupplier.get());
        }
        synchronized (statusLock) {
            if (stopRequested && !publishOnStop) {
                throw new InterruptedException("Stopping without publishing");
            }
            status = Status.PUBLISHING;
        }
        final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {

            @Override
            public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
                final KafkaPartitions finalPartitions = toolbox.getObjectMapper().convertValue(((Map) commitMetadata).get(METADATA_NEXT_PARTITIONS), KafkaPartitions.class);
                // Sanity check, we should only be publishing things that match our desired end state.
                if (!endOffsets.equals(finalPartitions.getPartitionOffsetMap())) {
                    throw new ISE("WTF?! Driver attempted to publish invalid metadata[%s].", commitMetadata);
                }
                final SegmentTransactionalInsertAction action;
                if (ioConfig.isUseTransaction()) {
                    action = new SegmentTransactionalInsertAction(segments, new KafkaDataSourceMetadata(ioConfig.getStartPartitions()), new KafkaDataSourceMetadata(finalPartitions));
                } else {
                    action = new SegmentTransactionalInsertAction(segments, null, null);
                }
                log.info("Publishing with isTransaction[%s].", ioConfig.isUseTransaction());
                return toolbox.getTaskActionClient().submit(action).isSuccess();
            }
        };
        final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
        if (published == null) {
            throw new ISE("Transaction failure publishing segments, aborting");
        } else {
            log.info("Published segments[%s] with metadata[%s].", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {

                @Override
                public String apply(DataSegment input) {
                    return input.getIdentifier();
                }
            })), published.getCommitMetadata());
        }
    } catch (InterruptedException | RejectedExecutionException e) {
        // handle the InterruptedException that gets wrapped in a RejectedExecutionException
        if (e instanceof RejectedExecutionException && (e.getCause() == null || !(e.getCause() instanceof InterruptedException))) {
            throw e;
        }
        // if we were interrupted because we were asked to stop, handle the exception and return success, else rethrow
        if (!stopRequested) {
            Thread.currentThread().interrupt();
            throw e;
        }
        log.info("The task was asked to stop before completing");
    } finally {
        if (chatHandlerProvider.isPresent()) {
            chatHandlerProvider.get().unregister(getId());
        }
    }
    return success();
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) Set(java.util.Set) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) SegmentTransactionalInsertAction(io.druid.indexing.common.actions.SegmentTransactionalInsertAction) DataSegment(io.druid.timeline.DataSegment) FireDepartment(io.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ISE(io.druid.java.util.common.ISE) Supplier(com.google.common.base.Supplier) SegmentsAndMetadata(io.druid.segment.realtime.appenderator.SegmentsAndMetadata) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) Appenderator(io.druid.segment.realtime.appenderator.Appenderator) FiniteAppenderatorDriver(io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver) InputRow(io.druid.data.input.InputRow) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) ParseException(io.druid.java.util.common.parsers.ParseException) OffsetOutOfRangeException(org.apache.kafka.clients.consumer.OffsetOutOfRangeException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

ParseException (io.druid.java.util.common.parsers.ParseException)15 InputRow (io.druid.data.input.InputRow)6 ISE (io.druid.java.util.common.ISE)5 IOException (java.io.IOException)4 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)4 GenericRecord (org.apache.avro.generic.GenericRecord)4 Firehose (io.druid.data.input.Firehose)3 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)3 Map (java.util.Map)3 ByteBufferInputStream (org.apache.avro.util.ByteBufferInputStream)3 DateTime (org.joda.time.DateTime)3 Supplier (com.google.common.base.Supplier)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Committer (io.druid.data.input.Committer)2 SegmentTransactionalInsertAction (io.druid.indexing.common.actions.SegmentTransactionalInsertAction)2 Aggregator (io.druid.query.aggregation.Aggregator)2 FireDepartment (io.druid.segment.realtime.FireDepartment)2 RealtimeMetricsMonitor (io.druid.segment.realtime.RealtimeMetricsMonitor)2