Search in sources :

Example 6 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class OffheapIncrementalIndex method addToFacts.

@Override
protected Integer addToFacts(AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, InputRow row, AtomicInteger numEntries, TimeAndDims key, ThreadLocal<InputRow> rowContainer, Supplier<InputRow> rowSupplier) throws IndexSizeExceededException {
    ByteBuffer aggBuffer;
    int bufferIndex;
    int bufferOffset;
    synchronized (this) {
        final Integer priorIndex = facts.getPriorIndex(key);
        if (null != priorIndex) {
            final int[] indexAndOffset = indexAndOffsets.get(priorIndex);
            bufferIndex = indexAndOffset[0];
            bufferOffset = indexAndOffset[1];
            aggBuffer = aggBuffers.get(bufferIndex).get();
        } else {
            if (metrics.length > 0 && getAggs()[0] == null) {
                // note: creation of Aggregators is done lazily when at least one row from input is available
                // so that FilteredAggregators could be initialized correctly.
                rowContainer.set(row);
                for (int i = 0; i < metrics.length; i++) {
                    final AggregatorFactory agg = metrics[i];
                    getAggs()[i] = agg.factorizeBuffered(makeColumnSelectorFactory(agg, rowSupplier, deserializeComplexMetrics));
                }
                rowContainer.set(null);
            }
            bufferIndex = aggBuffers.size() - 1;
            ByteBuffer lastBuffer = aggBuffers.isEmpty() ? null : aggBuffers.get(aggBuffers.size() - 1).get();
            int[] lastAggregatorsIndexAndOffset = indexAndOffsets.isEmpty() ? null : indexAndOffsets.get(indexAndOffsets.size() - 1);
            if (lastAggregatorsIndexAndOffset != null && lastAggregatorsIndexAndOffset[0] != bufferIndex) {
                throw new ISE("last row's aggregate's buffer and last buffer index must be same");
            }
            bufferOffset = aggsTotalSize + (lastAggregatorsIndexAndOffset != null ? lastAggregatorsIndexAndOffset[1] : 0);
            if (lastBuffer != null && lastBuffer.capacity() - bufferOffset >= aggsTotalSize) {
                aggBuffer = lastBuffer;
            } else {
                ResourceHolder<ByteBuffer> bb = bufferPool.take();
                aggBuffers.add(bb);
                bufferIndex = aggBuffers.size() - 1;
                bufferOffset = 0;
                aggBuffer = bb.get();
            }
            for (int i = 0; i < metrics.length; i++) {
                getAggs()[i].init(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
            }
            // Last ditch sanity checks
            if (numEntries.get() >= maxRowCount && facts.getPriorIndex(key) == null) {
                throw new IndexSizeExceededException("Maximum number of rows [%d] reached", maxRowCount);
            }
            final Integer rowIndex = indexIncrement.getAndIncrement();
            // note that indexAndOffsets must be updated before facts, because as soon as we update facts
            // concurrent readers get hold of it and might ask for newly added row
            indexAndOffsets.add(new int[] { bufferIndex, bufferOffset });
            final Integer prev = facts.putIfAbsent(key, rowIndex);
            if (null == prev) {
                numEntries.incrementAndGet();
            } else {
                throw new ISE("WTF! we are in sychronized block.");
            }
        }
    }
    rowContainer.set(row);
    for (int i = 0; i < metrics.length; i++) {
        final BufferAggregator agg = getAggs()[i];
        synchronized (agg) {
            try {
                agg.aggregate(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                if (reportParseExceptions) {
                    throw new ParseException(e, "Encountered parse error for aggregator[%s]", getMetricAggs()[i].getName());
                } else {
                    log.debug(e, "Encountered parse error, skipping aggregator[%s].", getMetricAggs()[i].getName());
                }
            }
        }
    }
    rowContainer.set(null);
    return numEntries.get();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ISE(io.druid.java.util.common.ISE) ParseException(io.druid.java.util.common.parsers.ParseException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ByteBuffer(java.nio.ByteBuffer) BufferAggregator(io.druid.query.aggregation.BufferAggregator)

Example 7 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class OnheapIncrementalIndex method doAggregate.

private void doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, boolean reportParseExceptions) {
    rowContainer.set(row);
    for (int i = 0; i < aggs.length; i++) {
        final Aggregator agg = aggs[i];
        synchronized (agg) {
            try {
                agg.aggregate();
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                if (reportParseExceptions) {
                    throw new ParseException(e, "Encountered parse error for aggregator[%s]", metrics[i].getName());
                } else {
                    log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
                }
            }
        }
    }
    rowContainer.set(null);
}
Also used : Aggregator(io.druid.query.aggregation.Aggregator) ParseException(io.druid.java.util.common.parsers.ParseException)

Example 8 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class SpatialDimensionRowTransformer method apply.

@Override
public InputRow apply(final InputRow row) {
    final Map<String, List<String>> spatialLookup = Maps.newHashMap();
    // remove all spatial dimensions
    final List<String> finalDims = Lists.newArrayList(Iterables.filter(row.getDimensions(), new Predicate<String>() {

        @Override
        public boolean apply(String input) {
            return !spatialDimensionMap.containsKey(input) && !spatialPartialDimNames.contains(input);
        }
    }));
    InputRow retVal = new InputRow() {

        @Override
        public List<String> getDimensions() {
            return finalDims;
        }

        @Override
        public long getTimestampFromEpoch() {
            return row.getTimestampFromEpoch();
        }

        @Override
        public DateTime getTimestamp() {
            return row.getTimestamp();
        }

        @Override
        public List<String> getDimension(String dimension) {
            List<String> retVal = spatialLookup.get(dimension);
            return (retVal == null) ? row.getDimension(dimension) : retVal;
        }

        @Override
        public Object getRaw(String dimension) {
            List<String> retVal = spatialLookup.get(dimension);
            return (retVal == null) ? row.getRaw(dimension) : retVal;
        }

        @Override
        public long getLongMetric(String metric) {
            try {
                return row.getLongMetric(metric);
            } catch (ParseException e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public float getFloatMetric(String metric) {
            try {
                return row.getFloatMetric(metric);
            } catch (ParseException e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public String toString() {
            return row.toString();
        }

        @Override
        public int compareTo(Row o) {
            return getTimestamp().compareTo(o.getTimestamp());
        }
    };
    for (Map.Entry<String, SpatialDimensionSchema> entry : spatialDimensionMap.entrySet()) {
        final String spatialDimName = entry.getKey();
        final SpatialDimensionSchema spatialDim = entry.getValue();
        List<String> dimVals = row.getDimension(spatialDimName);
        if (dimVals != null && !dimVals.isEmpty()) {
            if (dimVals.size() != 1) {
                throw new ISE("Spatial dimension value must be in an array!");
            }
            if (isJoinedSpatialDimValValid(dimVals.get(0))) {
                spatialLookup.put(spatialDimName, dimVals);
                finalDims.add(spatialDimName);
            }
        } else {
            List<String> spatialDimVals = Lists.newArrayList();
            for (String dim : spatialDim.getDims()) {
                List<String> partialDimVals = row.getDimension(dim);
                if (isSpatialDimValsValid(partialDimVals)) {
                    spatialDimVals.addAll(partialDimVals);
                }
            }
            if (spatialDimVals.size() == spatialDim.getDims().size()) {
                spatialLookup.put(spatialDimName, Arrays.asList(JOINER.join(spatialDimVals)));
                finalDims.add(spatialDimName);
            }
        }
    }
    return retVal;
}
Also used : SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) InputRow(io.druid.data.input.InputRow) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ISE(io.druid.java.util.common.ISE) ParseException(io.druid.java.util.common.parsers.ParseException) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) Map(java.util.Map) Predicate(com.google.common.base.Predicate)

Example 9 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class StringArrayWritable method toBytes.

public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, boolean reportParseExceptions) {
    try {
        ByteArrayDataOutput out = ByteStreams.newDataOutput();
        //write timestamp
        out.writeLong(row.getTimestampFromEpoch());
        //writing all dimensions
        List<String> dimList = row.getDimensions();
        WritableUtils.writeVInt(out, dimList.size());
        if (dimList != null) {
            for (String dim : dimList) {
                List<String> dimValues = row.getDimension(dim);
                writeString(dim, out);
                writeStringArray(dimValues, out);
            }
        }
        //writing all metrics
        Supplier<InputRow> supplier = new Supplier<InputRow>() {

            @Override
            public InputRow get() {
                return row;
            }
        };
        WritableUtils.writeVInt(out, aggs.length);
        for (AggregatorFactory aggFactory : aggs) {
            String k = aggFactory.getName();
            writeString(k, out);
            Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true));
            try {
                agg.aggregate();
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                if (reportParseExceptions) {
                    throw new ParseException(e, "Encountered parse error for aggregator[%s]", k);
                }
                log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
            }
            String t = aggFactory.getTypeName();
            if (t.equals("float")) {
                out.writeFloat(agg.getFloat());
            } else if (t.equals("long")) {
                WritableUtils.writeVLong(out, agg.getLong());
            } else {
                //its a complex metric
                Object val = agg.get();
                ComplexMetricSerde serde = getComplexMetricSerde(t);
                writeBytes(serde.toBytes(val), out);
            }
        }
        return out.toByteArray();
    } catch (IOException ex) {
        throw Throwables.propagate(ex);
    }
}
Also used : ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) ByteArrayDataOutput(com.google.common.io.ByteArrayDataOutput) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) Aggregator(io.druid.query.aggregation.Aggregator) Supplier(com.google.common.base.Supplier) ParseException(io.druid.java.util.common.parsers.ParseException) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 10 with ParseException

use of io.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class IndexTask method generateAndPublishSegments.

private boolean generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final Map<Interval, List<ShardSpec>> shardSpecs, final String version, final FirehoseFactory firehoseFactory) throws IOException, InterruptedException {
    final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
    final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    final Map<String, ShardSpec> sequenceNameToShardSpecMap = Maps.newHashMap();
    if (toolbox.getMonitorScheduler() != null) {
        toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
    }
    final SegmentAllocator segmentAllocator;
    if (ingestionSchema.getIOConfig().isAppendToExisting()) {
        segmentAllocator = new ActionBasedSegmentAllocator(toolbox.getTaskActionClient(), dataSchema);
    } else {
        segmentAllocator = new SegmentAllocator() {

            @Override
            public SegmentIdentifier allocate(DateTime timestamp, String sequenceName, String previousSegmentId) throws IOException {
                Optional<Interval> interval = granularitySpec.bucketInterval(timestamp);
                if (!interval.isPresent()) {
                    throw new ISE("Could not find interval for timestamp [%s]", timestamp);
                }
                ShardSpec shardSpec = sequenceNameToShardSpecMap.get(sequenceName);
                if (shardSpec == null) {
                    throw new ISE("Could not find ShardSpec for sequenceName [%s]", sequenceName);
                }
                return new SegmentIdentifier(getDataSource(), interval.get(), version, shardSpec);
            }
        };
    }
    try (final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema);
        final FiniteAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator, fireDepartmentMetrics);
        final Firehose firehose = firehoseFactory.connect(dataSchema.getParser())) {
        final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
        final Map<Interval, ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
        if (driver.startJob() != null) {
            driver.clear();
        }
        try {
            while (firehose.hasMore()) {
                try {
                    final InputRow inputRow = firehose.nextRow();
                    final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
                    if (!optInterval.isPresent()) {
                        fireDepartmentMetrics.incrementThrownAway();
                        continue;
                    }
                    final Interval interval = optInterval.get();
                    if (!shardSpecLookups.containsKey(interval)) {
                        final List<ShardSpec> intervalShardSpecs = shardSpecs.get(interval);
                        if (intervalShardSpecs == null || intervalShardSpecs.isEmpty()) {
                            throw new ISE("Failed to get shardSpec for interval[%s]", interval);
                        }
                        shardSpecLookups.put(interval, intervalShardSpecs.get(0).getLookup(intervalShardSpecs));
                    }
                    final ShardSpec shardSpec = shardSpecLookups.get(interval).getShardSpec(inputRow.getTimestampFromEpoch(), inputRow);
                    final String sequenceName = String.format("index_%s_%s_%d", interval, version, shardSpec.getPartitionNum());
                    if (!sequenceNameToShardSpecMap.containsKey(sequenceName)) {
                        final ShardSpec shardSpecForPublishing = ingestionSchema.getTuningConfig().isForceExtendableShardSpecs() || ingestionSchema.getIOConfig().isAppendToExisting() ? new NumberedShardSpec(shardSpec.getPartitionNum(), shardSpecs.get(interval).size()) : shardSpec;
                        sequenceNameToShardSpecMap.put(sequenceName, shardSpecForPublishing);
                    }
                    final SegmentIdentifier identifier = driver.add(inputRow, sequenceName, committerSupplier);
                    if (identifier == null) {
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                    fireDepartmentMetrics.incrementProcessed();
                } catch (ParseException e) {
                    if (ingestionSchema.getTuningConfig().isReportParseExceptions()) {
                        throw e;
                    } else {
                        fireDepartmentMetrics.incrementUnparseable();
                    }
                }
            }
        } finally {
            driver.persist(committerSupplier.get());
        }
        final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {

            @Override
            public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
                final SegmentTransactionalInsertAction action = new SegmentTransactionalInsertAction(segments, null, null);
                return toolbox.getTaskActionClient().submit(action).isSuccess();
            }
        };
        final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
        if (published == null) {
            log.error("Failed to publish segments, aborting!");
            return false;
        } else {
            log.info("Published segments[%s]", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {

                @Override
                public String apply(DataSegment input) {
                    return input.getIdentifier();
                }
            })));
            return true;
        }
    }
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) SortedSet(java.util.SortedSet) Set(java.util.Set) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) ShardSpecLookup(io.druid.timeline.partition.ShardSpecLookup) SegmentTransactionalInsertAction(io.druid.indexing.common.actions.SegmentTransactionalInsertAction) DataSegment(io.druid.timeline.DataSegment) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DateTime(org.joda.time.DateTime) FireDepartment(io.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ActionBasedSegmentAllocator(io.druid.indexing.appenderator.ActionBasedSegmentAllocator) ISE(io.druid.java.util.common.ISE) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Optional(com.google.common.base.Optional) Firehose(io.druid.data.input.Firehose) SegmentsAndMetadata(io.druid.segment.realtime.appenderator.SegmentsAndMetadata) IOException(java.io.IOException) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) Appenderator(io.druid.segment.realtime.appenderator.Appenderator) GranularitySpec(io.druid.segment.indexing.granularity.GranularitySpec) ActionBasedSegmentAllocator(io.druid.indexing.appenderator.ActionBasedSegmentAllocator) SegmentAllocator(io.druid.segment.realtime.appenderator.SegmentAllocator) FiniteAppenderatorDriver(io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver) InputRow(io.druid.data.input.InputRow) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) ParseException(io.druid.java.util.common.parsers.ParseException) Interval(org.joda.time.Interval)

Aggregations

ParseException (io.druid.java.util.common.parsers.ParseException)15 InputRow (io.druid.data.input.InputRow)6 ISE (io.druid.java.util.common.ISE)5 IOException (java.io.IOException)4 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)4 GenericRecord (org.apache.avro.generic.GenericRecord)4 Firehose (io.druid.data.input.Firehose)3 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)3 Map (java.util.Map)3 ByteBufferInputStream (org.apache.avro.util.ByteBufferInputStream)3 DateTime (org.joda.time.DateTime)3 Supplier (com.google.common.base.Supplier)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Committer (io.druid.data.input.Committer)2 SegmentTransactionalInsertAction (io.druid.indexing.common.actions.SegmentTransactionalInsertAction)2 Aggregator (io.druid.query.aggregation.Aggregator)2 FireDepartment (io.druid.segment.realtime.FireDepartment)2 RealtimeMetricsMonitor (io.druid.segment.realtime.RealtimeMetricsMonitor)2