Search in sources :

Example 11 with Supplier

use of com.google.common.base.Supplier in project druid by druid-io.

the class InDimFilter method getLongPredicateSupplier.

// As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once.
// Pass in a common long predicate supplier to all filters created by .toFilter(), so that
// we only compute the long hashset/array once per query.
// This supplier must be thread-safe, since this DimFilter will be accessed in the query runners.
private Supplier<DruidLongPredicate> getLongPredicateSupplier() {
    return new Supplier<DruidLongPredicate>() {

        private final Object initLock = new Object();

        private DruidLongPredicate predicate;

        private void initLongValues() {
            if (predicate != null) {
                return;
            }
            synchronized (initLock) {
                if (predicate != null) {
                    return;
                }
                LongArrayList longs = new LongArrayList(values.size());
                for (String value : values) {
                    Long longValue = GuavaUtils.tryParseLong(value);
                    if (longValue != null) {
                        longs.add(longValue);
                    }
                }
                if (longs.size() > NUMERIC_HASHING_THRESHOLD) {
                    final LongOpenHashSet longHashSet = new LongOpenHashSet(longs);
                    predicate = new DruidLongPredicate() {

                        @Override
                        public boolean applyLong(long input) {
                            return longHashSet.contains(input);
                        }
                    };
                } else {
                    final long[] longArray = longs.toLongArray();
                    Arrays.sort(longArray);
                    predicate = new DruidLongPredicate() {

                        @Override
                        public boolean applyLong(long input) {
                            return Arrays.binarySearch(longArray, input) >= 0;
                        }
                    };
                }
            }
        }

        @Override
        public DruidLongPredicate get() {
            initLongValues();
            return predicate;
        }
    };
}
Also used : LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Supplier(com.google.common.base.Supplier) LongOpenHashSet(it.unimi.dsi.fastutil.longs.LongOpenHashSet)

Example 12 with Supplier

use of com.google.common.base.Supplier in project druid by druid-io.

the class ExpressionObjectSelector method createBindings.

private static Expr.ObjectBinding createBindings(ColumnSelectorFactory columnSelectorFactory, Expr expression) {
    final Map<String, Supplier<Number>> suppliers = Maps.newHashMap();
    for (String columnName : Parser.findRequiredBindings(expression)) {
        final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName);
        final ValueType nativeType = columnCapabilities != null ? columnCapabilities.getType() : null;
        final Supplier<Number> supplier;
        if (nativeType == ValueType.FLOAT) {
            supplier = supplierFromFloatSelector(columnSelectorFactory.makeFloatColumnSelector(columnName));
        } else if (nativeType == ValueType.LONG) {
            supplier = supplierFromLongSelector(columnSelectorFactory.makeLongColumnSelector(columnName));
        } else if (nativeType == null) {
            // Unknown ValueType. Try making an Object selector and see if that gives us anything useful.
            supplier = supplierFromObjectSelector(columnSelectorFactory.makeObjectColumnSelector(columnName));
        } else {
            // Unhandleable ValueType (possibly STRING or COMPLEX).
            supplier = null;
        }
        if (supplier != null) {
            suppliers.put(columnName, supplier);
        }
    }
    return Parser.withSuppliers(suppliers);
}
Also used : ValueType(io.druid.segment.column.ValueType) Supplier(com.google.common.base.Supplier) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities)

Example 13 with Supplier

use of com.google.common.base.Supplier in project druid by druid-io.

the class AggregationTestHelper method createTopNQueryAggregationTestHelper.

public static final AggregationTestHelper createTopNQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
    ObjectMapper mapper = new DefaultObjectMapper();
    TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig(), QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator());
    TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(new StupidPool<>("TopNQueryRunnerFactory-bufferPool", new Supplier<ByteBuffer>() {

        @Override
        public ByteBuffer get() {
            return ByteBuffer.allocate(10 * 1024 * 1024);
        }
    }), toolchest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {

        @Override
        public int columnCacheSizeBytes() {
            return 0;
        }
    });
    return new AggregationTestHelper(mapper, new IndexMerger(mapper, indexIO), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister);
}
Also used : IndexMerger(io.druid.segment.IndexMerger) TopNQueryConfig(io.druid.query.topn.TopNQueryConfig) IndexIO(io.druid.segment.IndexIO) ColumnConfig(io.druid.segment.column.ColumnConfig) TopNQueryRunnerFactory(io.druid.query.topn.TopNQueryRunnerFactory) TopNQueryQueryToolChest(io.druid.query.topn.TopNQueryQueryToolChest) Supplier(com.google.common.base.Supplier) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 14 with Supplier

use of com.google.common.base.Supplier in project druid by druid-io.

the class StringArrayWritable method toBytes.

public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, boolean reportParseExceptions) {
    try {
        ByteArrayDataOutput out = ByteStreams.newDataOutput();
        //write timestamp
        out.writeLong(row.getTimestampFromEpoch());
        //writing all dimensions
        List<String> dimList = row.getDimensions();
        WritableUtils.writeVInt(out, dimList.size());
        if (dimList != null) {
            for (String dim : dimList) {
                List<String> dimValues = row.getDimension(dim);
                writeString(dim, out);
                writeStringArray(dimValues, out);
            }
        }
        //writing all metrics
        Supplier<InputRow> supplier = new Supplier<InputRow>() {

            @Override
            public InputRow get() {
                return row;
            }
        };
        WritableUtils.writeVInt(out, aggs.length);
        for (AggregatorFactory aggFactory : aggs) {
            String k = aggFactory.getName();
            writeString(k, out);
            Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true));
            try {
                agg.aggregate();
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                if (reportParseExceptions) {
                    throw new ParseException(e, "Encountered parse error for aggregator[%s]", k);
                }
                log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
            }
            String t = aggFactory.getTypeName();
            if (t.equals("float")) {
                out.writeFloat(agg.getFloat());
            } else if (t.equals("long")) {
                WritableUtils.writeVLong(out, agg.getLong());
            } else {
                //its a complex metric
                Object val = agg.get();
                ComplexMetricSerde serde = getComplexMetricSerde(t);
                writeBytes(serde.toBytes(val), out);
            }
        }
        return out.toByteArray();
    } catch (IOException ex) {
        throw Throwables.propagate(ex);
    }
}
Also used : ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) ByteArrayDataOutput(com.google.common.io.ByteArrayDataOutput) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) Aggregator(io.druid.query.aggregation.Aggregator) Supplier(com.google.common.base.Supplier) ParseException(io.druid.java.util.common.parsers.ParseException) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 15 with Supplier

use of com.google.common.base.Supplier in project druid by druid-io.

the class YeOldePlumberSchool method findPlumber.

@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
    // There can be only one.
    final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
    // Temporary directory to hold spilled segments.
    final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
    // Set of spilled segments. Will be merged at the end.
    final Set<File> spilled = Sets.newHashSet();
    // IndexMerger implementation.
    final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
    return new Plumber() {

        @Override
        public Object startJob() {
            return null;
        }

        @Override
        public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
            Sink sink = getSink(row.getTimestampFromEpoch());
            if (sink == null) {
                return -1;
            }
            final int numRows = sink.add(row);
            if (!sink.canAppendRow()) {
                persist(committerSupplier.get());
            }
            return numRows;
        }

        private Sink getSink(long timestamp) {
            if (theSink.getInterval().contains(timestamp)) {
                return theSink;
            } else {
                return null;
            }
        }

        @Override
        public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
            throw new UnsupportedOperationException("Don't query me, bro.");
        }

        @Override
        public void persist(Committer committer) {
            spillIfSwappable();
            committer.run();
        }

        @Override
        public void finishJob() {
            // The segment we will upload
            File fileToUpload = null;
            try {
                // User should have persisted everything by now.
                Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
                if (spilled.size() == 0) {
                    throw new IllegalStateException("Nothing indexed?");
                } else if (spilled.size() == 1) {
                    fileToUpload = Iterables.getOnlyElement(spilled);
                } else {
                    List<QueryableIndex> indexes = Lists.newArrayList();
                    for (final File oneSpill : spilled) {
                        indexes.add(indexIO.loadIndex(oneSpill));
                    }
                    fileToUpload = new File(tmpSegmentDir, "merged");
                    theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
                }
                // Map merged segment so we can extract dimensions
                final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
                final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
                dataSegmentPusher.push(fileToUpload, segmentToUpload);
                log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
            } catch (Exception e) {
                log.warn(e, "Failed to merge and upload");
                throw Throwables.propagate(e);
            } finally {
                try {
                    if (fileToUpload != null) {
                        log.info("Deleting Index File[%s]", fileToUpload);
                        FileUtils.deleteDirectory(fileToUpload);
                    }
                } catch (IOException e) {
                    log.warn(e, "Error deleting directory[%s]", fileToUpload);
                }
            }
        }

        private void spillIfSwappable() {
            if (theSink.swappable()) {
                final FireHydrant indexToPersist = theSink.swap();
                final int rowsToPersist = indexToPersist.getIndex().size();
                final File dirToPersist = getSpillDir(indexToPersist.getCount());
                log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
                try {
                    theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
                    indexToPersist.swapSegment(null);
                    metrics.incrementRowOutputCount(rowsToPersist);
                    spilled.add(dirToPersist);
                } catch (Exception e) {
                    log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
                    throw Throwables.propagate(e);
                }
            }
        }

        private File getSpillDir(final int n) {
            return new File(persistDir, String.format("spill%d", n));
        }
    };
}
Also used : IndexMerger(io.druid.segment.IndexMerger) Query(io.druid.query.Query) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Sink(io.druid.segment.realtime.plumber.Sink) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) Plumber(io.druid.segment.realtime.plumber.Plumber) Supplier(com.google.common.base.Supplier) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Committer(io.druid.data.input.Committer) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Aggregations

Supplier (com.google.common.base.Supplier)51 IOException (java.io.IOException)14 Test (org.junit.Test)11 ImmutableMap (com.google.common.collect.ImmutableMap)8 Map (java.util.Map)8 Path (java.nio.file.Path)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Set (java.util.Set)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 Committer (io.druid.data.input.Committer)4 ByteBuffer (java.nio.ByteBuffer)4 Optional (java.util.Optional)4 SourcePath (com.facebook.buck.rules.SourcePath)3 InputRow (io.druid.data.input.InputRow)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 File (java.io.File)3 HashMap (java.util.HashMap)3