Search in sources :

Example 31 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class SegmentPublisherHelper method annotateShardSpec.

/**
 * This method fills missing information in the shard spec if necessary when publishing segments.
 *
 * - When time chunk lock is used, the non-appending task should set the proper size of the core partitions for
 *   dynamically-partitioned segments. See {@link #annotateCorePartitionSetSizeFn}.
 * - When segment lock is used, the overwriting task should set the proper size of the atomic update group.
 *   See {@link #annotateAtomicUpdateGroupFn}.
 */
static Set<DataSegment> annotateShardSpec(Set<DataSegment> segments) {
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    segments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segmentsPerInterval = entry.getValue();
        final ShardSpec firstShardSpec = segmentsPerInterval.get(0).getShardSpec();
        final boolean anyMismatch = segmentsPerInterval.stream().anyMatch(segment -> segment.getShardSpec().getClass() != firstShardSpec.getClass());
        if (anyMismatch) {
            throw new ISE("Mismatched shardSpecs in interval[%s] for segments[%s]", interval, segmentsPerInterval);
        }
        final Function<DataSegment, DataSegment> annotateFn;
        if (firstShardSpec instanceof OverwriteShardSpec) {
            annotateFn = annotateAtomicUpdateGroupFn(segmentsPerInterval.size());
        } else if (firstShardSpec instanceof BuildingShardSpec) {
            // sanity check
            // BuildingShardSpec is used in non-appending mode. In this mode,
            // the segments in each interval should have contiguous partitionIds,
            // so that they can be queryable (see PartitionHolder.isComplete()).
            int expectedCorePartitionSetSize = segmentsPerInterval.size();
            int actualCorePartitionSetSize = Math.toIntExact(segmentsPerInterval.stream().filter(segment -> segment.getShardSpec().getPartitionNum() < expectedCorePartitionSetSize).count());
            if (expectedCorePartitionSetSize != actualCorePartitionSetSize) {
                LOG.errorSegments(segmentsPerInterval, "Cannot publish segments due to incomplete time chunk");
                throw new ISE("Cannot publish segments due to incomplete time chunk for interval[%s]. " + "Expected [%s] segments in the core partition, but only [%] segments are found. " + "See task logs for more details about these segments.", interval, expectedCorePartitionSetSize, actualCorePartitionSetSize);
            }
            annotateFn = annotateCorePartitionSetSizeFn(expectedCorePartitionSetSize);
        } else if (firstShardSpec instanceof BucketNumberedShardSpec) {
            throw new ISE("Cannot publish segments with shardSpec[%s]", firstShardSpec);
        } else {
            annotateFn = null;
        }
        if (annotateFn != null) {
            intervalToSegments.put(interval, segmentsPerInterval.stream().map(annotateFn).collect(Collectors.toList()));
        }
    }
    return intervalToSegments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet());
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) Collection(java.util.Collection) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) HashMap(java.util.HashMap) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) List(java.util.List) Map(java.util.Map) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) Interval(org.joda.time.Interval)

Example 32 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class DumpSegment method runBitmaps.

private void runBitmaps(final Injector injector, final QueryableIndex index) throws IOException {
    final ObjectMapper objectMapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class));
    final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
    final BitmapSerdeFactory bitmapSerdeFactory;
    if (bitmapFactory instanceof ConciseBitmapFactory) {
        bitmapSerdeFactory = new ConciseBitmapSerdeFactory();
    } else if (bitmapFactory instanceof RoaringBitmapFactory) {
        bitmapSerdeFactory = new RoaringBitmapSerdeFactory(null);
    } else {
        throw new ISE("Don't know which BitmapSerdeFactory to use for BitmapFactory[%s]!", bitmapFactory.getClass().getName());
    }
    final List<String> columnNames = getColumnsToInclude(index);
    withOutputStream(new Function<OutputStream, Object>() {

        @Override
        public Object apply(final OutputStream out) {
            try (final JsonGenerator jg = objectMapper.getFactory().createGenerator(out)) {
                jg.writeStartObject();
                {
                    jg.writeObjectField("bitmapSerdeFactory", bitmapSerdeFactory);
                    jg.writeFieldName("bitmaps");
                    jg.writeStartObject();
                    {
                        for (final String columnName : columnNames) {
                            final ColumnHolder columnHolder = index.getColumnHolder(columnName);
                            final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
                            if (bitmapIndex == null) {
                                jg.writeNullField(columnName);
                            } else {
                                jg.writeFieldName(columnName);
                                jg.writeStartObject();
                                for (int i = 0; i < bitmapIndex.getCardinality(); i++) {
                                    String val = bitmapIndex.getValue(i);
                                    // respect nulls if they are present in the dictionary
                                    jg.writeFieldName(val == null ? "null" : val);
                                    final ImmutableBitmap bitmap = bitmapIndex.getBitmap(i);
                                    if (decompressBitmaps) {
                                        jg.writeStartArray();
                                        final IntIterator iterator = bitmap.iterator();
                                        while (iterator.hasNext()) {
                                            final int rowNum = iterator.next();
                                            jg.writeNumber(rowNum);
                                        }
                                        jg.writeEndArray();
                                    } else {
                                        byte[] bytes = bitmapSerdeFactory.getObjectStrategy().toBytes(bitmap);
                                        if (bytes != null) {
                                            jg.writeBinary(bytes);
                                        }
                                    }
                                }
                                jg.writeEndObject();
                            }
                        }
                    }
                    jg.writeEndObject();
                }
                jg.writeEndObject();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            return null;
        }
    });
}
Also used : ConciseBitmapFactory(org.apache.druid.collections.bitmap.ConciseBitmapFactory) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) IntIterator(org.roaringbitmap.IntIterator) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) Json(org.apache.druid.guice.annotations.Json) IOException(java.io.IOException) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) ISE(org.apache.druid.java.util.common.ISE) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) ConciseBitmapFactory(org.apache.druid.collections.bitmap.ConciseBitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory)

Example 33 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class DumpSegment method run.

@Override
public void run() {
    final Injector injector = makeInjector();
    final IndexIO indexIO = injector.getInstance(IndexIO.class);
    final DumpType dumpType;
    try {
        dumpType = DumpType.valueOf(StringUtils.toUpperCase(dumpTypeString));
    } catch (Exception e) {
        throw new IAE("Not a valid dump type: %s", dumpTypeString);
    }
    try (final QueryableIndex index = indexIO.loadIndex(new File(directory))) {
        switch(dumpType) {
            case ROWS:
                runDump(injector, index);
                break;
            case METADATA:
                runMetadata(injector, index);
                break;
            case BITMAPS:
                runBitmaps(injector, index);
                break;
            default:
                throw new ISE("dumpType[%s] has no handler", dumpType);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : IndexIO(org.apache.druid.segment.IndexIO) Injector(com.google.inject.Injector) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) IAE(org.apache.druid.java.util.common.IAE) File(java.io.File) IOException(java.io.IOException)

Example 34 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class ColumnProcessors method makeProcessorInternal.

/**
 * Creates "column processors", which are objects that wrap a single input column and provide some
 * functionality on top of it.
 *
 * @param inputCapabilitiesFn   function that returns capabilities of the column being processed. The type provided
 *                              by these capabilities will be used to determine what kind of selector to create. If
 *                              this function returns null, then processorFactory.defaultType() will be
 *                              used to construct a set of assumed capabilities.
 * @param dimensionSelectorFn   function that creates a DimensionSelector for the column being processed. Will be
 *                              called if the column type is string.
 * @param valueSelectorFunction function that creates a ColumnValueSelector for the column being processed. Will be
 *                              called if the column type is long, float, double, or complex.
 * @param processorFactory      object that encapsulates the knowledge about how to create processors
 * @param selectorFactory       column selector factory used for creating the vector processor
 */
private static <T> T makeProcessorInternal(final Function<ColumnSelectorFactory, ColumnCapabilities> inputCapabilitiesFn, final Function<ColumnSelectorFactory, DimensionSelector> dimensionSelectorFn, final Function<ColumnSelectorFactory, ColumnValueSelector<?>> valueSelectorFunction, final ColumnProcessorFactory<T> processorFactory, final ColumnSelectorFactory selectorFactory) {
    final ColumnCapabilities capabilities = inputCapabilitiesFn.apply(selectorFactory);
    final TypeSignature<ValueType> effectiveType = capabilities != null ? capabilities : processorFactory.defaultType();
    switch(effectiveType.getType()) {
        case STRING:
            return processorFactory.makeDimensionProcessor(dimensionSelectorFn.apply(selectorFactory), mayBeMultiValue(capabilities));
        case LONG:
            return processorFactory.makeLongProcessor(valueSelectorFunction.apply(selectorFactory));
        case FLOAT:
            return processorFactory.makeFloatProcessor(valueSelectorFunction.apply(selectorFactory));
        case DOUBLE:
            return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory));
        case COMPLEX:
            return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory));
        default:
            throw new ISE("Unsupported type[%s]", effectiveType.asTypeString());
    }
}
Also used : ValueType(org.apache.druid.segment.column.ValueType) ISE(org.apache.druid.java.util.common.ISE) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 35 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class RegisteredLookupExtractionFn method ensureDelegate.

private LookupExtractionFn ensureDelegate() {
    if (null == delegate) {
        // http://www.javamex.com/tutorials/double_checked_locking.shtml
        synchronized (delegateLock) {
            if (null == delegate) {
                final LookupExtractor factory = manager.get(getLookup()).orElseThrow(() -> new ISE("Lookup [%s] not found", getLookup())).getLookupExtractorFactory().get();
                delegate = new LookupExtractionFn(factory, retainMissingValue, replaceMissingValueWith, injective, optimize);
            }
        }
    }
    return delegate;
}
Also used : ISE(org.apache.druid.java.util.common.ISE)

Aggregations

ISE (org.apache.druid.java.util.common.ISE)354 IOException (java.io.IOException)95 ArrayList (java.util.ArrayList)90 Map (java.util.Map)68 List (java.util.List)60 File (java.io.File)48 Interval (org.joda.time.Interval)48 DataSegment (org.apache.druid.timeline.DataSegment)44 HashMap (java.util.HashMap)43 Nullable (javax.annotation.Nullable)43 URL (java.net.URL)36 StatusResponseHolder (org.apache.druid.java.util.http.client.response.StatusResponseHolder)33 Request (org.apache.druid.java.util.http.client.Request)30 ExecutionException (java.util.concurrent.ExecutionException)29 ImmutableMap (com.google.common.collect.ImmutableMap)28 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)28 VisibleForTesting (com.google.common.annotations.VisibleForTesting)27 Collectors (java.util.stream.Collectors)27 IAE (org.apache.druid.java.util.common.IAE)27 ImmutableList (com.google.common.collect.ImmutableList)26