Search in sources :

Example 6 with ColumnCapabilitiesImpl

use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.

the class IndexMergerTest method testCloser.

@Test(expected = IllegalArgumentException.class)
public void testCloser() throws Exception {
    final long timestamp = System.currentTimeMillis();
    IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
    IncrementalIndexTest.populateIndex(timestamp, toPersist);
    ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
    capabilities.setHasSpatialIndexes(true);
    final File tempDir = temporaryFolder.newFolder();
    final File v8TmpDir = new File(tempDir, "v8-tmp");
    final File v9TmpDir = new File(tempDir, "v9-tmp");
    try {
        INDEX_MERGER.persist(toPersist, tempDir, indexSpec);
    } finally {
        if (v8TmpDir.exists()) {
            Assert.fail("v8-tmp dir not clean.");
        }
        if (v9TmpDir.exists()) {
            Assert.fail("v9-tmp dir not clean.");
        }
    }
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) File(java.io.File) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) IncrementalIndexTest(io.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Example 7 with ColumnCapabilitiesImpl

use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.

the class IncrementalIndex method toTimeAndDims.

@VisibleForTesting
TimeAndDims toTimeAndDims(InputRow row) throws IndexSizeExceededException {
    row = formatRow(row);
    if (row.getTimestampFromEpoch() < minTimestamp) {
        throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, new DateTime(minTimestamp));
    }
    final List<String> rowDimensions = row.getDimensions();
    Object[] dims;
    List<Object> overflow = null;
    synchronized (dimensionDescs) {
        dims = new Object[dimensionDescs.size()];
        for (String dimension : rowDimensions) {
            boolean wasNewDim = false;
            ColumnCapabilitiesImpl capabilities;
            DimensionDesc desc = dimensionDescs.get(dimension);
            if (desc != null) {
                capabilities = desc.getCapabilities();
            } else {
                wasNewDim = true;
                capabilities = columnCapabilities.get(dimension);
                if (capabilities == null) {
                    capabilities = new ColumnCapabilitiesImpl();
                    // For schemaless type discovery, assume everything is a String for now, can change later.
                    capabilities.setType(ValueType.STRING);
                    capabilities.setDictionaryEncoded(true);
                    capabilities.setHasBitmapIndexes(true);
                    columnCapabilities.put(dimension, capabilities);
                }
                DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
                desc = addNewDimension(dimension, capabilities, handler);
            }
            DimensionHandler handler = desc.getHandler();
            DimensionIndexer indexer = desc.getIndexer();
            Object dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension));
            // Set column capabilities as data is coming in
            if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
                capabilities.setHasMultipleValues(true);
            }
            if (wasNewDim) {
                if (overflow == null) {
                    overflow = Lists.newArrayList();
                }
                overflow.add(dimsKey);
            } else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
                /*
           * index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
           * otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
           * it must have been added to dimensionOrder during this InputRow.
           *
           * if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
           * we saw it on a previous input row (this its safe to index into dims). If we found a value in
           * the dims array for this index, it means we have seen this dimension already on this input row.
           */
                throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
            } else {
                dims[desc.getIndex()] = dimsKey;
            }
        }
    }
    if (overflow != null) {
        // Merge overflow and non-overflow
        Object[] newDims = new Object[dims.length + overflow.size()];
        System.arraycopy(dims, 0, newDims, 0, dims.length);
        for (int i = 0; i < overflow.size(); ++i) {
            newDims[dims.length + i] = overflow.get(i);
        }
        dims = newDims;
    }
    long truncated = 0;
    if (row.getTimestamp() != null) {
        truncated = gran.bucketStart(row.getTimestamp()).getMillis();
    }
    return new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList);
}
Also used : DimensionHandler(io.druid.segment.DimensionHandler) IAE(io.druid.java.util.common.IAE) DateTime(org.joda.time.DateTime) DimensionIndexer(io.druid.segment.DimensionIndexer) ISE(io.druid.java.util.common.ISE) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 8 with ColumnCapabilitiesImpl

use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.

the class IncrementalIndex method makeCapabilitesFromValueType.

private ColumnCapabilitiesImpl makeCapabilitesFromValueType(ValueType type) {
    ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl();
    capabilities.setDictionaryEncoded(type == ValueType.STRING);
    capabilities.setHasBitmapIndexes(type == ValueType.STRING);
    capabilities.setType(type);
    return capabilities;
}
Also used : ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 9 with ColumnCapabilitiesImpl

use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.

the class Sink method makeNewCurrIndex.

private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getParser()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getParser()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
    final IncrementalIndex newIndex = new OnheapIncrementalIndex(indexSchema, reportParseExceptions, maxRowsInMemory);
    final FireHydrant old;
    synchronized (hydrantLock) {
        if (writable) {
            old = currHydrant;
            int newCount = 0;
            int numHydrants = hydrants.size();
            if (numHydrants > 0) {
                FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
                newCount = lastHydrant.getCount() + 1;
                if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
                    Map<String, ColumnCapabilitiesImpl> oldCapabilities;
                    if (lastHydrant.hasSwapped()) {
                        oldCapabilities = Maps.newHashMap();
                        QueryableIndex oldIndex = lastHydrant.getSegment().asQueryableIndex();
                        for (String dim : oldIndex.getAvailableDimensions()) {
                            dimOrder.add(dim);
                            oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumn(dim).getCapabilities());
                        }
                    } else {
                        IncrementalIndex oldIndex = lastHydrant.getIndex();
                        dimOrder.addAll(oldIndex.getDimensionOrder());
                        oldCapabilities = oldIndex.getColumnCapabilities();
                    }
                    newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
                }
            }
            currHydrant = new FireHydrant(newIndex, newCount, getSegment().getIdentifier());
            if (old != null) {
                numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
            }
            hydrants.add(currHydrant);
        } else {
            // Oops, someone called finishWriting while we were making this new index.
            newIndex.close();
            throw new ISE("finishWriting() called during swap");
        }
    }
    return old;
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(io.druid.segment.QueryableIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 10 with ColumnCapabilitiesImpl

use of io.druid.segment.column.ColumnCapabilitiesImpl in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

@Override
protected File makeIndexFiles(final List<IndexableAdapter> adapters, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, new Function<IndexableAdapter, Metadata>() {

        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        final File v9TmpDir = new File(outDir, "v9-tmp");
        FileUtils.forceMkdir(v9TmpDir);
        registerDeleteDirectory(closer, v9TmpDir);
        log.info("Start making v9 index files, outDir:%s", outDir);
        File tmpPeonFilesDir = new File(v9TmpDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, false);
        closer.register(ioPeon);
        long startTime = System.currentTimeMillis();
        ByteStreams.write(Ints.toByteArray(IndexIO.V9_VERSION), Files.newOutputStreamSupplier(new File(outDir, "version.bin")));
        log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
        }
        log.info("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
        final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
        final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            mergers.add(handlers[i].makeMerger(indexSpec, v9TmpDir, ioPeon, dimCapabilities.get(i), progress));
        }
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
        final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
        writeDimValueAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /************* Walk through data sets, merge them, and write merged columns *************/
        progress.progress();
        final Iterable<Rowboat> theRows = makeRowIterable(adapters, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon, indexSpec);
        final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
        mergeIndexesAndWriteColumns(adapters, progress, theRows, timeWriter, metWriters, rowNumConversions, mergers);
        /************ Create Inverted Indexes and Finalize Build Columns *************/
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = (DimensionMergerV9) mergers.get(i);
            merger.writeIndexes(rowNumConversions, closer);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /************* Make index.drd & metadata.drd files **************/
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(io.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(io.druid.segment.column.ColumnDescriptor) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Aggregations

ColumnCapabilitiesImpl (io.druid.segment.column.ColumnCapabilitiesImpl)10 ISE (io.druid.java.util.common.ISE)4 ColumnCapabilities (io.druid.segment.column.ColumnCapabilities)3 ValueType (io.druid.segment.column.ValueType)3 File (java.io.File)3 Closer (com.google.common.io.Closer)2 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)2 DimensionHandler (io.druid.segment.DimensionHandler)2 QueryableIndex (io.druid.segment.QueryableIndex)2 IOPeon (io.druid.segment.data.IOPeon)2 TmpFileIOPeon (io.druid.segment.data.TmpFileIOPeon)2 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)2 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)2 FileOutputStream (java.io.FileOutputStream)2 IntBuffer (java.nio.IntBuffer)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Nullable (javax.annotation.Nullable)2 DateTime (org.joda.time.DateTime)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1