Search in sources :

Example 1 with BTreeFieldPrefixNSMLeafFrame

use of org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame in project asterixdb by apache.

the class FieldPrefixCompressor method compress.

@Override
public boolean compress(ITreeIndexFrame indexFrame, MultiComparator cmp) throws Exception {
    BTreeFieldPrefixNSMLeafFrame frame = (BTreeFieldPrefixNSMLeafFrame) indexFrame;
    int tupleCount = frame.getTupleCount();
    if (tupleCount <= 0) {
        frame.setPrefixTupleCount(0);
        frame.setFreeSpaceOff(frame.getOrigFreeSpaceOff());
        frame.setTotalFreeSpace(frame.getOrigTotalFreeSpace());
        return false;
    }
    if (cmp.getKeyFieldCount() == 1) {
        return false;
    }
    int uncompressedTupleCount = frame.getUncompressedTupleCount();
    float ratio = (float) uncompressedTupleCount / (float) tupleCount;
    if (ratio < ratioThreshold)
        return false;
    IBinaryComparator[] cmps = cmp.getComparators();
    int fieldCount = typeTraits.length;
    ByteBuffer buf = frame.getBuffer();
    byte[] pageArray = buf.array();
    IPrefixSlotManager slotManager = (IPrefixSlotManager) frame.getSlotManager();
    // perform analysis pass
    ArrayList<KeyPartition> keyPartitions = getKeyPartitions(frame, cmp, occurrenceThreshold);
    if (keyPartitions.size() == 0)
        return false;
    // for each keyPartition, determine the best prefix length for
    // compression, and count how many prefix tuple we would need in total
    int totalSlotsNeeded = 0;
    int totalPrefixBytes = 0;
    for (KeyPartition kp : keyPartitions) {
        for (int j = 0; j < kp.pmi.length; j++) {
            int benefitMinusCost = kp.pmi[j].spaceBenefit - kp.pmi[j].spaceCost;
            if (benefitMinusCost > kp.maxBenefitMinusCost) {
                kp.maxBenefitMinusCost = benefitMinusCost;
                kp.maxPmiIndex = j;
            }
        }
        // ignore keyPartitions with no benefit and don't count bytes and slots needed
        if (kp.maxBenefitMinusCost <= 0)
            continue;
        totalPrefixBytes += kp.pmi[kp.maxPmiIndex].prefixBytes;
        totalSlotsNeeded += kp.pmi[kp.maxPmiIndex].prefixSlotsNeeded;
    }
    // we use a greedy heuristic to solve this "knapsack"-like problem
    // (every keyPartition has a space savings and a number of slots
    // required, but the number of slots are constrained by MAX_PREFIX_SLOTS)
    // we sort the keyPartitions by maxBenefitMinusCost / prefixSlotsNeeded
    // and later choose the top MAX_PREFIX_SLOTS
    int[] newPrefixSlots;
    if (totalSlotsNeeded > FieldPrefixSlotManager.MAX_PREFIX_SLOTS) {
        // order keyPartitions by the heuristic function
        SortByHeuristic heuristicComparator = new SortByHeuristic();
        Collections.sort(keyPartitions, heuristicComparator);
        int slotsUsed = 0;
        int numberKeyPartitions = -1;
        for (int i = 0; i < keyPartitions.size(); i++) {
            KeyPartition kp = keyPartitions.get(i);
            slotsUsed += kp.pmi[kp.maxPmiIndex].prefixSlotsNeeded;
            if (slotsUsed > FieldPrefixSlotManager.MAX_PREFIX_SLOTS) {
                numberKeyPartitions = i + 1;
                slotsUsed -= kp.pmi[kp.maxPmiIndex].prefixSlotsNeeded;
                break;
            }
        }
        newPrefixSlots = new int[slotsUsed];
        // remove irrelevant keyPartitions and adjust total prefix bytes
        while (keyPartitions.size() >= numberKeyPartitions) {
            int lastIndex = keyPartitions.size() - 1;
            KeyPartition kp = keyPartitions.get(lastIndex);
            if (kp.maxBenefitMinusCost > 0)
                totalPrefixBytes -= kp.pmi[kp.maxPmiIndex].prefixBytes;
            keyPartitions.remove(lastIndex);
        }
        // re-order keyPartitions by prefix (corresponding to original order)
        SortByOriginalRank originalRankComparator = new SortByOriginalRank();
        Collections.sort(keyPartitions, originalRankComparator);
    } else {
        newPrefixSlots = new int[totalSlotsNeeded];
    }
    int[] newTupleSlots = new int[tupleCount];
    // WARNING: our hope is that compression is infrequent
    // here we allocate a big chunk of memory to temporary hold the new, re-compressed tuple
    // in general it is very hard to avoid this step
    int prefixFreeSpace = frame.getOrigFreeSpaceOff();
    int tupleFreeSpace = prefixFreeSpace + totalPrefixBytes;
    byte[] buffer = new byte[buf.capacity()];
    ByteBuffer byteBuffer = ByteBuffer.wrap(buffer);
    // perform compression, and reorg
    // we assume that the keyPartitions are sorted by the prefixes
    // (i.e., in the logical target order)
    int kpIndex = 0;
    int tupleIndex = 0;
    int prefixTupleIndex = 0;
    uncompressedTupleCount = 0;
    TypeAwareTupleWriter tupleWriter = new TypeAwareTupleWriter(typeTraits);
    FieldPrefixTupleReference tupleToWrite = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
    tupleToWrite.setFieldCount(fieldCount);
    while (tupleIndex < tupleCount) {
        if (kpIndex < keyPartitions.size()) {
            // beginning of keyPartition found, compress entire keyPartition
            if (tupleIndex == keyPartitions.get(kpIndex).firstTupleIndex) {
                // number of fields we decided to use for compression of this keyPartition
                int fieldCountToCompress = keyPartitions.get(kpIndex).maxPmiIndex + 1;
                int segmentStart = keyPartitions.get(kpIndex).firstTupleIndex;
                int tuplesInSegment = 1;
                FieldPrefixTupleReference prevTuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
                prevTuple.setFieldCount(fieldCount);
                FieldPrefixTupleReference tuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
                tuple.setFieldCount(fieldCount);
                for (int i = tupleIndex + 1; i <= keyPartitions.get(kpIndex).lastTupleIndex; i++) {
                    prevTuple.resetByTupleIndex(frame, i - 1);
                    tuple.resetByTupleIndex(frame, i);
                    // check if tuples match in fieldCountToCompress of their first fields
                    int prefixFieldsMatch = 0;
                    for (int j = 0; j < fieldCountToCompress; j++) {
                        if (cmps[j].compare(pageArray, prevTuple.getFieldStart(j), prevTuple.getFieldLength(j), pageArray, tuple.getFieldStart(j), tuple.getFieldLength(j)) == 0)
                            prefixFieldsMatch++;
                        else
                            break;
                    }
                    // the two tuples must match in exactly the number of fields we decided
                    // to compress for this keyPartition
                    int processSegments = 0;
                    if (prefixFieldsMatch == fieldCountToCompress)
                        tuplesInSegment++;
                    else
                        processSegments++;
                    if (i == keyPartitions.get(kpIndex).lastTupleIndex)
                        processSegments++;
                    for (int r = 0; r < processSegments; r++) {
                        // compress current segment and then start new segment
                        if (tuplesInSegment < occurrenceThreshold || fieldCountToCompress <= 0) {
                            // write tuples uncompressed
                            for (int j = 0; j < tuplesInSegment; j++) {
                                int slotNum = segmentStart + j;
                                tupleToWrite.resetByTupleIndex(frame, slotNum);
                                newTupleSlots[tupleCount - 1 - slotNum] = slotManager.encodeSlotFields(FieldPrefixSlotManager.TUPLE_UNCOMPRESSED, tupleFreeSpace);
                                tupleFreeSpace += tupleWriter.writeTuple(tupleToWrite, byteBuffer, tupleFreeSpace);
                            }
                            uncompressedTupleCount += tuplesInSegment;
                        } else {
                            // segment has enough tuples: compress segment, extract prefix,
                            // write prefix tuple to buffer, and set prefix slot
                            newPrefixSlots[newPrefixSlots.length - 1 - prefixTupleIndex] = slotManager.encodeSlotFields(fieldCountToCompress, prefixFreeSpace);
                            prefixFreeSpace += tupleWriter.writeTupleFields(prevTuple, 0, fieldCountToCompress, byteBuffer.array(), prefixFreeSpace);
                            // truncate tuples, write them to buffer, and set tuple slots
                            for (int j = 0; j < tuplesInSegment; j++) {
                                int currTupleIndex = segmentStart + j;
                                tupleToWrite.resetByTupleIndex(frame, currTupleIndex);
                                newTupleSlots[tupleCount - 1 - currTupleIndex] = slotManager.encodeSlotFields(prefixTupleIndex, tupleFreeSpace);
                                tupleFreeSpace += tupleWriter.writeTupleFields(tupleToWrite, fieldCountToCompress, fieldCount - fieldCountToCompress, byteBuffer.array(), tupleFreeSpace);
                            }
                            prefixTupleIndex++;
                        }
                        // begin new segment
                        segmentStart = i;
                        tuplesInSegment = 1;
                    }
                }
                tupleIndex = keyPartitions.get(kpIndex).lastTupleIndex;
                kpIndex++;
            } else {
                // just write the tuple uncompressed
                tupleToWrite.resetByTupleIndex(frame, tupleIndex);
                newTupleSlots[tupleCount - 1 - tupleIndex] = slotManager.encodeSlotFields(FieldPrefixSlotManager.TUPLE_UNCOMPRESSED, tupleFreeSpace);
                tupleFreeSpace += tupleWriter.writeTuple(tupleToWrite, byteBuffer, tupleFreeSpace);
                uncompressedTupleCount++;
            }
        } else {
            // just write the tuple uncompressed
            tupleToWrite.resetByTupleIndex(frame, tupleIndex);
            newTupleSlots[tupleCount - 1 - tupleIndex] = slotManager.encodeSlotFields(FieldPrefixSlotManager.TUPLE_UNCOMPRESSED, tupleFreeSpace);
            tupleFreeSpace += tupleWriter.writeTuple(tupleToWrite, byteBuffer, tupleFreeSpace);
            uncompressedTupleCount++;
        }
        tupleIndex++;
    }
    // sanity check to see if we have written exactly as many prefix bytes as computed before
    if (prefixFreeSpace != frame.getOrigFreeSpaceOff() + totalPrefixBytes) {
        throw new Exception("ERROR: Number of prefix bytes written don't match computed number");
    }
    // in some rare instances our procedure could even increase the space requirement which is very dangerous
    // this can happen to to the greedy solution of the knapsack-like problem
    // therefore, we check if the new space exceeds the page size to avoid the only danger of
    // an increasing space
    int totalSpace = tupleFreeSpace + newTupleSlots.length * slotManager.getSlotSize() + newPrefixSlots.length * slotManager.getSlotSize();
    if (totalSpace > buf.capacity())
        // just leave the page as is
        return false;
    // copy new tuple and new slots into original page
    int freeSpaceAfterInit = frame.getOrigFreeSpaceOff();
    System.arraycopy(buffer, freeSpaceAfterInit, pageArray, freeSpaceAfterInit, tupleFreeSpace - freeSpaceAfterInit);
    // copy prefix slots
    int slotOffRunner = buf.capacity() - slotManager.getSlotSize();
    for (int i = 0; i < newPrefixSlots.length; i++) {
        buf.putInt(slotOffRunner, newPrefixSlots[newPrefixSlots.length - 1 - i]);
        slotOffRunner -= slotManager.getSlotSize();
    }
    // copy tuple slots
    for (int i = 0; i < newTupleSlots.length; i++) {
        buf.putInt(slotOffRunner, newTupleSlots[newTupleSlots.length - 1 - i]);
        slotOffRunner -= slotManager.getSlotSize();
    }
    // update space fields, TODO: we need to update more fields
    frame.setFreeSpaceOff(tupleFreeSpace);
    frame.setPrefixTupleCount(newPrefixSlots.length);
    frame.setUncompressedTupleCount(uncompressedTupleCount);
    int totalFreeSpace = buf.capacity() - tupleFreeSpace - ((newTupleSlots.length + newPrefixSlots.length) * slotManager.getSlotSize());
    frame.setTotalFreeSpace(totalFreeSpace);
    return true;
}
Also used : IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) ByteBuffer(java.nio.ByteBuffer) BTreeFieldPrefixNSMLeafFrame(org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IPrefixSlotManager(org.apache.hyracks.storage.am.btree.api.IPrefixSlotManager) FieldPrefixTupleReference(org.apache.hyracks.storage.am.btree.impls.FieldPrefixTupleReference) TypeAwareTupleWriter(org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter)

Example 2 with BTreeFieldPrefixNSMLeafFrame

use of org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame in project asterixdb by apache.

the class FieldPrefixPrefixTupleReference method resetByTupleIndex.

// assumes tuple index refers to prefix tuples
@Override
public void resetByTupleIndex(ITreeIndexFrame frame, int tupleIndex) {
    BTreeFieldPrefixNSMLeafFrame concreteFrame = (BTreeFieldPrefixNSMLeafFrame) frame;
    IPrefixSlotManager slotManager = concreteFrame.getSlotManager();
    int prefixSlotOff = slotManager.getPrefixSlotOff(tupleIndex);
    int prefixSlot = concreteFrame.getBuffer().getInt(prefixSlotOff);
    setFieldCount(slotManager.decodeFirstSlotField(prefixSlot));
    tupleStartOff = slotManager.decodeSecondSlotField(prefixSlot);
    buf = concreteFrame.getBuffer().array();
    resetByTupleOffset(buf, tupleStartOff);
}
Also used : IPrefixSlotManager(org.apache.hyracks.storage.am.btree.api.IPrefixSlotManager) BTreeFieldPrefixNSMLeafFrame(org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame)

Example 3 with BTreeFieldPrefixNSMLeafFrame

use of org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame in project asterixdb by apache.

the class FieldPrefixNSMTest method test01.

@Test
public void test01() throws Exception {
    // declare fields
    int fieldCount = 3;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = IntegerPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    // declare keys
    int keyFieldCount = 3;
    IBinaryComparator[] cmps = new IBinaryComparator[keyFieldCount];
    cmps[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY).createBinaryComparator();
    cmps[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY).createBinaryComparator();
    cmps[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY).createBinaryComparator();
    MultiComparator cmp = new MultiComparator(cmps);
    // just for printing
    @SuppressWarnings("rawtypes") ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
    Random rnd = new Random();
    rnd.setSeed(50);
    IBufferCache bufferCache = harness.getBufferCache();
    IFileMapProvider fileMapProvider = harness.getFileMapProvider();
    bufferCache.createFile(harness.getFileReference());
    int btreeFileId = fileMapProvider.lookupFileId(harness.getFileReference());
    bufferCache.openFile(btreeFileId);
    IHyracksTaskContext ctx = harness.getHyracksTaskContext();
    ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(btreeFileId, 0), true);
    try {
        ITreeIndexTupleWriter tupleWriter = new TypeAwareTupleWriter(typeTraits);
        BTreeFieldPrefixNSMLeafFrame frame = new BTreeFieldPrefixNSMLeafFrame(tupleWriter);
        frame.setPage(page);
        frame.initBuffer((byte) 0);
        frame.setMultiComparator(cmp);
        frame.setPrefixTupleCount(0);
        String before = new String();
        String after = new String();
        int compactFreq = 5;
        int compressFreq = 5;
        int smallMax = 10;
        int numRecords = 1000;
        int[][] savedFields = new int[numRecords][3];
        // insert records with random calls to compact and compress
        for (int i = 0; i < numRecords; i++) {
            if (LOGGER.isLoggable(Level.INFO)) {
                if ((i + 1) % 100 == 0) {
                    LOGGER.info("INSERTING " + (i + 1) + " / " + numRecords);
                }
            }
            int a = rnd.nextInt() % smallMax;
            int b = rnd.nextInt() % smallMax;
            int c = i;
            ITupleReference tuple = createTuple(ctx, a, b, c, false);
            try {
                int targetTupleIndex = frame.findInsertTupleIndex(tuple);
                frame.insert(tuple, targetTupleIndex);
            } catch (Exception e) {
                e.printStackTrace();
            }
            savedFields[i][0] = a;
            savedFields[i][1] = b;
            savedFields[i][2] = c;
            if (rnd.nextInt() % compactFreq == 0) {
                before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                frame.compact();
                after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                Assert.assertEquals(before, after);
            }
            if (rnd.nextInt() % compressFreq == 0) {
                before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                frame.compress();
                after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                Assert.assertEquals(before, after);
            }
        }
        // delete records with random calls to compact and compress
        for (int i = 0; i < numRecords; i++) {
            if (LOGGER.isLoggable(Level.INFO)) {
                if ((i + 1) % 100 == 0) {
                    LOGGER.info("DELETING " + (i + 1) + " / " + numRecords);
                }
            }
            ITupleReference tuple = createTuple(ctx, savedFields[i][0], savedFields[i][1], savedFields[i][2], false);
            try {
                int tupleIndex = frame.findDeleteTupleIndex(tuple);
                frame.delete(tuple, tupleIndex);
            } catch (Exception e) {
            }
            if (rnd.nextInt() % compactFreq == 0) {
                before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                frame.compact();
                after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                Assert.assertEquals(before, after);
            }
            if (rnd.nextInt() % compressFreq == 0) {
                before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                frame.compress();
                after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
                Assert.assertEquals(before, after);
            }
        }
    } finally {
        bufferCache.unpin(page);
        bufferCache.closeFile(btreeFileId);
        bufferCache.close();
    }
}
Also used : ICachedPage(org.apache.hyracks.storage.common.buffercache.ICachedPage) ITreeIndexTupleWriter(org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) MultiComparator(org.apache.hyracks.storage.common.MultiComparator) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) BTreeFieldPrefixNSMLeafFrame(org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IFileMapProvider(org.apache.hyracks.storage.common.file.IFileMapProvider) Random(java.util.Random) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) TypeAwareTupleWriter(org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter) Test(org.junit.Test) AbstractBTreeTest(org.apache.hyracks.storage.am.btree.util.AbstractBTreeTest)

Aggregations

BTreeFieldPrefixNSMLeafFrame (org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame)3 IBinaryComparator (org.apache.hyracks.api.dataflow.value.IBinaryComparator)2 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)2 IPrefixSlotManager (org.apache.hyracks.storage.am.btree.api.IPrefixSlotManager)2 TypeAwareTupleWriter (org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter)2 ByteBuffer (java.nio.ByteBuffer)1 Random (java.util.Random)1 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)1 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)1 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)1 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)1 FieldPrefixTupleReference (org.apache.hyracks.storage.am.btree.impls.FieldPrefixTupleReference)1 AbstractBTreeTest (org.apache.hyracks.storage.am.btree.util.AbstractBTreeTest)1 ITreeIndexTupleWriter (org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter)1 MultiComparator (org.apache.hyracks.storage.common.MultiComparator)1 IBufferCache (org.apache.hyracks.storage.common.buffercache.IBufferCache)1 ICachedPage (org.apache.hyracks.storage.common.buffercache.ICachedPage)1 IFileMapProvider (org.apache.hyracks.storage.common.file.IFileMapProvider)1 Test (org.junit.Test)1