use of org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame in project asterixdb by apache.
the class FieldPrefixCompressor method compress.
@Override
public boolean compress(ITreeIndexFrame indexFrame, MultiComparator cmp) throws Exception {
BTreeFieldPrefixNSMLeafFrame frame = (BTreeFieldPrefixNSMLeafFrame) indexFrame;
int tupleCount = frame.getTupleCount();
if (tupleCount <= 0) {
frame.setPrefixTupleCount(0);
frame.setFreeSpaceOff(frame.getOrigFreeSpaceOff());
frame.setTotalFreeSpace(frame.getOrigTotalFreeSpace());
return false;
}
if (cmp.getKeyFieldCount() == 1) {
return false;
}
int uncompressedTupleCount = frame.getUncompressedTupleCount();
float ratio = (float) uncompressedTupleCount / (float) tupleCount;
if (ratio < ratioThreshold)
return false;
IBinaryComparator[] cmps = cmp.getComparators();
int fieldCount = typeTraits.length;
ByteBuffer buf = frame.getBuffer();
byte[] pageArray = buf.array();
IPrefixSlotManager slotManager = (IPrefixSlotManager) frame.getSlotManager();
// perform analysis pass
ArrayList<KeyPartition> keyPartitions = getKeyPartitions(frame, cmp, occurrenceThreshold);
if (keyPartitions.size() == 0)
return false;
// for each keyPartition, determine the best prefix length for
// compression, and count how many prefix tuple we would need in total
int totalSlotsNeeded = 0;
int totalPrefixBytes = 0;
for (KeyPartition kp : keyPartitions) {
for (int j = 0; j < kp.pmi.length; j++) {
int benefitMinusCost = kp.pmi[j].spaceBenefit - kp.pmi[j].spaceCost;
if (benefitMinusCost > kp.maxBenefitMinusCost) {
kp.maxBenefitMinusCost = benefitMinusCost;
kp.maxPmiIndex = j;
}
}
// ignore keyPartitions with no benefit and don't count bytes and slots needed
if (kp.maxBenefitMinusCost <= 0)
continue;
totalPrefixBytes += kp.pmi[kp.maxPmiIndex].prefixBytes;
totalSlotsNeeded += kp.pmi[kp.maxPmiIndex].prefixSlotsNeeded;
}
// we use a greedy heuristic to solve this "knapsack"-like problem
// (every keyPartition has a space savings and a number of slots
// required, but the number of slots are constrained by MAX_PREFIX_SLOTS)
// we sort the keyPartitions by maxBenefitMinusCost / prefixSlotsNeeded
// and later choose the top MAX_PREFIX_SLOTS
int[] newPrefixSlots;
if (totalSlotsNeeded > FieldPrefixSlotManager.MAX_PREFIX_SLOTS) {
// order keyPartitions by the heuristic function
SortByHeuristic heuristicComparator = new SortByHeuristic();
Collections.sort(keyPartitions, heuristicComparator);
int slotsUsed = 0;
int numberKeyPartitions = -1;
for (int i = 0; i < keyPartitions.size(); i++) {
KeyPartition kp = keyPartitions.get(i);
slotsUsed += kp.pmi[kp.maxPmiIndex].prefixSlotsNeeded;
if (slotsUsed > FieldPrefixSlotManager.MAX_PREFIX_SLOTS) {
numberKeyPartitions = i + 1;
slotsUsed -= kp.pmi[kp.maxPmiIndex].prefixSlotsNeeded;
break;
}
}
newPrefixSlots = new int[slotsUsed];
// remove irrelevant keyPartitions and adjust total prefix bytes
while (keyPartitions.size() >= numberKeyPartitions) {
int lastIndex = keyPartitions.size() - 1;
KeyPartition kp = keyPartitions.get(lastIndex);
if (kp.maxBenefitMinusCost > 0)
totalPrefixBytes -= kp.pmi[kp.maxPmiIndex].prefixBytes;
keyPartitions.remove(lastIndex);
}
// re-order keyPartitions by prefix (corresponding to original order)
SortByOriginalRank originalRankComparator = new SortByOriginalRank();
Collections.sort(keyPartitions, originalRankComparator);
} else {
newPrefixSlots = new int[totalSlotsNeeded];
}
int[] newTupleSlots = new int[tupleCount];
// WARNING: our hope is that compression is infrequent
// here we allocate a big chunk of memory to temporary hold the new, re-compressed tuple
// in general it is very hard to avoid this step
int prefixFreeSpace = frame.getOrigFreeSpaceOff();
int tupleFreeSpace = prefixFreeSpace + totalPrefixBytes;
byte[] buffer = new byte[buf.capacity()];
ByteBuffer byteBuffer = ByteBuffer.wrap(buffer);
// perform compression, and reorg
// we assume that the keyPartitions are sorted by the prefixes
// (i.e., in the logical target order)
int kpIndex = 0;
int tupleIndex = 0;
int prefixTupleIndex = 0;
uncompressedTupleCount = 0;
TypeAwareTupleWriter tupleWriter = new TypeAwareTupleWriter(typeTraits);
FieldPrefixTupleReference tupleToWrite = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
tupleToWrite.setFieldCount(fieldCount);
while (tupleIndex < tupleCount) {
if (kpIndex < keyPartitions.size()) {
// beginning of keyPartition found, compress entire keyPartition
if (tupleIndex == keyPartitions.get(kpIndex).firstTupleIndex) {
// number of fields we decided to use for compression of this keyPartition
int fieldCountToCompress = keyPartitions.get(kpIndex).maxPmiIndex + 1;
int segmentStart = keyPartitions.get(kpIndex).firstTupleIndex;
int tuplesInSegment = 1;
FieldPrefixTupleReference prevTuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
prevTuple.setFieldCount(fieldCount);
FieldPrefixTupleReference tuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
tuple.setFieldCount(fieldCount);
for (int i = tupleIndex + 1; i <= keyPartitions.get(kpIndex).lastTupleIndex; i++) {
prevTuple.resetByTupleIndex(frame, i - 1);
tuple.resetByTupleIndex(frame, i);
// check if tuples match in fieldCountToCompress of their first fields
int prefixFieldsMatch = 0;
for (int j = 0; j < fieldCountToCompress; j++) {
if (cmps[j].compare(pageArray, prevTuple.getFieldStart(j), prevTuple.getFieldLength(j), pageArray, tuple.getFieldStart(j), tuple.getFieldLength(j)) == 0)
prefixFieldsMatch++;
else
break;
}
// the two tuples must match in exactly the number of fields we decided
// to compress for this keyPartition
int processSegments = 0;
if (prefixFieldsMatch == fieldCountToCompress)
tuplesInSegment++;
else
processSegments++;
if (i == keyPartitions.get(kpIndex).lastTupleIndex)
processSegments++;
for (int r = 0; r < processSegments; r++) {
// compress current segment and then start new segment
if (tuplesInSegment < occurrenceThreshold || fieldCountToCompress <= 0) {
// write tuples uncompressed
for (int j = 0; j < tuplesInSegment; j++) {
int slotNum = segmentStart + j;
tupleToWrite.resetByTupleIndex(frame, slotNum);
newTupleSlots[tupleCount - 1 - slotNum] = slotManager.encodeSlotFields(FieldPrefixSlotManager.TUPLE_UNCOMPRESSED, tupleFreeSpace);
tupleFreeSpace += tupleWriter.writeTuple(tupleToWrite, byteBuffer, tupleFreeSpace);
}
uncompressedTupleCount += tuplesInSegment;
} else {
// segment has enough tuples: compress segment, extract prefix,
// write prefix tuple to buffer, and set prefix slot
newPrefixSlots[newPrefixSlots.length - 1 - prefixTupleIndex] = slotManager.encodeSlotFields(fieldCountToCompress, prefixFreeSpace);
prefixFreeSpace += tupleWriter.writeTupleFields(prevTuple, 0, fieldCountToCompress, byteBuffer.array(), prefixFreeSpace);
// truncate tuples, write them to buffer, and set tuple slots
for (int j = 0; j < tuplesInSegment; j++) {
int currTupleIndex = segmentStart + j;
tupleToWrite.resetByTupleIndex(frame, currTupleIndex);
newTupleSlots[tupleCount - 1 - currTupleIndex] = slotManager.encodeSlotFields(prefixTupleIndex, tupleFreeSpace);
tupleFreeSpace += tupleWriter.writeTupleFields(tupleToWrite, fieldCountToCompress, fieldCount - fieldCountToCompress, byteBuffer.array(), tupleFreeSpace);
}
prefixTupleIndex++;
}
// begin new segment
segmentStart = i;
tuplesInSegment = 1;
}
}
tupleIndex = keyPartitions.get(kpIndex).lastTupleIndex;
kpIndex++;
} else {
// just write the tuple uncompressed
tupleToWrite.resetByTupleIndex(frame, tupleIndex);
newTupleSlots[tupleCount - 1 - tupleIndex] = slotManager.encodeSlotFields(FieldPrefixSlotManager.TUPLE_UNCOMPRESSED, tupleFreeSpace);
tupleFreeSpace += tupleWriter.writeTuple(tupleToWrite, byteBuffer, tupleFreeSpace);
uncompressedTupleCount++;
}
} else {
// just write the tuple uncompressed
tupleToWrite.resetByTupleIndex(frame, tupleIndex);
newTupleSlots[tupleCount - 1 - tupleIndex] = slotManager.encodeSlotFields(FieldPrefixSlotManager.TUPLE_UNCOMPRESSED, tupleFreeSpace);
tupleFreeSpace += tupleWriter.writeTuple(tupleToWrite, byteBuffer, tupleFreeSpace);
uncompressedTupleCount++;
}
tupleIndex++;
}
// sanity check to see if we have written exactly as many prefix bytes as computed before
if (prefixFreeSpace != frame.getOrigFreeSpaceOff() + totalPrefixBytes) {
throw new Exception("ERROR: Number of prefix bytes written don't match computed number");
}
// in some rare instances our procedure could even increase the space requirement which is very dangerous
// this can happen to to the greedy solution of the knapsack-like problem
// therefore, we check if the new space exceeds the page size to avoid the only danger of
// an increasing space
int totalSpace = tupleFreeSpace + newTupleSlots.length * slotManager.getSlotSize() + newPrefixSlots.length * slotManager.getSlotSize();
if (totalSpace > buf.capacity())
// just leave the page as is
return false;
// copy new tuple and new slots into original page
int freeSpaceAfterInit = frame.getOrigFreeSpaceOff();
System.arraycopy(buffer, freeSpaceAfterInit, pageArray, freeSpaceAfterInit, tupleFreeSpace - freeSpaceAfterInit);
// copy prefix slots
int slotOffRunner = buf.capacity() - slotManager.getSlotSize();
for (int i = 0; i < newPrefixSlots.length; i++) {
buf.putInt(slotOffRunner, newPrefixSlots[newPrefixSlots.length - 1 - i]);
slotOffRunner -= slotManager.getSlotSize();
}
// copy tuple slots
for (int i = 0; i < newTupleSlots.length; i++) {
buf.putInt(slotOffRunner, newTupleSlots[newTupleSlots.length - 1 - i]);
slotOffRunner -= slotManager.getSlotSize();
}
// update space fields, TODO: we need to update more fields
frame.setFreeSpaceOff(tupleFreeSpace);
frame.setPrefixTupleCount(newPrefixSlots.length);
frame.setUncompressedTupleCount(uncompressedTupleCount);
int totalFreeSpace = buf.capacity() - tupleFreeSpace - ((newTupleSlots.length + newPrefixSlots.length) * slotManager.getSlotSize());
frame.setTotalFreeSpace(totalFreeSpace);
return true;
}
use of org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame in project asterixdb by apache.
the class FieldPrefixPrefixTupleReference method resetByTupleIndex.
// assumes tuple index refers to prefix tuples
@Override
public void resetByTupleIndex(ITreeIndexFrame frame, int tupleIndex) {
BTreeFieldPrefixNSMLeafFrame concreteFrame = (BTreeFieldPrefixNSMLeafFrame) frame;
IPrefixSlotManager slotManager = concreteFrame.getSlotManager();
int prefixSlotOff = slotManager.getPrefixSlotOff(tupleIndex);
int prefixSlot = concreteFrame.getBuffer().getInt(prefixSlotOff);
setFieldCount(slotManager.decodeFirstSlotField(prefixSlot));
tupleStartOff = slotManager.decodeSecondSlotField(prefixSlot);
buf = concreteFrame.getBuffer().array();
resetByTupleOffset(buf, tupleStartOff);
}
use of org.apache.hyracks.storage.am.btree.frames.BTreeFieldPrefixNSMLeafFrame in project asterixdb by apache.
the class FieldPrefixNSMTest method test01.
@Test
public void test01() throws Exception {
// declare fields
int fieldCount = 3;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
// declare keys
int keyFieldCount = 3;
IBinaryComparator[] cmps = new IBinaryComparator[keyFieldCount];
cmps[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY).createBinaryComparator();
cmps[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY).createBinaryComparator();
cmps[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY).createBinaryComparator();
MultiComparator cmp = new MultiComparator(cmps);
// just for printing
@SuppressWarnings("rawtypes") ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
Random rnd = new Random();
rnd.setSeed(50);
IBufferCache bufferCache = harness.getBufferCache();
IFileMapProvider fileMapProvider = harness.getFileMapProvider();
bufferCache.createFile(harness.getFileReference());
int btreeFileId = fileMapProvider.lookupFileId(harness.getFileReference());
bufferCache.openFile(btreeFileId);
IHyracksTaskContext ctx = harness.getHyracksTaskContext();
ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(btreeFileId, 0), true);
try {
ITreeIndexTupleWriter tupleWriter = new TypeAwareTupleWriter(typeTraits);
BTreeFieldPrefixNSMLeafFrame frame = new BTreeFieldPrefixNSMLeafFrame(tupleWriter);
frame.setPage(page);
frame.initBuffer((byte) 0);
frame.setMultiComparator(cmp);
frame.setPrefixTupleCount(0);
String before = new String();
String after = new String();
int compactFreq = 5;
int compressFreq = 5;
int smallMax = 10;
int numRecords = 1000;
int[][] savedFields = new int[numRecords][3];
// insert records with random calls to compact and compress
for (int i = 0; i < numRecords; i++) {
if (LOGGER.isLoggable(Level.INFO)) {
if ((i + 1) % 100 == 0) {
LOGGER.info("INSERTING " + (i + 1) + " / " + numRecords);
}
}
int a = rnd.nextInt() % smallMax;
int b = rnd.nextInt() % smallMax;
int c = i;
ITupleReference tuple = createTuple(ctx, a, b, c, false);
try {
int targetTupleIndex = frame.findInsertTupleIndex(tuple);
frame.insert(tuple, targetTupleIndex);
} catch (Exception e) {
e.printStackTrace();
}
savedFields[i][0] = a;
savedFields[i][1] = b;
savedFields[i][2] = c;
if (rnd.nextInt() % compactFreq == 0) {
before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
frame.compact();
after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
Assert.assertEquals(before, after);
}
if (rnd.nextInt() % compressFreq == 0) {
before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
frame.compress();
after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
Assert.assertEquals(before, after);
}
}
// delete records with random calls to compact and compress
for (int i = 0; i < numRecords; i++) {
if (LOGGER.isLoggable(Level.INFO)) {
if ((i + 1) % 100 == 0) {
LOGGER.info("DELETING " + (i + 1) + " / " + numRecords);
}
}
ITupleReference tuple = createTuple(ctx, savedFields[i][0], savedFields[i][1], savedFields[i][2], false);
try {
int tupleIndex = frame.findDeleteTupleIndex(tuple);
frame.delete(tuple, tupleIndex);
} catch (Exception e) {
}
if (rnd.nextInt() % compactFreq == 0) {
before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
frame.compact();
after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
Assert.assertEquals(before, after);
}
if (rnd.nextInt() % compressFreq == 0) {
before = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
frame.compress();
after = TreeIndexUtils.printFrameTuples(frame, fieldSerdes);
Assert.assertEquals(before, after);
}
}
} finally {
bufferCache.unpin(page);
bufferCache.closeFile(btreeFileId);
bufferCache.close();
}
}
Aggregations