use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class CompactSketch method wrap.
/**
* Wrap takes the sketch image in the given Memory and refers to it directly.
* There is no data copying onto the java heap.
* The wrap operation enables fast read-only merging and access to all the public read-only API.
*
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct sketches can be wrapped.
* Wrapping earlier serial version sketches will result in a heapify operation.
* These early versions were never designed to "wrap".</p>
*
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
* This is actually faster and consumes less overall memory.</p>
*
* <p>This method checks if the given expectedSeed was used to create the source Memory image.
* However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p>
*
* @param srcMem an image of a Sketch that was created using the given expectedSeed.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a CompactSketch backed by the given Memory except as above.
*/
public static CompactSketch wrap(final Memory srcMem, final long expectedSeed) {
final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
final Family family = Family.idToFamily(familyID);
if (family != Family.COMPACT) {
throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
}
final short seedHash = Util.computeSeedHash(expectedSeed);
if (serVer == 3) {
if (PreambleUtil.isEmptyFlag(srcMem)) {
return EmptyCompactSketch.getHeapInstance(srcMem);
}
if (otherCheckForSingleItem(srcMem)) {
// SINGLEITEM?
return SingleItemSketch.heapify(srcMem, seedHash);
}
// not empty & not singleItem
final int flags = srcMem.getByte(FLAGS_BYTE);
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
if (!compactFlag) {
throw new SketchesArgumentException("Corrupted: COMPACT family sketch image must have compact flag set");
}
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
if (!readOnly) {
throw new SketchesArgumentException("Corrupted: COMPACT family sketch image must have Read-Only flag set");
}
return DirectCompactSketch.wrapInstance(srcMem, seedHash);
} else // end of serVer 3
if (serVer == 1) {
return ForwardCompatibility.heapify1to3(srcMem, seedHash);
} else if (serVer == 2) {
return ForwardCompatibility.heapify2to3(srcMem, seedHash);
}
throw new SketchesArgumentException("Corrupted: Serialization Version " + serVer + " not recognized.");
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class CompactSketch method heapify.
/**
* Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch.
*
* <p>The resulting sketch will not retain any link to the source Memory and all of its data will be
* copied to the heap CompactSketch.</p>
*
* <p>This method checks if the given expectedSeed was used to create the source Memory image.
* However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p>
*
* @param srcMem an image of a CompactSketch that was created using the given expectedSeed.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a CompactSketch on the heap.
*/
public static CompactSketch heapify(final Memory srcMem, final long expectedSeed) {
final int serVer = srcMem.getByte(SER_VER_BYTE);
final byte familyID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(familyID);
if (family != Family.COMPACT) {
throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
}
if (serVer == 3) {
final int flags = PreambleUtil.extractFlags(srcMem);
final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0;
final boolean empty = (flags & EMPTY_FLAG_MASK) != 0;
if (!empty) {
PreambleUtil.checkMemorySeedHash(srcMem, expectedSeed);
}
return CompactOperations.memoryToCompact(srcMem, srcOrdered, null);
}
// not SerVer 3, assume compact stored form
final short seedHash = Util.computeSeedHash(expectedSeed);
if (serVer == 1) {
return ForwardCompatibility.heapify1to3(srcMem, seedHash);
}
if (serVer == 2) {
return ForwardCompatibility.heapify2to3(srcMem, seedHash);
}
throw new SketchesArgumentException("Unknown Serialization Version: " + serVer);
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class CompactSketch method heapify.
/**
* Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch.
*
* <p>The resulting sketch will not retain any link to the source Memory and all of its data will be
* copied to the heap CompactSketch.</p>
*
* <p>This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
* The resulting on-heap CompactSketch will be given the seedHash derived from the given sketch image.
* However, Serial Version 1 sketch images do not have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.</p>
*
* @param srcMem an image of a CompactSketch.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
* @return a CompactSketch on the heap.
*/
public static CompactSketch heapify(final Memory srcMem) {
final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
final Family family = Family.idToFamily(familyID);
if (family != Family.COMPACT) {
throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
}
if (serVer == 3) {
// no seed check
final int flags = PreambleUtil.extractFlags(srcMem);
final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0;
return CompactOperations.memoryToCompact(srcMem, srcOrdered, null);
}
// not SerVer 3, assume compact stored form
if (serVer == 1) {
return ForwardCompatibility.heapify1to3(srcMem, defaultSeedHash);
}
if (serVer == 2) {
final short srcSeedHash = (short) extractSeedHash(srcMem);
return ForwardCompatibility.heapify2to3(srcMem, srcSeedHash);
}
throw new SketchesArgumentException("Unknown Serialization Version: " + serVer);
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class HeapAlphaSketch method checkAlphaFamily.
static void checkAlphaFamily(final Memory mem, final int preambleLongs, final int lgNomLongs) {
// Check Family
// byte 2
final int familyID = extractFamilyID(mem);
final Family family = Family.idToFamily(familyID);
if (family.equals(Family.ALPHA)) {
if (preambleLongs != Family.ALPHA.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs);
}
} else {
throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
}
// Check lgNomLongs
if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) {
throw new SketchesArgumentException("Possible corruption: This sketch requires a minimum nominal entries of " + (1 << ALPHA_MIN_LG_NOM_LONGS));
}
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class PreambleUtil method toString.
static String toString(final Memory mem) {
// First 8 bytes
final int preInts = mem.getByte(PREAMBLE_INTS_BYTE);
final int serVer = mem.getByte(SER_VER_BYTE);
final Family family = Family.idToFamily(mem.getByte(FAMILY_BYTE));
final int lgK = mem.getByte(LG_K_BYTE);
final int lgArr = mem.getByte(LG_ARR_BYTE);
final int flags = mem.getByte(FLAGS_BYTE);
// Flags
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
final String nativeOrder = ByteOrder.nativeOrder().toString();
final boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
final boolean oooFlag = (flags & OUT_OF_ORDER_FLAG_MASK) > 0;
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
final boolean rebuildKxQ = (flags & REBUILD_CURMIN_NUM_KXQ_MASK) > 0;
final int hllCurMin = mem.getByte(HLL_CUR_MIN_BYTE);
final int listCount = hllCurMin;
final int modeByte = mem.getByte(MODE_BYTE);
final CurMode curMode = CurMode.fromOrdinal(modeByte & 3);
final TgtHllType tgtHllType = TgtHllType.fromOrdinal((modeByte >>> 2) & 3);
double hipAccum = 0;
double kxq0 = 0;
double kxq1 = 0;
int hashSetCount = 0;
int curMinCount = 0;
int exceptionCount = 0;
if (curMode == CurMode.SET) {
hashSetCount = mem.getInt(HASH_SET_COUNT_INT);
} else if (curMode == CurMode.HLL) {
hipAccum = mem.getDouble(HIP_ACCUM_DOUBLE);
kxq0 = mem.getDouble(KXQ0_DOUBLE);
kxq1 = mem.getDouble(KXQ1_DOUBLE);
curMinCount = mem.getInt(CUR_MIN_COUNT_INT);
exceptionCount = mem.getInt(AUX_COUNT_INT);
}
final StringBuilder sb = new StringBuilder();
sb.append(LS);
sb.append("### HLL SKETCH PREAMBLE:").append(LS);
sb.append("Byte 0: Preamble Ints : ").append(preInts).append(LS);
sb.append("Byte 1: SerVer : ").append(serVer).append(LS);
sb.append("Byte 2: Family : ").append(family).append(LS);
sb.append("Byte 3: lgK : ").append(lgK).append(LS);
// expand byte 4: LgArr
if (curMode == CurMode.LIST) {
sb.append("Byte 4: LgArr: List Arr : ").append(lgArr).append(LS);
}
if (curMode == CurMode.SET) {
sb.append("Byte 4: LgArr: Hash Set Arr : ").append(lgArr).append(LS);
}
if (curMode == CurMode.HLL) {
sb.append("Byte 4: LgArr or Aux LgArr : ").append(lgArr).append(LS);
}
// expand byte 5: Flags
sb.append("Byte 5: Flags: : ").append(flagsStr).append(LS);
sb.append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS);
sb.append(" (Native Byte Order) : ").append(nativeOrder).append(LS);
sb.append(" READ_ONLY : ").append(readOnly).append(LS);
sb.append(" EMPTY : ").append(empty).append(LS);
sb.append(" COMPACT : ").append(compact).append(LS);
sb.append(" OUT_OF_ORDER : ").append(oooFlag).append(LS);
sb.append(" REBUILD_KXQ : ").append(rebuildKxQ).append(LS);
// expand byte 6: ListCount, CurMin
if (curMode == CurMode.LIST) {
sb.append("Byte 6: List Count/CurMin : ").append(listCount).append(LS);
}
if (curMode == CurMode.SET) {
sb.append("Byte 6: (not used) : ").append(LS);
}
if (curMode == CurMode.HLL) {
sb.append("Byte 6: Cur Min : ").append(hllCurMin).append(LS);
}
final String modes = curMode.toString() + ", " + tgtHllType.toString();
sb.append("Byte 7: Mode : ").append(modes).append(LS);
if (curMode == CurMode.SET) {
sb.append("Hash Set Count : ").append(hashSetCount).append(LS);
}
if (curMode == CurMode.HLL) {
sb.append("HIP Accum : ").append(hipAccum).append(LS);
sb.append("KxQ0 : ").append(kxq0).append(LS);
sb.append("KxQ1 : ").append(kxq1).append(LS);
sb.append("Num At Cur Min : ").append(curMinCount).append(LS);
sb.append("Aux Count : ").append(exceptionCount).append(LS);
}
sb.append("### END HLL SKETCH PREAMBLE").append(LS);
return sb.toString();
}
Aggregations