use of com.yahoo.sketches.Family in project sketches-core by DataSketches.
the class HeapQuickSelectSketch method heapifyInstance.
/**
* Heapify a sketch from a Memory UpdateSketch or Union object
* containing sketch data.
* @param srcMem The source Memory object.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
* @return instance of this sketch
*/
static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long seed) {
//byte 0
final int preambleLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
final ResizeFactor myRF = ResizeFactor.getRF((//byte 0
srcMem.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT));
//byte 1
final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
//byte 2
final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
//byte 3
final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF;
//byte 4
final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF;
//byte 5
final int flags = srcMem.getByte(FLAGS_BYTE) & 0XFF;
//byte 6,7
final short seedHash = srcMem.getShort(SEED_HASH_SHORT);
//bytes 8-11
final int curCount = srcMem.getInt(RETAINED_ENTRIES_INT);
//bytes 12-15
final float p = srcMem.getFloat(P_FLOAT);
//bytes 16-23
final long thetaLong = srcMem.getLong(THETA_LONG);
if (serVer != SER_VER) {
throw new SketchesArgumentException("Possible corruption: Invalid Serialization Version: " + serVer);
}
final Family family = Family.idToFamily(familyID);
if (family.equals(Family.UNION)) {
if (preambleLongs != Family.UNION.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs);
}
} else if (family.equals(Family.QUICKSELECT)) {
if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs);
}
} else {
throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
}
if (lgNomLongs < MIN_LG_NOM_LONGS) {
throw new SketchesArgumentException("Possible corruption: Current Memory lgNomLongs < min required size: " + lgNomLongs + " < " + MIN_LG_NOM_LONGS);
}
final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
if ((flags & flagsMask) > 0) {
throw new SketchesArgumentException("Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
}
Util.checkSeedHashes(seedHash, Util.computeSeedHash(seed));
final long curCapBytes = srcMem.getCapacity();
final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
if (curCapBytes < minReqBytes) {
throw new SketchesArgumentException("Possible corruption: Current Memory size < min required size: " + curCapBytes + " < " + minReqBytes);
}
final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
if ((lgArrLongs <= lgNomLongs) && (theta < p)) {
throw new SketchesArgumentException("Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
}
final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, myRF, preambleLongs, family);
hqss.lgArrLongs_ = lgArrLongs;
hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.curCount_ = curCount;
hqss.thetaLong_ = thetaLong;
hqss.empty_ = (flags & EMPTY_FLAG_MASK) > 0;
hqss.cache_ = new long[1 << lgArrLongs];
//read in as hash table
srcMem.getLongArray(preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs);
return hqss;
}
use of com.yahoo.sketches.Family in project sketches-core by DataSketches.
the class PreambleUtil method preambleToString.
/**
* Returns a human readable string summary of the preamble state of the given Memory.
* Note: other than making sure that the given Memory size is large
* enough for just the preamble, this does not do much value checking of the contents of the
* preamble as this is primarily a tool for debugging the preamble visually.
*
* @param mem the given Memory.
* @return the summary preamble string.
*/
public static String preambleToString(final WritableMemory mem) {
//may be null
final Object memObj = mem.getArray();
final long memAdd = mem.getCumulativeOffset(0L);
final int preLongs = getAndCheckPreLongs(memObj, memAdd, mem);
final ResizeFactor rf = ResizeFactor.getRF(extractLgResizeFactor(memObj, memAdd));
final int serVer = extractSerVer(memObj, memAdd);
final Family family = Family.idToFamily(extractFamilyID(memObj, memAdd));
final int lgNomLongs = extractLgNomLongs(memObj, memAdd);
final int lgArrLongs = extractLgArrLongs(memObj, memAdd);
//Flags
final int flags = extractFlags(memObj, memAdd);
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
final String nativeOrder = ByteOrder.nativeOrder().toString();
final boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0;
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
final int seedHash = extractSeedHash(memObj, memAdd);
//Assumed if preLongs == 1
int curCount = 0;
float p = (float) 1.0;
//Assumed if preLongs == 1 or 2
long thetaLong = (long) (p * MAX_THETA_LONG_AS_DOUBLE);
//Assumed if preLongs == 1 or 2 or 3
long thetaULong = thetaLong;
if (preLongs == 2) {
curCount = extractCurCount(memObj, memAdd);
p = extractP(memObj, memAdd);
thetaLong = (long) (p * MAX_THETA_LONG_AS_DOUBLE);
thetaULong = thetaLong;
} else if (preLongs == 3) {
curCount = extractCurCount(memObj, memAdd);
p = extractP(memObj, memAdd);
thetaLong = extractThetaLong(memObj, memAdd);
thetaULong = thetaLong;
} else if (preLongs == 4) {
curCount = extractCurCount(memObj, memAdd);
p = extractP(memObj, memAdd);
thetaLong = extractThetaLong(memObj, memAdd);
thetaULong = extractUnionThetaLong(memObj, memAdd);
}
//else: the same as preLongs == 1
final double thetaDbl = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
final double thetaUDbl = thetaULong / MAX_THETA_LONG_AS_DOUBLE;
final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16);
final StringBuilder sb = new StringBuilder();
sb.append(LS).append("### SKETCH PREAMBLE SUMMARY:").append(LS).append("Byte 0: Preamble Longs : ").append(preLongs).append(LS).append("Byte 0: ResizeFactor : ").append(rf.toString()).append(LS).append("Byte 1: Serialization Version: ").append(serVer).append(LS).append("Byte 2: Family : ").append(family.toString()).append(LS).append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS).append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS).append("Byte 5: Flags Field : ").append(flagsStr).append(LS).append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS).append(" (Native Byte Order) : ").append(nativeOrder).append(LS).append(" READ_ONLY : ").append(readOnly).append(LS).append(" EMPTY : ").append(empty).append(LS).append(" COMPACT : ").append(compact).append(LS).append(" ORDERED : ").append(ordered).append(LS).append("Bytes 6-7 : Seed Hash : ").append(Integer.toHexString(seedHash)).append(LS);
if (preLongs == 1) {
sb.append(" --ABSENT, ASSUMED:").append(LS);
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS).append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS).append(" Theta (long) : ").append(thetaLong).append(LS).append(" Theta (long,hex) : ").append(thetaHex).append(LS);
}
if (preLongs == 2) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS).append("Bytes 12-15: P : ").append(p).append(LS);
sb.append(" --ABSENT, ASSUMED:").append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS).append(" Theta (long) : ").append(thetaLong).append(LS).append(" Theta (long,hex) : ").append(thetaHex).append(LS);
}
if (preLongs == 3) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS).append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS).append(" Theta (long) : ").append(thetaLong).append(LS).append(" Theta (long,hex) : ").append(thetaHex).append(LS);
}
if (preLongs == 4) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS).append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS).append(" Theta (long) : ").append(thetaLong).append(LS).append(" Theta (long,hex) : ").append(thetaHex).append(LS);
sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS).append(" ThetaU (long) : ").append(thetaULong).append(LS).append(" ThetaU (long,hex): ").append(thetaUHex).append(LS);
}
sb.append("Preamble Bytes : ").append(preLongs * 8).append(LS);
sb.append("Data Bytes : ").append(curCount * 8).append(LS);
sb.append("TOTAL Sketch Bytes : ").append(mem.getCapacity()).append(LS).append("### END SKETCH PREAMBLE SUMMARY").append(LS);
return sb.toString();
}
use of com.yahoo.sketches.Family in project sketches-core by DataSketches.
the class SetOperation method isValidSetOpID.
/**
* Returns true if given Family id is one of the set operations
* @param id the given Family id
* @return true if given Family id is one of the set operations
*/
static boolean isValidSetOpID(final int id) {
final Family family = Family.idToFamily(id);
final boolean ret = ((family == Family.UNION) || (family == Family.INTERSECTION) || (family == Family.A_NOT_B));
return ret;
}
use of com.yahoo.sketches.Family in project sketches-core by DataSketches.
the class SetOperation method wrap.
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
* Only "Direct" SetOperations that have been explicity stored as direct can be wrapped.
* @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final Memory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
final int serVer = srcMem.getByte(SER_VER_BYTE);
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
switch(family) {
case UNION:
{
return UnionImpl.wrapInstance(srcMem, seed);
}
case INTERSECTION:
{
return IntersectionImplR.wrapInstance(srcMem, seed);
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
}
}
use of com.yahoo.sketches.Family in project sketches-core by DataSketches.
the class SetOperation method wrap.
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
* Only "Direct" SetOperations that have been explicity stored as direct can be wrapped.
* @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final WritableMemory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
final int serVer = srcMem.getByte(SER_VER_BYTE);
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
switch(family) {
case UNION:
{
return UnionImpl.wrapInstance(srcMem, seed);
}
case INTERSECTION:
{
return IntersectionImpl.wrapInstance(srcMem, seed);
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
}
}
Aggregations