use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class Sketch method heapify.
/**
* Heapify takes the sketch image in Memory and instantiates an on-heap Sketch.
*
* <p>The resulting sketch will not retain any link to the source Memory.</p>
*
* <p>For Update and Compact Sketches this method checks if the given expectedSeed was used to
* create the source Memory image. However, SerialVersion 1 sketches cannot be checked.</p>
*
* @param srcMem an image of a Sketch that was created using the given expectedSeed.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* Compact sketches store a 16-bit hash of the seed, but not the seed itself.
* @return a Sketch on the heap.
*/
public static Sketch heapify(final Memory srcMem, final long expectedSeed) {
final byte familyID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(familyID);
if (family == Family.COMPACT) {
return CompactSketch.heapify(srcMem, expectedSeed);
}
return heapifyUpdateFromMemory(srcMem, expectedSeed);
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class Sketch method heapifyUpdateFromMemory.
/**
* Instantiates a Heap Update Sketch from Memory. Only SerVer3. SerVer 1 & 2 already handled.
* @param srcMem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a Sketch
*/
private static final Sketch heapifyUpdateFromMemory(final Memory srcMem, final long expectedSeed) {
final long cap = srcMem.getCapacity();
if (cap < 8) {
throw new SketchesArgumentException("Corrupted: valid sketch must be at least 8 bytes.");
}
final byte familyID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(familyID);
if (family == Family.ALPHA) {
final int flags = PreambleUtil.extractFlags(srcMem);
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) != 0;
if (compactFlag) {
throw new SketchesArgumentException("Corrupted: ALPHA family image: cannot be compact");
}
return HeapAlphaSketch.heapifyInstance(srcMem, expectedSeed);
}
if (family == Family.QUICKSELECT) {
return HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed);
}
throw new SketchesArgumentException("Sketch cannot heapify family: " + family + " as a Sketch");
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class PreambleUtil method preambleToString.
/**
* Returns a human readable string summary of the preamble state of the given Memory.
* Note: other than making sure that the given Memory size is large
* enough for just the preamble, this does not do much value checking of the contents of the
* preamble as this is primarily a tool for debugging the preamble visually.
*
* @param mem the given Memory.
* @return the summary preamble string.
*/
static String preambleToString(final Memory mem) {
final int preLongs = getAndCheckPreLongs(mem);
final int rfId = extractLgResizeFactor(mem);
final ResizeFactor rf = ResizeFactor.getRF(rfId);
final int serVer = extractSerVer(mem);
final int familyId = extractFamilyID(mem);
final Family family = Family.idToFamily(familyId);
final int lgNomLongs = extractLgNomLongs(mem);
final int lgArrLongs = extractLgArrLongs(mem);
// Flags
final int flags = extractFlags(mem);
final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8);
final String nativeOrder = ByteOrder.nativeOrder().toString();
final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
final boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0;
// !empty && (preLongs == 1);
final boolean singleItem = (flags & SINGLEITEM_FLAG_MASK) > 0;
final int seedHash = extractSeedHash(mem);
// assumes preLongs == 1; empty or singleItem
int curCount = singleItem ? 1 : 0;
// preLongs 1 or 2
float p = (float) 1.0;
// preLongs 1 or 2
long thetaLong = Long.MAX_VALUE;
// preLongs 1, 2 or 3
long thetaULong = thetaLong;
if (preLongs == 2) {
// exact (non-estimating) CompactSketch
curCount = extractCurCount(mem);
p = extractP(mem);
} else if (preLongs == 3) {
// Update Sketch
curCount = extractCurCount(mem);
p = extractP(mem);
thetaLong = extractThetaLong(mem);
thetaULong = thetaLong;
} else if (preLongs == 4) {
// Union
curCount = extractCurCount(mem);
p = extractP(mem);
thetaLong = extractThetaLong(mem);
thetaULong = extractUnionThetaLong(mem);
}
// else the same as an empty sketch or singleItem
final double thetaDbl = thetaLong / Util.LONG_MAX_VALUE_AS_DOUBLE;
final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
final double thetaUDbl = thetaULong / Util.LONG_MAX_VALUE_AS_DOUBLE;
final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16);
final StringBuilder sb = new StringBuilder();
sb.append(LS);
sb.append("### SKETCH PREAMBLE SUMMARY:").append(LS);
sb.append("Native Byte Order : ").append(nativeOrder).append(LS);
sb.append("Byte 0: Preamble Longs : ").append(preLongs).append(LS);
sb.append("Byte 0: ResizeFactor : ").append(rfId + ", " + rf.toString()).append(LS);
sb.append("Byte 1: Serialization Version: ").append(serVer).append(LS);
sb.append("Byte 2: Family : ").append(familyId + ", " + family.toString()).append(LS);
sb.append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS);
sb.append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS);
sb.append("Byte 5: Flags Field : ").append(flagsStr).append(LS);
sb.append(" Bit Flag Name : State:").append(LS);
sb.append(" 0 BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS);
sb.append(" 1 READ_ONLY : ").append(readOnly).append(LS);
sb.append(" 2 EMPTY : ").append(empty).append(LS);
sb.append(" 3 COMPACT : ").append(compact).append(LS);
sb.append(" 4 ORDERED : ").append(ordered).append(LS);
sb.append(" 5 SINGLE_ITEM : ").append(singleItem).append(LS);
sb.append("Bytes 6-7 : Seed Hash Hex : ").append(Integer.toHexString(seedHash)).append(LS);
if (preLongs == 1) {
sb.append(" --ABSENT FIELDS, ASSUMED:").append(LS);
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
sb.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
sb.append(" Theta (long) : ").append(thetaLong).append(LS);
sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
} else if (preLongs == 2) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
sb.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append(" --ABSENT, ASSUMED:").append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
sb.append(" Theta (long) : ").append(thetaLong).append(LS);
sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
} else if (preLongs == 3) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
sb.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
sb.append(" Theta (long) : ").append(thetaLong).append(LS);
sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
} else {
// preLongs == 4
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS);
sb.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS);
sb.append(" Theta (long) : ").append(thetaLong).append(LS);
sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS);
sb.append(" ThetaU (long) : ").append(thetaULong).append(LS);
sb.append(" ThetaU (long,hex): ").append(thetaUHex).append(LS);
}
sb.append("Preamble Bytes : ").append(preLongs * 8).append(LS);
sb.append("Data Bytes : ").append(curCount * 8).append(LS);
sb.append("TOTAL Sketch Bytes : ").append((preLongs + curCount) * 8).append(LS);
sb.append("TOTAL Capacity Bytes : ").append(mem.getCapacity()).append(LS);
sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS);
return sb.toString();
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class SetOperation method wrap.
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
*
* <p>Note: Only certain set operators during stateful operations can be serialized and thus
* wrapped.</p>
*
* @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final WritableMemory srcMem, final long expectedSeed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
final int serVer = srcMem.getByte(SER_VER_BYTE);
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
switch(family) {
case UNION:
{
return UnionImpl.wrapInstance(srcMem, expectedSeed);
}
case INTERSECTION:
{
return IntersectionImpl.wrapInstance(srcMem, expectedSeed, false);
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
}
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class SetOperation method wrap.
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
*
* <p>Note: Only certain set operators during stateful operations can be serialized and thus
* wrapped.</p>
*
* @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final Memory srcMem, final long expectedSeed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
final int serVer = srcMem.getByte(SER_VER_BYTE);
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
switch(family) {
case UNION:
{
return UnionImpl.wrapInstance(srcMem, expectedSeed);
}
case INTERSECTION:
{
return IntersectionImpl.wrapInstance((WritableMemory) srcMem, expectedSeed, true);
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
}
}
Aggregations