use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class PreambleUtil method toString.
static String toString(final Memory mem, final boolean detail) {
final long capBytes = mem.getCapacity();
// Lo Fields Preamble, first 7 fields, first 8 bytes
final int preInts = mem.getByte(getLoFieldOffset(LoField.PRE_INTS)) & 0xFF;
final int serVer = mem.getByte(getLoFieldOffset(LoField.SER_VERSION)) & 0xFF;
final Family family = Family.idToFamily(mem.getByte(getLoFieldOffset(LoField.FAMILY)) & 0xFF);
final int lgK = mem.getByte(getLoFieldOffset(LoField.LG_K)) & 0xFF;
final int fiCol = mem.getByte(getLoFieldOffset(LoField.FI_COL)) & 0xFF;
final int flags = mem.getByte(getLoFieldOffset(LoField.FLAGS)) & 0XFF;
final int seedHash = mem.getShort(getLoFieldOffset(LoField.SEED_HASH)) & 0XFFFF;
final String seedHashStr = Integer.toHexString(seedHash);
// Flags of the Flags byte
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
final boolean compressed = (flags & COMPRESSED_FLAG_MASK) > 0;
final boolean hasHip = (flags & HIP_FLAG_MASK) > 0;
final boolean hasSV = (flags & SUP_VAL_FLAG_MASK) > 0;
final boolean hasWindow = (flags & WINDOW_FLAG_MASK) > 0;
final int formatOrdinal = (flags >>> 2) & 0x7;
final Format format = Format.ordinalToFormat(formatOrdinal);
final String nativeOrderStr = ByteOrder.nativeOrder().toString();
long numCoupons = 0;
long numSv = 0;
long winOffset = 0;
long svLengthInts = 0;
long wLengthInts = 0;
double kxp = 0;
double hipAccum = 0;
long svStreamStart = 0;
long wStreamStart = 0;
long reqBytes = 0;
final StringBuilder sb = new StringBuilder();
sb.append(LS);
sb.append("### CPC SKETCH IMAGE - PREAMBLE:").append(LS);
sb.append("Format : ").append(format.name()).append(LS);
sb.append("Byte 0: Preamble Ints : ").append(preInts).append(LS);
sb.append("Byte 1: SerVer : ").append(serVer).append(LS);
sb.append("Byte 2: Family : ").append(family).append(LS);
sb.append("Byte 3: lgK : ").append(lgK).append(LS);
sb.append("Byte 4: First Interesting Col : ").append(fiCol).append(LS);
sb.append("Byte 5: Flags : ").append(flagsStr).append(LS);
sb.append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS);
sb.append(" (Native Byte Order) : ").append(nativeOrderStr).append(LS);
sb.append(" Compressed : ").append(compressed).append(LS);
sb.append(" Has HIP : ").append(hasHip).append(LS);
sb.append(" Has Surprising Values : ").append(hasSV).append(LS);
sb.append(" Has Window Values : ").append(hasWindow).append(LS);
sb.append("Byte 6, 7: Seed Hash : ").append(seedHashStr).append(LS);
final Flavor flavor;
switch(format) {
case EMPTY_MERGED:
case EMPTY_HIP:
{
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
break;
}
case SPARSE_HYBRID_MERGED:
{
numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
numSv = numCoupons;
svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
svStreamStart = getSvStreamOffset(mem);
reqBytes = svStreamStart + (svLengthInts << 2);
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
sb.append("Num Coupons : ").append(numCoupons).append(LS);
sb.append("Num SV : ").append(numSv).append(LS);
sb.append("SV Length Ints : ").append(svLengthInts).append(LS);
sb.append("SV Stream Start : ").append(svStreamStart).append(LS);
break;
}
case SPARSE_HYBRID_HIP:
{
numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
numSv = numCoupons;
svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
svStreamStart = getSvStreamOffset(mem);
kxp = mem.getDouble(getHiFieldOffset(format, HiField.KXP));
hipAccum = mem.getDouble(getHiFieldOffset(format, HiField.HIP_ACCUM));
reqBytes = svStreamStart + (svLengthInts << 2);
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
sb.append("Num Coupons : ").append(numCoupons).append(LS);
sb.append("Num SV : ").append(numSv).append(LS);
sb.append("SV Length Ints : ").append(svLengthInts).append(LS);
sb.append("SV Stream Start : ").append(svStreamStart).append(LS);
sb.append("KxP : ").append(kxp).append(LS);
sb.append("HipAccum : ").append(hipAccum).append(LS);
break;
}
case PINNED_SLIDING_MERGED_NOSV:
{
numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
wStreamStart = getWStreamOffset(mem);
reqBytes = wStreamStart + (wLengthInts << 2);
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
sb.append("Num Coupons : ").append(numCoupons).append(LS);
sb.append("Window Offset : ").append(winOffset).append(LS);
sb.append("Window Length Ints : ").append(wLengthInts).append(LS);
sb.append("Window Stream Start : ").append(wStreamStart).append(LS);
break;
}
case PINNED_SLIDING_HIP_NOSV:
{
numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
wStreamStart = getWStreamOffset(mem);
kxp = mem.getDouble(getHiFieldOffset(format, HiField.KXP));
hipAccum = mem.getDouble(getHiFieldOffset(format, HiField.HIP_ACCUM));
reqBytes = wStreamStart + (wLengthInts << 2);
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
sb.append("Num Coupons : ").append(numCoupons).append(LS);
sb.append("Window Offset : ").append(winOffset).append(LS);
sb.append("Window Length Ints : ").append(wLengthInts).append(LS);
sb.append("Window Stream Start : ").append(wStreamStart).append(LS);
sb.append("KxP : ").append(kxp).append(LS);
sb.append("HipAccum : ").append(hipAccum).append(LS);
break;
}
case PINNED_SLIDING_MERGED:
{
numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS) & 0xFFFF_FFFFL);
winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
numSv = mem.getInt(getHiFieldOffset(format, HiField.NUM_SV)) & 0xFFFF_FFFFL;
svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
wStreamStart = getWStreamOffset(mem);
svStreamStart = getSvStreamOffset(mem);
reqBytes = svStreamStart + (svLengthInts << 2);
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
sb.append("Num Coupons : ").append(numCoupons).append(LS);
sb.append("Num SV : ").append(numSv).append(LS);
sb.append("SV Length Ints : ").append(svLengthInts).append(LS);
sb.append("SV Stream Start : ").append(svStreamStart).append(LS);
sb.append("Window Offset : ").append(winOffset).append(LS);
sb.append("Window Length Ints : ").append(wLengthInts).append(LS);
sb.append("Window Stream Start : ").append(wStreamStart).append(LS);
break;
}
case PINNED_SLIDING_HIP:
{
numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS) & 0xFFFF_FFFFL);
winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
numSv = mem.getInt(getHiFieldOffset(format, HiField.NUM_SV)) & 0xFFFF_FFFFL;
svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
wStreamStart = getWStreamOffset(mem);
svStreamStart = getSvStreamOffset(mem);
kxp = mem.getDouble(getHiFieldOffset(format, HiField.KXP));
hipAccum = mem.getDouble(getHiFieldOffset(format, HiField.HIP_ACCUM));
reqBytes = svStreamStart + (svLengthInts << 2);
flavor = CpcUtil.determineFlavor(lgK, numCoupons);
sb.append("Flavor : ").append(flavor).append(LS);
sb.append("Num Coupons : ").append(numCoupons).append(LS);
sb.append("Num SV : ").append(numSv).append(LS);
sb.append("SV Length Ints : ").append(svLengthInts).append(LS);
sb.append("SV Stream Start : ").append(svStreamStart).append(LS);
sb.append("Window Offset : ").append(winOffset).append(LS);
sb.append("Window Length Ints : ").append(wLengthInts).append(LS);
sb.append("Window Stream Start : ").append(wStreamStart).append(LS);
sb.append("KxP : ").append(kxp).append(LS);
sb.append("HipAccum : ").append(hipAccum).append(LS);
break;
}
}
sb.append("Actual Bytes : ").append(capBytes).append(LS);
sb.append("Required Bytes : ").append(reqBytes).append(LS);
if (detail) {
sb.append(LS).append("### CPC SKETCH IMAGE - DATA").append(LS);
if (wLengthInts > 0) {
sb.append(LS).append("Window Stream:").append(LS);
listData(mem, wStreamStart, wLengthInts, sb);
}
if (svLengthInts > 0) {
sb.append(LS).append("SV Stream:").append(LS);
listData(mem, svStreamStart, svLengthInts, sb);
}
}
sb.append("### END CPC SKETCH IMAGE").append(LS);
return sb.toString();
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class PreambleUtil method checkLoPreamble.
// basic checks of SerVer, Format, preInts, Family, fiCol, lgK.
static void checkLoPreamble(final Memory mem) {
rtAssertEquals(getSerVer(mem), SER_VER & 0XFF);
final Format fmt = getFormat(mem);
final int preIntsDef = getDefinedPreInts(fmt) & 0XFF;
rtAssertEquals(getPreInts(mem), preIntsDef);
final Family fam = getFamily(mem);
rtAssert(fam == Family.CPC);
final int lgK = getLgK(mem);
rtAssert((lgK >= 4) && (lgK <= 26));
final int fiCol = getFiCol(mem);
rtAssert((fiCol <= 63) && (fiCol >= 0));
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class SetOperation method heapify.
/**
* Heapify takes the SetOperation image in Memory and instantiates an on-heap
* SetOperation using the given expectedSeed.
* The resulting SetOperation will not retain any link to the source Memory.
*
* <p>Note: Only certain set operators during stateful operations can be serialized and thus
* heapified.</p>
*
* @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a Heap-based SetOperation from the given Memory
*/
public static SetOperation heapify(final Memory srcMem, final long expectedSeed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
switch(family) {
case UNION:
{
return UnionImpl.heapifyInstance(srcMem, expectedSeed);
}
case INTERSECTION:
{
return IntersectionImpl.heapifyInstance(srcMem, expectedSeed);
}
default:
{
throw new SketchesArgumentException("SetOperation cannot heapify family: " + family.toString());
}
}
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class UpdateSketch method checkUnionQuickSelectFamily.
static void checkUnionQuickSelectFamily(final Memory mem, final int preambleLongs, final int lgNomLongs) {
// Check Family
// byte 2
final int familyID = extractFamilyID(mem);
final Family family = Family.idToFamily(familyID);
if (family.equals(Family.UNION)) {
if (preambleLongs != Family.UNION.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs);
}
} else if (family.equals(Family.QUICKSELECT)) {
if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs);
}
} else {
throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
}
// Check lgNomLongs
if (lgNomLongs < MIN_LG_NOM_LONGS) {
throw new SketchesArgumentException("Possible corruption: Current Memory lgNomLongs < min required size: " + lgNomLongs + " < " + MIN_LG_NOM_LONGS);
}
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class UpdateSketch method wrap.
/**
* Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto
* the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct objects can be wrapped.
* An attempt to "wrap" earlier version sketches will result in a "heapified", normal
* Java Heap version of the sketch where all data will be copied to the heap.
* @param srcMem an image of a Sketch where the image seed hash matches the given seed hash.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* Compact sketches store a 16-bit hash of the seed, but not the seed itself.
* @return a UpdateSketch backed by the given Memory
*/
public static UpdateSketch wrap(final WritableMemory srcMem, final long expectedSeed) {
final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
final Family family = Family.idToFamily(familyID);
if (family != Family.QUICKSELECT) {
throw new SketchesArgumentException("A " + family + " sketch cannot be wrapped as an UpdateSketch.");
}
if ((serVer == 3) && (preLongs == 3)) {
return DirectQuickSelectSketch.writableWrap(srcMem, expectedSeed);
} else {
throw new SketchesArgumentException("Corrupted: An UpdateSketch image: must have SerVer = 3 and preLongs = 3");
}
}
Aggregations