use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class PreambleUtil method preambleToString.
/**
* Returns a human readable string summary of the preamble state of the given Memory.
* Note: other than making sure that the given Memory size is large
* enough for just the preamble, this does not do much value checking of the contents of the
* preamble as this is primarily a tool for debugging the preamble visually.
*
* @param srcMem the given Memory.
* @return the summary preamble string.
*/
public static String preambleToString(final Memory srcMem) {
// make sure we can get the assumed preamble
final long pre0 = checkPreambleSize(srcMem);
// byte 0
final int preLongs = extractPreLongs(pre0);
// byte 1
final int serVer = extractSerVer(pre0);
// byte 2
final Family family = Family.idToFamily(extractFamilyID(pre0));
// byte 3
final int lgMaxMapSize = extractLgMaxMapSize(pre0);
// byte 4
final int lgCurMapSize = extractLgCurMapSize(pre0);
// byte 5
final int flags = extractFlags(pre0);
// byte 6
final int type = extractSerDeId(pre0);
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
final int maxMapSize = 1 << lgMaxMapSize;
final int curMapSize = 1 << lgCurMapSize;
final int maxPreLongs = Family.FREQUENCY.getMaxPreLongs();
// Assumed if preLongs == 1
int activeItems = 0;
long streamLength = 0;
long offset = 0;
if (preLongs == maxPreLongs) {
// get full preamble
final long[] preArr = new long[preLongs];
srcMem.getLongArray(0, preArr, 0, preLongs);
activeItems = extractActiveItems(preArr[1]);
streamLength = preArr[2];
offset = preArr[3];
}
final StringBuilder sb = new StringBuilder();
sb.append(LS).append("### FREQUENCY SKETCH PREAMBLE SUMMARY:").append(LS).append("Byte 0: Preamble Longs : ").append(preLongs).append(LS).append("Byte 1: Serialization Version: ").append(serVer).append(LS).append("Byte 2: Family : ").append(family.toString()).append(LS).append("Byte 3: MaxMapSize : ").append(maxMapSize).append(LS).append("Byte 4: CurMapSize : ").append(curMapSize).append(LS).append("Byte 5: Flags Field : ").append(flagsStr).append(LS).append(" EMPTY : ").append(empty).append(LS).append("Byte 6: Freq Sketch Type : ").append(type).append(LS);
if (preLongs == 1) {
sb.append(" --ABSENT, ASSUMED:").append(LS);
} else {
// preLongs == maxPreLongs
sb.append("Bytes 8-11 : ActiveItems : ").append(activeItems).append(LS);
sb.append("Bytes 16-23: StreamLength : ").append(streamLength).append(LS).append("Bytes 24-31: Offset : ").append(offset).append(LS);
}
sb.append("Preamble Bytes : ").append(preLongs * 8).append(LS);
sb.append("TOTAL Sketch Bytes : ").append((preLongs + (activeItems * 2)) << 3).append(LS).append("### END FREQUENCY SKETCH PREAMBLE SUMMARY").append(LS);
return sb.toString();
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class UpdateSketchTest method checkBuilder.
@Test
public void checkBuilder() {
UpdateSketchBuilder bldr = UpdateSketch.builder();
long seed = 12345L;
bldr.setSeed(seed);
assertEquals(bldr.getSeed(), seed);
float p = (float) 0.5;
bldr.setP(p);
assertEquals(bldr.getP(), p);
ResizeFactor rf = ResizeFactor.X4;
bldr.setResizeFactor(rf);
assertEquals(bldr.getResizeFactor(), rf);
Family fam = Family.ALPHA;
bldr.setFamily(fam);
assertEquals(bldr.getFamily(), fam);
int lgK = 10;
int k = 1 << lgK;
bldr.setNominalEntries(k);
assertEquals(bldr.getLgNominalEntries(), lgK);
MemoryRequestServer mrs = new DefaultMemoryRequestServer();
bldr.setMemoryRequestServer(mrs);
assertEquals(bldr.getMemoryRequestServer(), mrs);
println(bldr.toString());
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class SketchTest method checkBuilder.
@Test
public void checkBuilder() {
int k = 2048;
int lgK = Integer.numberOfTrailingZeros(k);
long seed = 1021;
float p = (float) 0.5;
ResizeFactor rf = X4;
Family fam = Family.ALPHA;
UpdateSketch sk1 = UpdateSketch.builder().setSeed(seed).setP(p).setResizeFactor(rf).setFamily(fam).setNominalEntries(k).build();
String nameS1 = sk1.getClass().getSimpleName();
assertEquals(nameS1, "HeapAlphaSketch");
assertEquals(sk1.getLgNomLongs(), lgK);
assertEquals(sk1.getSeed(), seed);
assertEquals(sk1.getP(), p);
// check reset of defaults
sk1 = UpdateSketch.builder().build();
nameS1 = sk1.getClass().getSimpleName();
assertEquals(nameS1, "HeapQuickSelectSketch");
assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(DEFAULT_NOMINAL_ENTRIES));
assertEquals(sk1.getSeed(), DEFAULT_UPDATE_SEED);
assertEquals(sk1.getP(), (float) 1.0);
assertEquals(sk1.getResizeFactor(), ResizeFactor.X8);
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class CompactOperations method memoryToCompact.
/**
* Heapify or convert a source Theta Sketch Memory image into a heap or target Memory CompactSketch.
* This assumes hashSeed is OK; serVer = 3.
* @param srcMem the given input source Memory image
* @param dstOrdered the desired ordering of the resulting CompactSketch
* @param dstMem Used for the target CompactSketch if it is Direct.
* @return a CompactSketch of the correct form.
*/
@SuppressWarnings("unused")
static CompactSketch memoryToCompact(final Memory srcMem, final boolean dstOrdered, final WritableMemory dstMem) {
// extract Pre0 fields and Flags from srcMem
final int srcPreLongs = extractPreLongs(srcMem);
// not used
final int srcSerVer = extractSerVer(srcMem);
final int srcFamId = extractFamilyID(srcMem);
final Family srcFamily = Family.idToFamily(srcFamId);
final int srcLgArrLongs = extractLgArrLongs(srcMem);
final int srcFlags = extractFlags(srcMem);
final short srcSeedHash = (short) extractSeedHash(srcMem);
// srcFlags
final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0;
final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0;
final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0;
final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0;
final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0;
final boolean single = srcSingleFlag || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags);
// extract pre1 and pre2 fields
final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcMem) : 0;
final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcMem) : Long.MAX_VALUE;
// do some basic checks ...
if (srcEmptyFlag) {
assert (curCount == 0) && (thetaLong == Long.MAX_VALUE);
}
if (single) {
assert (curCount == 1) && (thetaLong == Long.MAX_VALUE);
}
checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag);
// dispatch empty and single cases
// Note: for empty and single we always output the ordered form.
final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered;
if (srcEmptyFlag) {
if (dstMem != null) {
dstMem.putByteArray(0, EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, 8);
return new DirectCompactSketch(dstMem);
} else {
return EmptyCompactSketch.getInstance();
}
}
if (single) {
final long hash = srcMem.getLong(srcPreLongs << 3);
final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash);
if (dstMem != null) {
dstMem.putByteArray(0, sis.toByteArray(), 0, 16);
return new DirectCompactSketch(dstMem);
} else {
// heap
return sis;
}
}
// extract hashArr > 1
final long[] hashArr;
if (srcCompactFlag) {
hashArr = new long[curCount];
srcMem.getLongArray(srcPreLongs << 3, hashArr, 0, curCount);
} else {
// update sketch, thus hashTable form
final int srcCacheLen = 1 << srcLgArrLongs;
final long[] tempHashArr = new long[srcCacheLen];
srcMem.getLongArray(srcPreLongs << 3, tempHashArr, 0, srcCacheLen);
hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut);
}
final int flagsOut = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0);
// load the destination.
if (dstMem != null) {
final Memory tgtMem = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstMem, (byte) flagsOut, srcPreLongs);
return new DirectCompactSketch(tgtMem);
} else {
// heap
return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong, dstOrderedOut);
}
}
use of org.apache.datasketches.Family in project sketches-core by DataSketches.
the class CompactSketch method wrap.
/**
* Wrap takes the CompactSketch image in given Memory and refers to it directly.
* There is no data copying onto the java heap.
* The wrap operation enables fast read-only merging and access to all the public read-only API.
*
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct sketches can be wrapped.
* Wrapping earlier serial version sketches will result in a heapify operation.
* These early versions were never designed to "wrap".</p>
*
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
* This is actually faster and consumes less overall memory.</p>
*
* <p>This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
* However, Serial Version 1 sketch images do not have a seedHash field,
* so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.</p>
*
* @param srcMem an image of a Sketch.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
* @return a CompactSketch backed by the given Memory except as above.
*/
public static CompactSketch wrap(final Memory srcMem) {
final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
final Family family = Family.idToFamily(familyID);
if (family != Family.COMPACT) {
throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
}
if (serVer == 3) {
if (PreambleUtil.isEmptyFlag(srcMem)) {
return EmptyCompactSketch.getHeapInstance(srcMem);
}
final short memSeedHash = (short) extractSeedHash(srcMem);
if (otherCheckForSingleItem(srcMem)) {
// SINGLEITEM?
return SingleItemSketch.heapify(srcMem, memSeedHash);
}
// not empty & not singleItem
final int flags = srcMem.getByte(FLAGS_BYTE);
final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
if (!compactFlag) {
throw new SketchesArgumentException("Corrupted: COMPACT family sketch image must have compact flag set");
}
final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
if (!readOnly) {
throw new SketchesArgumentException("Corrupted: COMPACT family sketch image must have Read-Only flag set");
}
return DirectCompactSketch.wrapInstance(srcMem, memSeedHash);
} else // end of serVer 3
if (serVer == 1) {
return ForwardCompatibility.heapify1to3(srcMem, defaultSeedHash);
} else if (serVer == 2) {
final short memSeedHash = (short) extractSeedHash(srcMem);
return ForwardCompatibility.heapify2to3(srcMem, memSeedHash);
}
throw new SketchesArgumentException("Corrupted: Serialization Version " + serVer + " not recognized.");
}
Aggregations