use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.
the class PreambleUtil method sketchPreambleToString.
private static String sketchPreambleToString(final Memory mem, final Family family, final int preLongs) {
final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(mem));
final int serVer = extractSerVer(mem);
// Flags
final int flags = extractFlags(mem);
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
// final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
// final String nativeOrder = ByteOrder.nativeOrder().toString();
// final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0;
final boolean isGadget = (flags & GADGET_FLAG_MASK) > 0;
final int k;
if (serVer == 1) {
final short encK = extractEncodedReservoirSize(mem);
k = ReservoirSize.decodeValue(encK);
} else {
k = extractK(mem);
}
long n = 0;
if (!isEmpty) {
n = extractN(mem);
}
final long dataBytes = mem.getCapacity() - (preLongs << 3);
final StringBuilder sb = new StringBuilder();
sb.append(LS).append("### END ").append(family.getFamilyName().toUpperCase()).append(" PREAMBLE SUMMARY").append(LS).append("Byte 0: Preamble Longs : ").append(preLongs).append(LS).append("Byte 0: ResizeFactor : ").append(rf.toString()).append(LS).append("Byte 1: Serialization Version: ").append(serVer).append(LS).append("Byte 2: Family : ").append(family.toString()).append(LS).append("Byte 3: Flags Field : ").append(flagsStr).append(LS).append(" EMPTY : ").append(isEmpty).append(LS);
if (family == Family.VAROPT) {
sb.append(" GADGET : ").append(isGadget).append(LS);
}
sb.append("Bytes 4-7: Sketch Size (k) : ").append(k).append(LS);
if (!isEmpty) {
sb.append("Bytes 8-15: Items Seen (n) : ").append(n).append(LS);
}
if ((family == Family.VAROPT) && !isEmpty) {
final int hCount = extractHRegionItemCount(mem);
final int rCount = extractRRegionItemCount(mem);
final double totalRWeight = extractTotalRWeight(mem);
sb.append("Bytes 16-19: H region count : ").append(hCount).append(LS).append("Bytes 20-23: R region count : ").append(rCount).append(LS);
if (rCount > 0) {
sb.append("Bytes 24-31: R region weight : ").append(totalRWeight).append(LS);
}
}
sb.append("TOTAL Sketch Bytes : ").append(mem.getCapacity()).append(LS).append(" Preamble Bytes : ").append(preLongs << 3).append(LS).append(" Data Bytes : ").append(dataBytes).append(LS).append("### END ").append(family.getFamilyName().toUpperCase()).append(" PREAMBLE SUMMARY").append(LS);
return sb.toString();
}
use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.
the class PreambleUtil method unionPreambleToString.
private static String unionPreambleToString(final Memory mem, final Family family, final int preLongs) {
final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(mem));
final int serVer = extractSerVer(mem);
// Flags
final int flags = extractFlags(mem);
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
// final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
// final String nativeOrder = ByteOrder.nativeOrder().toString();
// final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0;
final int k;
if (serVer == 1) {
final short encK = extractEncodedReservoirSize(mem);
k = ReservoirSize.decodeValue(encK);
} else {
k = extractK(mem);
}
final long dataBytes = mem.getCapacity() - (preLongs << 3);
return LS + "### END " + family.getFamilyName().toUpperCase() + " PREAMBLE SUMMARY" + LS + "Byte 0: Preamble Longs : " + preLongs + LS + "Byte 0: ResizeFactor : " + rf.toString() + LS + "Byte 1: Serialization Version : " + serVer + LS + "Byte 2: Family : " + family.toString() + LS + "Byte 3: Flags Field : " + flagsStr + LS + // + " READ_ONLY : " + readOnly + LS
" EMPTY : " + isEmpty + LS + "Bytes 4-7: Max Sketch Size (maxK): " + k + LS + "TOTAL Sketch Bytes : " + mem.getCapacity() + LS + " Preamble Bytes : " + (preLongs << 3) + LS + " Sketch Bytes : " + dataBytes + LS + "### END " + family.getFamilyName().toUpperCase() + " PREAMBLE SUMMARY" + LS;
}
use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.
the class ReservoirItemsSketch method heapify.
/**
* Returns a sketch instance of this class from the given srcMem,
* which must be a Memory representation of this sketch class.
*
* @param <T> The type of item this sketch contains
* @param srcMem a Memory representation of a sketch of this class.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param serDe An instance of ArrayOfItemsSerDe
* @return a sketch instance of this class
*/
public static <T> ReservoirItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
Family.RESERVOIR.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
final int numPreLongs = extractPreLongs(srcMem);
final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
final int serVer = extractSerVer(srcMem);
final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
final long itemsSeen = (isEmpty ? 0 : extractN(srcMem));
int k = extractK(srcMem);
// Check values
final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR.getMinPreLongs());
final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR.getMaxPreLongs());
if (!preLongsEqMin & !preLongsEqMax) {
throw new SketchesArgumentException("Possible corruption: Non-empty sketch with only " + Family.RESERVOIR.getMinPreLongs() + " preLong(s)");
}
if (serVer != SER_VER) {
if (serVer == 1) {
final short encK = extractEncodedReservoirSize(srcMem);
k = ReservoirSize.decodeValue(encK);
} else {
throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
}
}
if (isEmpty) {
return new ReservoirItemsSketch<>(k, rf);
}
final int preLongBytes = numPreLongs << 3;
// default to full reservoir
int allocatedItems = k;
if (itemsSeen < k) {
// under-full so determine size to allocate, using ceilingLog2(totalSeen) as minimum
// casts to int are safe since under-full
final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
final int minLgSize = Util.toLog2(Util.ceilingPowerOf2((int) itemsSeen), "heapify");
final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
}
final int itemsToRead = (int) Math.min(k, itemsSeen);
final T[] data = serDe.deserializeFromMemory(srcMem.region(preLongBytes, srcMem.getCapacity() - preLongBytes), itemsToRead);
final ArrayList<T> dataList = new ArrayList<>(Arrays.asList(data));
final ReservoirItemsSketch<T> ris = new ReservoirItemsSketch<>(dataList, itemsSeen, rf, k);
ris.data_.ensureCapacity(allocatedItems);
ris.currItemsAlloc_ = allocatedItems;
return ris;
}
use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.
the class ReservoirLongsSketch method heapify.
/**
* Returns a sketch instance of this class from the given srcMem, which must be a Memory
* representation of this sketch class.
*
* @param srcMem a Memory representation of a sketch of this class. <a href=
* "{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @return a sketch instance of this class
*/
public static ReservoirLongsSketch heapify(final Memory srcMem) {
Family.RESERVOIR.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
final int numPreLongs = extractPreLongs(srcMem);
final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
final int serVer = extractSerVer(srcMem);
final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
final long itemsSeen = (isEmpty ? 0 : extractN(srcMem));
int k = extractK(srcMem);
// Check values
final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR.getMinPreLongs());
final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR.getMaxPreLongs());
if (!preLongsEqMin & !preLongsEqMax) {
throw new SketchesArgumentException("Possible corruption: Non-empty sketch with only " + Family.RESERVOIR.getMinPreLongs() + "preLongs");
}
if (serVer != SER_VER) {
if (serVer == 1) {
final short encK = extractEncodedReservoirSize(srcMem);
k = ReservoirSize.decodeValue(encK);
} else {
throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
}
}
if (isEmpty) {
return new ReservoirLongsSketch(k, rf);
}
final int preLongBytes = numPreLongs << 3;
final int numSketchLongs = (int) Math.min(itemsSeen, k);
// default to full reservoir
int allocatedSize = k;
if (itemsSeen < k) {
// under-full so determine size to allocate, using ceilingLog2(totalSeen) as minimum
// casts to int are safe since under-full
final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
final int minLgSize = Util.toLog2(Util.ceilingPowerOf2((int) itemsSeen), "heapify");
final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_LONGS));
allocatedSize = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
}
final long[] data = new long[allocatedSize];
srcMem.getLongArray(preLongBytes, data, 0, numSketchLongs);
return new ReservoirLongsSketch(data, itemsSeen, rf, k);
}
use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.
the class VarOptItemsSketch method heapify.
/**
* Returns a sketch instance of this class from the given srcMem,
* which must be a Memory representation of this sketch class.
*
* @param <T> The type of item this sketch contains
* @param srcMem a Memory representation of a sketch of this class.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param serDe An instance of ArrayOfItemsSerDe
* @return a sketch instance of this class
*/
@SuppressWarnings("null")
public static <T> VarOptItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
final int numPreLongs = getAndCheckPreLongs(srcMem);
final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
final int serVer = extractSerVer(srcMem);
final int familyId = extractFamilyID(srcMem);
final int flags = extractFlags(srcMem);
final boolean isEmpty = (flags & EMPTY_FLAG_MASK) != 0;
final boolean isGadget = (flags & GADGET_FLAG_MASK) != 0;
// Check values
if (isEmpty) {
if (numPreLongs != VO_PRELONGS_EMPTY) {
throw new SketchesArgumentException("Possible corruption: Must be " + VO_PRELONGS_EMPTY + " for an empty sketch. Found: " + numPreLongs);
}
} else {
if ((numPreLongs != VO_PRELONGS_WARMUP) && (numPreLongs != VO_PRELONGS_FULL)) {
throw new SketchesArgumentException("Possible corruption: Must be " + VO_PRELONGS_WARMUP + " or " + VO_PRELONGS_FULL + " for a non-empty sketch. Found: " + numPreLongs);
}
}
if (serVer != SER_VER) {
throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
}
final int reqFamilyId = Family.VAROPT.getID();
if (familyId != reqFamilyId) {
throw new SketchesArgumentException("Possible Corruption: FamilyID must be " + reqFamilyId + ": " + familyId);
}
final int k = extractK(srcMem);
if (k < 1) {
throw new SketchesArgumentException("Possible Corruption: k must be at least 1: " + k);
}
if (isEmpty) {
assert numPreLongs == Family.VAROPT.getMinPreLongs();
return new VarOptItemsSketch<>(k, rf);
}
final long n = extractN(srcMem);
if (n < 0) {
throw new SketchesArgumentException("Possible Corruption: n cannot be negative: " + n);
}
// get rest of preamble
final int hCount = extractHRegionItemCount(srcMem);
final int rCount = extractRRegionItemCount(srcMem);
if (hCount < 0) {
throw new SketchesArgumentException("Possible Corruption: H region count cannot be " + "negative: " + hCount);
}
if (rCount < 0) {
throw new SketchesArgumentException("Possible Corruption: R region count cannot be " + "negative: " + rCount);
}
double totalRWeight = 0.0;
if (numPreLongs == Family.VAROPT.getMaxPreLongs()) {
if (rCount > 0) {
totalRWeight = extractTotalRWeight(srcMem);
} else {
throw new SketchesArgumentException("Possible Corruption: " + Family.VAROPT.getMaxPreLongs() + " preLongs but no items in R region");
}
}
final int preLongBytes = numPreLongs << 3;
final int totalItems = hCount + rCount;
// default to full
int allocatedItems = k + 1;
if (rCount == 0) {
// Not in sampling mode, so determine size to allocate, using ceilingLog2(hCount) as minimum
final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
final int minLgSize = Util.toLog2(Util.ceilingPowerOf2(hCount), "heapify");
final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
if (allocatedItems == k) {
++allocatedItems;
}
}
// allocate full-sized ArrayLists, but we store only hCount weights at any moment
final long weightOffsetBytes = TOTAL_WEIGHT_R_DOUBLE + (rCount > 0 ? Double.BYTES : 0);
final ArrayList<Double> weightList = new ArrayList<>(allocatedItems);
final double[] wts = new double[allocatedItems];
srcMem.getDoubleArray(weightOffsetBytes, wts, 0, hCount);
// can't use Arrays.asList(wts) since double[] rather than Double[]
for (int i = 0; i < hCount; ++i) {
if (wts[i] <= 0.0) {
throw new SketchesArgumentException("Possible Corruption: " + "Non-positive weight in heapify(): " + wts[i]);
}
weightList.add(wts[i]);
}
// marks, if we have a gadget
long markBytes = 0;
int markCount = 0;
ArrayList<Boolean> markList = null;
if (isGadget) {
final long markOffsetBytes = preLongBytes + ((long) hCount * Double.BYTES);
markBytes = ArrayOfBooleansSerDe.computeBytesNeeded(hCount);
markList = new ArrayList<>(allocatedItems);
final ArrayOfBooleansSerDe booleansSerDe = new ArrayOfBooleansSerDe();
final Boolean[] markArray = booleansSerDe.deserializeFromMemory(srcMem.region(markOffsetBytes, (hCount >>> 3) + 1), hCount);
for (Boolean mark : markArray) {
if (mark) {
++markCount;
}
}
markList.addAll(Arrays.asList(markArray));
}
final long offsetBytes = preLongBytes + ((long) hCount * Double.BYTES) + markBytes;
final T[] data = serDe.deserializeFromMemory(srcMem.region(offsetBytes, srcMem.getCapacity() - offsetBytes), totalItems);
final List<T> wrappedData = Arrays.asList(data);
final ArrayList<T> dataList = new ArrayList<>(allocatedItems);
dataList.addAll(wrappedData.subList(0, hCount));
// Load items in R as needed
if (rCount > 0) {
// the gap
weightList.add(-1.0);
// the gap
if (isGadget) {
markList.add(false);
}
for (int i = 0; i < rCount; ++i) {
weightList.add(-1.0);
if (isGadget) {
markList.add(false);
}
}
// the gap
dataList.add(null);
dataList.addAll(wrappedData.subList(hCount, totalItems));
}
final VarOptItemsSketch<T> sketch = new VarOptItemsSketch<>(dataList, weightList, k, n, allocatedItems, rf, hCount, rCount, totalRWeight);
if (isGadget) {
sketch.marks_ = markList;
sketch.numMarksInH_ = markCount;
}
return sketch;
}
Aggregations