use of com.yahoo.sketches.SketchesArgumentException in project sketches-core by DataSketches.
the class DirectQuickSelectSketch method initNewDirectInstance.
/**
* Get a new sketch instance and initialize the given Memory as its backing store.
*
* @param lgNomLongs <a href="{@docRoot}/resources/dictionary.html#lgNomLongs">See lgNomLongs</a>.
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @param p
* <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>
* @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid
* MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the
* dstMem must be large enough for a full sketch.
* <a href="{@docRoot}/resources/dictionary.html#resizeFactor">See Resize Factor</a>
* @param dstMem the given Memory object destination. It cannot be null.
* It will be cleared prior to use.
* @param unionGadget true if this sketch is implementing the Union gadget function.
* Otherwise, it is behaving as a normal QuickSelectSketch.
* @return instance of this sketch
*/
static DirectQuickSelectSketch initNewDirectInstance(final int lgNomLongs, final long seed, final float p, final ResizeFactor rf, final WritableMemory dstMem, final boolean unionGadget) {
//Choose family, preambleLongs
final Family family;
final int preambleLongs;
if (unionGadget) {
preambleLongs = Family.UNION.getMinPreLongs();
family = Family.UNION;
} else {
preambleLongs = Family.QUICKSELECT.getMinPreLongs();
family = Family.QUICKSELECT;
}
//Choose RF, minReqBytes, lgArrLongs.
final int lgRF = rf.lg();
final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : MIN_LG_ARR_LONGS;
final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
//Make sure Memory is large enough
final long curMemCapBytes = dstMem.getCapacity();
if (curMemCapBytes < minReqBytes) {
throw new SketchesArgumentException("Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes);
}
//@formatter:off
//Build preamble
//may be null
final Object memObj = dstMem.getArray();
final long memAdd = dstMem.getCumulativeOffset(0L);
//byte 0
insertPreLongs(memObj, memAdd, preambleLongs);
//byte 0
insertLgResizeFactor(memObj, memAdd, lgRF);
//byte 1
insertSerVer(memObj, memAdd, SER_VER);
//byte 2
insertFamilyID(memObj, memAdd, family.getID());
//byte 3
insertLgNomLongs(memObj, memAdd, lgNomLongs);
//byte 4
insertLgArrLongs(memObj, memAdd, lgArrLongs);
//flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4
//byte 5
insertFlags(memObj, memAdd, EMPTY_FLAG_MASK);
//bytes 6,7
insertSeedHash(memObj, memAdd, Util.computeSeedHash(seed));
//bytes 8-11
insertCurCount(memObj, memAdd, 0);
//bytes 12-15
insertP(memObj, memAdd, p);
final long thetaLong = (long) (p * MAX_THETA_LONG_AS_DOUBLE);
//bytes 16-23
insertThetaLong(memObj, memAdd, thetaLong);
//@formatter:on
//clear hash table area
dstMem.clear(preambleLongs << 3, 8 << lgArrLongs);
final DirectQuickSelectSketch dqss = new DirectQuickSelectSketch(lgNomLongs, seed, preambleLongs, dstMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
use of com.yahoo.sketches.SketchesArgumentException in project sketches-core by DataSketches.
the class DirectQuickSelectSketchR method checkIntegrity.
static void checkIntegrity(final Memory srcMem, final long seed, final int preambleLongs, final int serVer, final int familyID, final int lgNomLongs, final int lgArrLongs, final int flags, final short seedHash, final float p, final long thetaLong) {
if (serVer != SER_VER) {
throw new SketchesArgumentException("Possible corruption: Invalid Serialization Version: " + serVer);
}
final Family family = Family.idToFamily(familyID);
if (family.equals(Family.UNION)) {
if (preambleLongs != Family.UNION.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs);
}
} else if (family.equals(Family.QUICKSELECT)) {
if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs);
}
} else {
throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
}
if (lgNomLongs < MIN_LG_NOM_LONGS) {
throw new SketchesArgumentException("Possible corruption: Current Memory lgNomLongs < min required size: " + lgNomLongs + " < " + MIN_LG_NOM_LONGS);
}
final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
if ((flags & flagsMask) > 0) {
throw new SketchesArgumentException("Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
}
Util.checkSeedHashes(seedHash, Util.computeSeedHash(seed));
final long curCapBytes = srcMem.getCapacity();
final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
if (curCapBytes < minReqBytes) {
throw new SketchesArgumentException("Possible corruption: Current Memory size < min required size: " + curCapBytes + " < " + minReqBytes);
}
final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
if ((lgArrLongs <= lgNomLongs) && (theta < p)) {
throw new SketchesArgumentException("Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
}
}
use of com.yahoo.sketches.SketchesArgumentException in project sketches-core by DataSketches.
the class HeapAlphaSketch method heapifyInstance.
/**
* Heapify a sketch from a Memory object containing sketch data.
* @param srcMem The source Memory object.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
* @return instance of this sketch
*/
static HeapAlphaSketch heapifyInstance(final Memory srcMem, final long seed) {
//byte 0
final int preambleLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
final ResizeFactor myRF = ResizeFactor.getRF((//byte 0
srcMem.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT));
//byte 1
final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
//byte 2
final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
//byte 3
final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF;
//byte 4
final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF;
//byte 5
final int flags = srcMem.getByte(FLAGS_BYTE) & 0XFF;
//byte 6,7
final short seedHash = srcMem.getShort(SEED_HASH_SHORT);
//bytes 8-11
final int curCount = srcMem.getInt(RETAINED_ENTRIES_INT);
//bytes 12-15
final float p = srcMem.getFloat(P_FLOAT);
//bytes 16-23
final long thetaLong = srcMem.getLong(THETA_LONG);
final Family family = Family.idToFamily(familyID);
if (family.equals(Family.ALPHA)) {
if (preambleLongs != Family.ALPHA.getMinPreLongs()) {
throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs);
}
} else {
throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
}
if (serVer != SER_VER) {
throw new SketchesArgumentException("Possible corruption: Invalid Serialization Version: " + serVer);
}
final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
if ((flags & flagsMask) > 0) {
throw new SketchesArgumentException("Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
}
Util.checkSeedHashes(seedHash, Util.computeSeedHash(seed));
final long curCapBytes = srcMem.getCapacity();
final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
if (curCapBytes < minReqBytes) {
throw new SketchesArgumentException("Possible corruption: Current Memory size < min required size: " + curCapBytes + " < " + minReqBytes);
}
final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
if ((lgArrLongs <= lgNomLongs) && (theta < p)) {
throw new SketchesArgumentException("Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
}
final double nomLongs = (1L << lgNomLongs);
final double alpha = nomLongs / (nomLongs + 1.0);
final long split1 = (long) ((p * (alpha + 1.0) / 2.0) * MAX_THETA_LONG_AS_DOUBLE);
final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, myRF, alpha, split1);
has.lgArrLongs_ = lgArrLongs;
has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
has.curCount_ = curCount;
has.thetaLong_ = thetaLong;
has.empty_ = (flags & EMPTY_FLAG_MASK) > 0;
has.cache_ = new long[1 << lgArrLongs];
//read in as hash table
srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs);
return has;
}
use of com.yahoo.sketches.SketchesArgumentException in project sketches-core by DataSketches.
the class ReservoirItemsSketch method heapify.
/**
* Returns a sketch instance of this class from the given srcMem,
* which must be a Memory representation of this sketch class.
*
* @param <T> The type of item this sketch contains
* @param srcMem a Memory representation of a sketch of this class.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param serDe An instance of ArrayOfItemsSerDe
* @return a sketch instance of this class
*/
public static <T> ReservoirItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
Family.RESERVOIR.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
final int numPreLongs = extractPreLongs(srcMem);
final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
final int serVer = extractSerVer(srcMem);
final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
final long itemsSeen = (isEmpty ? 0 : extractN(srcMem));
int k = extractK(srcMem);
// Check values
final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR.getMinPreLongs());
final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR.getMaxPreLongs());
if (!preLongsEqMin & !preLongsEqMax) {
throw new SketchesArgumentException("Possible corruption: Non-empty sketch with only " + Family.RESERVOIR.getMinPreLongs() + " preLong(s)");
}
if (serVer != SER_VER) {
if (serVer == 1) {
final short encK = extractEncodedReservoirSize(srcMem);
k = ReservoirSize.decodeValue(encK);
} else {
throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
}
}
if (isEmpty) {
return new ReservoirItemsSketch<>(k, rf);
}
final int preLongBytes = numPreLongs << 3;
// default to full reservoir
int allocatedItems = k;
if (itemsSeen < k) {
// under-full so determine size to allocate, using ceilingLog2(totalSeen) as minimum
// casts to int are safe since under-full
final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
final int minLgSize = Util.toLog2(Util.ceilingPowerOf2((int) itemsSeen), "heapify");
final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
}
final int itemsToRead = (int) Math.min(k, itemsSeen);
final T[] data = serDe.deserializeFromMemory(srcMem.region(preLongBytes, srcMem.getCapacity() - preLongBytes), itemsToRead);
final ArrayList<T> dataList = new ArrayList<>(Arrays.asList(data));
final ReservoirItemsSketch<T> ris = new ReservoirItemsSketch<>(dataList, itemsSeen, rf, k);
ris.data_.ensureCapacity(allocatedItems);
ris.currItemsAlloc_ = allocatedItems;
return ris;
}
use of com.yahoo.sketches.SketchesArgumentException in project sketches-core by DataSketches.
the class ReservoirLongsUnion method heapify.
/**
* Instantiates a Union from Memory
*
* @param srcMem Memory object containing a serialized union
* @return A ReservoirLongsUnion created from the provided Memory
*/
public static ReservoirLongsUnion heapify(final Memory srcMem) {
Family.RESERVOIR_UNION.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
final int numPreLongs = extractPreLongs(srcMem);
final int serVer = extractSerVer(srcMem);
final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
int maxK = extractMaxK(srcMem);
final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR_UNION.getMinPreLongs());
final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR_UNION.getMaxPreLongs());
if (!preLongsEqMin & !preLongsEqMax) {
throw new SketchesArgumentException("Possible corruption: Non-empty union with only " + Family.RESERVOIR_UNION.getMinPreLongs() + "preLongs");
}
if (serVer != SER_VER) {
if (serVer == 1) {
final short encMaxK = extractEncodedReservoirSize(srcMem);
maxK = ReservoirSize.decodeValue(encMaxK);
} else {
throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
}
}
final ReservoirLongsUnion rlu = new ReservoirLongsUnion(maxK);
if (!isEmpty) {
final int preLongBytes = numPreLongs << 3;
final Memory sketchMem = srcMem.region(preLongBytes, srcMem.getCapacity() - preLongBytes);
rlu.update(sketchMem);
}
return rlu;
}
Aggregations