Search in sources :

Example 6 with ResizeFactor

use of com.yahoo.sketches.ResizeFactor in project sketches-core by DataSketches.

the class PreambleUtil method preambleToString.

/**
   * Returns a human readable string summary of the preamble state of the given Memory.
   * Note: other than making sure that the given Memory size is large
   * enough for just the preamble, this does not do much value checking of the contents of the
   * preamble as this is primarily a tool for debugging the preamble visually.
   *
   * @param mem the given Memory.
   * @return the summary preamble string.
   */
public static String preambleToString(final WritableMemory mem) {
    //may be null
    final Object memObj = mem.getArray();
    final long memAdd = mem.getCumulativeOffset(0L);
    final int preLongs = getAndCheckPreLongs(memObj, memAdd, mem);
    final ResizeFactor rf = ResizeFactor.getRF(extractLgResizeFactor(memObj, memAdd));
    final int serVer = extractSerVer(memObj, memAdd);
    final Family family = Family.idToFamily(extractFamilyID(memObj, memAdd));
    final int lgNomLongs = extractLgNomLongs(memObj, memAdd);
    final int lgArrLongs = extractLgArrLongs(memObj, memAdd);
    //Flags
    final int flags = extractFlags(memObj, memAdd);
    final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
    final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
    final String nativeOrder = ByteOrder.nativeOrder().toString();
    final boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
    final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0;
    final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
    final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
    final int seedHash = extractSeedHash(memObj, memAdd);
    //Assumed if preLongs == 1
    int curCount = 0;
    float p = (float) 1.0;
    //Assumed if preLongs == 1 or 2
    long thetaLong = (long) (p * MAX_THETA_LONG_AS_DOUBLE);
    //Assumed if preLongs == 1 or 2 or 3
    long thetaULong = thetaLong;
    if (preLongs == 2) {
        curCount = extractCurCount(memObj, memAdd);
        p = extractP(memObj, memAdd);
        thetaLong = (long) (p * MAX_THETA_LONG_AS_DOUBLE);
        thetaULong = thetaLong;
    } else if (preLongs == 3) {
        curCount = extractCurCount(memObj, memAdd);
        p = extractP(memObj, memAdd);
        thetaLong = extractThetaLong(memObj, memAdd);
        thetaULong = thetaLong;
    } else if (preLongs == 4) {
        curCount = extractCurCount(memObj, memAdd);
        p = extractP(memObj, memAdd);
        thetaLong = extractThetaLong(memObj, memAdd);
        thetaULong = extractUnionThetaLong(memObj, memAdd);
    }
    //else: the same as preLongs == 1
    final double thetaDbl = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
    final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
    final double thetaUDbl = thetaULong / MAX_THETA_LONG_AS_DOUBLE;
    final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16);
    final StringBuilder sb = new StringBuilder();
    sb.append(LS).append("### SKETCH PREAMBLE SUMMARY:").append(LS).append("Byte  0: Preamble Longs       : ").append(preLongs).append(LS).append("Byte  0: ResizeFactor         : ").append(rf.toString()).append(LS).append("Byte  1: Serialization Version: ").append(serVer).append(LS).append("Byte  2: Family               : ").append(family.toString()).append(LS).append("Byte  3: LgNomLongs           : ").append(lgNomLongs).append(LS).append("Byte  4: LgArrLongs           : ").append(lgArrLongs).append(LS).append("Byte  5: Flags Field          : ").append(flagsStr).append(LS).append("  BIG_ENDIAN_STORAGE          : ").append(bigEndian).append(LS).append("  (Native Byte Order)         : ").append(nativeOrder).append(LS).append("  READ_ONLY                   : ").append(readOnly).append(LS).append("  EMPTY                       : ").append(empty).append(LS).append("  COMPACT                     : ").append(compact).append(LS).append("  ORDERED                     : ").append(ordered).append(LS).append("Bytes 6-7  : Seed Hash        : ").append(Integer.toHexString(seedHash)).append(LS);
    if (preLongs == 1) {
        sb.append(" --ABSENT, ASSUMED:").append(LS);
        sb.append("Bytes 8-11 : CurrentCount     : ").append(curCount).append(LS).append("Bytes 12-15: P                : ").append(p).append(LS);
        sb.append("Bytes 16-23: Theta (double)   : ").append(thetaDbl).append(LS).append("             Theta (long)     : ").append(thetaLong).append(LS).append("             Theta (long,hex) : ").append(thetaHex).append(LS);
    }
    if (preLongs == 2) {
        sb.append("Bytes 8-11 : CurrentCount     : ").append(curCount).append(LS).append("Bytes 12-15: P                : ").append(p).append(LS);
        sb.append(" --ABSENT, ASSUMED:").append(LS);
        sb.append("Bytes 16-23: Theta (double)   : ").append(thetaDbl).append(LS).append("             Theta (long)     : ").append(thetaLong).append(LS).append("             Theta (long,hex) : ").append(thetaHex).append(LS);
    }
    if (preLongs == 3) {
        sb.append("Bytes 8-11 : CurrentCount     : ").append(curCount).append(LS).append("Bytes 12-15: P                : ").append(p).append(LS);
        sb.append("Bytes 16-23: Theta (double)   : ").append(thetaDbl).append(LS).append("             Theta (long)     : ").append(thetaLong).append(LS).append("             Theta (long,hex) : ").append(thetaHex).append(LS);
    }
    if (preLongs == 4) {
        sb.append("Bytes 8-11 : CurrentCount     : ").append(curCount).append(LS).append("Bytes 12-15: P                : ").append(p).append(LS);
        sb.append("Bytes 16-23: Theta (double)   : ").append(thetaDbl).append(LS).append("             Theta (long)     : ").append(thetaLong).append(LS).append("             Theta (long,hex) : ").append(thetaHex).append(LS);
        sb.append("Bytes 25-31: ThetaU (double)  : ").append(thetaUDbl).append(LS).append("             ThetaU (long)    : ").append(thetaULong).append(LS).append("             ThetaU (long,hex): ").append(thetaUHex).append(LS);
    }
    sb.append("Preamble Bytes                : ").append(preLongs * 8).append(LS);
    sb.append("Data Bytes                    : ").append(curCount * 8).append(LS);
    sb.append("TOTAL Sketch Bytes            : ").append(mem.getCapacity()).append(LS).append("### END SKETCH PREAMBLE SUMMARY").append(LS);
    return sb.toString();
}
Also used : Family(com.yahoo.sketches.Family) ResizeFactor(com.yahoo.sketches.ResizeFactor)

Example 7 with ResizeFactor

use of com.yahoo.sketches.ResizeFactor in project sketches-core by DataSketches.

the class HeapAlphaSketch method heapifyInstance.

/**
   * Heapify a sketch from a Memory object containing sketch data.
   * @param srcMem The source Memory object.
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
   * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
   * @return instance of this sketch
   */
static HeapAlphaSketch heapifyInstance(final Memory srcMem, final long seed) {
    //byte 0
    final int preambleLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
    final ResizeFactor myRF = ResizeFactor.getRF((//byte 0
    srcMem.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT));
    //byte 1
    final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
    //byte 2
    final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
    //byte 3
    final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF;
    //byte 4
    final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF;
    //byte 5
    final int flags = srcMem.getByte(FLAGS_BYTE) & 0XFF;
    //byte 6,7
    final short seedHash = srcMem.getShort(SEED_HASH_SHORT);
    //bytes 8-11
    final int curCount = srcMem.getInt(RETAINED_ENTRIES_INT);
    //bytes 12-15
    final float p = srcMem.getFloat(P_FLOAT);
    //bytes 16-23
    final long thetaLong = srcMem.getLong(THETA_LONG);
    final Family family = Family.idToFamily(familyID);
    if (family.equals(Family.ALPHA)) {
        if (preambleLongs != Family.ALPHA.getMinPreLongs()) {
            throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs);
        }
    } else {
        throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
    }
    if (serVer != SER_VER) {
        throw new SketchesArgumentException("Possible corruption: Invalid Serialization Version: " + serVer);
    }
    final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
    if ((flags & flagsMask) > 0) {
        throw new SketchesArgumentException("Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
    }
    Util.checkSeedHashes(seedHash, Util.computeSeedHash(seed));
    final long curCapBytes = srcMem.getCapacity();
    final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
    if (curCapBytes < minReqBytes) {
        throw new SketchesArgumentException("Possible corruption: Current Memory size < min required size: " + curCapBytes + " < " + minReqBytes);
    }
    final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
    if ((lgArrLongs <= lgNomLongs) && (theta < p)) {
        throw new SketchesArgumentException("Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
    }
    final double nomLongs = (1L << lgNomLongs);
    final double alpha = nomLongs / (nomLongs + 1.0);
    final long split1 = (long) ((p * (alpha + 1.0) / 2.0) * MAX_THETA_LONG_AS_DOUBLE);
    final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, myRF, alpha, split1);
    has.lgArrLongs_ = lgArrLongs;
    has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
    has.curCount_ = curCount;
    has.thetaLong_ = thetaLong;
    has.empty_ = (flags & EMPTY_FLAG_MASK) > 0;
    has.cache_ = new long[1 << lgArrLongs];
    //read in as hash table
    srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs);
    return has;
}
Also used : SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException) Family(com.yahoo.sketches.Family) ResizeFactor(com.yahoo.sketches.ResizeFactor)

Example 8 with ResizeFactor

use of com.yahoo.sketches.ResizeFactor in project sketches-core by DataSketches.

the class HeapAlphaSketch method resizeClean.

//curCount > hashTableThreshold
//Checks for rare lockup condition
// Used by hashUpdate()
private final void resizeClean() {
    //must resize, but are we at tgt size?
    final int lgTgtLongs = lgNomLongs_ + 1;
    if (lgTgtLongs > lgArrLongs_) {
        //not yet at tgt size
        final ResizeFactor rf = getResizeFactor();
        //must be > 0
        final int lgDeltaLongs = lgTgtLongs - lgArrLongs_;
        //rf_.lg() could be 0
        final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1);
        forceResizeCleanCache(lgResizeFactor);
    } else {
        //at tgt size or larger, no dirty values, must take drastic measures, very rare.
        forceResizeCleanCache(1);
    }
}
Also used : ResizeFactor(com.yahoo.sketches.ResizeFactor)

Example 9 with ResizeFactor

use of com.yahoo.sketches.ResizeFactor in project sketches-core by DataSketches.

the class ReservoirItemsSketch method heapify.

/**
   * Returns a sketch instance of this class from the given srcMem,
   * which must be a Memory representation of this sketch class.
   *
   * @param <T>    The type of item this sketch contains
   * @param srcMem a Memory representation of a sketch of this class.
   *               <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
   * @param serDe  An instance of ArrayOfItemsSerDe
   * @return a sketch instance of this class
   */
public static <T> ReservoirItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
    Family.RESERVOIR.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
    final int numPreLongs = extractPreLongs(srcMem);
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
    final int serVer = extractSerVer(srcMem);
    final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
    final long itemsSeen = (isEmpty ? 0 : extractN(srcMem));
    int k = extractK(srcMem);
    // Check values
    final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR.getMinPreLongs());
    final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR.getMaxPreLongs());
    if (!preLongsEqMin & !preLongsEqMax) {
        throw new SketchesArgumentException("Possible corruption: Non-empty sketch with only " + Family.RESERVOIR.getMinPreLongs() + " preLong(s)");
    }
    if (serVer != SER_VER) {
        if (serVer == 1) {
            final short encK = extractEncodedReservoirSize(srcMem);
            k = ReservoirSize.decodeValue(encK);
        } else {
            throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
        }
    }
    if (isEmpty) {
        return new ReservoirItemsSketch<>(k, rf);
    }
    final int preLongBytes = numPreLongs << 3;
    // default to full reservoir
    int allocatedItems = k;
    if (itemsSeen < k) {
        // under-full so determine size to allocate, using ceilingLog2(totalSeen) as minimum
        // casts to int are safe since under-full
        final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
        final int minLgSize = Util.toLog2(Util.ceilingPowerOf2((int) itemsSeen), "heapify");
        final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
        allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
    }
    final int itemsToRead = (int) Math.min(k, itemsSeen);
    final T[] data = serDe.deserializeFromMemory(srcMem.region(preLongBytes, srcMem.getCapacity() - preLongBytes), itemsToRead);
    final ArrayList<T> dataList = new ArrayList<>(Arrays.asList(data));
    final ReservoirItemsSketch<T> ris = new ReservoirItemsSketch<>(dataList, itemsSeen, rf, k);
    ris.data_.ensureCapacity(allocatedItems);
    ris.currItemsAlloc_ = allocatedItems;
    return ris;
}
Also used : SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException) ArrayList(java.util.ArrayList) ResizeFactor(com.yahoo.sketches.ResizeFactor) PreambleUtil.extractResizeFactor(com.yahoo.sketches.sampling.PreambleUtil.extractResizeFactor)

Example 10 with ResizeFactor

use of com.yahoo.sketches.ResizeFactor in project sketches-core by DataSketches.

the class VarOptItemsSketch method heapify.

/**
   * Returns a sketch instance of this class from the given srcMem,
   * which must be a Memory representation of this sketch class.
   *
   * @param <T>    The type of item this sketch contains
   * @param srcMem a Memory representation of a sketch of this class.
   *               <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
   * @param serDe  An instance of ArrayOfItemsSerDe
   * @return a sketch instance of this class
   */
@SuppressWarnings("null")
public static <T> VarOptItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
    final int numPreLongs = getAndCheckPreLongs(srcMem);
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
    final int serVer = extractSerVer(srcMem);
    final int familyId = extractFamilyID(srcMem);
    final int flags = extractFlags(srcMem);
    final boolean isEmpty = (flags & EMPTY_FLAG_MASK) != 0;
    final boolean isGadget = (flags & GADGET_FLAG_MASK) != 0;
    // Check values
    if (numPreLongs != Family.VAROPT.getMinPreLongs() && numPreLongs != Family.VAROPT.getMaxPreLongs() && numPreLongs != PreambleUtil.VO_WARMUP_PRELONGS) {
        throw new SketchesArgumentException("Possible corruption: Must have " + Family.VAROPT.getMinPreLongs() + ", " + PreambleUtil.VO_WARMUP_PRELONGS + ", or " + Family.VAROPT.getMaxPreLongs() + " preLongs. Found: " + numPreLongs);
    }
    if (serVer != SER_VER) {
        throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
    }
    final int reqFamilyId = Family.VAROPT.getID();
    if (familyId != reqFamilyId) {
        throw new SketchesArgumentException("Possible Corruption: FamilyID must be " + reqFamilyId + ": " + familyId);
    }
    final int k = extractK(srcMem);
    if (k < 2) {
        throw new SketchesArgumentException("Possible Corruption: k must be at least 2: " + k);
    }
    if (isEmpty) {
        assert numPreLongs == Family.VAROPT.getMinPreLongs();
        return new VarOptItemsSketch<>(k, rf);
    }
    final long n = extractN(srcMem);
    if (n < 0) {
        throw new SketchesArgumentException("Possible Corruption: n cannot be negative: " + n);
    }
    // get rest of preamble
    final int hCount = extractHRegionItemCount(srcMem);
    final int rCount = extractRRegionItemCount(srcMem);
    if (hCount < 0) {
        throw new SketchesArgumentException("Possible Corruption: H region count cannot be " + "negative: " + hCount);
    }
    if (rCount < 0) {
        throw new SketchesArgumentException("Possible Corruption: R region count cannot be " + "negative: " + rCount);
    }
    double totalRWeight = 0.0;
    if (numPreLongs == Family.VAROPT.getMaxPreLongs()) {
        if (rCount > 0) {
            totalRWeight = extractTotalRWeight(srcMem);
        } else {
            throw new SketchesArgumentException("Possible Corruption: " + Family.VAROPT.getMaxPreLongs() + " preLongs but no items in R region");
        }
    }
    final int preLongBytes = numPreLongs << 3;
    final int totalItems = hCount + rCount;
    // default to full
    int allocatedItems = k + 1;
    if (rCount == 0) {
        // Not in sampling mode, so determine size to allocate, using ceilingLog2(hCount) as minimum
        final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
        final int minLgSize = Util.toLog2(Util.ceilingPowerOf2(hCount), "heapify");
        final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
        allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
        if (allocatedItems == k) {
            ++allocatedItems;
        }
    }
    // allocate full-sized ArrayLists, but we store only hCount weights at any moment
    final long weightOffsetBytes = TOTAL_WEIGHT_R_DOUBLE + (rCount > 0 ? Double.BYTES : 0);
    final ArrayList<Double> weightList = new ArrayList<>(allocatedItems);
    final double[] wts = new double[allocatedItems];
    srcMem.getDoubleArray(weightOffsetBytes, wts, 0, hCount);
    // can't use Arrays.asList(wts) since double[] rather than Double[]
    for (int i = 0; i < hCount; ++i) {
        if (wts[i] <= 0.0) {
            throw new SketchesArgumentException("Possible Corruption: " + "Non-positive weight in heapify(): " + wts[i]);
        }
        weightList.add(wts[i]);
    }
    // marks, if we have a gadget
    long markBytes = 0;
    int markCount = 0;
    ArrayList<Boolean> markList = null;
    if (isGadget) {
        final long markOffsetBytes = preLongBytes + (hCount * Double.BYTES);
        markBytes = ArrayOfBooleansSerDe.computeBytesNeeded(hCount);
        markList = new ArrayList<>(allocatedItems);
        final ArrayOfBooleansSerDe booleansSerDe = new ArrayOfBooleansSerDe();
        final Boolean[] markArray = booleansSerDe.deserializeFromMemory(srcMem.region(markOffsetBytes, (hCount >> 3) + 1), hCount);
        for (Boolean mark : markArray) {
            if (mark) {
                ++markCount;
            }
        }
        markList.addAll(Arrays.asList(markArray));
    }
    final long offsetBytes = preLongBytes + (hCount * Double.BYTES) + markBytes;
    final T[] data = serDe.deserializeFromMemory(srcMem.region(offsetBytes, srcMem.getCapacity() - offsetBytes), totalItems);
    final List<T> wrappedData = Arrays.asList(data);
    final ArrayList<T> dataList = new ArrayList<>(allocatedItems);
    dataList.addAll(wrappedData.subList(0, hCount));
    // Load items in R as needed
    if (rCount > 0) {
        // the gap
        weightList.add(-1.0);
        // the gap
        if (isGadget) {
            markList.add(false);
        }
        for (int i = 0; i < rCount; ++i) {
            weightList.add(-1.0);
            if (isGadget) {
                markList.add(false);
            }
        }
        // the gap
        dataList.add(null);
        dataList.addAll(wrappedData.subList(hCount, totalItems));
    }
    final VarOptItemsSketch<T> sketch = new VarOptItemsSketch<>(dataList, weightList, k, n, allocatedItems, rf, hCount, rCount, totalRWeight);
    if (isGadget) {
        sketch.marks_ = markList;
        sketch.numMarksInH_ = markCount;
    }
    return sketch;
}
Also used : ArrayOfBooleansSerDe(com.yahoo.sketches.ArrayOfBooleansSerDe) ArrayList(java.util.ArrayList) ResizeFactor(com.yahoo.sketches.ResizeFactor) PreambleUtil.extractResizeFactor(com.yahoo.sketches.sampling.PreambleUtil.extractResizeFactor) SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException)

Aggregations

ResizeFactor (com.yahoo.sketches.ResizeFactor)22 Test (org.testng.annotations.Test)11 SketchesArgumentException (com.yahoo.sketches.SketchesArgumentException)6 Family (com.yahoo.sketches.Family)5 PreambleUtil.extractResizeFactor (com.yahoo.sketches.sampling.PreambleUtil.extractResizeFactor)3 ArrayList (java.util.ArrayList)3 SketchesException (com.yahoo.sketches.SketchesException)2 WritableMemory (com.yahoo.memory.WritableMemory)1 ArrayOfBooleansSerDe (com.yahoo.sketches.ArrayOfBooleansSerDe)1 Family.objectToFamily (com.yahoo.sketches.Family.objectToFamily)1