Search in sources :

Example 6 with Family

use of com.yahoo.sketches.Family in project sketches-core by DataSketches.

the class DirectQuickSelectSketch method initNewDirectInstance.

/**
   * Get a new sketch instance and initialize the given Memory as its backing store.
   *
   * @param lgNomLongs <a href="{@docRoot}/resources/dictionary.html#lgNomLongs">See lgNomLongs</a>.
   * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
   * @param p
   * <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>
   * @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid
   * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the
   * dstMem must be large enough for a full sketch.
   * <a href="{@docRoot}/resources/dictionary.html#resizeFactor">See Resize Factor</a>
   * @param dstMem the given Memory object destination. It cannot be null.
   * It will be cleared prior to use.
   * @param unionGadget true if this sketch is implementing the Union gadget function.
   * Otherwise, it is behaving as a normal QuickSelectSketch.
   * @return instance of this sketch
   */
static DirectQuickSelectSketch initNewDirectInstance(final int lgNomLongs, final long seed, final float p, final ResizeFactor rf, final WritableMemory dstMem, final boolean unionGadget) {
    //Choose family, preambleLongs
    final Family family;
    final int preambleLongs;
    if (unionGadget) {
        preambleLongs = Family.UNION.getMinPreLongs();
        family = Family.UNION;
    } else {
        preambleLongs = Family.QUICKSELECT.getMinPreLongs();
        family = Family.QUICKSELECT;
    }
    //Choose RF, minReqBytes, lgArrLongs.
    final int lgRF = rf.lg();
    final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : MIN_LG_ARR_LONGS;
    final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
    //Make sure Memory is large enough
    final long curMemCapBytes = dstMem.getCapacity();
    if (curMemCapBytes < minReqBytes) {
        throw new SketchesArgumentException("Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes);
    }
    //@formatter:off
    //Build preamble
    //may be null
    final Object memObj = dstMem.getArray();
    final long memAdd = dstMem.getCumulativeOffset(0L);
    //byte 0
    insertPreLongs(memObj, memAdd, preambleLongs);
    //byte 0
    insertLgResizeFactor(memObj, memAdd, lgRF);
    //byte 1
    insertSerVer(memObj, memAdd, SER_VER);
    //byte 2
    insertFamilyID(memObj, memAdd, family.getID());
    //byte 3
    insertLgNomLongs(memObj, memAdd, lgNomLongs);
    //byte 4
    insertLgArrLongs(memObj, memAdd, lgArrLongs);
    //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4
    //byte 5
    insertFlags(memObj, memAdd, EMPTY_FLAG_MASK);
    //bytes 6,7
    insertSeedHash(memObj, memAdd, Util.computeSeedHash(seed));
    //bytes 8-11
    insertCurCount(memObj, memAdd, 0);
    //bytes 12-15
    insertP(memObj, memAdd, p);
    final long thetaLong = (long) (p * MAX_THETA_LONG_AS_DOUBLE);
    //bytes 16-23
    insertThetaLong(memObj, memAdd, thetaLong);
    //@formatter:on
    //clear hash table area
    dstMem.clear(preambleLongs << 3, 8 << lgArrLongs);
    final DirectQuickSelectSketch dqss = new DirectQuickSelectSketch(lgNomLongs, seed, preambleLongs, dstMem);
    dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
    return dqss;
}
Also used : SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException) Family(com.yahoo.sketches.Family)

Example 7 with Family

use of com.yahoo.sketches.Family in project sketches-core by DataSketches.

the class DirectQuickSelectSketchR method checkIntegrity.

static void checkIntegrity(final Memory srcMem, final long seed, final int preambleLongs, final int serVer, final int familyID, final int lgNomLongs, final int lgArrLongs, final int flags, final short seedHash, final float p, final long thetaLong) {
    if (serVer != SER_VER) {
        throw new SketchesArgumentException("Possible corruption: Invalid Serialization Version: " + serVer);
    }
    final Family family = Family.idToFamily(familyID);
    if (family.equals(Family.UNION)) {
        if (preambleLongs != Family.UNION.getMinPreLongs()) {
            throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs);
        }
    } else if (family.equals(Family.QUICKSELECT)) {
        if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) {
            throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs);
        }
    } else {
        throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
    }
    if (lgNomLongs < MIN_LG_NOM_LONGS) {
        throw new SketchesArgumentException("Possible corruption: Current Memory lgNomLongs < min required size: " + lgNomLongs + " < " + MIN_LG_NOM_LONGS);
    }
    final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
    if ((flags & flagsMask) > 0) {
        throw new SketchesArgumentException("Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
    }
    Util.checkSeedHashes(seedHash, Util.computeSeedHash(seed));
    final long curCapBytes = srcMem.getCapacity();
    final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
    if (curCapBytes < minReqBytes) {
        throw new SketchesArgumentException("Possible corruption: Current Memory size < min required size: " + curCapBytes + " < " + minReqBytes);
    }
    final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
    if ((lgArrLongs <= lgNomLongs) && (theta < p)) {
        throw new SketchesArgumentException("Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
    }
}
Also used : SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException) Family(com.yahoo.sketches.Family)

Example 8 with Family

use of com.yahoo.sketches.Family in project sketches-core by DataSketches.

the class HeapAlphaSketch method heapifyInstance.

/**
   * Heapify a sketch from a Memory object containing sketch data.
   * @param srcMem The source Memory object.
   * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
   * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
   * @return instance of this sketch
   */
static HeapAlphaSketch heapifyInstance(final Memory srcMem, final long seed) {
    //byte 0
    final int preambleLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
    final ResizeFactor myRF = ResizeFactor.getRF((//byte 0
    srcMem.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT));
    //byte 1
    final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
    //byte 2
    final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
    //byte 3
    final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF;
    //byte 4
    final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF;
    //byte 5
    final int flags = srcMem.getByte(FLAGS_BYTE) & 0XFF;
    //byte 6,7
    final short seedHash = srcMem.getShort(SEED_HASH_SHORT);
    //bytes 8-11
    final int curCount = srcMem.getInt(RETAINED_ENTRIES_INT);
    //bytes 12-15
    final float p = srcMem.getFloat(P_FLOAT);
    //bytes 16-23
    final long thetaLong = srcMem.getLong(THETA_LONG);
    final Family family = Family.idToFamily(familyID);
    if (family.equals(Family.ALPHA)) {
        if (preambleLongs != Family.ALPHA.getMinPreLongs()) {
            throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs);
        }
    } else {
        throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
    }
    if (serVer != SER_VER) {
        throw new SketchesArgumentException("Possible corruption: Invalid Serialization Version: " + serVer);
    }
    final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK;
    if ((flags & flagsMask) > 0) {
        throw new SketchesArgumentException("Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only");
    }
    Util.checkSeedHashes(seedHash, Util.computeSeedHash(seed));
    final long curCapBytes = srcMem.getCapacity();
    final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs);
    if (curCapBytes < minReqBytes) {
        throw new SketchesArgumentException("Possible corruption: Current Memory size < min required size: " + curCapBytes + " < " + minReqBytes);
    }
    final double theta = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
    if ((lgArrLongs <= lgNomLongs) && (theta < p)) {
        throw new SketchesArgumentException("Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p);
    }
    final double nomLongs = (1L << lgNomLongs);
    final double alpha = nomLongs / (nomLongs + 1.0);
    final long split1 = (long) ((p * (alpha + 1.0) / 2.0) * MAX_THETA_LONG_AS_DOUBLE);
    final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, myRF, alpha, split1);
    has.lgArrLongs_ = lgArrLongs;
    has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
    has.curCount_ = curCount;
    has.thetaLong_ = thetaLong;
    has.empty_ = (flags & EMPTY_FLAG_MASK) > 0;
    has.cache_ = new long[1 << lgArrLongs];
    //read in as hash table
    srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs);
    return has;
}
Also used : SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException) Family(com.yahoo.sketches.Family) ResizeFactor(com.yahoo.sketches.ResizeFactor)

Example 9 with Family

use of com.yahoo.sketches.Family in project sketches-core by DataSketches.

the class PreambleUtil method toString.

public static String toString(final Memory mem) {
    //First 8 bytes
    final int preInts = mem.getByte(PREAMBLE_INTS_BYTE);
    final int serVer = mem.getByte(SER_VER_BYTE);
    final Family family = Family.idToFamily(mem.getByte(FAMILY_BYTE));
    final int lgK = mem.getByte(LG_K_BYTE);
    final int lgArr = mem.getByte(LG_ARR_BYTE);
    final int flags = mem.getByte(FLAGS_BYTE);
    //Flags
    final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
    final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
    final String nativeOrder = ByteOrder.nativeOrder().toString();
    final boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
    final boolean oooFlag = (flags & OUT_OF_ORDER_FLAG_MASK) > 0;
    final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
    final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
    final int hllCurMin = mem.getByte(HLL_CUR_MIN_BYTE);
    final int listCount = hllCurMin;
    final int modeByte = mem.getByte(MODE_BYTE);
    final CurMode curMode = CurMode.fromOrdinal(modeByte & 3);
    final TgtHllType tgtHllType = TgtHllType.fromOrdinal((modeByte >> 2) & 3);
    double hipAccum = 0;
    double kxq0 = 0;
    double kxq1 = 0;
    int hashSetCount = 0;
    int curMinCount = 0;
    int exceptionCount = 0;
    if (curMode == CurMode.SET) {
        hashSetCount = mem.getInt(HASH_SET_COUNT_INT);
    } else if (curMode == CurMode.HLL) {
        hipAccum = mem.getDouble(HIP_ACCUM_DOUBLE);
        kxq0 = mem.getDouble(KXQ0_DOUBLE);
        kxq1 = mem.getDouble(KXQ1_DOUBLE);
        curMinCount = mem.getInt(CUR_MIN_COUNT_INT);
        exceptionCount = mem.getInt(AUX_COUNT_INT);
    }
    final StringBuilder sb = new StringBuilder();
    sb.append(LS);
    sb.append("### HLL SKETCH PREAMBLE:").append(LS);
    sb.append("Byte  0: Preamble Ints        : ").append(preInts).append(LS);
    sb.append("Byte  1: SerVer               : ").append(serVer).append(LS);
    sb.append("Byte  2: Family               : ").append(family).append(LS);
    sb.append("Byte  3: lgK                  : ").append(lgK).append(LS);
    //expand byte 4: LgArr
    if (curMode == CurMode.LIST) {
        sb.append("Byte  4: LgArr: List Arr      : ").append(lgArr).append(LS);
    }
    if (curMode == CurMode.SET) {
        sb.append("Byte  4: LgArr: Hash Set Arr  : ").append(lgArr).append(LS);
    }
    if (curMode == CurMode.HLL) {
        sb.append("Byte  4: LgArr: Except Arr    : ").append(lgArr).append(LS);
    }
    //expand byte 5: Flags
    sb.append("Byte  5: Flags:               : ").append(flagsStr).append(LS);
    sb.append("  BIG_ENDIAN_STORAGE          : ").append(bigEndian).append(LS);
    sb.append("  (Native Byte Order)         : ").append(nativeOrder).append(LS);
    sb.append("  READ_ONLY                   : ").append(readOnly).append(LS);
    sb.append("  EMPTY                       : ").append(empty).append(LS);
    sb.append("  COMPACT                     : ").append(compact).append(LS);
    sb.append("  OUT_OF_ORDER                : ").append(oooFlag).append(LS);
    //expand byte 6: ListCount, CurMin
    if (curMode == CurMode.LIST) {
        sb.append("Byte  6: List Count           : ").append(listCount).append(LS);
    }
    if (curMode == CurMode.SET) {
        sb.append("Byte  6: (not used)           : ").append(LS);
    }
    if (curMode == CurMode.HLL) {
        sb.append("Byte  6: Cur Min Count        : ").append(curMinCount).append(LS);
    }
    final String modes = curMode.toString() + ", " + tgtHllType.toString();
    sb.append("Byte 7: Mode                  : ").append(modes).append(LS);
    if (curMode == CurMode.SET) {
        sb.append("Hash Set Count                : ").append(hashSetCount).append(LS);
    }
    if (curMode == CurMode.HLL) {
        sb.append("HIP Accum                     : ").append(hipAccum).append(LS);
        sb.append("KxQ0                          : ").append(kxq0).append(LS);
        sb.append("KxQ1                          : ").append(kxq1).append(LS);
        sb.append("Cur Min Count                 : ").append(curMinCount).append(LS);
        sb.append("Exception Count               : ").append(exceptionCount).append(LS);
    }
    sb.append("### END HLL SKETCH PREAMBLE").append(LS);
    return sb.toString();
}
Also used : Family(com.yahoo.sketches.Family)

Example 10 with Family

use of com.yahoo.sketches.Family in project sketches-core by DataSketches.

the class PreambleUtil method preambleToString.

/**
   * Returns a human readable string summary of the preamble state of the given Memory.
   * Note: other than making sure that the given Memory size is large
   * enough for just the preamble, this does not do much value checking of the contents of the
   * preamble as this is primarily a tool for debugging the preamble visually.
   *
   * @param mem the given Memory.
   * @return the summary preamble string.
   */
public static String preambleToString(final Memory mem) {
    // make sure we can get the assumed preamble
    final int preLongs = getAndCheckPreLongs(mem);
    final Family family = Family.idToFamily(mem.getByte(FAMILY_BYTE));
    switch(family) {
        case RESERVOIR:
        case VAROPT:
            return sketchPreambleToString(mem, family, preLongs);
        case RESERVOIR_UNION:
        case VAROPT_UNION:
            return unionPreambleToString(mem, family, preLongs);
        default:
            throw new SketchesArgumentException("Inspecting preamble with Sampling family's " + "PreambleUtil with object of family " + family.getFamilyName());
    }
}
Also used : SketchesArgumentException(com.yahoo.sketches.SketchesArgumentException) Family(com.yahoo.sketches.Family)

Aggregations

Family (com.yahoo.sketches.Family)18 SketchesArgumentException (com.yahoo.sketches.SketchesArgumentException)11 Family.idToFamily (com.yahoo.sketches.Family.idToFamily)6 ResizeFactor (com.yahoo.sketches.ResizeFactor)5 Test (org.testng.annotations.Test)2 Family.objectToFamily (com.yahoo.sketches.Family.objectToFamily)1