Search in sources :

Example 11 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class PreambleUtil method toString.

static String toString(final Memory mem, final boolean detail) {
    final long capBytes = mem.getCapacity();
    // Lo Fields Preamble, first 7 fields, first 8 bytes
    final int preInts = mem.getByte(getLoFieldOffset(LoField.PRE_INTS)) & 0xFF;
    final int serVer = mem.getByte(getLoFieldOffset(LoField.SER_VERSION)) & 0xFF;
    final Family family = Family.idToFamily(mem.getByte(getLoFieldOffset(LoField.FAMILY)) & 0xFF);
    final int lgK = mem.getByte(getLoFieldOffset(LoField.LG_K)) & 0xFF;
    final int fiCol = mem.getByte(getLoFieldOffset(LoField.FI_COL)) & 0xFF;
    final int flags = mem.getByte(getLoFieldOffset(LoField.FLAGS)) & 0XFF;
    final int seedHash = mem.getShort(getLoFieldOffset(LoField.SEED_HASH)) & 0XFFFF;
    final String seedHashStr = Integer.toHexString(seedHash);
    // Flags of the Flags byte
    final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
    final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
    final boolean compressed = (flags & COMPRESSED_FLAG_MASK) > 0;
    final boolean hasHip = (flags & HIP_FLAG_MASK) > 0;
    final boolean hasSV = (flags & SUP_VAL_FLAG_MASK) > 0;
    final boolean hasWindow = (flags & WINDOW_FLAG_MASK) > 0;
    final int formatOrdinal = (flags >>> 2) & 0x7;
    final Format format = Format.ordinalToFormat(formatOrdinal);
    final String nativeOrderStr = ByteOrder.nativeOrder().toString();
    long numCoupons = 0;
    long numSv = 0;
    long winOffset = 0;
    long svLengthInts = 0;
    long wLengthInts = 0;
    double kxp = 0;
    double hipAccum = 0;
    long svStreamStart = 0;
    long wStreamStart = 0;
    long reqBytes = 0;
    final StringBuilder sb = new StringBuilder();
    sb.append(LS);
    sb.append("### CPC SKETCH IMAGE - PREAMBLE:").append(LS);
    sb.append("Format                          : ").append(format.name()).append(LS);
    sb.append("Byte 0: Preamble Ints           : ").append(preInts).append(LS);
    sb.append("Byte 1: SerVer                  : ").append(serVer).append(LS);
    sb.append("Byte 2: Family                  : ").append(family).append(LS);
    sb.append("Byte 3: lgK                     : ").append(lgK).append(LS);
    sb.append("Byte 4: First Interesting Col   : ").append(fiCol).append(LS);
    sb.append("Byte 5: Flags                   : ").append(flagsStr).append(LS);
    sb.append("  BIG_ENDIAN_STORAGE            : ").append(bigEndian).append(LS);
    sb.append("  (Native Byte Order)           : ").append(nativeOrderStr).append(LS);
    sb.append("  Compressed                    : ").append(compressed).append(LS);
    sb.append("  Has HIP                       : ").append(hasHip).append(LS);
    sb.append("  Has Surprising Values         : ").append(hasSV).append(LS);
    sb.append("  Has Window Values             : ").append(hasWindow).append(LS);
    sb.append("Byte 6, 7: Seed Hash            : ").append(seedHashStr).append(LS);
    final Flavor flavor;
    switch(format) {
        case EMPTY_MERGED:
        case EMPTY_HIP:
            {
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                break;
            }
        case SPARSE_HYBRID_MERGED:
            {
                numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
                numSv = numCoupons;
                svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
                svStreamStart = getSvStreamOffset(mem);
                reqBytes = svStreamStart + (svLengthInts << 2);
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                sb.append("Num Coupons                     : ").append(numCoupons).append(LS);
                sb.append("Num SV                          : ").append(numSv).append(LS);
                sb.append("SV Length Ints                  : ").append(svLengthInts).append(LS);
                sb.append("SV Stream Start                 : ").append(svStreamStart).append(LS);
                break;
            }
        case SPARSE_HYBRID_HIP:
            {
                numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
                numSv = numCoupons;
                svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
                svStreamStart = getSvStreamOffset(mem);
                kxp = mem.getDouble(getHiFieldOffset(format, HiField.KXP));
                hipAccum = mem.getDouble(getHiFieldOffset(format, HiField.HIP_ACCUM));
                reqBytes = svStreamStart + (svLengthInts << 2);
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                sb.append("Num Coupons                     : ").append(numCoupons).append(LS);
                sb.append("Num SV                          : ").append(numSv).append(LS);
                sb.append("SV Length Ints                  : ").append(svLengthInts).append(LS);
                sb.append("SV Stream Start                 : ").append(svStreamStart).append(LS);
                sb.append("KxP                             : ").append(kxp).append(LS);
                sb.append("HipAccum                        : ").append(hipAccum).append(LS);
                break;
            }
        case PINNED_SLIDING_MERGED_NOSV:
            {
                numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
                winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
                wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
                wStreamStart = getWStreamOffset(mem);
                reqBytes = wStreamStart + (wLengthInts << 2);
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                sb.append("Num Coupons                     : ").append(numCoupons).append(LS);
                sb.append("Window Offset                   : ").append(winOffset).append(LS);
                sb.append("Window Length Ints              : ").append(wLengthInts).append(LS);
                sb.append("Window Stream Start             : ").append(wStreamStart).append(LS);
                break;
            }
        case PINNED_SLIDING_HIP_NOSV:
            {
                numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS)) & 0xFFFF_FFFFL;
                winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
                wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
                wStreamStart = getWStreamOffset(mem);
                kxp = mem.getDouble(getHiFieldOffset(format, HiField.KXP));
                hipAccum = mem.getDouble(getHiFieldOffset(format, HiField.HIP_ACCUM));
                reqBytes = wStreamStart + (wLengthInts << 2);
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                sb.append("Num Coupons                     : ").append(numCoupons).append(LS);
                sb.append("Window Offset                   : ").append(winOffset).append(LS);
                sb.append("Window Length Ints              : ").append(wLengthInts).append(LS);
                sb.append("Window Stream Start             : ").append(wStreamStart).append(LS);
                sb.append("KxP                             : ").append(kxp).append(LS);
                sb.append("HipAccum                        : ").append(hipAccum).append(LS);
                break;
            }
        case PINNED_SLIDING_MERGED:
            {
                numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS) & 0xFFFF_FFFFL);
                winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
                wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
                numSv = mem.getInt(getHiFieldOffset(format, HiField.NUM_SV)) & 0xFFFF_FFFFL;
                svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
                wStreamStart = getWStreamOffset(mem);
                svStreamStart = getSvStreamOffset(mem);
                reqBytes = svStreamStart + (svLengthInts << 2);
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                sb.append("Num Coupons                     : ").append(numCoupons).append(LS);
                sb.append("Num SV                          : ").append(numSv).append(LS);
                sb.append("SV Length Ints                  : ").append(svLengthInts).append(LS);
                sb.append("SV Stream Start                 : ").append(svStreamStart).append(LS);
                sb.append("Window Offset                   : ").append(winOffset).append(LS);
                sb.append("Window Length Ints              : ").append(wLengthInts).append(LS);
                sb.append("Window Stream Start             : ").append(wStreamStart).append(LS);
                break;
            }
        case PINNED_SLIDING_HIP:
            {
                numCoupons = mem.getInt(getHiFieldOffset(format, HiField.NUM_COUPONS) & 0xFFFF_FFFFL);
                winOffset = CpcUtil.determineCorrectOffset(lgK, numCoupons);
                wLengthInts = mem.getInt(getHiFieldOffset(format, HiField.W_LENGTH_INTS)) & 0xFFFF_FFFFL;
                numSv = mem.getInt(getHiFieldOffset(format, HiField.NUM_SV)) & 0xFFFF_FFFFL;
                svLengthInts = mem.getInt(getHiFieldOffset(format, HiField.SV_LENGTH_INTS)) & 0xFFFF_FFFFL;
                wStreamStart = getWStreamOffset(mem);
                svStreamStart = getSvStreamOffset(mem);
                kxp = mem.getDouble(getHiFieldOffset(format, HiField.KXP));
                hipAccum = mem.getDouble(getHiFieldOffset(format, HiField.HIP_ACCUM));
                reqBytes = svStreamStart + (svLengthInts << 2);
                flavor = CpcUtil.determineFlavor(lgK, numCoupons);
                sb.append("Flavor                          : ").append(flavor).append(LS);
                sb.append("Num Coupons                     : ").append(numCoupons).append(LS);
                sb.append("Num SV                          : ").append(numSv).append(LS);
                sb.append("SV Length Ints                  : ").append(svLengthInts).append(LS);
                sb.append("SV Stream Start                 : ").append(svStreamStart).append(LS);
                sb.append("Window Offset                   : ").append(winOffset).append(LS);
                sb.append("Window Length Ints              : ").append(wLengthInts).append(LS);
                sb.append("Window Stream Start             : ").append(wStreamStart).append(LS);
                sb.append("KxP                             : ").append(kxp).append(LS);
                sb.append("HipAccum                        : ").append(hipAccum).append(LS);
                break;
            }
    }
    sb.append("Actual Bytes                    : ").append(capBytes).append(LS);
    sb.append("Required Bytes                  : ").append(reqBytes).append(LS);
    if (detail) {
        sb.append(LS).append("### CPC SKETCH IMAGE - DATA").append(LS);
        if (wLengthInts > 0) {
            sb.append(LS).append("Window Stream:").append(LS);
            listData(mem, wStreamStart, wLengthInts, sb);
        }
        if (svLengthInts > 0) {
            sb.append(LS).append("SV Stream:").append(LS);
            listData(mem, svStreamStart, svLengthInts, sb);
        }
    }
    sb.append("### END CPC SKETCH IMAGE").append(LS);
    return sb.toString();
}
Also used : Family(org.apache.datasketches.Family)

Example 12 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class PreambleUtil method checkLoPreamble.

// basic checks of SerVer, Format, preInts, Family, fiCol, lgK.
static void checkLoPreamble(final Memory mem) {
    rtAssertEquals(getSerVer(mem), SER_VER & 0XFF);
    final Format fmt = getFormat(mem);
    final int preIntsDef = getDefinedPreInts(fmt) & 0XFF;
    rtAssertEquals(getPreInts(mem), preIntsDef);
    final Family fam = getFamily(mem);
    rtAssert(fam == Family.CPC);
    final int lgK = getLgK(mem);
    rtAssert((lgK >= 4) && (lgK <= 26));
    final int fiCol = getFiCol(mem);
    rtAssert((fiCol <= 63) && (fiCol >= 0));
}
Also used : Family(org.apache.datasketches.Family)

Example 13 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class SetOperation method heapify.

/**
 * Heapify takes the SetOperation image in Memory and instantiates an on-heap
 * SetOperation using the given expectedSeed.
 * The resulting SetOperation will not retain any link to the source Memory.
 *
 * <p>Note: Only certain set operators during stateful operations can be serialized and thus
 * heapified.</p>
 *
 * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash.
 * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
 * @param expectedSeed the seed used to validate the given Memory image.
 * <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
 * @return a Heap-based SetOperation from the given Memory
 */
public static SetOperation heapify(final Memory srcMem, final long expectedSeed) {
    final byte famID = srcMem.getByte(FAMILY_BYTE);
    final Family family = idToFamily(famID);
    switch(family) {
        case UNION:
            {
                return UnionImpl.heapifyInstance(srcMem, expectedSeed);
            }
        case INTERSECTION:
            {
                return IntersectionImpl.heapifyInstance(srcMem, expectedSeed);
            }
        default:
            {
                throw new SketchesArgumentException("SetOperation cannot heapify family: " + family.toString());
            }
    }
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) Family.idToFamily(org.apache.datasketches.Family.idToFamily) Family(org.apache.datasketches.Family)

Example 14 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class UpdateSketch method checkUnionQuickSelectFamily.

static void checkUnionQuickSelectFamily(final Memory mem, final int preambleLongs, final int lgNomLongs) {
    // Check Family
    // byte 2
    final int familyID = extractFamilyID(mem);
    final Family family = Family.idToFamily(familyID);
    if (family.equals(Family.UNION)) {
        if (preambleLongs != Family.UNION.getMinPreLongs()) {
            throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs);
        }
    } else if (family.equals(Family.QUICKSELECT)) {
        if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) {
            throw new SketchesArgumentException("Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs);
        }
    } else {
        throw new SketchesArgumentException("Possible corruption: Invalid Family: " + family.toString());
    }
    // Check lgNomLongs
    if (lgNomLongs < MIN_LG_NOM_LONGS) {
        throw new SketchesArgumentException("Possible corruption: Current Memory lgNomLongs < min required size: " + lgNomLongs + " < " + MIN_LG_NOM_LONGS);
    }
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) Family(org.apache.datasketches.Family)

Example 15 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class UpdateSketch method wrap.

/**
 * Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto
 * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
 * been explicitly stored as direct objects can be wrapped.
 * An attempt to "wrap" earlier version sketches will result in a "heapified", normal
 * Java Heap version of the sketch where all data will be copied to the heap.
 * @param srcMem an image of a Sketch where the image seed hash matches the given seed hash.
 * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
 * @param expectedSeed the seed used to validate the given Memory image.
 * <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
 * Compact sketches store a 16-bit hash of the seed, but not the seed itself.
 * @return a UpdateSketch backed by the given Memory
 */
public static UpdateSketch wrap(final WritableMemory srcMem, final long expectedSeed) {
    final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
    final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
    final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
    final Family family = Family.idToFamily(familyID);
    if (family != Family.QUICKSELECT) {
        throw new SketchesArgumentException("A " + family + " sketch cannot be wrapped as an UpdateSketch.");
    }
    if ((serVer == 3) && (preLongs == 3)) {
        return DirectQuickSelectSketch.writableWrap(srcMem, expectedSeed);
    } else {
        throw new SketchesArgumentException("Corrupted: An UpdateSketch image: must have SerVer = 3 and preLongs = 3");
    }
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) Family(org.apache.datasketches.Family)

Aggregations

Family (org.apache.datasketches.Family)25 SketchesArgumentException (org.apache.datasketches.SketchesArgumentException)14 Family.idToFamily (org.apache.datasketches.Family.idToFamily)12 ResizeFactor (org.apache.datasketches.ResizeFactor)4 WritableMemory (org.apache.datasketches.memory.WritableMemory)2 Test (org.testng.annotations.Test)2 DefaultMemoryRequestServer (org.apache.datasketches.memory.DefaultMemoryRequestServer)1 Memory (org.apache.datasketches.memory.Memory)1 MemoryRequestServer (org.apache.datasketches.memory.MemoryRequestServer)1 PreambleUtil.extractLgResizeFactor (org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor)1 PreambleUtil.insertLgResizeFactor (org.apache.datasketches.theta.PreambleUtil.insertLgResizeFactor)1