Search in sources :

Example 16 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class PreambleUtil method preambleToString.

/**
 * Returns a human readable string summary of the preamble state of the given Memory.
 * Note: other than making sure that the given Memory size is large
 * enough for just the preamble, this does not do much value checking of the contents of the
 * preamble as this is primarily a tool for debugging the preamble visually.
 *
 * @param srcMem the given Memory.
 * @return the summary preamble string.
 */
public static String preambleToString(final Memory srcMem) {
    // make sure we can get the assumed preamble
    final long pre0 = checkPreambleSize(srcMem);
    // byte 0
    final int preLongs = extractPreLongs(pre0);
    // byte 1
    final int serVer = extractSerVer(pre0);
    // byte 2
    final Family family = Family.idToFamily(extractFamilyID(pre0));
    // byte 3
    final int lgMaxMapSize = extractLgMaxMapSize(pre0);
    // byte 4
    final int lgCurMapSize = extractLgCurMapSize(pre0);
    // byte 5
    final int flags = extractFlags(pre0);
    // byte 6
    final int type = extractSerDeId(pre0);
    final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
    final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
    final int maxMapSize = 1 << lgMaxMapSize;
    final int curMapSize = 1 << lgCurMapSize;
    final int maxPreLongs = Family.FREQUENCY.getMaxPreLongs();
    // Assumed if preLongs == 1
    int activeItems = 0;
    long streamLength = 0;
    long offset = 0;
    if (preLongs == maxPreLongs) {
        // get full preamble
        final long[] preArr = new long[preLongs];
        srcMem.getLongArray(0, preArr, 0, preLongs);
        activeItems = extractActiveItems(preArr[1]);
        streamLength = preArr[2];
        offset = preArr[3];
    }
    final StringBuilder sb = new StringBuilder();
    sb.append(LS).append("### FREQUENCY SKETCH PREAMBLE SUMMARY:").append(LS).append("Byte  0: Preamble Longs       : ").append(preLongs).append(LS).append("Byte  1: Serialization Version: ").append(serVer).append(LS).append("Byte  2: Family               : ").append(family.toString()).append(LS).append("Byte  3: MaxMapSize           : ").append(maxMapSize).append(LS).append("Byte  4: CurMapSize           : ").append(curMapSize).append(LS).append("Byte  5: Flags Field          : ").append(flagsStr).append(LS).append("  EMPTY                       : ").append(empty).append(LS).append("Byte  6: Freq Sketch Type     : ").append(type).append(LS);
    if (preLongs == 1) {
        sb.append(" --ABSENT, ASSUMED:").append(LS);
    } else {
        // preLongs == maxPreLongs
        sb.append("Bytes 8-11 : ActiveItems      : ").append(activeItems).append(LS);
        sb.append("Bytes 16-23: StreamLength     : ").append(streamLength).append(LS).append("Bytes 24-31: Offset           : ").append(offset).append(LS);
    }
    sb.append("Preamble Bytes                : ").append(preLongs * 8).append(LS);
    sb.append("TOTAL Sketch Bytes            : ").append((preLongs + (activeItems * 2)) << 3).append(LS).append("### END FREQUENCY SKETCH PREAMBLE SUMMARY").append(LS);
    return sb.toString();
}
Also used : Family(org.apache.datasketches.Family)

Example 17 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class UpdateSketchTest method checkBuilder.

@Test
public void checkBuilder() {
    UpdateSketchBuilder bldr = UpdateSketch.builder();
    long seed = 12345L;
    bldr.setSeed(seed);
    assertEquals(bldr.getSeed(), seed);
    float p = (float) 0.5;
    bldr.setP(p);
    assertEquals(bldr.getP(), p);
    ResizeFactor rf = ResizeFactor.X4;
    bldr.setResizeFactor(rf);
    assertEquals(bldr.getResizeFactor(), rf);
    Family fam = Family.ALPHA;
    bldr.setFamily(fam);
    assertEquals(bldr.getFamily(), fam);
    int lgK = 10;
    int k = 1 << lgK;
    bldr.setNominalEntries(k);
    assertEquals(bldr.getLgNominalEntries(), lgK);
    MemoryRequestServer mrs = new DefaultMemoryRequestServer();
    bldr.setMemoryRequestServer(mrs);
    assertEquals(bldr.getMemoryRequestServer(), mrs);
    println(bldr.toString());
}
Also used : Family(org.apache.datasketches.Family) DefaultMemoryRequestServer(org.apache.datasketches.memory.DefaultMemoryRequestServer) ResizeFactor(org.apache.datasketches.ResizeFactor) PreambleUtil.insertLgResizeFactor(org.apache.datasketches.theta.PreambleUtil.insertLgResizeFactor) DefaultMemoryRequestServer(org.apache.datasketches.memory.DefaultMemoryRequestServer) MemoryRequestServer(org.apache.datasketches.memory.MemoryRequestServer) Test(org.testng.annotations.Test)

Example 18 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class SketchTest method checkBuilder.

@Test
public void checkBuilder() {
    int k = 2048;
    int lgK = Integer.numberOfTrailingZeros(k);
    long seed = 1021;
    float p = (float) 0.5;
    ResizeFactor rf = X4;
    Family fam = Family.ALPHA;
    UpdateSketch sk1 = UpdateSketch.builder().setSeed(seed).setP(p).setResizeFactor(rf).setFamily(fam).setNominalEntries(k).build();
    String nameS1 = sk1.getClass().getSimpleName();
    assertEquals(nameS1, "HeapAlphaSketch");
    assertEquals(sk1.getLgNomLongs(), lgK);
    assertEquals(sk1.getSeed(), seed);
    assertEquals(sk1.getP(), p);
    // check reset of defaults
    sk1 = UpdateSketch.builder().build();
    nameS1 = sk1.getClass().getSimpleName();
    assertEquals(nameS1, "HeapQuickSelectSketch");
    assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(DEFAULT_NOMINAL_ENTRIES));
    assertEquals(sk1.getSeed(), DEFAULT_UPDATE_SEED);
    assertEquals(sk1.getP(), (float) 1.0);
    assertEquals(sk1.getResizeFactor(), ResizeFactor.X8);
}
Also used : Family(org.apache.datasketches.Family) ResizeFactor(org.apache.datasketches.ResizeFactor) Test(org.testng.annotations.Test)

Example 19 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class CompactOperations method memoryToCompact.

/**
 * Heapify or convert a source Theta Sketch Memory image into a heap or target Memory CompactSketch.
 * This assumes hashSeed is OK; serVer = 3.
 * @param srcMem the given input source Memory image
 * @param dstOrdered the desired ordering of the resulting CompactSketch
 * @param dstMem Used for the target CompactSketch if it is Direct.
 * @return a CompactSketch of the correct form.
 */
@SuppressWarnings("unused")
static CompactSketch memoryToCompact(final Memory srcMem, final boolean dstOrdered, final WritableMemory dstMem) {
    // extract Pre0 fields and Flags from srcMem
    final int srcPreLongs = extractPreLongs(srcMem);
    // not used
    final int srcSerVer = extractSerVer(srcMem);
    final int srcFamId = extractFamilyID(srcMem);
    final Family srcFamily = Family.idToFamily(srcFamId);
    final int srcLgArrLongs = extractLgArrLongs(srcMem);
    final int srcFlags = extractFlags(srcMem);
    final short srcSeedHash = (short) extractSeedHash(srcMem);
    // srcFlags
    final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0;
    final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0;
    final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0;
    final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0;
    final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0;
    final boolean single = srcSingleFlag || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags);
    // extract pre1 and pre2 fields
    final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcMem) : 0;
    final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcMem) : Long.MAX_VALUE;
    // do some basic checks ...
    if (srcEmptyFlag) {
        assert (curCount == 0) && (thetaLong == Long.MAX_VALUE);
    }
    if (single) {
        assert (curCount == 1) && (thetaLong == Long.MAX_VALUE);
    }
    checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag);
    // dispatch empty and single cases
    // Note: for empty and single we always output the ordered form.
    final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered;
    if (srcEmptyFlag) {
        if (dstMem != null) {
            dstMem.putByteArray(0, EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, 8);
            return new DirectCompactSketch(dstMem);
        } else {
            return EmptyCompactSketch.getInstance();
        }
    }
    if (single) {
        final long hash = srcMem.getLong(srcPreLongs << 3);
        final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash);
        if (dstMem != null) {
            dstMem.putByteArray(0, sis.toByteArray(), 0, 16);
            return new DirectCompactSketch(dstMem);
        } else {
            // heap
            return sis;
        }
    }
    // extract hashArr > 1
    final long[] hashArr;
    if (srcCompactFlag) {
        hashArr = new long[curCount];
        srcMem.getLongArray(srcPreLongs << 3, hashArr, 0, curCount);
    } else {
        // update sketch, thus hashTable form
        final int srcCacheLen = 1 << srcLgArrLongs;
        final long[] tempHashArr = new long[srcCacheLen];
        srcMem.getLongArray(srcPreLongs << 3, tempHashArr, 0, srcCacheLen);
        hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut);
    }
    final int flagsOut = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0);
    // load the destination.
    if (dstMem != null) {
        final Memory tgtMem = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstMem, (byte) flagsOut, srcPreLongs);
        return new DirectCompactSketch(tgtMem);
    } else {
        // heap
        return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong, dstOrderedOut);
    }
}
Also used : Memory(org.apache.datasketches.memory.Memory) WritableMemory(org.apache.datasketches.memory.WritableMemory) Family(org.apache.datasketches.Family)

Example 20 with Family

use of org.apache.datasketches.Family in project sketches-core by DataSketches.

the class CompactSketch method wrap.

/**
 * Wrap takes the CompactSketch image in given Memory and refers to it directly.
 * There is no data copying onto the java heap.
 * The wrap operation enables fast read-only merging and access to all the public read-only API.
 *
 * <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
 * been explicitly stored as direct sketches can be wrapped.
 * Wrapping earlier serial version sketches will result in a heapify operation.
 * These early versions were never designed to "wrap".</p>
 *
 * <p>Wrapping any subclass of this class that is empty or contains only a single item will
 * result in heapified forms of empty and single item sketch respectively.
 * This is actually faster and consumes less overall memory.</p>
 *
 * <p>This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
 * However, Serial Version 1 sketch images do not have a seedHash field,
 * so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.</p>
 *
 * @param srcMem an image of a Sketch.
 * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>.
 * @return a CompactSketch backed by the given Memory except as above.
 */
public static CompactSketch wrap(final Memory srcMem) {
    final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF;
    final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF;
    final Family family = Family.idToFamily(familyID);
    if (family != Family.COMPACT) {
        throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!");
    }
    if (serVer == 3) {
        if (PreambleUtil.isEmptyFlag(srcMem)) {
            return EmptyCompactSketch.getHeapInstance(srcMem);
        }
        final short memSeedHash = (short) extractSeedHash(srcMem);
        if (otherCheckForSingleItem(srcMem)) {
            // SINGLEITEM?
            return SingleItemSketch.heapify(srcMem, memSeedHash);
        }
        // not empty & not singleItem
        final int flags = srcMem.getByte(FLAGS_BYTE);
        final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0;
        if (!compactFlag) {
            throw new SketchesArgumentException("Corrupted: COMPACT family sketch image must have compact flag set");
        }
        final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
        if (!readOnly) {
            throw new SketchesArgumentException("Corrupted: COMPACT family sketch image must have Read-Only flag set");
        }
        return DirectCompactSketch.wrapInstance(srcMem, memSeedHash);
    } else // end of serVer 3
    if (serVer == 1) {
        return ForwardCompatibility.heapify1to3(srcMem, defaultSeedHash);
    } else if (serVer == 2) {
        final short memSeedHash = (short) extractSeedHash(srcMem);
        return ForwardCompatibility.heapify2to3(srcMem, memSeedHash);
    }
    throw new SketchesArgumentException("Corrupted: Serialization Version " + serVer + " not recognized.");
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) Family.idToFamily(org.apache.datasketches.Family.idToFamily) Family(org.apache.datasketches.Family)

Aggregations

Family (org.apache.datasketches.Family)25 SketchesArgumentException (org.apache.datasketches.SketchesArgumentException)14 Family.idToFamily (org.apache.datasketches.Family.idToFamily)12 ResizeFactor (org.apache.datasketches.ResizeFactor)4 WritableMemory (org.apache.datasketches.memory.WritableMemory)2 Test (org.testng.annotations.Test)2 DefaultMemoryRequestServer (org.apache.datasketches.memory.DefaultMemoryRequestServer)1 Memory (org.apache.datasketches.memory.Memory)1 MemoryRequestServer (org.apache.datasketches.memory.MemoryRequestServer)1 PreambleUtil.extractLgResizeFactor (org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor)1 PreambleUtil.insertLgResizeFactor (org.apache.datasketches.theta.PreambleUtil.insertLgResizeFactor)1