Search in sources :

Example 1 with ResizeFactor

use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.

the class PreambleUtil method sketchPreambleToString.

private static String sketchPreambleToString(final Memory mem, final Family family, final int preLongs) {
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(mem));
    final int serVer = extractSerVer(mem);
    // Flags
    final int flags = extractFlags(mem);
    final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
    // final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
    // final String nativeOrder = ByteOrder.nativeOrder().toString();
    // final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
    final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0;
    final boolean isGadget = (flags & GADGET_FLAG_MASK) > 0;
    final int k;
    if (serVer == 1) {
        final short encK = extractEncodedReservoirSize(mem);
        k = ReservoirSize.decodeValue(encK);
    } else {
        k = extractK(mem);
    }
    long n = 0;
    if (!isEmpty) {
        n = extractN(mem);
    }
    final long dataBytes = mem.getCapacity() - (preLongs << 3);
    final StringBuilder sb = new StringBuilder();
    sb.append(LS).append("### END ").append(family.getFamilyName().toUpperCase()).append(" PREAMBLE SUMMARY").append(LS).append("Byte  0: Preamble Longs       : ").append(preLongs).append(LS).append("Byte  0: ResizeFactor         : ").append(rf.toString()).append(LS).append("Byte  1: Serialization Version: ").append(serVer).append(LS).append("Byte  2: Family               : ").append(family.toString()).append(LS).append("Byte  3: Flags Field          : ").append(flagsStr).append(LS).append("  EMPTY                       : ").append(isEmpty).append(LS);
    if (family == Family.VAROPT) {
        sb.append("  GADGET                      : ").append(isGadget).append(LS);
    }
    sb.append("Bytes  4-7: Sketch Size (k)   : ").append(k).append(LS);
    if (!isEmpty) {
        sb.append("Bytes 8-15: Items Seen (n)    : ").append(n).append(LS);
    }
    if ((family == Family.VAROPT) && !isEmpty) {
        final int hCount = extractHRegionItemCount(mem);
        final int rCount = extractRRegionItemCount(mem);
        final double totalRWeight = extractTotalRWeight(mem);
        sb.append("Bytes 16-19: H region count   : ").append(hCount).append(LS).append("Bytes 20-23: R region count   : ").append(rCount).append(LS);
        if (rCount > 0) {
            sb.append("Bytes 24-31: R region weight  : ").append(totalRWeight).append(LS);
        }
    }
    sb.append("TOTAL Sketch Bytes            : ").append(mem.getCapacity()).append(LS).append("  Preamble Bytes              : ").append(preLongs << 3).append(LS).append("  Data Bytes                  : ").append(dataBytes).append(LS).append("### END ").append(family.getFamilyName().toUpperCase()).append(" PREAMBLE SUMMARY").append(LS);
    return sb.toString();
}
Also used : ResizeFactor(org.apache.datasketches.ResizeFactor)

Example 2 with ResizeFactor

use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.

the class PreambleUtil method unionPreambleToString.

private static String unionPreambleToString(final Memory mem, final Family family, final int preLongs) {
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(mem));
    final int serVer = extractSerVer(mem);
    // Flags
    final int flags = extractFlags(mem);
    final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
    // final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
    // final String nativeOrder = ByteOrder.nativeOrder().toString();
    // final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
    final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0;
    final int k;
    if (serVer == 1) {
        final short encK = extractEncodedReservoirSize(mem);
        k = ReservoirSize.decodeValue(encK);
    } else {
        k = extractK(mem);
    }
    final long dataBytes = mem.getCapacity() - (preLongs << 3);
    return LS + "### END " + family.getFamilyName().toUpperCase() + " PREAMBLE SUMMARY" + LS + "Byte  0: Preamble Longs           : " + preLongs + LS + "Byte  0: ResizeFactor             : " + rf.toString() + LS + "Byte  1: Serialization Version    : " + serVer + LS + "Byte  2: Family                   : " + family.toString() + LS + "Byte  3: Flags Field              : " + flagsStr + LS + // + "  READ_ONLY                       : " + readOnly + LS
    "  EMPTY                           : " + isEmpty + LS + "Bytes  4-7: Max Sketch Size (maxK): " + k + LS + "TOTAL Sketch Bytes                : " + mem.getCapacity() + LS + "  Preamble Bytes                  : " + (preLongs << 3) + LS + "  Sketch Bytes                    : " + dataBytes + LS + "### END " + family.getFamilyName().toUpperCase() + " PREAMBLE SUMMARY" + LS;
}
Also used : ResizeFactor(org.apache.datasketches.ResizeFactor)

Example 3 with ResizeFactor

use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.

the class ReservoirItemsSketch method heapify.

/**
 * Returns a sketch instance of this class from the given srcMem,
 * which must be a Memory representation of this sketch class.
 *
 * @param <T>    The type of item this sketch contains
 * @param srcMem a Memory representation of a sketch of this class.
 *               <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
 * @param serDe  An instance of ArrayOfItemsSerDe
 * @return a sketch instance of this class
 */
public static <T> ReservoirItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
    Family.RESERVOIR.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
    final int numPreLongs = extractPreLongs(srcMem);
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
    final int serVer = extractSerVer(srcMem);
    final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
    final long itemsSeen = (isEmpty ? 0 : extractN(srcMem));
    int k = extractK(srcMem);
    // Check values
    final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR.getMinPreLongs());
    final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR.getMaxPreLongs());
    if (!preLongsEqMin & !preLongsEqMax) {
        throw new SketchesArgumentException("Possible corruption: Non-empty sketch with only " + Family.RESERVOIR.getMinPreLongs() + " preLong(s)");
    }
    if (serVer != SER_VER) {
        if (serVer == 1) {
            final short encK = extractEncodedReservoirSize(srcMem);
            k = ReservoirSize.decodeValue(encK);
        } else {
            throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
        }
    }
    if (isEmpty) {
        return new ReservoirItemsSketch<>(k, rf);
    }
    final int preLongBytes = numPreLongs << 3;
    // default to full reservoir
    int allocatedItems = k;
    if (itemsSeen < k) {
        // under-full so determine size to allocate, using ceilingLog2(totalSeen) as minimum
        // casts to int are safe since under-full
        final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
        final int minLgSize = Util.toLog2(Util.ceilingPowerOf2((int) itemsSeen), "heapify");
        final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
        allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
    }
    final int itemsToRead = (int) Math.min(k, itemsSeen);
    final T[] data = serDe.deserializeFromMemory(srcMem.region(preLongBytes, srcMem.getCapacity() - preLongBytes), itemsToRead);
    final ArrayList<T> dataList = new ArrayList<>(Arrays.asList(data));
    final ReservoirItemsSketch<T> ris = new ReservoirItemsSketch<>(dataList, itemsSeen, rf, k);
    ris.data_.ensureCapacity(allocatedItems);
    ris.currItemsAlloc_ = allocatedItems;
    return ris;
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) ArrayList(java.util.ArrayList) PreambleUtil.extractResizeFactor(org.apache.datasketches.sampling.PreambleUtil.extractResizeFactor) ResizeFactor(org.apache.datasketches.ResizeFactor)

Example 4 with ResizeFactor

use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.

the class ReservoirLongsSketch method heapify.

/**
 * Returns a sketch instance of this class from the given srcMem, which must be a Memory
 * representation of this sketch class.
 *
 * @param srcMem a Memory representation of a sketch of this class. <a href=
 *        "{@docRoot}/resources/dictionary.html#mem">See Memory</a>
 * @return a sketch instance of this class
 */
public static ReservoirLongsSketch heapify(final Memory srcMem) {
    Family.RESERVOIR.checkFamilyID(srcMem.getByte(FAMILY_BYTE));
    final int numPreLongs = extractPreLongs(srcMem);
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
    final int serVer = extractSerVer(srcMem);
    final boolean isEmpty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) != 0;
    final long itemsSeen = (isEmpty ? 0 : extractN(srcMem));
    int k = extractK(srcMem);
    // Check values
    final boolean preLongsEqMin = (numPreLongs == Family.RESERVOIR.getMinPreLongs());
    final boolean preLongsEqMax = (numPreLongs == Family.RESERVOIR.getMaxPreLongs());
    if (!preLongsEqMin & !preLongsEqMax) {
        throw new SketchesArgumentException("Possible corruption: Non-empty sketch with only " + Family.RESERVOIR.getMinPreLongs() + "preLongs");
    }
    if (serVer != SER_VER) {
        if (serVer == 1) {
            final short encK = extractEncodedReservoirSize(srcMem);
            k = ReservoirSize.decodeValue(encK);
        } else {
            throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
        }
    }
    if (isEmpty) {
        return new ReservoirLongsSketch(k, rf);
    }
    final int preLongBytes = numPreLongs << 3;
    final int numSketchLongs = (int) Math.min(itemsSeen, k);
    // default to full reservoir
    int allocatedSize = k;
    if (itemsSeen < k) {
        // under-full so determine size to allocate, using ceilingLog2(totalSeen) as minimum
        // casts to int are safe since under-full
        final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
        final int minLgSize = Util.toLog2(Util.ceilingPowerOf2((int) itemsSeen), "heapify");
        final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_LONGS));
        allocatedSize = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
    }
    final long[] data = new long[allocatedSize];
    srcMem.getLongArray(preLongBytes, data, 0, numSketchLongs);
    return new ReservoirLongsSketch(data, itemsSeen, rf, k);
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) PreambleUtil.extractResizeFactor(org.apache.datasketches.sampling.PreambleUtil.extractResizeFactor) ResizeFactor(org.apache.datasketches.ResizeFactor)

Example 5 with ResizeFactor

use of org.apache.datasketches.ResizeFactor in project sketches-core by DataSketches.

the class VarOptItemsSketch method heapify.

/**
 * Returns a sketch instance of this class from the given srcMem,
 * which must be a Memory representation of this sketch class.
 *
 * @param <T>    The type of item this sketch contains
 * @param srcMem a Memory representation of a sketch of this class.
 *               <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
 * @param serDe  An instance of ArrayOfItemsSerDe
 * @return a sketch instance of this class
 */
@SuppressWarnings("null")
public static <T> VarOptItemsSketch<T> heapify(final Memory srcMem, final ArrayOfItemsSerDe<T> serDe) {
    final int numPreLongs = getAndCheckPreLongs(srcMem);
    final ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(srcMem));
    final int serVer = extractSerVer(srcMem);
    final int familyId = extractFamilyID(srcMem);
    final int flags = extractFlags(srcMem);
    final boolean isEmpty = (flags & EMPTY_FLAG_MASK) != 0;
    final boolean isGadget = (flags & GADGET_FLAG_MASK) != 0;
    // Check values
    if (isEmpty) {
        if (numPreLongs != VO_PRELONGS_EMPTY) {
            throw new SketchesArgumentException("Possible corruption: Must be " + VO_PRELONGS_EMPTY + " for an empty sketch. Found: " + numPreLongs);
        }
    } else {
        if ((numPreLongs != VO_PRELONGS_WARMUP) && (numPreLongs != VO_PRELONGS_FULL)) {
            throw new SketchesArgumentException("Possible corruption: Must be " + VO_PRELONGS_WARMUP + " or " + VO_PRELONGS_FULL + " for a non-empty sketch. Found: " + numPreLongs);
        }
    }
    if (serVer != SER_VER) {
        throw new SketchesArgumentException("Possible Corruption: Ser Ver must be " + SER_VER + ": " + serVer);
    }
    final int reqFamilyId = Family.VAROPT.getID();
    if (familyId != reqFamilyId) {
        throw new SketchesArgumentException("Possible Corruption: FamilyID must be " + reqFamilyId + ": " + familyId);
    }
    final int k = extractK(srcMem);
    if (k < 1) {
        throw new SketchesArgumentException("Possible Corruption: k must be at least 1: " + k);
    }
    if (isEmpty) {
        assert numPreLongs == Family.VAROPT.getMinPreLongs();
        return new VarOptItemsSketch<>(k, rf);
    }
    final long n = extractN(srcMem);
    if (n < 0) {
        throw new SketchesArgumentException("Possible Corruption: n cannot be negative: " + n);
    }
    // get rest of preamble
    final int hCount = extractHRegionItemCount(srcMem);
    final int rCount = extractRRegionItemCount(srcMem);
    if (hCount < 0) {
        throw new SketchesArgumentException("Possible Corruption: H region count cannot be " + "negative: " + hCount);
    }
    if (rCount < 0) {
        throw new SketchesArgumentException("Possible Corruption: R region count cannot be " + "negative: " + rCount);
    }
    double totalRWeight = 0.0;
    if (numPreLongs == Family.VAROPT.getMaxPreLongs()) {
        if (rCount > 0) {
            totalRWeight = extractTotalRWeight(srcMem);
        } else {
            throw new SketchesArgumentException("Possible Corruption: " + Family.VAROPT.getMaxPreLongs() + " preLongs but no items in R region");
        }
    }
    final int preLongBytes = numPreLongs << 3;
    final int totalItems = hCount + rCount;
    // default to full
    int allocatedItems = k + 1;
    if (rCount == 0) {
        // Not in sampling mode, so determine size to allocate, using ceilingLog2(hCount) as minimum
        final int ceilingLgK = Util.toLog2(Util.ceilingPowerOf2(k), "heapify");
        final int minLgSize = Util.toLog2(Util.ceilingPowerOf2(hCount), "heapify");
        final int initialLgSize = SamplingUtil.startingSubMultiple(ceilingLgK, rf.lg(), Math.max(minLgSize, MIN_LG_ARR_ITEMS));
        allocatedItems = SamplingUtil.getAdjustedSize(k, 1 << initialLgSize);
        if (allocatedItems == k) {
            ++allocatedItems;
        }
    }
    // allocate full-sized ArrayLists, but we store only hCount weights at any moment
    final long weightOffsetBytes = TOTAL_WEIGHT_R_DOUBLE + (rCount > 0 ? Double.BYTES : 0);
    final ArrayList<Double> weightList = new ArrayList<>(allocatedItems);
    final double[] wts = new double[allocatedItems];
    srcMem.getDoubleArray(weightOffsetBytes, wts, 0, hCount);
    // can't use Arrays.asList(wts) since double[] rather than Double[]
    for (int i = 0; i < hCount; ++i) {
        if (wts[i] <= 0.0) {
            throw new SketchesArgumentException("Possible Corruption: " + "Non-positive weight in heapify(): " + wts[i]);
        }
        weightList.add(wts[i]);
    }
    // marks, if we have a gadget
    long markBytes = 0;
    int markCount = 0;
    ArrayList<Boolean> markList = null;
    if (isGadget) {
        final long markOffsetBytes = preLongBytes + ((long) hCount * Double.BYTES);
        markBytes = ArrayOfBooleansSerDe.computeBytesNeeded(hCount);
        markList = new ArrayList<>(allocatedItems);
        final ArrayOfBooleansSerDe booleansSerDe = new ArrayOfBooleansSerDe();
        final Boolean[] markArray = booleansSerDe.deserializeFromMemory(srcMem.region(markOffsetBytes, (hCount >>> 3) + 1), hCount);
        for (Boolean mark : markArray) {
            if (mark) {
                ++markCount;
            }
        }
        markList.addAll(Arrays.asList(markArray));
    }
    final long offsetBytes = preLongBytes + ((long) hCount * Double.BYTES) + markBytes;
    final T[] data = serDe.deserializeFromMemory(srcMem.region(offsetBytes, srcMem.getCapacity() - offsetBytes), totalItems);
    final List<T> wrappedData = Arrays.asList(data);
    final ArrayList<T> dataList = new ArrayList<>(allocatedItems);
    dataList.addAll(wrappedData.subList(0, hCount));
    // Load items in R as needed
    if (rCount > 0) {
        // the gap
        weightList.add(-1.0);
        // the gap
        if (isGadget) {
            markList.add(false);
        }
        for (int i = 0; i < rCount; ++i) {
            weightList.add(-1.0);
            if (isGadget) {
                markList.add(false);
            }
        }
        // the gap
        dataList.add(null);
        dataList.addAll(wrappedData.subList(hCount, totalItems));
    }
    final VarOptItemsSketch<T> sketch = new VarOptItemsSketch<>(dataList, weightList, k, n, allocatedItems, rf, hCount, rCount, totalRWeight);
    if (isGadget) {
        sketch.marks_ = markList;
        sketch.numMarksInH_ = markCount;
    }
    return sketch;
}
Also used : ArrayOfBooleansSerDe(org.apache.datasketches.ArrayOfBooleansSerDe) ArrayList(java.util.ArrayList) PreambleUtil.extractResizeFactor(org.apache.datasketches.sampling.PreambleUtil.extractResizeFactor) ResizeFactor(org.apache.datasketches.ResizeFactor) SketchesArgumentException(org.apache.datasketches.SketchesArgumentException)

Aggregations

ResizeFactor (org.apache.datasketches.ResizeFactor)24 Test (org.testng.annotations.Test)13 PreambleUtil.insertLgResizeFactor (org.apache.datasketches.theta.PreambleUtil.insertLgResizeFactor)6 SketchesArgumentException (org.apache.datasketches.SketchesArgumentException)5 PreambleUtil.extractLgResizeFactor (org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor)5 Family (org.apache.datasketches.Family)4 ArrayList (java.util.ArrayList)3 WritableMemory (org.apache.datasketches.memory.WritableMemory)3 PreambleUtil.extractResizeFactor (org.apache.datasketches.sampling.PreambleUtil.extractResizeFactor)3 SketchesException (org.apache.datasketches.SketchesException)2 DefaultMemoryRequestServer (org.apache.datasketches.memory.DefaultMemoryRequestServer)2 MemoryRequestServer (org.apache.datasketches.memory.MemoryRequestServer)2 ArrayOfBooleansSerDe (org.apache.datasketches.ArrayOfBooleansSerDe)1