use of org.apache.datasketches.memory.WritableMemory in project sketches-core by DataSketches.
the class VarOptItemsSketch method toByteArray.
/**
* Returns a byte array representation of this sketch. Copies contents into an array of the
* specified class for serialization to allow for polymorphic types.
*
* @param serDe An instance of ArrayOfItemsSerDe
* @param clazz The class represented by <T>
* @return a byte array representation of this sketch
*/
// bytes will be null only if empty == true
@SuppressWarnings("null")
public byte[] toByteArray(final ArrayOfItemsSerDe<? super T> serDe, final Class<?> clazz) {
final int preLongs, numMarkBytes, outBytes;
final boolean empty = (r_ == 0) && (h_ == 0);
// for serialized items from serDe
byte[] itemBytes = null;
int flags = marks_ == null ? 0 : GADGET_FLAG_MASK;
if (empty) {
preLongs = Family.VAROPT.getMinPreLongs();
// only contains the minimum header info
outBytes = Family.VAROPT.getMinPreLongs() << 3;
flags |= EMPTY_FLAG_MASK;
} else {
preLongs = (r_ == 0 ? PreambleUtil.VO_PRELONGS_WARMUP : Family.VAROPT.getMaxPreLongs());
itemBytes = serDe.serializeToByteArray(getDataSamples(clazz));
numMarkBytes = marks_ == null ? 0 : ArrayOfBooleansSerDe.computeBytesNeeded(h_);
outBytes = (preLongs << 3) + (h_ * Double.BYTES) + numMarkBytes + itemBytes.length;
}
final byte[] outArr = new byte[outBytes];
final WritableMemory mem = WritableMemory.writableWrap(outArr);
// build first preLong
// Byte 0
PreambleUtil.insertPreLongs(mem, preLongs);
PreambleUtil.insertLgResizeFactor(mem, rf_.lg());
// Byte 1
PreambleUtil.insertSerVer(mem, SER_VER);
// Byte 2
PreambleUtil.insertFamilyID(mem, Family.VAROPT.getID());
// Byte 3
PreambleUtil.insertFlags(mem, flags);
// Bytes 4-7
PreambleUtil.insertK(mem, k_);
if (!empty) {
// Bytes 8-15
PreambleUtil.insertN(mem, n_);
// Bytes 16-19
PreambleUtil.insertHRegionItemCount(mem, h_);
// Bytes 20-23
PreambleUtil.insertRRegionItemCount(mem, r_);
if (r_ > 0) {
// Bytes 24-31
PreambleUtil.insertTotalRWeight(mem, totalWtR_);
}
// write the first h_ weights
int offset = preLongs << 3;
for (int i = 0; i < h_; ++i) {
mem.putDouble(offset, weights_.get(i));
offset += Double.BYTES;
}
// write the first h_ marks, iff we have a gadget
if (marks_ != null) {
final byte[] markBytes;
markBytes = MARK_SERDE.serializeToByteArray(marks_.subList(0, h_).toArray(new Boolean[0]));
mem.putByteArray(offset, markBytes, 0, markBytes.length);
offset += markBytes.length;
}
// write the sample items, using offset from earlier
mem.putByteArray(offset, itemBytes, 0, itemBytes.length);
}
return outArr;
}
use of org.apache.datasketches.memory.WritableMemory in project sketches-core by DataSketches.
the class DirectQuickSelectSketchR method toByteArray.
@Override
public byte[] toByteArray() {
// MY_FAMILY is stored in wmem_
checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wmem_));
final int lengthBytes = getCurrentBytes();
final byte[] byteArray = new byte[lengthBytes];
final WritableMemory mem = WritableMemory.writableWrap(byteArray);
wmem_.copyTo(0, mem, 0, lengthBytes);
final long thetaLong = correctThetaOnCompact(isEmpty(), extractCurCount(wmem_), extractThetaLong(wmem_));
insertThetaLong(wmem_, thetaLong);
return byteArray;
}
use of org.apache.datasketches.memory.WritableMemory in project sketches-core by DataSketches.
the class DoublesMergeImpl method mergeInto.
/**
* Merges the source sketch into the target sketch that can have a smaller value of K.
* However, it is required that the ratio of the two K values be a power of 2.
* I.e., source.getK() = target.getK() * 2^(nonnegative integer).
* The source is not modified.
*
* <p>Note: It is easy to prove that the following simplified code which launches multiple waves of
* carry propagation does exactly the same amount of merging work (including the work of
* allocating fresh buffers) as the more complicated and seemingly more efficient approach that
* tracks a single carry propagation wave through both sketches.
*
* <p>This simplified code probably does do slightly more "outer loop" work, but I am pretty
* sure that even that is within a constant factor of the more complicated code, plus the
* total amount of "outer loop" work is at least a factor of K smaller than the total amount of
* merging work, which is identical in the two approaches.
*
* <p>Note: a two-way merge that doesn't modify either of its two inputs could be implemented
* by making a deep copy of the larger sketch and then merging the smaller one into it.
* However, it was decided not to do this.
*
* @param src The source sketch
* @param tgt The target sketch
*/
static void mergeInto(final DoublesSketch src, final UpdateDoublesSketch tgt) {
final int srcK = src.getK();
final int tgtK = tgt.getK();
final long srcN = src.getN();
final long tgtN = tgt.getN();
if (srcK != tgtK) {
downSamplingMergeInto(src, tgt);
return;
}
// The remainder of this code is for the case where the k's are equal
final DoublesSketchAccessor srcSketchBuf = DoublesSketchAccessor.wrap(src);
final long nFinal = tgtN + srcN;
for (int i = 0; i < srcSketchBuf.numItems(); i++) {
// update only the base buffer
tgt.update(srcSketchBuf.get(i));
}
final int spaceNeeded = DoublesUpdateImpl.getRequiredItemCapacity(tgtK, nFinal);
final int tgtCombBufItemCap = tgt.getCombinedBufferItemCapacity();
if (spaceNeeded > tgtCombBufItemCap) {
// copies base buffer plus current levels
tgt.growCombinedBuffer(tgtCombBufItemCap, spaceNeeded);
}
final DoublesArrayAccessor scratch2KAcc = DoublesArrayAccessor.initialize(2 * tgtK);
long srcBitPattern = src.getBitPattern();
assert srcBitPattern == (srcN / (2L * srcK));
final DoublesSketchAccessor tgtSketchBuf = DoublesSketchAccessor.wrap(tgt, true);
long newTgtBitPattern = tgt.getBitPattern();
for (int srcLvl = 0; srcBitPattern != 0L; srcLvl++, srcBitPattern >>>= 1) {
if ((srcBitPattern & 1L) > 0L) {
newTgtBitPattern = DoublesUpdateImpl.inPlacePropagateCarry(srcLvl, srcSketchBuf.setLevel(srcLvl), scratch2KAcc, false, tgtK, tgtSketchBuf, newTgtBitPattern);
}
}
if (tgt.isDirect() && (nFinal > 0)) {
final WritableMemory mem = tgt.getMemory();
mem.clearBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK);
}
tgt.putN(nFinal);
// no-op if direct
tgt.putBitPattern(newTgtBitPattern);
// internal consistency check
assert (tgt.getN() / (2L * tgtK)) == tgt.getBitPattern();
double srcMax = src.getMaxValue();
srcMax = Double.isNaN(srcMax) ? Double.NEGATIVE_INFINITY : srcMax;
double srcMin = src.getMinValue();
srcMin = Double.isNaN(srcMin) ? Double.POSITIVE_INFINITY : srcMin;
double tgtMax = tgt.getMaxValue();
tgtMax = Double.isNaN(tgtMax) ? Double.NEGATIVE_INFINITY : tgtMax;
double tgtMin = tgt.getMinValue();
tgtMin = Double.isNaN(tgtMin) ? Double.POSITIVE_INFINITY : tgtMin;
tgt.putMaxValue(Math.max(srcMax, tgtMax));
tgt.putMinValue(Math.min(srcMin, tgtMin));
}
use of org.apache.datasketches.memory.WritableMemory in project sketches-core by DataSketches.
the class DoublesMergeImpl method downSamplingMergeInto.
/**
* Merges the source sketch into the target sketch that can have a smaller value of K.
* However, it is required that the ratio of the two K values be a power of 2.
* I.e., source.getK() = target.getK() * 2^(nonnegative integer).
* The source is not modified.
*
* @param src The source sketch
* @param tgt The target sketch
*/
// also used by DoublesSketch, DoublesUnionImpl and HeapDoublesSketchTest
static void downSamplingMergeInto(final DoublesSketch src, final UpdateDoublesSketch tgt) {
final int srcK = src.getK();
final int tgtK = tgt.getK();
final long tgtN = tgt.getN();
if ((srcK % tgtK) != 0) {
throw new SketchesArgumentException("source.getK() must equal target.getK() * 2^(nonnegative integer).");
}
final int downFactor = srcK / tgtK;
checkIfPowerOf2(downFactor, "source.getK()/target.getK() ratio");
final int lgDownFactor = Integer.numberOfTrailingZeros(downFactor);
if (src.isEmpty()) {
return;
}
final DoublesSketchAccessor srcSketchBuf = DoublesSketchAccessor.wrap(src);
final long nFinal = tgtN + src.getN();
for (int i = 0; i < srcSketchBuf.numItems(); i++) {
// update only the base buffer
tgt.update(srcSketchBuf.get(i));
}
final int spaceNeeded = DoublesUpdateImpl.getRequiredItemCapacity(tgtK, nFinal);
final int curCombBufCap = tgt.getCombinedBufferItemCapacity();
if (spaceNeeded > curCombBufCap) {
// copies base buffer plus current levels
tgt.growCombinedBuffer(curCombBufCap, spaceNeeded);
}
// working scratch buffers
final DoublesArrayAccessor scratch2KAcc = DoublesArrayAccessor.initialize(2 * tgtK);
final DoublesArrayAccessor downScratchKAcc = DoublesArrayAccessor.initialize(tgtK);
final DoublesSketchAccessor tgtSketchBuf = DoublesSketchAccessor.wrap(tgt, true);
long srcBitPattern = src.getBitPattern();
long newTgtBitPattern = tgt.getBitPattern();
for (int srcLvl = 0; srcBitPattern != 0L; srcLvl++, srcBitPattern >>>= 1) {
if ((srcBitPattern & 1L) > 0L) {
justZipWithStride(srcSketchBuf.setLevel(srcLvl), downScratchKAcc, tgtK, downFactor);
newTgtBitPattern = DoublesUpdateImpl.inPlacePropagateCarry(// starting level
srcLvl + lgDownFactor, // optSrcKBuf,
downScratchKAcc, // size2KBuf,
scratch2KAcc, // do mergeInto version
false, tgtK, tgtSketchBuf, newTgtBitPattern);
// off-heap is a no-op
tgt.putBitPattern(newTgtBitPattern);
}
}
if (tgt.isDirect() && (nFinal > 0)) {
final WritableMemory mem = tgt.getMemory();
mem.clearBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK);
}
tgt.putN(nFinal);
// internal consistency check
assert (tgt.getN() / (2L * tgtK)) == newTgtBitPattern;
double srcMax = src.getMaxValue();
srcMax = Double.isNaN(srcMax) ? Double.NEGATIVE_INFINITY : srcMax;
double srcMin = src.getMinValue();
srcMin = Double.isNaN(srcMin) ? Double.POSITIVE_INFINITY : srcMin;
double tgtMax = tgt.getMaxValue();
tgtMax = Double.isNaN(tgtMax) ? Double.NEGATIVE_INFINITY : tgtMax;
double tgtMin = tgt.getMinValue();
tgtMin = Double.isNaN(tgtMin) ? Double.POSITIVE_INFINITY : tgtMin;
if (srcMax > tgtMax) {
tgt.putMaxValue(srcMax);
}
if (srcMin < tgtMin) {
tgt.putMinValue(srcMin);
}
}
use of org.apache.datasketches.memory.WritableMemory in project sketches-core by DataSketches.
the class DoublesSketch method putMemory.
/**
* Puts the current sketch into the given Memory if there is sufficient space, otherwise,
* throws an error.
*
* @param dstMem the given memory.
* @param compact if true, compacts and sorts the base buffer, which optimizes merge
* performance at the cost of slightly increased serialization time.
*/
public void putMemory(final WritableMemory dstMem, final boolean compact) {
if (isDirect() && isCompact() == compact) {
final Memory srcMem = getMemory();
srcMem.copyTo(0, dstMem, 0, getStorageBytes());
} else {
final byte[] byteArr = toByteArray(compact);
final int arrLen = byteArr.length;
final long memCap = dstMem.getCapacity();
if (memCap < arrLen) {
throw new SketchesArgumentException("Destination Memory not large enough: " + memCap + " < " + arrLen);
}
dstMem.putByteArray(0, byteArr, 0, arrLen);
}
}
Aggregations