use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.
the class MiscTest method checkById.
// @Test
public void checkById() {
final int[] ids = { 0, 1, 2, 5, 6 };
final int len = ids.length;
for (int i = 0; i < len; i++) {
for (int j = 0; j < len; j++) {
final int id = ids[i] << 3 | ids[j];
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final String interResStr = cCase.getIntersectAction().getActionDescription();
final String anotbResStr = cCase.getAnotbAction().getActionDescription();
println(Integer.toOctalString(id) + "\t" + cCase + "\t" + cCase.getCaseDescription() + "\t" + interResStr + "\t" + anotbResStr);
}
}
}
use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.
the class AnotB method notB.
/**
* This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the
* second (or <i>n+1</i>th) argument <i>B</i> of <i>A-AND-NOT-B</i>.
* Performs an <i>AND NOT</i> operation with the existing internal state of this AnotB operator.
* Calls to this method can be intermingled with calls to
* {@link #notB(org.apache.datasketches.theta.Sketch)}.
*
* <p>An input argument of null or empty is ignored.</p>
*
* <p>Rationale: A <i>null</i> for the second or following arguments is more tolerable because
* <i>A NOT null</i> is still <i>A</i> even if we don't know exactly what the null represents. It
* clearly does not have any content that overlaps with <i>A</i>. Also, because this can be part of
* a multistep operation with multiple <i>notB</i> steps. Other following steps can still produce
* a valid result.</p>
*
* <p>Use {@link #getResult(boolean)} to obtain the result.</p>
*
* @param skB The incoming Theta sketch for the second (or following) argument <i>B</i>.
*/
@SuppressWarnings("unchecked")
public void notB(final org.apache.datasketches.theta.Sketch skB) {
// ignore
if (skB == null) {
return;
}
final long thetaLongB = skB.getThetaLong();
final int countB = skB.getRetainedEntries();
final boolean emptyB = skB.isEmpty();
final int id = SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB);
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final AnotbAction anotbAction = cCase.getAnotbAction();
switch(anotbAction) {
case EMPTY_1_0_T:
{
reset();
break;
}
case DEGEN_MIN_0_F:
{
reset();
thetaLong_ = min(thetaLong_, thetaLongB);
empty_ = false;
break;
}
case DEGEN_THA_0_F:
{
empty_ = false;
curCount_ = 0;
// thetaLong_ is ok
break;
}
case TRIM_A:
{
thetaLong_ = min(thetaLong_, thetaLongB);
final DataArrays<S> da = trimAndCopyDataArrays(hashArr_, summaryArr_, thetaLong_, true);
hashArr_ = da.hashArr;
curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
summaryArr_ = da.summaryArr;
break;
}
case SKETCH_A:
{
// result is already in A
break;
}
case FULL_ANOTB:
{
// both A and B should have valid entries.
thetaLong_ = min(thetaLong_, thetaLongB);
final DataArrays<S> daB = getCopyOfResultArraysTheta(thetaLong_, curCount_, hashArr_, summaryArr_, skB);
hashArr_ = daB.hashArr;
curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
summaryArr_ = daB.summaryArr;
// empty_ = is whatever SkA is,
}
}
}
use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.
the class ArrayOfDoublesAnotBImpl method update.
@Override
public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) {
if (skA == null || skB == null) {
throw new SketchesArgumentException("Neither argument may be null.");
}
numValues_ = skA.getNumValues();
seedHash_ = skA.getSeedHash();
if (numValues_ != skB.getNumValues()) {
throw new SketchesArgumentException("Inputs cannot have different numValues");
}
if (seedHash_ != skB.getSeedHash()) {
throw new SketchesArgumentException("Inputs cannot have different seedHashes");
}
final long thetaLongA = skA.getThetaLong();
final int countA = skA.getRetainedEntries();
final boolean emptyA = skA.isEmpty();
final long thetaLongB = skB.getThetaLong();
final int countB = skB.getRetainedEntries();
final boolean emptyB = skB.isEmpty();
final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final AnotbAction anotbAction = cCase.getAnotbAction();
final long minThetaLong = min(thetaLongA, thetaLongB);
switch(anotbAction) {
case EMPTY_1_0_T:
{
reset();
break;
}
case DEGEN_MIN_0_F:
{
keys_ = null;
values_ = null;
thetaLong_ = minThetaLong;
empty_ = false;
count_ = 0;
break;
}
case DEGEN_THA_0_F:
{
keys_ = null;
values_ = null;
thetaLong_ = thetaLongA;
empty_ = false;
count_ = 0;
break;
}
case TRIM_A:
{
final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA);
final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_);
keys_ = da.hashArr;
values_ = da.valuesArr;
thetaLong_ = minThetaLong;
empty_ = skA.isEmpty();
count_ = da.count;
break;
}
case SKETCH_A:
{
final ArrayOfDoublesCompactSketch csk = skA.compact();
keys_ = csk.getKeys();
values_ = csk.getValuesAsOneDimension();
thetaLong_ = csk.thetaLong_;
empty_ = csk.isEmpty();
count_ = csk.getRetainedEntries();
break;
}
case FULL_ANOTB:
{
// both A and B should have valid entries.
final long[] keysA = skA.getKeys();
final double[] valuesA = skA.getValuesAsOneDimension();
final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB);
count_ = daR.count;
keys_ = (count_ == 0) ? null : daR.hashArr;
values_ = (count_ == 0) ? null : daR.valuesArr;
thetaLong_ = minThetaLong;
empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0);
break;
}
}
}
use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.
the class AnotB method aNotB.
/**
* Returns the A-and-not-B set operation on the two given Tuple sketches.
*
* <p>This a stateless operation and has no impact on the internal state of this operator.
* Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
* {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta.Sketch)}, and
* {@link #getResult(boolean)} methods.</p>
*
* <p>If either argument is null an exception is thrown.</p>
*
* <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
* That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
* In most cases it is a programming error due to some object that was not properly initialized.
* With a null as the first argument, we cannot know what the user's intent is.
* With a null as the second argument, we can't ignore it as we must return a result and there is
* no following possible viable arguments for the second argument.
* Since it is very likely that a <i>null</i> is a programming error, we throw an exception.</p>
*
* @param skA The incoming Tuple sketch for the first argument
* @param skB The incoming Tuple sketch for the second argument
* @param <S> Type of Summary
* @return the result as an unordered {@link CompactSketch}
*/
@SuppressWarnings("unchecked")
public static <S extends Summary> CompactSketch<S> aNotB(final Sketch<S> skA, final Sketch<S> skB) {
if (skA == null || skB == null) {
throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
}
final long thetaLongA = skA.getThetaLong();
final int countA = skA.getRetainedEntries();
final boolean emptyA = skA.isEmpty();
final long thetaLongB = skB.getThetaLong();
final int countB = skB.getRetainedEntries();
final boolean emptyB = skB.isEmpty();
final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final AnotbAction anotbAction = cCase.getAnotbAction();
CompactSketch<S> result = null;
switch(anotbAction) {
case EMPTY_1_0_T:
{
result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
break;
}
case DEGEN_MIN_0_F:
{
final long thetaLong = min(thetaLongA, thetaLongB);
result = new CompactSketch<>(null, null, thetaLong, false);
break;
}
case DEGEN_THA_0_F:
{
result = new CompactSketch<>(null, null, thetaLongA, false);
break;
}
case TRIM_A:
{
final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
final long[] hashArrA = daA.hashArr;
final S[] summaryArrA = daA.summaryArr;
final long minThetaLong = min(thetaLongA, thetaLongB);
final DataArrays<S> da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
break;
}
case SKETCH_A:
{
final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
break;
}
case FULL_ANOTB:
{
// both A and B should have valid entries.
final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
final long minThetaLong = min(thetaLongA, thetaLongB);
final DataArrays<S> daR = getCopyOfResultArraysTuple(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
if (countR == 0) {
result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
} else {
result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
}
}
}
return result;
}
use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.
the class AnotB method aNotB.
/**
* Returns the A-and-not-B set operation on a Tuple sketch and a Theta sketch.
*
* <p>This a stateless operation and has no impact on the internal state of this operator.
* Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
* {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta.Sketch)}, and
* {@link #getResult(boolean)} methods.</p>
*
* <p>If either argument is null an exception is thrown.</p>
*
* <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
* That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
* In most cases it is a programming error due to some object that was not properly initialized.
* With a null as the first argument, we cannot know what the user's intent is.
* With a null as the second argument, we can't ignore it as we must return a result and there is
* no following possible viable arguments for the second argument.
* Since it is very likely that a <i>null</i> is a programming error for either argument
* we throw a an exception.</p>
*
* @param skA The incoming Tuple sketch for the first argument
* @param skB The incoming Theta sketch for the second argument
* @param <S> Type of Summary
* @return the result as an unordered {@link CompactSketch}
*/
@SuppressWarnings("unchecked")
public static <S extends Summary> CompactSketch<S> aNotB(final Sketch<S> skA, final org.apache.datasketches.theta.Sketch skB) {
if (skA == null || skB == null) {
throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
}
final long thetaLongA = skA.getThetaLong();
final int countA = skA.getRetainedEntries();
final boolean emptyA = skA.isEmpty();
final long thetaLongB = skB.getThetaLong();
final int countB = skB.getRetainedEntries();
final boolean emptyB = skB.isEmpty();
final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final AnotbAction anotbAction = cCase.getAnotbAction();
CompactSketch<S> result = null;
switch(anotbAction) {
case EMPTY_1_0_T:
{
result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
break;
}
case DEGEN_MIN_0_F:
{
final long thetaLong = min(thetaLongA, thetaLongB);
result = new CompactSketch<>(null, null, thetaLong, false);
break;
}
case DEGEN_THA_0_F:
{
result = new CompactSketch<>(null, null, thetaLongA, false);
break;
}
case TRIM_A:
{
final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
final long[] hashArrA = daA.hashArr;
final S[] summaryArrA = daA.summaryArr;
final long minThetaLong = min(thetaLongA, thetaLongB);
final DataArrays<S> da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
break;
}
case SKETCH_A:
{
final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
break;
}
case FULL_ANOTB:
{
// both A and B should have valid entries.
final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
final long minThetaLong = min(thetaLongA, thetaLongB);
final DataArrays<S> daR = getCopyOfResultArraysTheta(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
if (countR == 0) {
result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
} else {
result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
}
}
}
return result;
}
Aggregations