Search in sources :

Example 1 with CornerCase

use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.

the class MiscTest method checkById.

// @Test
public void checkById() {
    final int[] ids = { 0, 1, 2, 5, 6 };
    final int len = ids.length;
    for (int i = 0; i < len; i++) {
        for (int j = 0; j < len; j++) {
            final int id = ids[i] << 3 | ids[j];
            final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
            final String interResStr = cCase.getIntersectAction().getActionDescription();
            final String anotbResStr = cCase.getAnotbAction().getActionDescription();
            println(Integer.toOctalString(id) + "\t" + cCase + "\t" + cCase.getCaseDescription() + "\t" + interResStr + "\t" + anotbResStr);
        }
    }
}
Also used : CornerCase(org.apache.datasketches.SetOperationCornerCases.CornerCase)

Example 2 with CornerCase

use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.

the class AnotB method notB.

/**
 * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the
 * second (or <i>n+1</i>th) argument <i>B</i> of <i>A-AND-NOT-B</i>.
 * Performs an <i>AND NOT</i> operation with the existing internal state of this AnotB operator.
 * Calls to this method can be intermingled with calls to
 * {@link #notB(org.apache.datasketches.theta.Sketch)}.
 *
 * <p>An input argument of null or empty is ignored.</p>
 *
 * <p>Rationale: A <i>null</i> for the second or following arguments is more tolerable because
 * <i>A NOT null</i> is still <i>A</i> even if we don't know exactly what the null represents. It
 * clearly does not have any content that overlaps with <i>A</i>. Also, because this can be part of
 * a multistep operation with multiple <i>notB</i> steps. Other following steps can still produce
 * a valid result.</p>
 *
 * <p>Use {@link #getResult(boolean)} to obtain the result.</p>
 *
 * @param skB The incoming Theta sketch for the second (or following) argument <i>B</i>.
 */
@SuppressWarnings("unchecked")
public void notB(final org.apache.datasketches.theta.Sketch skB) {
    // ignore
    if (skB == null) {
        return;
    }
    final long thetaLongB = skB.getThetaLong();
    final int countB = skB.getRetainedEntries();
    final boolean emptyB = skB.isEmpty();
    final int id = SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB);
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
    final AnotbAction anotbAction = cCase.getAnotbAction();
    switch(anotbAction) {
        case EMPTY_1_0_T:
            {
                reset();
                break;
            }
        case DEGEN_MIN_0_F:
            {
                reset();
                thetaLong_ = min(thetaLong_, thetaLongB);
                empty_ = false;
                break;
            }
        case DEGEN_THA_0_F:
            {
                empty_ = false;
                curCount_ = 0;
                // thetaLong_ is ok
                break;
            }
        case TRIM_A:
            {
                thetaLong_ = min(thetaLong_, thetaLongB);
                final DataArrays<S> da = trimAndCopyDataArrays(hashArr_, summaryArr_, thetaLong_, true);
                hashArr_ = da.hashArr;
                curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
                summaryArr_ = da.summaryArr;
                break;
            }
        case SKETCH_A:
            {
                // result is already in A
                break;
            }
        case FULL_ANOTB:
            {
                // both A and B should have valid entries.
                thetaLong_ = min(thetaLong_, thetaLongB);
                final DataArrays<S> daB = getCopyOfResultArraysTheta(thetaLong_, curCount_, hashArr_, summaryArr_, skB);
                hashArr_ = daB.hashArr;
                curCount_ = (hashArr_ == null) ? 0 : hashArr_.length;
                summaryArr_ = daB.summaryArr;
            // empty_ = is whatever SkA is,
            }
    }
}
Also used : CornerCase(org.apache.datasketches.SetOperationCornerCases.CornerCase) AnotbAction(org.apache.datasketches.SetOperationCornerCases.AnotbAction)

Example 3 with CornerCase

use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.

the class ArrayOfDoublesAnotBImpl method update.

@Override
public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) {
    if (skA == null || skB == null) {
        throw new SketchesArgumentException("Neither argument may be null.");
    }
    numValues_ = skA.getNumValues();
    seedHash_ = skA.getSeedHash();
    if (numValues_ != skB.getNumValues()) {
        throw new SketchesArgumentException("Inputs cannot have different numValues");
    }
    if (seedHash_ != skB.getSeedHash()) {
        throw new SketchesArgumentException("Inputs cannot have different seedHashes");
    }
    final long thetaLongA = skA.getThetaLong();
    final int countA = skA.getRetainedEntries();
    final boolean emptyA = skA.isEmpty();
    final long thetaLongB = skB.getThetaLong();
    final int countB = skB.getRetainedEntries();
    final boolean emptyB = skB.isEmpty();
    final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
    final AnotbAction anotbAction = cCase.getAnotbAction();
    final long minThetaLong = min(thetaLongA, thetaLongB);
    switch(anotbAction) {
        case EMPTY_1_0_T:
            {
                reset();
                break;
            }
        case DEGEN_MIN_0_F:
            {
                keys_ = null;
                values_ = null;
                thetaLong_ = minThetaLong;
                empty_ = false;
                count_ = 0;
                break;
            }
        case DEGEN_THA_0_F:
            {
                keys_ = null;
                values_ = null;
                thetaLong_ = thetaLongA;
                empty_ = false;
                count_ = 0;
                break;
            }
        case TRIM_A:
            {
                final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA);
                final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_);
                keys_ = da.hashArr;
                values_ = da.valuesArr;
                thetaLong_ = minThetaLong;
                empty_ = skA.isEmpty();
                count_ = da.count;
                break;
            }
        case SKETCH_A:
            {
                final ArrayOfDoublesCompactSketch csk = skA.compact();
                keys_ = csk.getKeys();
                values_ = csk.getValuesAsOneDimension();
                thetaLong_ = csk.thetaLong_;
                empty_ = csk.isEmpty();
                count_ = csk.getRetainedEntries();
                break;
            }
        case FULL_ANOTB:
            {
                // both A and B should have valid entries.
                final long[] keysA = skA.getKeys();
                final double[] valuesA = skA.getValuesAsOneDimension();
                final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB);
                count_ = daR.count;
                keys_ = (count_ == 0) ? null : daR.hashArr;
                values_ = (count_ == 0) ? null : daR.valuesArr;
                thetaLong_ = minThetaLong;
                empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0);
                break;
            }
    }
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) CornerCase(org.apache.datasketches.SetOperationCornerCases.CornerCase) AnotbAction(org.apache.datasketches.SetOperationCornerCases.AnotbAction)

Example 4 with CornerCase

use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.

the class AnotB method aNotB.

/**
 * Returns the A-and-not-B set operation on the two given Tuple sketches.
 *
 * <p>This a stateless operation and has no impact on the internal state of this operator.
 * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
 * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta.Sketch)}, and
 * {@link #getResult(boolean)} methods.</p>
 *
 * <p>If either argument is null an exception is thrown.</p>
 *
 * <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
 * That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
 * In most cases it is a programming error due to some object that was not properly initialized.
 * With a null as the first argument, we cannot know what the user's intent is.
 * With a null as the second argument, we can't ignore it as we must return a result and there is
 * no following possible viable arguments for the second argument.
 * Since it is very likely that a <i>null</i> is a programming error, we throw an exception.</p>
 *
 * @param skA The incoming Tuple sketch for the first argument
 * @param skB The incoming Tuple sketch for the second argument
 * @param <S> Type of Summary
 * @return the result as an unordered {@link CompactSketch}
 */
@SuppressWarnings("unchecked")
public static <S extends Summary> CompactSketch<S> aNotB(final Sketch<S> skA, final Sketch<S> skB) {
    if (skA == null || skB == null) {
        throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
    }
    final long thetaLongA = skA.getThetaLong();
    final int countA = skA.getRetainedEntries();
    final boolean emptyA = skA.isEmpty();
    final long thetaLongB = skB.getThetaLong();
    final int countB = skB.getRetainedEntries();
    final boolean emptyB = skB.isEmpty();
    final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
    final AnotbAction anotbAction = cCase.getAnotbAction();
    CompactSketch<S> result = null;
    switch(anotbAction) {
        case EMPTY_1_0_T:
            {
                result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
                break;
            }
        case DEGEN_MIN_0_F:
            {
                final long thetaLong = min(thetaLongA, thetaLongB);
                result = new CompactSketch<>(null, null, thetaLong, false);
                break;
            }
        case DEGEN_THA_0_F:
            {
                result = new CompactSketch<>(null, null, thetaLongA, false);
                break;
            }
        case TRIM_A:
            {
                final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
                final long[] hashArrA = daA.hashArr;
                final S[] summaryArrA = daA.summaryArr;
                final long minThetaLong = min(thetaLongA, thetaLongB);
                final DataArrays<S> da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
                result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
                break;
            }
        case SKETCH_A:
            {
                final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
                result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
                break;
            }
        case FULL_ANOTB:
            {
                // both A and B should have valid entries.
                final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
                final long minThetaLong = min(thetaLongA, thetaLongB);
                final DataArrays<S> daR = getCopyOfResultArraysTuple(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
                final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
                if (countR == 0) {
                    result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
                } else {
                    result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
                }
            }
    }
    return result;
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) CornerCase(org.apache.datasketches.SetOperationCornerCases.CornerCase) AnotbAction(org.apache.datasketches.SetOperationCornerCases.AnotbAction)

Example 5 with CornerCase

use of org.apache.datasketches.SetOperationCornerCases.CornerCase in project sketches-core by DataSketches.

the class AnotB method aNotB.

/**
 * Returns the A-and-not-B set operation on a Tuple sketch and a Theta sketch.
 *
 * <p>This a stateless operation and has no impact on the internal state of this operator.
 * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)},
 * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta.Sketch)}, and
 * {@link #getResult(boolean)} methods.</p>
 *
 * <p>If either argument is null an exception is thrown.</p>
 *
 * <p>Rationale: In mathematics a "null set" is a set with no members, which we call an empty set.
 * That is distinctly different from the java <i>null</i>, which represents a nonexistent object.
 * In most cases it is a programming error due to some object that was not properly initialized.
 * With a null as the first argument, we cannot know what the user's intent is.
 * With a null as the second argument, we can't ignore it as we must return a result and there is
 * no following possible viable arguments for the second argument.
 * Since it is very likely that a <i>null</i> is a programming error for either argument
 * we throw a an exception.</p>
 *
 * @param skA The incoming Tuple sketch for the first argument
 * @param skB The incoming Theta sketch for the second argument
 * @param <S> Type of Summary
 * @return the result as an unordered {@link CompactSketch}
 */
@SuppressWarnings("unchecked")
public static <S extends Summary> CompactSketch<S> aNotB(final Sketch<S> skA, final org.apache.datasketches.theta.Sketch skB) {
    if (skA == null || skB == null) {
        throw new SketchesArgumentException("Neither argument may be null for this stateless operation.");
    }
    final long thetaLongA = skA.getThetaLong();
    final int countA = skA.getRetainedEntries();
    final boolean emptyA = skA.isEmpty();
    final long thetaLongB = skB.getThetaLong();
    final int countB = skB.getRetainedEntries();
    final boolean emptyB = skB.isEmpty();
    final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
    final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
    final AnotbAction anotbAction = cCase.getAnotbAction();
    CompactSketch<S> result = null;
    switch(anotbAction) {
        case EMPTY_1_0_T:
            {
                result = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
                break;
            }
        case DEGEN_MIN_0_F:
            {
                final long thetaLong = min(thetaLongA, thetaLongB);
                result = new CompactSketch<>(null, null, thetaLong, false);
                break;
            }
        case DEGEN_THA_0_F:
            {
                result = new CompactSketch<>(null, null, thetaLongA, false);
                break;
            }
        case TRIM_A:
            {
                final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
                final long[] hashArrA = daA.hashArr;
                final S[] summaryArrA = daA.summaryArr;
                final long minThetaLong = min(thetaLongA, thetaLongB);
                final DataArrays<S> da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false);
                result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_);
                break;
            }
        case SKETCH_A:
            {
                final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
                result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_);
                break;
            }
        case FULL_ANOTB:
            {
                // both A and B should have valid entries.
                final DataArrays<S> daA = getCopyOfDataArraysTuple(skA);
                final long minThetaLong = min(thetaLongA, thetaLongB);
                final DataArrays<S> daR = getCopyOfResultArraysTheta(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB);
                final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length;
                if (countR == 0) {
                    result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE);
                } else {
                    result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false);
                }
            }
    }
    return result;
}
Also used : SketchesArgumentException(org.apache.datasketches.SketchesArgumentException) CornerCase(org.apache.datasketches.SetOperationCornerCases.CornerCase) AnotbAction(org.apache.datasketches.SetOperationCornerCases.AnotbAction)

Aggregations

CornerCase (org.apache.datasketches.SetOperationCornerCases.CornerCase)6 AnotbAction (org.apache.datasketches.SetOperationCornerCases.AnotbAction)5 SketchesArgumentException (org.apache.datasketches.SketchesArgumentException)3