Search in sources :

Example 46 with RoaringBitmap

use of org.roaringbitmap.RoaringBitmap in project tez by apache.

the class TestUnorderedPartitionedKVWriter method getPartitionStats.

private int[] getPartitionStats(VertexManagerEvent vme) throws IOException {
    RoaringBitmap partitionStats = new RoaringBitmap();
    VertexManagerEventPayloadProto payload = VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(vme.getUserPayload()));
    if (!reportPartitionStats.isEnabled()) {
        assertFalse(payload.hasPartitionStats());
        assertFalse(payload.hasDetailedPartitionStats());
        return null;
    }
    if (reportPartitionStats.isPrecise()) {
        assertTrue(payload.hasDetailedPartitionStats());
        List<Integer> sizeInMBList = payload.getDetailedPartitionStats().getSizeInMbList();
        int[] stats = new int[sizeInMBList.size()];
        for (int i = 0; i < sizeInMBList.size(); i++) {
            stats[i] += sizeInMBList.get(i);
        }
        return stats;
    } else {
        assertTrue(payload.hasPartitionStats());
        ByteString compressedPartitionStats = payload.getPartitionStats();
        byte[] rawData = TezCommonUtils.decompressByteStringToByteArray(compressedPartitionStats);
        ByteArrayInputStream bin = new ByteArrayInputStream(rawData);
        partitionStats.deserialize(new DataInputStream(bin));
        int[] stats = new int[partitionStats.getCardinality()];
        Iterator<Integer> it = partitionStats.iterator();
        final DATA_RANGE_IN_MB[] RANGES = DATA_RANGE_IN_MB.values();
        final int RANGE_LEN = RANGES.length;
        while (it.hasNext()) {
            int pos = it.next();
            int index = ((pos) / RANGE_LEN);
            int rangeIndex = ((pos) % RANGE_LEN);
            if (RANGES[rangeIndex].getSizeInMB() > 0) {
                stats[index] += RANGES[rangeIndex].getSizeInMB();
            }
        }
        return stats;
    }
}
Also used : ByteString(com.google.protobuf.ByteString) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataInputStream(java.io.DataInputStream) DATA_RANGE_IN_MB(org.apache.tez.runtime.library.utils.DATA_RANGE_IN_MB) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ByteArrayInputStream(java.io.ByteArrayInputStream) VertexManagerEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto)

Example 47 with RoaringBitmap

use of org.roaringbitmap.RoaringBitmap in project narchy by automenta.

the class MatchConstraint method combineConstraints.

public static PrediTerm<Derivation> combineConstraints(AndCondition a) {
    RoaringBitmap constraints = new RoaringBitmap();
    @NotNull PrediTerm[] cond1 = a.cond;
    for (int i = 0, cl = cond1.length; i < cl; i++) {
        Term x = cond1[i];
        if (x instanceof MatchConstraint) {
            constraints.add(i);
        }
    }
    if (constraints.getCardinality() < 2) {
        return a;
    } else {
        // identify contiguous runs of constraints
        // inclusive
        List<IntIntPair> ranges = new FasterList<>(1);
        int start = -1, end = -1;
        PeekableIntIterator ii = constraints.getIntIterator();
        while (ii.hasNext()) {
            int next = ii.next();
            if (start == -1) {
                start = end = next;
            } else {
                if (next == end + 1) {
                    end++;
                } else {
                    if (end - start >= 1) {
                        // compile that range
                        ranges.add(pair(start, end));
                    }
                    // broken
                    start = -1;
                }
            }
        }
        if (end - start >= 1)
            ranges.add(pair(start, end));
        if (ranges.size() > 1)
            throw new TODO();
        IntIntPair rr = ranges.get(0);
        List<PrediTerm<Derivation>> l = new FasterList();
        int i;
        for (i = 0; i < start; i++) {
            l.add(a.cond[i]);
        }
        CompoundConstraint.the(Util.map(MatchConstraint.class::cast, MatchConstraint[]::new, ArrayUtils.subarray(a.cond, rr.getOne(), rr.getTwo() + 1))).forEach(l::add);
        i = end + 1;
        for (; i < a.cond.length; i++) {
            l.add(a.cond[i]);
        }
        return AndCondition.the((List) l);
    }
}
Also used : TODO(jcog.TODO) PrediTerm(nars.term.pred.PrediTerm) FasterList(jcog.list.FasterList) PeekableIntIterator(org.roaringbitmap.PeekableIntIterator) PrediTerm(nars.term.pred.PrediTerm) Term(nars.term.Term) Util(jcog.Util) NotNull(org.jetbrains.annotations.NotNull) NotNull(org.jetbrains.annotations.NotNull) RoaringBitmap(org.roaringbitmap.RoaringBitmap) IntIntPair(org.eclipse.collections.api.tuple.primitive.IntIntPair)

Example 48 with RoaringBitmap

use of org.roaringbitmap.RoaringBitmap in project narchy by automenta.

the class Int method intersect.

// public static Term[] intersect(Term[] u) {
// 
// TreeSet<Intlike> integers = new TreeSet();
// for (Term x : u) {
// if (x.op() == INT) {
// integers.add((Intlike) x);
// }
// }
// 
// int ii = integers.size();
// if (ii < 2)
// return u; //one or less integers, do nothing about it
// 
// 
// TreeSet<Term> v = new TreeSet<>();
// for (Term uu : u)
// v.add(uu);
// 
// Intlike a = integers.pollFirst();
// Intlike b = integers.pollFirst();
// Range ar = a.range();
// Range br = b.range();
// if (ar.isConnected(br)) {
// Intlike combined = Int.the(ar.span(br));
// v.remove(a);
// v.remove(b);
// v.add(combined);
// }
// 
// 
// return v.toArray(new Term[v.size()]);
// 
// }
/**
 * TODO permute other arrangements
 */
public static Term[] intersect(final Term... _subs) {
    RoaringBitmap factoring = new RoaringBitmap();
    int equalVolume = -1, equalStructure = -1;
    int pureInts = 0;
    for (int i = 0, subsLength = _subs.length; i < subsLength; i++) {
        Term x = _subs[i];
        if (!x.hasAny(Op.INT)) {
            continue;
        }
        if (x.op() == INT)
            pureInts++;
        if (equalVolume != -1) {
            if (x.volume() != equalVolume) {
                // a term with non-matching volume
                continue;
            }
        }
        if (equalStructure != -1) {
            if (x.structure() != equalStructure) {
                continue;
            }
        }
        equalVolume = x.volume();
        equalStructure = x.structure();
        factoring.add(i);
    }
    int ff = factoring.getCardinality();
    if (ff < 2)
        return sorted(_subs);
    Term[] subs;
    if (ff < _subs.length) {
        subs = new Term[ff];
        int j = 0;
        for (int i = 0; i < _subs.length; i++) {
            if (factoring.contains(i))
                subs[j++] = _subs[i];
        }
        assert (j == ff);
    } else {
        subs = _subs;
    }
    if (subs.length == 3) {
        Term[] rr;
        // HACK try permutations to combine some but not all
        Term[] ab = intersect(subs[0], subs[1]);
        if (ab.length == 1) {
            rr = intersect(ab[0], subs[2]);
        } else {
            Term[] bc = intersect(subs[1], subs[2]);
            if (bc.length == 1) {
                rr = intersect(bc[0], subs[0]);
            } else {
                Term[] ac = intersect(subs[0], subs[2]);
                if (ac.length == 1) {
                    rr = intersect(ac[0], subs[1]);
                } else {
                    rr = null;
                }
            }
        }
        if (rr != null) {
            return intersectResult(factoring, ff, new FasterList(rr), _subs);
        }
    }
    FasterList<Term> yAux = new FasterList(0);
    if (pureInts == ff) {
        // avoid the path stuff, just merge the int terms
        SimpleIntSet s = new SimpleIntSet(ff);
        for (Term x : subs) {
            ((Intlike) x).forEachInt(s::add);
        }
        int ns = s.size();
        assert (ns > 1);
        if (ns == 2) {
            // simple case
            Iterator<Integer> si = s.iterator();
            int a = si.next();
            int b = si.next();
            if (Math.abs(a - b) > 1) {
                yAux.add(Int.the(a));
                yAux.add(Int.the(b));
            } else {
                if (a > b) {
                    int c = b;
                    b = a;
                    a = c;
                }
                yAux.add(Int.range(a, b));
            }
        } else {
            features(s.iterator(), -1).forEachRemaining(yAux::add);
        }
    } else {
        // paths * extracted sequence of numbers at given path for each subterm
        Map<ByteList, Object> /*SimpleIntSet*/
        data = new LinkedHashMap<>(subs.length);
        // if a subterm is not an integer, check for equality of atoms (structure already compared abovec)
        final boolean[] valid = { true };
        subs[0].pathsTo(x -> x, d -> true, (p, x) -> {
            // if (p.isEmpty())
            // return true; //continue past root, of course the root term will be unique
            ImmutableByteList path = null;
            SimpleIntSet c = null;
            int xVol = x.volume();
            int xStruct = x.structure();
            for (int others = 1; others < subs.length; others++) {
                Term y = subs[others].subPath(p);
                if (x.equals(y))
                    continue;
                if (!x.hasAny(INT)) {
                    // and we expect differences but only in the INT
                    if (!y.equals(x)) {
                        valid[0] = false;
                        return false;
                    }
                } else if (x.op() == INT) {
                    if (y.op() != INT) {
                        valid[0] = false;
                        return false;
                    }
                    if (path == null)
                        path = p.toImmutable();
                    // store the path to the integer(s)
                    if (c == null)
                        c = (SimpleIntSet) data.computeIfAbsent(path, (pp) -> new SimpleIntSet(2));
                    ((Intlike) y).forEachInt(c::add);
                } else {
                    // this is a term containing an INT but is not an INT; the structure must still match
                    if (xVol != y.volume() || xStruct != y.structure()) {
                        valid[0] = false;
                        return false;
                    }
                }
            }
            if (x.op() == INT) {
                if (c == null) {
                    if (path == null)
                        path = p.toImmutable();
                    data.put(path, c = new SimpleIntSet(1));
                }
                ((Intlike) x).forEachInt(c::add);
            }
            return true;
        });
        if (!valid[0])
            return _subs;
        Iterator<Map.Entry<ByteList, Object>> entries = data.entrySet().iterator();
        while (entries.hasNext()) {
            Map.Entry<ByteList, Object> /*SimpleIntSet*/
            e = entries.next();
            SimpleIntSet s = (SimpleIntSet) e.getValue();
            if (s.size() < 2) {
                entries.remove();
                // same integer value in each
                continue;
            }
            // for each path where the other numerics are uniformly equal (only one unique value)
            /*if (new HashSet(nn).size()==1)*/
            Iterator<Integer> si = s.iterator();
            if (e.getKey().isEmpty()) {
                // root level, get as many as possible
                features(si, -1).forEachRemaining(yAux::add);
                entries.remove();
            } else {
                Iterator<Intlike> iii = features(si, 1);
                if (iii == null || !iii.hasNext())
                    // discontiguous or otherwise ununifiable
                    return _subs;
                e.setValue(iii.next());
            }
        }
        Term y;
        if (!data.isEmpty()) {
            y = subs[0];
            for (Map.Entry<ByteList, Object> /*Intlike*/
            e : data.entrySet()) {
                Object v = e.getValue();
                y = y.transform(e.getKey(), (Term) v);
            }
        } else {
            y = null;
        }
        if (subs.length == _subs.length && yAux.isEmpty()) {
            if (y == null)
                // ??
                return _subs;
            return new Term[] { y };
        } else {
            yAux.add(y);
        }
    }
    return intersectResult(factoring, ff, yAux, _subs);
// int ffs = ff.size();
// if (ffs == 0 || ffs >= numInvolved) {
// //nothing would be gained; dont bother
// continue;
// }
// 
// for (Intlike f : ff) {
// byte j = 0;
// for (Term x : subs) {
// 
// if (!involved.contains(j)) {
// result.add(x);
// //System.out.println("1: " + result);
// } else {
// 
// 
// //x is contained within range expression p
// Term xpp = x.subs() > 0 ? x.subPath(pp) : x;
// 
// boolean connected;
// if (xpp.op() == INT) {
// connected = (f.range().isConnected(((Intlike) xpp).range()));
// } else {
// connected = false;
// }
// 
// if (connected) {
// Term y = x instanceof Compound ?
// x.transform(pp, f)
// : f;
// //if (!y.equals(x)) {
// 
// if (!x.equals(y)) {
// subsumed.add(x);
// }
// result.add(y);
// //System.out.println(x + " 3: " + result + "\t + " + y);
// //}
// } else {
// result.add(x);
// }
// }
// j++;
// }
// 
// 
// }
// 
// int results = result.size();
// if ((results == 1) /*|| (results > resultLimit * subCount)*/) {
// break; //reduced to one or exploded, go no further
// }
// }
// result.removeAll(subsumed);
// 
// if (result.isEmpty()) {
// return subs;
// } else {
// 
// Term[] rr = result.toArray(new Term[result.size()]);
// if (Arrays.equals(rr, subs))
// return rr;
// else
// return intersect(rr); //changed, recompress
// 
// }
}
Also used : SimpleIntSet(jcog.data.SimpleIntSet) ImmutableByteList(org.eclipse.collections.api.list.primitive.ImmutableByteList) ByteList(org.eclipse.collections.api.list.primitive.ByteList) FasterList(jcog.list.FasterList) Term(nars.term.Term) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ImmutableByteList(org.eclipse.collections.api.list.primitive.ImmutableByteList)

Example 49 with RoaringBitmap

use of org.roaringbitmap.RoaringBitmap in project narchy by automenta.

the class Conj method term.

// private byte id(long w) {
// 
// int i = times.indexOf(w);
// if (i!=-1) {
// return (byte) i;
// } else {
// int s = times.size();
// assert(s < Byte.MAX_VALUE);
// times.add(w);
// return (byte)s;
// }
// }
// 
// short id(Term t, long w) {
// byte tb = id(t);
// byte wb = id(w);
// return (short) ((tb << 8) | wb);
// }
// 
// byte termIndex(short s) {
// return (byte) ((s >> 8) & 0xff);
// }
// byte timeIndex(short s) {
// return (byte) (s & 0xff);
// }
public Term term() {
    if (term != null)
        return term;
    int numTimes = event.size();
    switch(numTimes) {
        case 0:
            return Null;
        case 1:
            break;
        default:
            break;
    }
    event.compact();
    IntPredicate validator = null;
    Object eternalWhat = event.get(ETERNAL);
    Term eternal = term(ETERNAL, eternalWhat);
    if (eternal != null) {
        if (eternal instanceof Bool)
            // override and terminates
            return this.term = eternal;
        if (numTimes > 1) {
            if (eternal.op() == CONJ) {
                // Subterms eteSub = eternal.subterms();
                if (eternalWhat instanceof byte[]) {
                    byte[] b = (byte[]) eternalWhat;
                    validator = (i) -> indexOfZeroTerminated(b, (byte) -i) == -1;
                } else {
                    RoaringBitmap b = (RoaringBitmap) eternalWhat;
                    validator = (i) -> !b.contains(-i);
                }
            } else {
                Term finalEternal = eternal;
                validator = (t) -> !finalEternal.equalsNeg(termsIndex.get(Math.abs(t - 1)).negIf(t < 0));
            }
        }
    }
    if (eternal != null && numTimes == 1)
        // done
        return eternal;
    FasterList<LongObjectPair<Term>> e = new FasterList(numTimes - (eternal != null ? 1 : 0));
    Iterator<LongObjectPair> ii = event.keyValuesView().iterator();
    while (ii.hasNext()) {
        LongObjectPair next = ii.next();
        long when = next.getOne();
        if (when == ETERNAL)
            // already handled above
            continue;
        Term wt = term(when, next.getTwo(), validator);
        if (wt == True) {
            // canceled out
            continue;
        } else if (wt == False) {
            // short-circuit false
            return this.term = False;
        } else if (wt == Null) {
            // short-circuit null
            return this.term = Null;
        }
        e.add(pair(when, wt));
    }
    assert (!e.isEmpty());
    Term temporal;
    if (e.size() > 1) {
        e.sortThisBy(LongObjectPair::getOne);
        temporal = conjSeq(e);
        if (temporal instanceof Bool)
            return temporal;
    } else {
        temporal = e.get(0).getTwo();
    }
    return eternal != null ? // Op.instance(CONJ, DTERNAL, sorted(eternal, temporal))
    CONJ.the(DTERNAL, sorted(eternal, temporal)) : temporal;
}
Also used : LongObjectPair(org.eclipse.collections.api.tuple.primitive.LongObjectPair) IntPredicate(java.util.function.IntPredicate) Bool(nars.term.atom.Bool) FasterList(jcog.list.FasterList) Term(nars.term.Term) RoaringBitmap(org.roaringbitmap.RoaringBitmap)

Example 50 with RoaringBitmap

use of org.roaringbitmap.RoaringBitmap in project RoaringBitmap by RoaringBitmap.

the class RunContainerRealDataBenchmarkRunOptimize method serializeToBAOSNoClonePreOpti.

@Benchmark
public int serializeToBAOSNoClonePreOpti(BenchmarkState benchmarkState) throws IOException {
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(bos);
    for (int i = 0; i < benchmarkState.rc.size(); i++) {
        RoaringBitmap bitmap = benchmarkState.rc.get(i);
        bitmap.serialize(dos);
    }
    dos.flush();
    return bos.size();
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RoaringBitmap(org.roaringbitmap.RoaringBitmap) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Aggregations

RoaringBitmap (org.roaringbitmap.RoaringBitmap)81 Benchmark (org.openjdk.jmh.annotations.Benchmark)14 Test (org.junit.jupiter.api.Test)10 Test (org.junit.Test)9 DataOutputStream (java.io.DataOutputStream)8 MutableRoaringBitmap (org.roaringbitmap.buffer.MutableRoaringBitmap)8 ByteArrayOutputStream (java.io.ByteArrayOutputStream)7 DataInputStream (java.io.DataInputStream)7 IOException (java.io.IOException)7 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)7 ByteArrayInputStream (java.io.ByteArrayInputStream)6 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)6 BitmapDataProvider (org.roaringbitmap.BitmapDataProvider)6 ByteString (com.google.protobuf.ByteString)5 ByteBuffer (java.nio.ByteBuffer)5 Setup (org.openjdk.jmh.annotations.Setup)5 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 FasterList (jcog.list.FasterList)4 Term (nars.term.Term)4