Search in sources :

Example 1 with IndexMatch

use of datawave.query.index.lookup.IndexMatch in project datawave by NationalSecurityAgency.

the class AncestorUidIntersector method reduce.

private Set<IndexMatch> reduce(Set<IndexMatch> matches, IndexMatch currentMatch) {
    Set<IndexMatch> result = Sets.newHashSet();
    boolean conflict = false;
    for (IndexMatch match : matches) {
        if (!match.getUid().startsWith(currentMatch.getUid() + UIDConstants.DEFAULT_SEPARATOR) || match.getUid().equals(currentMatch.getUid())) {
            result.add(match);
        }
        if (currentMatch.getUid().startsWith(match.getUid() + UIDConstants.DEFAULT_SEPARATOR) || match.getUid().equals(currentMatch.getUid())) {
            conflict = true;
        }
    }
    if (!conflict) {
        result.add(currentMatch);
    }
    return result;
}
Also used : IndexMatch(datawave.query.index.lookup.IndexMatch)

Example 2 with IndexMatch

use of datawave.query.index.lookup.IndexMatch in project datawave by NationalSecurityAgency.

the class AncestorUidIntersector method intersect.

@Override
public Set<IndexMatch> intersect(Set<IndexMatch> uids1, Set<IndexMatch> uids2, List<JexlNode> delayedNodes) {
    /*
         * C) Both are small, so we have an easy case where we can prune much of this sub query. Must propagate delayed nodes, though.
         */
    // create a map of correlated UIDS mapped to the root uid. The values keep the two lists of uids separate
    Map<String, Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>>> correlatedUids = new HashMap<>();
    // put the first set of uids in the correlated list
    for (IndexMatch match1 : uids1) {
        String baseUid = TLD.parseRootPointerFromId(match1.getUid());
        Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists = correlatedUids.get(baseUid);
        if (indexMatchLists == null) {
            indexMatchLists = new Tuple2<>(new ArrayList<>(), new ArrayList<>());
            correlatedUids.put(baseUid, indexMatchLists);
        }
        indexMatchLists.first().add(match1);
    }
    // put the second set of uids in the correlated list
    for (IndexMatch match2 : uids2) {
        String baseUid = TLD.parseRootPointerFromId(match2.getUid());
        Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists = correlatedUids.get(baseUid);
        if (indexMatchLists == null) {
            indexMatchLists = new Tuple2<>(new ArrayList<>(), new ArrayList<>());
            correlatedUids.put(baseUid, indexMatchLists);
        }
        indexMatchLists.second().add(match2);
    }
    // now for each base uid, if we have uids in the two lists then remap them to the descendent furthest from the root
    Set<IndexMatch> matches = new HashSet<>();
    for (Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists : correlatedUids.values()) {
        if (!indexMatchLists.first().isEmpty() && !indexMatchLists.second().isEmpty()) {
            for (IndexMatch uid1 : indexMatchLists.first()) {
                for (IndexMatch uid2 : indexMatchLists.second()) {
                    // if uid1 starts with uid2, then uid1 is a descendent of uid2
                    if (uid1.getUid().startsWith(uid2.getUid() + UIDConstants.DEFAULT_SEPARATOR) || uid1.getUid().equals(uid2.getUid())) {
                        JexlNodeSet nodeSet = new JexlNodeSet();
                        nodeSet.add(uid1.getNode());
                        nodeSet.add(uid2.getNode());
                        nodeSet.addAll(delayedNodes);
                        IndexMatch currentMatch = new IndexMatch(Sets.newHashSet(nodeSet.getNodes()), uid1.getUid(), IndexMatchType.AND);
                        matches = reduce(matches, currentMatch);
                    } else // if uid2 starts with uid1, then uid2 is a descendent of uid1
                    if (uid2.getUid().startsWith(uid1.getUid() + UIDConstants.DEFAULT_SEPARATOR)) {
                        JexlNodeSet nodeSet = new JexlNodeSet();
                        nodeSet.add(uid1.getNode());
                        nodeSet.add(uid2.getNode());
                        nodeSet.addAll(delayedNodes);
                        IndexMatch currentMatch = new IndexMatch(Sets.newHashSet(nodeSet), uid2.getUid(), IndexMatchType.AND);
                        matches = reduce(matches, currentMatch);
                    }
                }
            }
        }
    }
    return matches;
}
Also used : HashMap(java.util.HashMap) Tuple2(datawave.query.util.Tuple2) IndexMatch(datawave.query.index.lookup.IndexMatch) ArrayList(java.util.ArrayList) JexlNodeSet(datawave.query.language.parser.jexl.JexlNodeSet) HashSet(java.util.HashSet)

Example 3 with IndexMatch

use of datawave.query.index.lookup.IndexMatch in project datawave by NationalSecurityAgency.

the class AncestorUidIntersectorTest method testMultipleBranchesCommonAncestor.

@Test
public void testMultipleBranchesCommonAncestor() {
    uids2.add(new IndexMatch("a.b.c", node1));
    uids2.add(new IndexMatch("a.b.c.2", node1));
    uids2.add(new IndexMatch("a.b.c.1", node1));
    uids1.add(new IndexMatch("a.b.c.1", node2));
    uids1.add(new IndexMatch("a.b.c.2.1", node2));
    uids1.add(new IndexMatch("a.b.c", node2));
    Set<IndexMatch> result = intersector.intersect(uids1, uids2, Collections.EMPTY_LIST);
    Assert.assertNotNull(result);
    Assert.assertEquals("expected size 1, got " + result.size(), 1, result.size());
    Assert.assertEquals("a.b.c", result.iterator().next().getUid());
}
Also used : IndexMatch(datawave.query.index.lookup.IndexMatch) Test(org.junit.Test)

Example 4 with IndexMatch

use of datawave.query.index.lookup.IndexMatch in project datawave by NationalSecurityAgency.

the class AncestorUidIntersectorTest method testReduceOverlapUids1.

@Test
public void testReduceOverlapUids1() {
    uids1.add(new IndexMatch("a.b.c.1", node1));
    uids2.add(new IndexMatch("a.b.c.1.1", node2));
    uids2.add(new IndexMatch("a.b.c.1.1.2", node2));
    Set<IndexMatch> result = intersector.intersect(uids1, uids2, Collections.EMPTY_LIST);
    Assert.assertNotNull(result);
    Assert.assertEquals("expected size 1, got " + result.size(), 1, result.size());
    Assert.assertEquals("a.b.c.1.1", result.iterator().next().getUid());
}
Also used : IndexMatch(datawave.query.index.lookup.IndexMatch) Test(org.junit.Test)

Example 5 with IndexMatch

use of datawave.query.index.lookup.IndexMatch in project datawave by NationalSecurityAgency.

the class AncestorUidIntersectorTest method testReduceSingleUids2.

@Test
public void testReduceSingleUids2() {
    uids2.add(new IndexMatch("a.b.c.1", node1));
    uids2.add(new IndexMatch("a.b.c.1.1", node1));
    uids1.add(new IndexMatch("a.b.c.1.1.1", node2));
    uids1.add(new IndexMatch("a.b.c.1.1.2", node2));
    Set<IndexMatch> result = intersector.intersect(uids1, uids2, Collections.EMPTY_LIST);
    Assert.assertNotNull(result);
    Assert.assertEquals("expected size 2, got " + result.size(), 2, result.size());
}
Also used : IndexMatch(datawave.query.index.lookup.IndexMatch) Test(org.junit.Test)

Aggregations

IndexMatch (datawave.query.index.lookup.IndexMatch)17 Test (org.junit.Test)14 ArrayList (java.util.ArrayList)2 DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)1 IndexInfo (datawave.query.index.lookup.IndexInfo)1 JexlNodeSet (datawave.query.language.parser.jexl.JexlNodeSet)1 Tuple2 (datawave.query.util.Tuple2)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1