Search in sources :

Example 6 with FieldValueCardinality

use of datawave.query.attributes.FieldValueCardinality in project datawave by NationalSecurityAgency.

the class FacetedGrouping method adjustAttributeGrouping.

/**
 * @param cardList
 * @param attributes
 * @return
 */
private List<FieldValueCardinality> adjustAttributeGrouping(Collection<FieldValueCardinality> cardList, Set<Attribute<? extends Comparable<?>>> attributes) {
    boolean found = false;
    List<FieldValueCardinality> newCardList = Lists.newArrayList();
    List<FieldValueCardinality> fixNewList = Lists.newArrayList(cardList);
    for (Attribute<? extends Comparable<?>> attribute : attributes) {
        Cardinality card = (Cardinality) attribute;
        found = false;
        for (FieldValueCardinality fvcCard : cardList) {
            card.getContent().isWithin(fvcCard);
            found = true;
            break;
        }
        if (found)
            continue;
        fixNewList.add(card.getContent());
    }
    if (fixNewList.size() > config.getMaximumFacetGroupCount()) {
        // we've exceeded, so let's get a minimum adjustment factor
        final int groupAdjustmentFactor = (int) Math.ceil((double) fixNewList.size() / config.getMaximumFacetGroupCount());
        // partition the list using the previously created adjustment factor.
        // for (List<FieldValueCardinality> list : Iterables.partition(fixNewList, groupAdjustmentFactor)) {
        Collections.sort(fixNewList);
        for (int i = 0; i < fixNewList.size(); i += groupAdjustmentFactor) {
            FieldValueCardinality first = fixNewList.get(i);
            FieldValueCardinality last = fixNewList.get(i + (groupAdjustmentFactor - 1));
            FieldValueCardinality newCard = new FieldValueCardinality();
            newCard.setContent(first.getFloorValue());
            newCard.setCeiling(last.getCeilingValue());
            if (log.isTraceEnabled())
                log.trace("Creating new bucket " + first.getFloorValue() + " " + last.getCeilingValue());
            newCardList.add(newCard);
            if (newCardList.size() + 1 > config.getMaximumFacetGroupCount()) {
                newCard.setCeiling(Iterables.getLast(fixNewList).getCeilingValue());
                break;
            }
        }
    } else {
        newCardList = fixNewList;
    }
    return newCardList;
}
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) FieldValueCardinality(datawave.query.attributes.FieldValueCardinality)

Example 7 with FieldValueCardinality

use of datawave.query.attributes.FieldValueCardinality in project datawave by NationalSecurityAgency.

the class FacetedGrouping method apply.

/*
     * (non-Javadoc)
     * 
     * @see com.google.common.base.Function#apply(java.lang.Object)
     */
@Override
public Entry<Key, Document> apply(Entry<Key, Document> input) {
    Document currentDoc = input.getValue();
    // list of document attributes to update.
    TreeMultimap<String, Attribute<?>> newDocumentAttributes = TreeMultimap.create();
    Key topKey = null;
    if (topKey == null)
        topKey = input.getKey();
    currentDoc = input.getValue();
    Map<?, ?> currentAttr = currentDoc.getDictionary();
    for (Entry<?, ?> attrE : currentAttr.entrySet()) {
        Entry<String, Attribute<?>> attr = (Entry<String, Attribute<?>>) attrE;
        if (!attr.getKey().equals(Document.DOCKEY_FIELD_NAME)) {
            if (attr.getValue() instanceof Attributes) {
                Attributes newAttrs = new Attributes(attr.getValue().isToKeep());
                Set<Attribute<? extends Comparable<?>>> attributes = ((Attributes) attr.getValue()).getAttributes();
                if (log.isTraceEnabled())
                    log.trace(attr.getKey() + " is attributes, size is " + attributes.size());
                Collection<FieldValueCardinality> cardList = cachedAttributeRanges.get(attr.getKey());
                // we already know that we will exceed the list size
                if (cardList.size() + attributes.size() > config.getMaximumFacetGroupCount()) {
                    if (log.isTraceEnabled())
                        log.trace("cardinality exceeds maximum facet count");
                    cardList = adjustAttributeGrouping(cardList, attributes);
                }
                List<Cardinality> newCardList = Lists.newArrayList();
                for (FieldValueCardinality fvcBucket : cardList) {
                    FieldValueCardinality fvc = new FieldValueCardinality();
                    fvc.setContent(fvcBucket.getFloorValue());
                    fvc.setCeiling(fvcBucket.getCeilingValue());
                    // for cardinalities, only use the visibility metadata
                    Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, attr.getValue().getColumnVisibility(), -1);
                    Cardinality card = new Cardinality(fvc, attr.getValue().getMetadata(), newAttrs.isToKeep());
                    newCardList.add(card);
                }
                for (Attribute<? extends Comparable<?>> myAttributeList : attributes) {
                    Cardinality card = (Cardinality) myAttributeList;
                    boolean foundBucket = false;
                    for (Cardinality fvcBucket : newCardList) {
                        if (fvcBucket.getContent().isWithin(card.getContent())) {
                            try {
                                fvcBucket.getContent().merge(card.getContent());
                                foundBucket = true;
                            } catch (CardinalityMergeException e) {
                                throw new RuntimeException(e);
                            }
                            break;
                        }
                    }
                    if (!foundBucket) {
                        newCardList.add(card);
                    }
                }
                for (Cardinality cardBucket : newCardList) {
                    newAttrs.add(cardBucket);
                    cachedAttributeRanges.put(attr.getKey(), cardBucket.getContent());
                }
                newDocumentAttributes.put(attr.getKey(), newAttrs);
            } else // ignore none Attributes attributes
            {
                if (log.isTraceEnabled())
                    log.trace(attr.getKey() + " is " + attr.getValue().getClass());
            }
        }
    }
    if (log.isTraceEnabled())
        log.trace("entries" + newDocumentAttributes.entries());
    for (Entry<String, Attribute<?>> newAttr : newDocumentAttributes.entries()) {
        currentDoc.replace(newAttr.getKey(), newAttr.getValue(), false, false);
    }
    if (log.isTraceEnabled())
        log.trace("currentDoc" + currentDoc);
    return Maps.immutableEntry(topKey, currentDoc);
}
Also used : FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Cardinality(datawave.query.attributes.Cardinality) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException) Entry(java.util.Map.Entry) FieldValueCardinality(datawave.query.attributes.FieldValueCardinality) Key(org.apache.accumulo.core.data.Key)

Aggregations

Cardinality (datawave.query.attributes.Cardinality)7 FieldValueCardinality (datawave.query.attributes.FieldValueCardinality)7 Key (org.apache.accumulo.core.data.Key)5 Document (datawave.query.attributes.Document)4 Attributes (datawave.query.attributes.Attributes)3 Attribute (datawave.query.attributes.Attribute)2 DatawaveKey (datawave.query.data.parsers.DatawaveKey)2 IOException (java.io.IOException)2 Entry (java.util.Map.Entry)2 CardinalityMergeException (com.clearspring.analytics.stream.cardinality.CardinalityMergeException)1 StringType (datawave.data.type.StringType)1 EmptyObjectException (datawave.webservice.query.exception.EmptyObjectException)1 FieldCardinalityBase (datawave.webservice.query.result.event.FieldCardinalityBase)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 ByteSequence (org.apache.accumulo.core.data.ByteSequence)1 Text (org.apache.hadoop.io.Text)1