Search in sources :

Example 61 with ITupleReference

use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.

the class AbstractTOccurrenceSearcher method tokenizeQuery.

protected void tokenizeQuery(InvertedIndexSearchPredicate searchPred) throws HyracksDataException {
    ITupleReference queryTuple = searchPred.getQueryTuple();
    int queryFieldIndex = searchPred.getQueryFieldIndex();
    IBinaryTokenizer queryTokenizer = searchPred.getQueryTokenizer();
    // Is this a full-text query?
    // Then, the last argument is conjuctive or disjunctive search option, not a query text.
    // Thus, we need to remove the last argument.
    boolean isFullTextSearchQuery = searchPred.getIsFullTextSearchQuery();
    // Get the type of query tokenizer.
    TokenizerType queryTokenizerType = queryTokenizer.getTokenizerType();
    int tokenCountInOneField = 0;
    queryTokenAppender.reset(queryTokenFrame, true);
    queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex), queryTuple.getFieldLength(queryFieldIndex));
    while (queryTokenizer.hasNext()) {
        queryTokenizer.next();
        queryTokenBuilder.reset();
        tokenCountInOneField++;
        try {
            IToken token = queryTokenizer.getToken();
            // If it's a list, it can have multiple keywords in it. But, each keyword should not be a phrase.
            if (isFullTextSearchQuery) {
                if (queryTokenizerType == TokenizerType.STRING && tokenCountInOneField > 1) {
                    throw HyracksDataException.create(ErrorCode.FULLTEXT_PHRASE_FOUND);
                } else if (queryTokenizerType == TokenizerType.LIST) {
                    for (int j = 1; j < token.getTokenLength(); j++) {
                        if (DelimitedUTF8StringBinaryTokenizer.isSeparator((char) token.getData()[token.getStartOffset() + j])) {
                            throw HyracksDataException.create(ErrorCode.FULLTEXT_PHRASE_FOUND);
                        }
                    }
                }
            }
            token.serializeToken(queryTokenBuilder.getFieldData());
            queryTokenBuilder.addFieldEndOffset();
            // WARNING: assuming one frame is big enough to hold all tokens
            queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0, queryTokenBuilder.getSize());
        } catch (IOException e) {
            throw new HyracksDataException(e);
        }
    }
}
Also used : IToken(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) TokenizerType(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.TokenizerInfo.TokenizerType) IBinaryTokenizer(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 62 with ITupleReference

use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.

the class AbstractLSMRTree method modify.

@Override
public void modify(IIndexOperationContext ictx, ITupleReference tuple) throws HyracksDataException {
    LSMRTreeOpContext ctx = (LSMRTreeOpContext) ictx;
    if (ctx.getOperation() == IndexOperation.PHYSICALDELETE) {
        throw new UnsupportedOperationException("Physical delete not supported in the LSM-RTree");
    }
    ITupleReference indexTuple;
    if (ctx.getIndexTuple() != null) {
        ctx.getIndexTuple().reset(tuple);
        indexTuple = ctx.getIndexTuple();
    } else {
        indexTuple = tuple;
    }
    ctx.getModificationCallback().before(indexTuple);
    ctx.getModificationCallback().found(null, indexTuple);
    if (ctx.getOperation() == IndexOperation.INSERT) {
        ctx.getCurrentMutableRTreeAccessor().insert(indexTuple);
    } else {
        // First remove all entries in the in-memory rtree (if any).
        ctx.getCurrentMutableRTreeAccessor().delete(indexTuple);
        // Insert key into the deleted-keys BTree.
        try {
            ctx.getCurrentMutableBTreeAccessor().insert(indexTuple);
        } catch (HyracksDataException e) {
            if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
                // that all the corresponding insert tuples are deleted
                throw e;
            }
        }
    }
    updateFilter(ctx, tuple);
}
Also used : ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 63 with ITupleReference

use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.

the class PartitionedInMemoryInvertedIndex method openInvertedListPartitionCursors.

@Override
public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx, short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions, List<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException {
    short minPartitionIndex;
    short maxPartitionIndex;
    partitionIndexLock.readLock().lock();
    minPartitionIndex = this.minPartitionIndex;
    maxPartitionIndex = this.maxPartitionIndex;
    partitionIndexLock.readLock().unlock();
    if (minPartitionIndex == Short.MAX_VALUE && maxPartitionIndex == Short.MIN_VALUE) {
        // Index must be empty.
        return false;
    }
    short partitionStartIndex = minPartitionIndex;
    short partitionEndIndex = maxPartitionIndex;
    if (numTokensLowerBound >= 0) {
        partitionStartIndex = (short) Math.max(minPartitionIndex, numTokensLowerBound);
    }
    if (numTokensUpperBound >= 0) {
        partitionEndIndex = (short) Math.min(maxPartitionIndex, numTokensUpperBound);
    }
    PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
    PartitionedInMemoryInvertedIndexOpContext ctx = (PartitionedInMemoryInvertedIndexOpContext) ictx;
    ctx.setOperation(IndexOperation.SEARCH);
    // We can pick either of the full low or high search key, since they should be identical here.
    ITupleReference searchKey = partSearcher.getFullLowSearchKey();
    ctx.getBtreePred().setLowKey(searchKey, true);
    ctx.getBtreePred().setHighKey(searchKey, true);
    // using the last existing partition and re-searching the BTree with an open interval as low key.
    for (short i = partitionStartIndex; i <= partitionEndIndex; i++) {
        partSearcher.setNumTokensBoundsInSearchKeys(i, i);
        InMemoryInvertedListCursor inMemListCursor = (InMemoryInvertedListCursor) partSearcher.getCachedInvertedListCursor();
        inMemListCursor.prepare(ctx.getBtreeAccessor(), ctx.getBtreePred(), ctx.getTokenFieldsCmp(), ctx.getBtreeCmp());
        inMemListCursor.reset(searchKey);
        invListPartitions.addInvertedListCursor(inMemListCursor, i);
    }
    return true;
}
Also used : ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) PartitionedTOccurrenceSearcher(org.apache.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher)

Example 64 with ITupleReference

use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.

the class BTreeNSMInteriorFrame method getChildPageId.

@Override
public int getChildPageId(RangePredicate pred) throws HyracksDataException {
    // Trivial case where there is only a child pointer (and no key).
    if (buf.getInt(Constants.TUPLE_COUNT_OFFSET) == 0) {
        return buf.getInt(RIGHT_LEAF_OFFSET);
    }
    // Trivial cases where no low key or high key was given (e.g. during an
    // index scan).
    ITupleReference tuple = null;
    FindTupleMode fsm = null;
    // The target comparator may be on a prefix of the BTree key fields.
    MultiComparator targetCmp = pred.getLowKeyComparator();
    tuple = pred.getLowKey();
    if (tuple == null) {
        return getLeftmostChildPageId();
    }
    if (pred.isLowKeyInclusive()) {
        fsm = FindTupleMode.INCLUSIVE;
    } else {
        fsm = FindTupleMode.EXCLUSIVE;
    }
    // Search for a matching key.
    int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, targetCmp, fsm, FindTupleNoExactMatchPolicy.HIGHER_KEY);
    int slotOff = slotManager.getSlotOff(tupleIndex);
    // Follow the rightmost (greatest) child pointer.
    if (tupleIndex == slotManager.getGreatestKeyIndicator()) {
        return buf.getInt(RIGHT_LEAF_OFFSET);
    }
    // Deal with prefix searches.
    // slotManager.findTupleIndex() will return an arbitrary tuple matching
    // the given field prefix (according to the target comparator).
    // To make sure we traverse the right path, we must find the
    // leftmost or rightmost tuple that matches the prefix.
    int origTupleOff = slotManager.getTupleOff(slotOff);
    cmpFrameTuple.resetByTupleOffset(buf.array(), origTupleOff);
    int cmpTupleOff = origTupleOff;
    // The answer set begins with the lowest key matching the prefix.
    // We must follow the child pointer of the lowest (leftmost) key
    // matching the given prefix.
    int maxSlotOff = buf.capacity();
    slotOff += slotManager.getSlotSize();
    while (slotOff < maxSlotOff) {
        cmpTupleOff = slotManager.getTupleOff(slotOff);
        frameTuple.resetByTupleOffset(buf.array(), cmpTupleOff);
        if (targetCmp.compare(cmpFrameTuple, frameTuple) != 0) {
            break;
        }
        slotOff += slotManager.getSlotSize();
    }
    slotOff -= slotManager.getSlotSize();
    frameTuple.resetByTupleOffset(buf.array(), slotManager.getTupleOff(slotOff));
    int childPageOff = getLeftChildPageOff(frameTuple);
    return buf.getInt(childPageOff);
}
Also used : MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) FindTupleMode(org.apache.hyracks.storage.am.common.ophelpers.FindTupleMode)

Example 65 with ITupleReference

use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.

the class BTree method upsertLeaf.

private boolean upsertLeaf(ITupleReference tuple, int targetTupleIndex, int pageId, BTreeOpContext ctx) throws Exception {
    boolean restartOp;
    ITupleReference beforeTuple = ctx.getLeafFrame().getMatchingKeyTuple(tuple, targetTupleIndex);
    if (ctx.getAcceptor().accept(beforeTuple)) {
        if (beforeTuple == null) {
            restartOp = insertLeaf(tuple, targetTupleIndex, pageId, ctx);
        } else {
            restartOp = updateLeaf(tuple, targetTupleIndex, pageId, ctx);
        }
    } else {
        restartOp = insertLeaf(tuple, ctx.getLeafFrame().findInsertTupleIndex(tuple), pageId, ctx);
    }
    return restartOp;
}
Also used : ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)

Aggregations

ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)149 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)80 ArrayList (java.util.ArrayList)40 ACIDException (org.apache.asterix.common.exceptions.ACIDException)31 MetadataEntityValueExtractor (org.apache.asterix.metadata.valueextractors.MetadataEntityValueExtractor)26 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)20 Test (org.junit.Test)20 RangePredicate (org.apache.hyracks.storage.am.btree.impls.RangePredicate)18 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)18 Dataset (org.apache.asterix.metadata.entities.Dataset)10 MultiComparator (org.apache.hyracks.storage.common.MultiComparator)10 CheckTuple (org.apache.hyracks.storage.am.common.CheckTuple)8 ITreeIndexAccessor (org.apache.hyracks.storage.am.common.api.ITreeIndexAccessor)8 ITreeIndexCursor (org.apache.hyracks.storage.am.common.api.ITreeIndexCursor)8 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)7 ILSMDiskComponentBulkLoader (org.apache.hyracks.storage.am.lsm.common.api.ILSMDiskComponentBulkLoader)7 SearchPredicate (org.apache.hyracks.storage.am.rtree.impls.SearchPredicate)7 ExtensionMetadataDataset (org.apache.asterix.metadata.api.ExtensionMetadataDataset)6 Datatype (org.apache.asterix.metadata.entities.Datatype)6 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)6