Search in sources :

Example 1 with ColumnIndex

use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.

the class SASIIndexBuilder method build.

public void build() {
    AbstractType<?> keyValidator = cfs.metadata().partitionKeyType;
    for (Map.Entry<SSTableReader, Map<ColumnMetadata, ColumnIndex>> e : sstables.entrySet()) {
        SSTableReader sstable = e.getKey();
        Map<ColumnMetadata, ColumnIndex> indexes = e.getValue();
        try (RandomAccessReader dataFile = sstable.openDataReader()) {
            PerSSTableIndexWriter indexWriter = SASIIndex.newWriter(keyValidator, sstable.descriptor, indexes, OperationType.COMPACTION);
            long previousKeyPosition = 0;
            try (KeyIterator keys = new KeyIterator(sstable.descriptor, cfs.metadata())) {
                while (keys.hasNext()) {
                    if (isStopRequested())
                        throw new CompactionInterruptedException(getCompactionInfo());
                    final DecoratedKey key = keys.next();
                    final long keyPosition = keys.getKeyPosition();
                    indexWriter.startPartition(key, keyPosition);
                    try {
                        RowIndexEntry indexEntry = sstable.getPosition(key, SSTableReader.Operator.EQ);
                        dataFile.seek(indexEntry.position);
                        // key
                        ByteBufferUtil.readWithShortLength(dataFile);
                        try (SSTableIdentityIterator partition = SSTableIdentityIterator.create(sstable, dataFile, key)) {
                            // if the row has statics attached, it has to be indexed separately
                            if (cfs.metadata().hasStaticColumns())
                                indexWriter.nextUnfilteredCluster(partition.staticRow());
                            while (partition.hasNext()) indexWriter.nextUnfilteredCluster(partition.next());
                        }
                    } catch (IOException ex) {
                        throw new FSReadError(ex, sstable.getFilename());
                    }
                    bytesProcessed += keyPosition - previousKeyPosition;
                    previousKeyPosition = keyPosition;
                }
                completeSSTable(indexWriter, sstable, indexes.values());
            }
        }
    }
}
Also used : ColumnMetadata(org.apache.cassandra.schema.ColumnMetadata) SSTableIdentityIterator(org.apache.cassandra.io.sstable.SSTableIdentityIterator) KeyIterator(org.apache.cassandra.io.sstable.KeyIterator) CompactionInterruptedException(org.apache.cassandra.db.compaction.CompactionInterruptedException) DecoratedKey(org.apache.cassandra.db.DecoratedKey) IOException(java.io.IOException) RowIndexEntry(org.apache.cassandra.db.RowIndexEntry) PerSSTableIndexWriter(org.apache.cassandra.index.sasi.disk.PerSSTableIndexWriter) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) ColumnIndex(org.apache.cassandra.index.sasi.conf.ColumnIndex) RandomAccessReader(org.apache.cassandra.io.util.RandomAccessReader) FSReadError(org.apache.cassandra.io.FSReadError)

Example 2 with ColumnIndex

use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.

the class Operation method analyzeGroup.

@VisibleForTesting
protected static ListMultimap<ColumnMetadata, Expression> analyzeGroup(QueryController controller, OperationType op, List<RowFilter.Expression> expressions) {
    ListMultimap<ColumnMetadata, Expression> analyzed = ArrayListMultimap.create();
    // sort all of the expressions in the operation by name and priority of the logical operator
    // this gives us an efficient way to handle inequality and combining into ranges without extra processing
    // and converting expressions from one type to another.
    Collections.sort(expressions, (a, b) -> {
        int cmp = a.column().compareTo(b.column());
        return cmp == 0 ? -Integer.compare(getPriority(a.operator()), getPriority(b.operator())) : cmp;
    });
    for (final RowFilter.Expression e : expressions) {
        ColumnIndex columnIndex = controller.getIndex(e);
        List<Expression> perColumn = analyzed.get(e.column());
        if (columnIndex == null)
            columnIndex = new ColumnIndex(controller.getKeyValidator(), e.column(), null);
        AbstractAnalyzer analyzer = columnIndex.getAnalyzer();
        analyzer.reset(e.getIndexValue());
        // EQ/LIKE_*/NOT_EQ can have multiple expressions e.g. text = "Hello World",
        // becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer),
        // NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or
        // if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present,
        // in such case we know exactly that there would be no more EQ/RANGE expressions for given column
        // since NOT_EQ has the lowest priority.
        boolean isMultiExpression = false;
        switch(e.operator()) {
            case EQ:
                isMultiExpression = false;
                break;
            case LIKE_PREFIX:
            case LIKE_SUFFIX:
            case LIKE_CONTAINS:
            case LIKE_MATCHES:
                isMultiExpression = true;
                break;
            case NEQ:
                isMultiExpression = (perColumn.size() == 0 || perColumn.size() > 1 || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ));
                break;
        }
        if (isMultiExpression) {
            while (analyzer.hasNext()) {
                final ByteBuffer token = analyzer.next();
                perColumn.add(new Expression(controller, columnIndex).add(e.operator(), token));
            }
        } else // "range" or not-equals operator, combines both bounds together into the single expression,
        // iff operation of the group is AND, otherwise we are forced to create separate expressions,
        // not-equals is combined with the range iff operator is AND.
        {
            Expression range;
            if (perColumn.size() == 0 || op != OperationType.AND)
                perColumn.add((range = new Expression(controller, columnIndex)));
            else
                range = Iterables.getLast(perColumn);
            while (analyzer.hasNext()) range.add(e.operator(), analyzer.next());
        }
    }
    return analyzed;
}
Also used : ColumnMetadata(org.apache.cassandra.schema.ColumnMetadata) RowFilter(org.apache.cassandra.db.filter.RowFilter) ColumnIndex(org.apache.cassandra.index.sasi.conf.ColumnIndex) AbstractAnalyzer(org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer) ByteBuffer(java.nio.ByteBuffer) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with ColumnIndex

use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.

the class SASIIndexTest method testSettingIsLiteralOption.

@Test
public void testSettingIsLiteralOption() {
    // special type which is UTF-8 but is only on the inside
    AbstractType<?> stringType = new AbstractType<String>(AbstractType.ComparisonType.CUSTOM) {

        public ByteBuffer fromString(String source) throws MarshalException {
            return UTF8Type.instance.fromString(source);
        }

        public Term fromJSONObject(Object parsed) throws MarshalException {
            throw new UnsupportedOperationException();
        }

        public TypeSerializer<String> getSerializer() {
            return UTF8Type.instance.getSerializer();
        }

        public int compareCustom(ByteBuffer a, ByteBuffer b) {
            return UTF8Type.instance.compare(a, b);
        }
    };
    // first let's check that we get 'false' for 'isLiteral' if we don't set the option with special comparator
    ColumnMetadata columnA = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-A", stringType);
    ColumnIndex indexA = new ColumnIndex(UTF8Type.instance, columnA, IndexMetadata.fromSchemaMetadata("special-index-A", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {

        {
            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
        }
    }));
    Assert.assertEquals(true, indexA.isIndexed());
    Assert.assertEquals(false, indexA.isLiteral());
    // now let's double-check that we do get 'true' when we set it
    ColumnMetadata columnB = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-B", stringType);
    ColumnIndex indexB = new ColumnIndex(UTF8Type.instance, columnB, IndexMetadata.fromSchemaMetadata("special-index-B", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {

        {
            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
            put("is_literal", "true");
        }
    }));
    Assert.assertEquals(true, indexB.isIndexed());
    Assert.assertEquals(true, indexB.isLiteral());
    // and finally we should also get a 'true' if it's built-in UTF-8/ASCII comparator
    ColumnMetadata columnC = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-C", UTF8Type.instance);
    ColumnIndex indexC = new ColumnIndex(UTF8Type.instance, columnC, IndexMetadata.fromSchemaMetadata("special-index-C", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {

        {
            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
        }
    }));
    Assert.assertEquals(true, indexC.isIndexed());
    Assert.assertEquals(true, indexC.isLiteral());
    ColumnMetadata columnD = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-D", AsciiType.instance);
    ColumnIndex indexD = new ColumnIndex(UTF8Type.instance, columnD, IndexMetadata.fromSchemaMetadata("special-index-D", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {

        {
            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
        }
    }));
    Assert.assertEquals(true, indexD.isIndexed());
    Assert.assertEquals(true, indexD.isLiteral());
    // and option should supersedes the comparator type
    ColumnMetadata columnE = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-E", UTF8Type.instance);
    ColumnIndex indexE = new ColumnIndex(UTF8Type.instance, columnE, IndexMetadata.fromSchemaMetadata("special-index-E", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {

        {
            put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
            put("is_literal", "false");
        }
    }));
    Assert.assertEquals(true, indexE.isIndexed());
    Assert.assertEquals(false, indexE.isLiteral());
}
Also used : ColumnMetadata(org.apache.cassandra.schema.ColumnMetadata) ColumnIndex(org.apache.cassandra.index.sasi.conf.ColumnIndex) ByteBuffer(java.nio.ByteBuffer)

Example 4 with ColumnIndex

use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.

the class SASIIndexTest method testIndexMemtableSwitching.

@Test
public void testIndexMemtableSwitching() {
    // write some data but don't flush
    ColumnFamilyStore store = loadData(new HashMap<String, Pair<String, Integer>>() {

        {
            put("key1", Pair.create("Pavel", 14));
        }
    }, false);
    ColumnIndex index = ((SASIIndex) store.indexManager.getIndexByName(store.name + "_first_name")).getIndex();
    IndexMemtable beforeFlushMemtable = index.getCurrentMemtable();
    PartitionRangeReadCommand command = new PartitionRangeReadCommand(store.metadata(), FBUtilities.nowInSeconds(), ColumnFilter.all(store.metadata()), RowFilter.NONE, DataLimits.NONE, DataRange.allData(store.getPartitioner()), Optional.empty());
    QueryController controller = new QueryController(store, command, Integer.MAX_VALUE);
    org.apache.cassandra.index.sasi.plan.Expression expression = new org.apache.cassandra.index.sasi.plan.Expression(controller, index).add(Operator.LIKE_MATCHES, UTF8Type.instance.fromString("Pavel"));
    Assert.assertTrue(beforeFlushMemtable.search(expression).getCount() > 0);
    store.forceBlockingFlush();
    IndexMemtable afterFlushMemtable = index.getCurrentMemtable();
    Assert.assertNotSame(afterFlushMemtable, beforeFlushMemtable);
    Assert.assertEquals(afterFlushMemtable.search(expression).getCount(), 0);
    Assert.assertEquals(0, index.getPendingMemtables().size());
    loadData(new HashMap<String, Pair<String, Integer>>() {

        {
            put("key2", Pair.create("Sam", 15));
        }
    }, false);
    expression = new org.apache.cassandra.index.sasi.plan.Expression(controller, index).add(Operator.LIKE_MATCHES, UTF8Type.instance.fromString("Sam"));
    beforeFlushMemtable = index.getCurrentMemtable();
    Assert.assertTrue(beforeFlushMemtable.search(expression).getCount() > 0);
    // let's emulate switching memtable and see if we can still read-data in "pending"
    index.switchMemtable(store.getTracker().getView().getCurrentMemtable());
    Assert.assertNotSame(index.getCurrentMemtable(), beforeFlushMemtable);
    Assert.assertEquals(1, index.getPendingMemtables().size());
    Assert.assertTrue(index.searchMemtable(expression).getCount() > 0);
    // emulate "everything is flushed" notification
    index.discardMemtable(store.getTracker().getView().getCurrentMemtable());
    Assert.assertEquals(0, index.getPendingMemtables().size());
    Assert.assertEquals(index.searchMemtable(expression).getCount(), 0);
    // test discarding data from memtable
    loadData(new HashMap<String, Pair<String, Integer>>() {

        {
            put("key3", Pair.create("Jonathan", 16));
        }
    }, false);
    expression = new org.apache.cassandra.index.sasi.plan.Expression(controller, index).add(Operator.LIKE_MATCHES, UTF8Type.instance.fromString("Jonathan"));
    Assert.assertTrue(index.searchMemtable(expression).getCount() > 0);
    index.switchMemtable();
    Assert.assertEquals(index.searchMemtable(expression).getCount(), 0);
}
Also used : ColumnIndex(org.apache.cassandra.index.sasi.conf.ColumnIndex) QueryController(org.apache.cassandra.index.sasi.plan.QueryController) IndexMemtable(org.apache.cassandra.index.sasi.memory.IndexMemtable) Pair(org.apache.cassandra.utils.Pair)

Example 5 with ColumnIndex

use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.

the class SASIIndexBuilder method completeSSTable.

private void completeSSTable(PerSSTableIndexWriter indexWriter, SSTableReader sstable, Collection<ColumnIndex> indexes) {
    indexWriter.complete();
    for (ColumnIndex index : indexes) {
        File tmpIndex = new File(sstable.descriptor.filenameFor(index.getComponent()));
        if (// no data was inserted into the index for given sstable
        !tmpIndex.exists())
            continue;
        index.update(Collections.<SSTableReader>emptyList(), Collections.singletonList(sstable));
    }
}
Also used : ColumnIndex(org.apache.cassandra.index.sasi.conf.ColumnIndex) File(java.io.File)

Aggregations

ColumnIndex (org.apache.cassandra.index.sasi.conf.ColumnIndex)6 ByteBuffer (java.nio.ByteBuffer)3 ColumnMetadata (org.apache.cassandra.schema.ColumnMetadata)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 File (java.io.File)1 IOException (java.io.IOException)1 DecoratedKey (org.apache.cassandra.db.DecoratedKey)1 RowIndexEntry (org.apache.cassandra.db.RowIndexEntry)1 CompactionInterruptedException (org.apache.cassandra.db.compaction.CompactionInterruptedException)1 RowFilter (org.apache.cassandra.db.filter.RowFilter)1 Row (org.apache.cassandra.db.rows.Row)1 AbstractAnalyzer (org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer)1 PerSSTableIndexWriter (org.apache.cassandra.index.sasi.disk.PerSSTableIndexWriter)1 IndexMemtable (org.apache.cassandra.index.sasi.memory.IndexMemtable)1 QueryController (org.apache.cassandra.index.sasi.plan.QueryController)1 FSReadError (org.apache.cassandra.io.FSReadError)1 KeyIterator (org.apache.cassandra.io.sstable.KeyIterator)1 SSTableIdentityIterator (org.apache.cassandra.io.sstable.SSTableIdentityIterator)1 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)1 RandomAccessReader (org.apache.cassandra.io.util.RandomAccessReader)1