use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.
the class SASIIndexBuilder method build.
public void build() {
AbstractType<?> keyValidator = cfs.metadata().partitionKeyType;
for (Map.Entry<SSTableReader, Map<ColumnMetadata, ColumnIndex>> e : sstables.entrySet()) {
SSTableReader sstable = e.getKey();
Map<ColumnMetadata, ColumnIndex> indexes = e.getValue();
try (RandomAccessReader dataFile = sstable.openDataReader()) {
PerSSTableIndexWriter indexWriter = SASIIndex.newWriter(keyValidator, sstable.descriptor, indexes, OperationType.COMPACTION);
long previousKeyPosition = 0;
try (KeyIterator keys = new KeyIterator(sstable.descriptor, cfs.metadata())) {
while (keys.hasNext()) {
if (isStopRequested())
throw new CompactionInterruptedException(getCompactionInfo());
final DecoratedKey key = keys.next();
final long keyPosition = keys.getKeyPosition();
indexWriter.startPartition(key, keyPosition);
try {
RowIndexEntry indexEntry = sstable.getPosition(key, SSTableReader.Operator.EQ);
dataFile.seek(indexEntry.position);
// key
ByteBufferUtil.readWithShortLength(dataFile);
try (SSTableIdentityIterator partition = SSTableIdentityIterator.create(sstable, dataFile, key)) {
// if the row has statics attached, it has to be indexed separately
if (cfs.metadata().hasStaticColumns())
indexWriter.nextUnfilteredCluster(partition.staticRow());
while (partition.hasNext()) indexWriter.nextUnfilteredCluster(partition.next());
}
} catch (IOException ex) {
throw new FSReadError(ex, sstable.getFilename());
}
bytesProcessed += keyPosition - previousKeyPosition;
previousKeyPosition = keyPosition;
}
completeSSTable(indexWriter, sstable, indexes.values());
}
}
}
}
use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.
the class Operation method analyzeGroup.
@VisibleForTesting
protected static ListMultimap<ColumnMetadata, Expression> analyzeGroup(QueryController controller, OperationType op, List<RowFilter.Expression> expressions) {
ListMultimap<ColumnMetadata, Expression> analyzed = ArrayListMultimap.create();
// sort all of the expressions in the operation by name and priority of the logical operator
// this gives us an efficient way to handle inequality and combining into ranges without extra processing
// and converting expressions from one type to another.
Collections.sort(expressions, (a, b) -> {
int cmp = a.column().compareTo(b.column());
return cmp == 0 ? -Integer.compare(getPriority(a.operator()), getPriority(b.operator())) : cmp;
});
for (final RowFilter.Expression e : expressions) {
ColumnIndex columnIndex = controller.getIndex(e);
List<Expression> perColumn = analyzed.get(e.column());
if (columnIndex == null)
columnIndex = new ColumnIndex(controller.getKeyValidator(), e.column(), null);
AbstractAnalyzer analyzer = columnIndex.getAnalyzer();
analyzer.reset(e.getIndexValue());
// EQ/LIKE_*/NOT_EQ can have multiple expressions e.g. text = "Hello World",
// becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer),
// NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or
// if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present,
// in such case we know exactly that there would be no more EQ/RANGE expressions for given column
// since NOT_EQ has the lowest priority.
boolean isMultiExpression = false;
switch(e.operator()) {
case EQ:
isMultiExpression = false;
break;
case LIKE_PREFIX:
case LIKE_SUFFIX:
case LIKE_CONTAINS:
case LIKE_MATCHES:
isMultiExpression = true;
break;
case NEQ:
isMultiExpression = (perColumn.size() == 0 || perColumn.size() > 1 || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ));
break;
}
if (isMultiExpression) {
while (analyzer.hasNext()) {
final ByteBuffer token = analyzer.next();
perColumn.add(new Expression(controller, columnIndex).add(e.operator(), token));
}
} else // "range" or not-equals operator, combines both bounds together into the single expression,
// iff operation of the group is AND, otherwise we are forced to create separate expressions,
// not-equals is combined with the range iff operator is AND.
{
Expression range;
if (perColumn.size() == 0 || op != OperationType.AND)
perColumn.add((range = new Expression(controller, columnIndex)));
else
range = Iterables.getLast(perColumn);
while (analyzer.hasNext()) range.add(e.operator(), analyzer.next());
}
}
return analyzed;
}
use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.
the class SASIIndexTest method testSettingIsLiteralOption.
@Test
public void testSettingIsLiteralOption() {
// special type which is UTF-8 but is only on the inside
AbstractType<?> stringType = new AbstractType<String>(AbstractType.ComparisonType.CUSTOM) {
public ByteBuffer fromString(String source) throws MarshalException {
return UTF8Type.instance.fromString(source);
}
public Term fromJSONObject(Object parsed) throws MarshalException {
throw new UnsupportedOperationException();
}
public TypeSerializer<String> getSerializer() {
return UTF8Type.instance.getSerializer();
}
public int compareCustom(ByteBuffer a, ByteBuffer b) {
return UTF8Type.instance.compare(a, b);
}
};
// first let's check that we get 'false' for 'isLiteral' if we don't set the option with special comparator
ColumnMetadata columnA = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-A", stringType);
ColumnIndex indexA = new ColumnIndex(UTF8Type.instance, columnA, IndexMetadata.fromSchemaMetadata("special-index-A", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {
{
put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
}
}));
Assert.assertEquals(true, indexA.isIndexed());
Assert.assertEquals(false, indexA.isLiteral());
// now let's double-check that we do get 'true' when we set it
ColumnMetadata columnB = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-B", stringType);
ColumnIndex indexB = new ColumnIndex(UTF8Type.instance, columnB, IndexMetadata.fromSchemaMetadata("special-index-B", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {
{
put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
put("is_literal", "true");
}
}));
Assert.assertEquals(true, indexB.isIndexed());
Assert.assertEquals(true, indexB.isLiteral());
// and finally we should also get a 'true' if it's built-in UTF-8/ASCII comparator
ColumnMetadata columnC = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-C", UTF8Type.instance);
ColumnIndex indexC = new ColumnIndex(UTF8Type.instance, columnC, IndexMetadata.fromSchemaMetadata("special-index-C", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {
{
put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
}
}));
Assert.assertEquals(true, indexC.isIndexed());
Assert.assertEquals(true, indexC.isLiteral());
ColumnMetadata columnD = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-D", AsciiType.instance);
ColumnIndex indexD = new ColumnIndex(UTF8Type.instance, columnD, IndexMetadata.fromSchemaMetadata("special-index-D", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {
{
put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
}
}));
Assert.assertEquals(true, indexD.isIndexed());
Assert.assertEquals(true, indexD.isLiteral());
// and option should supersedes the comparator type
ColumnMetadata columnE = ColumnMetadata.regularColumn(KS_NAME, CF_NAME, "special-E", UTF8Type.instance);
ColumnIndex indexE = new ColumnIndex(UTF8Type.instance, columnE, IndexMetadata.fromSchemaMetadata("special-index-E", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() {
{
put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName());
put("is_literal", "false");
}
}));
Assert.assertEquals(true, indexE.isIndexed());
Assert.assertEquals(false, indexE.isLiteral());
}
use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.
the class SASIIndexTest method testIndexMemtableSwitching.
@Test
public void testIndexMemtableSwitching() {
// write some data but don't flush
ColumnFamilyStore store = loadData(new HashMap<String, Pair<String, Integer>>() {
{
put("key1", Pair.create("Pavel", 14));
}
}, false);
ColumnIndex index = ((SASIIndex) store.indexManager.getIndexByName(store.name + "_first_name")).getIndex();
IndexMemtable beforeFlushMemtable = index.getCurrentMemtable();
PartitionRangeReadCommand command = new PartitionRangeReadCommand(store.metadata(), FBUtilities.nowInSeconds(), ColumnFilter.all(store.metadata()), RowFilter.NONE, DataLimits.NONE, DataRange.allData(store.getPartitioner()), Optional.empty());
QueryController controller = new QueryController(store, command, Integer.MAX_VALUE);
org.apache.cassandra.index.sasi.plan.Expression expression = new org.apache.cassandra.index.sasi.plan.Expression(controller, index).add(Operator.LIKE_MATCHES, UTF8Type.instance.fromString("Pavel"));
Assert.assertTrue(beforeFlushMemtable.search(expression).getCount() > 0);
store.forceBlockingFlush();
IndexMemtable afterFlushMemtable = index.getCurrentMemtable();
Assert.assertNotSame(afterFlushMemtable, beforeFlushMemtable);
Assert.assertEquals(afterFlushMemtable.search(expression).getCount(), 0);
Assert.assertEquals(0, index.getPendingMemtables().size());
loadData(new HashMap<String, Pair<String, Integer>>() {
{
put("key2", Pair.create("Sam", 15));
}
}, false);
expression = new org.apache.cassandra.index.sasi.plan.Expression(controller, index).add(Operator.LIKE_MATCHES, UTF8Type.instance.fromString("Sam"));
beforeFlushMemtable = index.getCurrentMemtable();
Assert.assertTrue(beforeFlushMemtable.search(expression).getCount() > 0);
// let's emulate switching memtable and see if we can still read-data in "pending"
index.switchMemtable(store.getTracker().getView().getCurrentMemtable());
Assert.assertNotSame(index.getCurrentMemtable(), beforeFlushMemtable);
Assert.assertEquals(1, index.getPendingMemtables().size());
Assert.assertTrue(index.searchMemtable(expression).getCount() > 0);
// emulate "everything is flushed" notification
index.discardMemtable(store.getTracker().getView().getCurrentMemtable());
Assert.assertEquals(0, index.getPendingMemtables().size());
Assert.assertEquals(index.searchMemtable(expression).getCount(), 0);
// test discarding data from memtable
loadData(new HashMap<String, Pair<String, Integer>>() {
{
put("key3", Pair.create("Jonathan", 16));
}
}, false);
expression = new org.apache.cassandra.index.sasi.plan.Expression(controller, index).add(Operator.LIKE_MATCHES, UTF8Type.instance.fromString("Jonathan"));
Assert.assertTrue(index.searchMemtable(expression).getCount() > 0);
index.switchMemtable();
Assert.assertEquals(index.searchMemtable(expression).getCount(), 0);
}
use of org.apache.cassandra.index.sasi.conf.ColumnIndex in project cassandra by apache.
the class SASIIndexBuilder method completeSSTable.
private void completeSSTable(PerSSTableIndexWriter indexWriter, SSTableReader sstable, Collection<ColumnIndex> indexes) {
indexWriter.complete();
for (ColumnIndex index : indexes) {
File tmpIndex = new File(sstable.descriptor.filenameFor(index.getComponent()));
if (// no data was inserted into the index for given sstable
!tmpIndex.exists())
continue;
index.update(Collections.<SSTableReader>emptyList(), Collections.singletonList(sstable));
}
}
Aggregations