use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method createWordInvIndexTestContext.
public static LSMInvertedIndexTestContext createWordInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
ITokenFactory tokenFactory = new UTF8WordTokenFactory();
IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false, tokenFactory);
LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
return testCtx;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method createNGramInvIndexTestContext.
public static LSMInvertedIndexTestContext createNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true, true, false, tokenFactory);
LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
return testCtx;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method createHashedWordInvIndexTestContext.
public static LSMInvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
ISerializerDeserializer[] fieldSerdes = getHashedIndexFieldSerdes(invIndexType);
ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false, tokenFactory);
LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
return testCtx;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory in project asterixdb by apache.
the class CountHashedGramTokensDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) throws AlgebricksException {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(3, true, false, true, tokenFactory);
return new GramTokensEvaluator(args, ctx, tokenizer, BuiltinType.AINT32);
}
};
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory in project asterixdb by apache.
the class CountHashedWordTokensDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, true, tokenFactory);
return new WordTokensEvaluator(args, ctx, tokenizer, BuiltinType.AINT32);
}
};
}
Aggregations