use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.
the class UTF8StringPointableTest method testLowercase.
@Test
public void testLowercase() throws Exception {
UTF8StringBuilder builder = new UTF8StringBuilder();
GrowableArray storage = new GrowableArray();
UTF8StringPointable result = new UTF8StringPointable();
STRING_UTF8_MIX.lowercase(builder, storage);
result.set(storage.getByteArray(), 0, storage.getLength());
assertEquals(0, STRING_UTF8_MIX_LOWERCASE.compareTo(result));
}
use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.
the class UTF8StringPointableTest method testSubstrAfter.
@Test
public void testSubstrAfter() throws Exception {
UTF8StringBuilder builder = new UTF8StringBuilder();
GrowableArray storage = new GrowableArray();
STRING_LEN_128.substrAfter(STRING_LEN_127, builder, storage);
UTF8StringPointable result = new UTF8StringPointable();
result.set(storage.getByteArray(), 0, storage.getLength());
UTF8StringPointable expect = generateUTF8Pointable(Character.toString(UTF8StringSample.ONE_ASCII_CHAR));
assertEquals(0, expect.compareTo(result));
storage.reset();
UTF8StringPointable testPtr = generateUTF8Pointable("Mix中文123");
UTF8StringPointable pattern = generateUTF8Pointable("文");
expect = generateUTF8Pointable("123");
testPtr.substrAfter(pattern, builder, storage);
result.set(storage.getByteArray(), 0, storage.getLength());
assertEquals(0, expect.compareTo(result));
}
use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.
the class UTF8StringPointableTest method testConcat.
@Test
public void testConcat() throws Exception {
UTF8StringPointable expected = generateUTF8Pointable(UTF8StringSample.generateStringRepeatBy(UTF8StringSample.ONE_ASCII_CHAR, 127 + 128));
GrowableArray storage = new GrowableArray();
UTF8StringBuilder builder = new UTF8StringBuilder();
STRING_LEN_127.concat(STRING_LEN_128, builder, storage);
UTF8StringPointable actual = new UTF8StringPointable();
actual.set(storage.getByteArray(), 0, storage.getLength());
assertEquals(0, expected.compareTo(actual));
storage.reset();
STRING_LEN_127.concat(STRING_EMPTY, builder, storage);
actual.set(storage.getByteArray(), 0, storage.getLength());
assertEquals(0, STRING_LEN_127.compareTo(actual));
}
use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.
the class AStringConstructorDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private DataOutput out = resultStorage.getDataOutput();
private IPointable inputArg = new VoidPointable();
private IScalarEvaluator eval = args[0].createScalarEvaluator(ctx);
private UTF8StringBuilder builder = new UTF8StringBuilder();
private GrowableArray baaos = new GrowableArray();
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
try {
resultStorage.reset();
baaos.reset();
eval.evaluate(tuple, inputArg);
byte[] serString = inputArg.getByteArray();
int offset = inputArg.getStartOffset();
int len = inputArg.getLength();
ATypeTag tt = ATypeTag.VALUE_TYPE_MAPPING[serString[offset]];
if (tt == ATypeTag.STRING) {
result.set(inputArg);
} else {
builder.reset(baaos, len);
int startOffset = offset + 1;
switch(tt) {
case TINYINT:
{
int i = AInt8SerializerDeserializer.getByte(serString, startOffset);
builder.appendString(String.valueOf(i));
break;
}
case SMALLINT:
{
int i = AInt16SerializerDeserializer.getShort(serString, startOffset);
builder.appendString(String.valueOf(i));
break;
}
case INTEGER:
{
int i = AInt32SerializerDeserializer.getInt(serString, startOffset);
builder.appendString(String.valueOf(i));
break;
}
case BIGINT:
{
long l = AInt64SerializerDeserializer.getLong(serString, startOffset);
builder.appendString(String.valueOf(l));
break;
}
case DOUBLE:
{
double d = ADoubleSerializerDeserializer.getDouble(serString, startOffset);
builder.appendString(String.valueOf(d));
break;
}
case FLOAT:
{
float f = AFloatSerializerDeserializer.getFloat(serString, startOffset);
builder.appendString(String.valueOf(f));
break;
}
case BOOLEAN:
{
boolean b = ABooleanSerializerDeserializer.getBoolean(serString, startOffset);
builder.appendString(String.valueOf(b));
break;
}
// NotYetImplemented
case CIRCLE:
case DATE:
case DATETIME:
case LINE:
case TIME:
case DURATION:
case YEARMONTHDURATION:
case DAYTIMEDURATION:
case INTERVAL:
case ARRAY:
case POINT:
case POINT3D:
case RECTANGLE:
case POLYGON:
case OBJECT:
case MULTISET:
case UUID:
default:
throw new UnsupportedTypeException(getIdentifier(), serString[offset]);
}
builder.finish();
out.write(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
out.write(baaos.getByteArray(), 0, baaos.getLength());
result.set(resultStorage);
}
} catch (IOException e) {
throw new InvalidDataFormatException(getIdentifier(), e, ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
}
};
}
};
}
use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.
the class NGramTokenizerTest method runTestNGramTokenizerWithUTF8Tokens.
void runTestNGramTokenizerWithUTF8Tokens(boolean prePost) throws IOException {
UTF8NGramTokenFactory tokenFactory = new UTF8NGramTokenFactory();
NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false, tokenFactory);
tokenizer.reset(inputBuffer, 0, inputBuffer.length);
ArrayList<String> expectedGrams = new ArrayList<String>();
getExpectedGrams(str, gramLength, expectedGrams, prePost);
int tokenCount = 0;
while (tokenizer.hasNext()) {
tokenizer.next();
// serialize hashed token
GrowableArray tokenData = new GrowableArray();
IToken token = tokenizer.getToken();
token.serializeToken(tokenData);
// deserialize token
ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
DataInput in = new DataInputStream(bais);
UTF8StringReader reader = new UTF8StringReader();
String strGram = reader.readUTF(in);
// System.out.println("\"" + strGram + "\"");
Assert.assertEquals(expectedGrams.get(tokenCount), strGram);
tokenCount++;
}
// System.out.println("---------");
}
Aggregations