use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class SerializationDeserializationTest method serdeser01.
@Test
public void serdeser01() throws Exception {
RecordDescriptor rDes = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
LineProcessor processor = new LineProcessor() {
@Override
public void process(String line, IDataWriter<Object[]> writer) throws Exception {
String[] splits = line.split(" ");
for (String s : splits) {
writer.writeData(new Object[] { s, Integer.valueOf(1) });
}
}
};
run(rDes, processor);
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class LSMBTreeMergeTestDriver method runTest.
@Override
protected void runTest(ISerializerDeserializer[] fieldSerdes, int numKeys, BTreeLeafFrameType leafType, ITupleReference lowKey, ITupleReference highKey, ITupleReference prefixLowKey, ITupleReference prefixHighKey) throws Exception {
OrderedIndexTestContext ctx = createTestContext(fieldSerdes, numKeys, leafType, false);
ctx.getIndex().create();
ctx.getIndex().activate();
// to determine which field types to generate.
if (fieldSerdes[0] instanceof IntegerSerializerDeserializer) {
orderedIndexTestUtils.bulkLoadIntTuples(ctx, numTuplesToInsert, getRandom());
} else if (fieldSerdes[0] instanceof UTF8StringSerializerDeserializer) {
orderedIndexTestUtils.bulkLoadStringTuples(ctx, numTuplesToInsert, getRandom());
}
int maxTreesToMerge = AccessMethodTestsConfig.LSM_BTREE_MAX_TREES_TO_MERGE;
for (int i = 0; i < maxTreesToMerge; i++) {
for (int j = 0; j < i; j++) {
if (fieldSerdes[0] instanceof IntegerSerializerDeserializer) {
orderedIndexTestUtils.insertIntTuples(ctx, numTuplesToInsert, getRandom());
// Deactivate and the re-activate the index to force it flush its in memory component
ctx.getIndex().deactivate();
ctx.getIndex().activate();
} else if (fieldSerdes[0] instanceof UTF8StringSerializerDeserializer) {
orderedIndexTestUtils.insertStringTuples(ctx, numTuplesToInsert, getRandom());
// Deactivate and the re-activate the index to force it flush its in memory component
ctx.getIndex().deactivate();
ctx.getIndex().activate();
}
}
ILSMIndexAccessor accessor = (ILSMIndexAccessor) ctx.getIndexAccessor();
accessor.scheduleMerge(NoOpIOOperationCallbackFactory.INSTANCE.createIoOpCallback(), ((LSMBTree) ctx.getIndex()).getImmutableComponents());
orderedIndexTestUtils.checkPointSearches(ctx);
orderedIndexTestUtils.checkScan(ctx);
orderedIndexTestUtils.checkDiskOrderScan(ctx);
orderedIndexTestUtils.checkRangeSearch(ctx, lowKey, highKey, true, true);
if (prefixLowKey != null && prefixHighKey != null) {
orderedIndexTestUtils.checkRangeSearch(ctx, prefixLowKey, prefixHighKey, true, true);
}
}
ctx.getIndex().deactivate();
ctx.getIndex().destroy();
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method createPersonNamesTupleGen.
public static TupleGenerator createPersonNamesTupleGen(Random rnd) throws IOException {
IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
fieldGens[0] = new PersonNameFieldValueGenerator(rnd, 0.5f);
fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE };
TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
return tupleGen;
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class MinMaxStringFieldAggregatorFactory method createAggregator.
/*
* (non-Javadoc)
*
* @see
* org.apache.hyracks.dataflow.std.aggregators.IAggregatorDescriptorFactory
* #createAggregator(org.apache.hyracks.api.context.IHyracksTaskContext,
* org.apache.hyracks.api.dataflow.value.RecordDescriptor,
* org.apache.hyracks.api.dataflow.value.RecordDescriptor, int[])
*/
@Override
public IFieldAggregateDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor) throws HyracksDataException {
return new IFieldAggregateDescriptor() {
UTF8StringSerializerDeserializer utf8SerializerDeserializer = new UTF8StringSerializerDeserializer();
@Override
public void reset() {
}
@Override
public void outputPartialResult(DataOutput fieldOutput, byte[] data, int offset, AggregateState state) throws HyracksDataException {
try {
if (hasBinaryState) {
int stateIdx = IntegerPointable.getInteger(data, offset);
Object[] storedState = (Object[]) state.state;
fieldOutput.writeUTF((String) storedState[stateIdx]);
} else {
fieldOutput.writeUTF((String) state.state);
}
} catch (IOException e) {
throw new HyracksDataException("I/O exception when writing a string to the output writer in MinMaxStringAggregatorFactory.");
}
}
@Override
public void outputFinalResult(DataOutput fieldOutput, byte[] data, int offset, AggregateState state) throws HyracksDataException {
try {
if (hasBinaryState) {
int stateIdx = IntegerPointable.getInteger(data, offset);
Object[] storedState = (Object[]) state.state;
fieldOutput.writeUTF((String) storedState[stateIdx]);
} else {
fieldOutput.writeUTF((String) state.state);
}
} catch (IOException e) {
throw new HyracksDataException("I/O exception when writing a string to the output writer in MinMaxStringAggregatorFactory.");
}
}
@Override
public void init(IFrameTupleAccessor accessor, int tIndex, DataOutput fieldOutput, AggregateState state) throws HyracksDataException {
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
int fieldLength = accessor.getFieldLength(tIndex, aggField);
String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset + accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
if (hasBinaryState) {
// Object-binary-state
Object[] storedState;
if (state.state == null) {
storedState = new Object[8];
storedState[0] = new Integer(0);
state.state = storedState;
} else {
storedState = (Object[]) state.state;
}
int stateCount = (Integer) (storedState[0]);
if (stateCount + 1 >= storedState.length) {
storedState = Arrays.copyOf(storedState, storedState.length * 2);
state.state = storedState;
}
stateCount++;
storedState[0] = stateCount;
storedState[stateCount] = strField;
try {
fieldOutput.writeInt(stateCount);
} catch (IOException e) {
throw new HyracksDataException(e.fillInStackTrace());
}
} else {
// Only object-state
state.state = strField;
}
}
@Override
public void close() {
// TODO Auto-generated method stub
}
@Override
public void aggregate(IFrameTupleAccessor accessor, int tIndex, byte[] data, int offset, AggregateState state) throws HyracksDataException {
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
int fieldLength = accessor.getFieldLength(tIndex, aggField);
String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset + accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
if (hasBinaryState) {
int stateIdx = IntegerPointable.getInteger(data, offset);
Object[] storedState = (Object[]) state.state;
if (isMax) {
if (strField.length() > ((String) (storedState[stateIdx])).length()) {
storedState[stateIdx] = strField;
}
} else {
if (strField.length() < ((String) (storedState[stateIdx])).length()) {
storedState[stateIdx] = strField;
}
}
} else {
if (isMax) {
if (strField.length() > ((String) (state.state)).length()) {
state.state = strField;
}
} else {
if (strField.length() < ((String) (state.state)).length()) {
state.state = strField;
}
}
}
}
public boolean needsObjectState() {
return true;
}
public boolean needsBinaryState() {
return hasBinaryState;
}
public AggregateState createState() {
return new AggregateState();
}
};
}
use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.
the class PushRuntimeTest method scanMicroSortWrite.
@Test
public void scanMicroSortWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
// the scanner
FileSplit[] fileSplits = new FileSplit[1];
fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "nation.tbl");
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
// the algebricks op.
InMemorySortRuntimeFactory sort = new InMemorySortRuntimeFactory(new int[] { 1 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, null);
RecordDescriptor sortDesc = scannerDesc;
String fileName = "scanMicroSortWrite.out";
String filePath = PATH_ACTUAL + SEPARATOR + fileName;
String resultFilePath = PATH_EXPECTED + SEPARATOR + fileName;
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1, 2, 3 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, UTF8StringPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE, UTF8StringPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, sortDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { sort, writer }, new RecordDescriptor[] { sortDesc, null });
PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
compareFiles(filePath, resultFilePath);
outFile.delete();
}
Aggregations