Search in sources :

Example 76 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class SerializationDeserializationTest method serdeser01.

@Test
public void serdeser01() throws Exception {
    RecordDescriptor rDes = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    LineProcessor processor = new LineProcessor() {

        @Override
        public void process(String line, IDataWriter<Object[]> writer) throws Exception {
            String[] splits = line.split(" ");
            for (String s : splits) {
                writer.writeData(new Object[] { s, Integer.valueOf(1) });
            }
        }
    };
    run(rDes, processor);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IDataWriter(org.apache.hyracks.api.dataflow.IDataWriter) Test(org.junit.Test)

Example 77 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class LSMBTreeMergeTestDriver method runTest.

@Override
protected void runTest(ISerializerDeserializer[] fieldSerdes, int numKeys, BTreeLeafFrameType leafType, ITupleReference lowKey, ITupleReference highKey, ITupleReference prefixLowKey, ITupleReference prefixHighKey) throws Exception {
    OrderedIndexTestContext ctx = createTestContext(fieldSerdes, numKeys, leafType, false);
    ctx.getIndex().create();
    ctx.getIndex().activate();
    // to determine which field types to generate.
    if (fieldSerdes[0] instanceof IntegerSerializerDeserializer) {
        orderedIndexTestUtils.bulkLoadIntTuples(ctx, numTuplesToInsert, getRandom());
    } else if (fieldSerdes[0] instanceof UTF8StringSerializerDeserializer) {
        orderedIndexTestUtils.bulkLoadStringTuples(ctx, numTuplesToInsert, getRandom());
    }
    int maxTreesToMerge = AccessMethodTestsConfig.LSM_BTREE_MAX_TREES_TO_MERGE;
    for (int i = 0; i < maxTreesToMerge; i++) {
        for (int j = 0; j < i; j++) {
            if (fieldSerdes[0] instanceof IntegerSerializerDeserializer) {
                orderedIndexTestUtils.insertIntTuples(ctx, numTuplesToInsert, getRandom());
                // Deactivate and the re-activate the index to force it flush its in memory component
                ctx.getIndex().deactivate();
                ctx.getIndex().activate();
            } else if (fieldSerdes[0] instanceof UTF8StringSerializerDeserializer) {
                orderedIndexTestUtils.insertStringTuples(ctx, numTuplesToInsert, getRandom());
                // Deactivate and the re-activate the index to force it flush its in memory component
                ctx.getIndex().deactivate();
                ctx.getIndex().activate();
            }
        }
        ILSMIndexAccessor accessor = (ILSMIndexAccessor) ctx.getIndexAccessor();
        accessor.scheduleMerge(NoOpIOOperationCallbackFactory.INSTANCE.createIoOpCallback(), ((LSMBTree) ctx.getIndex()).getImmutableComponents());
        orderedIndexTestUtils.checkPointSearches(ctx);
        orderedIndexTestUtils.checkScan(ctx);
        orderedIndexTestUtils.checkDiskOrderScan(ctx);
        orderedIndexTestUtils.checkRangeSearch(ctx, lowKey, highKey, true, true);
        if (prefixLowKey != null && prefixHighKey != null) {
            orderedIndexTestUtils.checkRangeSearch(ctx, prefixLowKey, prefixHighKey, true, true);
        }
    }
    ctx.getIndex().deactivate();
    ctx.getIndex().destroy();
}
Also used : IntegerSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer) OrderedIndexTestContext(org.apache.hyracks.storage.am.btree.OrderedIndexTestContext) ILSMIndexAccessor(org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)

Example 78 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method createPersonNamesTupleGen.

public static TupleGenerator createPersonNamesTupleGen(Random rnd) throws IOException {
    IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
    fieldGens[0] = new PersonNameFieldValueGenerator(rnd, 0.5f);
    fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
    ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE };
    TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
    return tupleGen;
}
Also used : SortedIntegerFieldValueGenerator(org.apache.hyracks.storage.am.common.datagen.SortedIntegerFieldValueGenerator) PersonNameFieldValueGenerator(org.apache.hyracks.storage.am.common.datagen.PersonNameFieldValueGenerator) IFieldValueGenerator(org.apache.hyracks.storage.am.common.datagen.IFieldValueGenerator) TupleGenerator(org.apache.hyracks.storage.am.common.datagen.TupleGenerator) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Example 79 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class MinMaxStringFieldAggregatorFactory method createAggregator.

/*
     * (non-Javadoc)
     *
     * @see
     * org.apache.hyracks.dataflow.std.aggregators.IAggregatorDescriptorFactory
     * #createAggregator(org.apache.hyracks.api.context.IHyracksTaskContext,
     * org.apache.hyracks.api.dataflow.value.RecordDescriptor,
     * org.apache.hyracks.api.dataflow.value.RecordDescriptor, int[])
     */
@Override
public IFieldAggregateDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor) throws HyracksDataException {
    return new IFieldAggregateDescriptor() {

        UTF8StringSerializerDeserializer utf8SerializerDeserializer = new UTF8StringSerializerDeserializer();

        @Override
        public void reset() {
        }

        @Override
        public void outputPartialResult(DataOutput fieldOutput, byte[] data, int offset, AggregateState state) throws HyracksDataException {
            try {
                if (hasBinaryState) {
                    int stateIdx = IntegerPointable.getInteger(data, offset);
                    Object[] storedState = (Object[]) state.state;
                    fieldOutput.writeUTF((String) storedState[stateIdx]);
                } else {
                    fieldOutput.writeUTF((String) state.state);
                }
            } catch (IOException e) {
                throw new HyracksDataException("I/O exception when writing a string to the output writer in MinMaxStringAggregatorFactory.");
            }
        }

        @Override
        public void outputFinalResult(DataOutput fieldOutput, byte[] data, int offset, AggregateState state) throws HyracksDataException {
            try {
                if (hasBinaryState) {
                    int stateIdx = IntegerPointable.getInteger(data, offset);
                    Object[] storedState = (Object[]) state.state;
                    fieldOutput.writeUTF((String) storedState[stateIdx]);
                } else {
                    fieldOutput.writeUTF((String) state.state);
                }
            } catch (IOException e) {
                throw new HyracksDataException("I/O exception when writing a string to the output writer in MinMaxStringAggregatorFactory.");
            }
        }

        @Override
        public void init(IFrameTupleAccessor accessor, int tIndex, DataOutput fieldOutput, AggregateState state) throws HyracksDataException {
            int tupleOffset = accessor.getTupleStartOffset(tIndex);
            int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
            int fieldLength = accessor.getFieldLength(tIndex, aggField);
            String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset + accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
            if (hasBinaryState) {
                // Object-binary-state
                Object[] storedState;
                if (state.state == null) {
                    storedState = new Object[8];
                    storedState[0] = new Integer(0);
                    state.state = storedState;
                } else {
                    storedState = (Object[]) state.state;
                }
                int stateCount = (Integer) (storedState[0]);
                if (stateCount + 1 >= storedState.length) {
                    storedState = Arrays.copyOf(storedState, storedState.length * 2);
                    state.state = storedState;
                }
                stateCount++;
                storedState[0] = stateCount;
                storedState[stateCount] = strField;
                try {
                    fieldOutput.writeInt(stateCount);
                } catch (IOException e) {
                    throw new HyracksDataException(e.fillInStackTrace());
                }
            } else {
                // Only object-state
                state.state = strField;
            }
        }

        @Override
        public void close() {
        // TODO Auto-generated method stub
        }

        @Override
        public void aggregate(IFrameTupleAccessor accessor, int tIndex, byte[] data, int offset, AggregateState state) throws HyracksDataException {
            int tupleOffset = accessor.getTupleStartOffset(tIndex);
            int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
            int fieldLength = accessor.getFieldLength(tIndex, aggField);
            String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset + accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
            if (hasBinaryState) {
                int stateIdx = IntegerPointable.getInteger(data, offset);
                Object[] storedState = (Object[]) state.state;
                if (isMax) {
                    if (strField.length() > ((String) (storedState[stateIdx])).length()) {
                        storedState[stateIdx] = strField;
                    }
                } else {
                    if (strField.length() < ((String) (storedState[stateIdx])).length()) {
                        storedState[stateIdx] = strField;
                    }
                }
            } else {
                if (isMax) {
                    if (strField.length() > ((String) (state.state)).length()) {
                        state.state = strField;
                    }
                } else {
                    if (strField.length() < ((String) (state.state)).length()) {
                        state.state = strField;
                    }
                }
            }
        }

        public boolean needsObjectState() {
            return true;
        }

        public boolean needsBinaryState() {
            return hasBinaryState;
        }

        public AggregateState createState() {
            return new AggregateState();
        }
    };
}
Also used : DataOutput(java.io.DataOutput) IOException(java.io.IOException) IFieldAggregateDescriptor(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) DataInputStream(java.io.DataInputStream) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) AggregateState(org.apache.hyracks.dataflow.std.group.AggregateState) ByteArrayInputStream(java.io.ByteArrayInputStream) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor)

Example 80 with UTF8StringSerializerDeserializer

use of org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer in project asterixdb by apache.

the class PushRuntimeTest method scanMicroSortWrite.

@Test
public void scanMicroSortWrite() throws Exception {
    JobSpecification spec = new JobSpecification(FRAME_SIZE);
    // the scanner
    FileSplit[] fileSplits = new FileSplit[1];
    fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "nation.tbl");
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
    RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
    FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
    // the algebricks op.
    InMemorySortRuntimeFactory sort = new InMemorySortRuntimeFactory(new int[] { 1 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, null);
    RecordDescriptor sortDesc = scannerDesc;
    String fileName = "scanMicroSortWrite.out";
    String filePath = PATH_ACTUAL + SEPARATOR + fileName;
    String resultFilePath = PATH_EXPECTED + SEPARATOR + fileName;
    File outFile = new File(filePath);
    SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1, 2, 3 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, UTF8StringPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE, UTF8StringPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, sortDesc);
    AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { sort, writer }, new RecordDescriptor[] { sortDesc, null });
    PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
    spec.addRoot(algebricksOp);
    AlgebricksHyracksIntegrationUtil.runJob(spec);
    compareFiles(filePath, resultFilePath);
    outFile.delete();
}
Also used : InMemorySortRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.sort.InMemorySortRuntimeFactory) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) File(java.io.File) Test(org.junit.Test)

Aggregations

UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)94 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)74 Test (org.junit.Test)69 JobSpecification (org.apache.hyracks.api.job.JobSpecification)67 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)62 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)51 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)45 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)37 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)37 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)36 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)34 FileSplit (org.apache.hyracks.api.io.FileSplit)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)33 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)31 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)28 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)24