Search in sources :

Example 1 with ITupleParserFactory

use of org.apache.hyracks.dataflow.std.file.ITupleParserFactory in project asterixdb by apache.

the class PigletMetadataProvider method getScannerRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
    PigletFileDataSource ds = (PigletFileDataSource) dataSource;
    FileSplit[] fileSplits = ds.getFileSplits();
    String[] locations = new String[fileSplits.length];
    for (int i = 0; i < fileSplits.length; ++i) {
        locations[i] = fileSplits[i].getNodeName();
    }
    IFileSplitProvider fsp = new ConstantFileSplitProvider(fileSplits);
    Object[] colTypes = ds.getSchemaTypes();
    IValueParserFactory[] vpfs = new IValueParserFactory[colTypes.length];
    ISerializerDeserializer[] serDesers = new ISerializerDeserializer[colTypes.length];
    for (int i = 0; i < colTypes.length; ++i) {
        Type colType = (Type) colTypes[i];
        IValueParserFactory vpf;
        ISerializerDeserializer serDeser;
        switch(colType.getTag()) {
            case INTEGER:
                vpf = IntegerParserFactory.INSTANCE;
                serDeser = IntegerSerializerDeserializer.INSTANCE;
                break;
            case CHAR_ARRAY:
                vpf = UTF8StringParserFactory.INSTANCE;
                serDeser = new UTF8StringSerializerDeserializer();
                break;
            case FLOAT:
                vpf = FloatParserFactory.INSTANCE;
                serDeser = FloatSerializerDeserializer.INSTANCE;
                break;
            default:
                throw new UnsupportedOperationException();
        }
        vpfs[i] = vpf;
        serDesers[i] = serDeser;
    }
    ITupleParserFactory tpf = new DelimitedDataTupleParserFactory(vpfs, ',');
    RecordDescriptor rDesc = new RecordDescriptor(serDesers);
    IOperatorDescriptor scanner = new FileScanOperatorDescriptor(jobSpec, fsp, tpf, rDesc);
    AlgebricksAbsolutePartitionConstraint constraint = new AlgebricksAbsolutePartitionConstraint(locations);
    return new Pair<>(scanner, constraint);
}
Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) Type(org.apache.hyracks.algebricks.examples.piglet.types.Type) ITupleParserFactory(org.apache.hyracks.dataflow.std.file.ITupleParserFactory) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 2 with ITupleParserFactory

use of org.apache.hyracks.dataflow.std.file.ITupleParserFactory in project asterixdb by apache.

the class TestTypedAdapterFactory method createAdapter.

@Override
public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
    final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId();
    final ITupleParserFactory tupleParserFactory = new ITupleParserFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public ITupleParser createTupleParser(IHyracksTaskContext ctx) throws HyracksDataException {
            ADMDataParser parser;
            ITupleForwarder forwarder;
            ArrayTupleBuilder tb;
            IApplicationContext appCtx = (IApplicationContext) ctx.getJobletContext().getServiceContext().getApplicationContext();
            ClusterPartition nodePartition = appCtx.getMetadataProperties().getNodePartitions().get(nodeId)[0];
            parser = new ADMDataParser(outputType, true);
            forwarder = DataflowUtils.getTupleForwarder(configuration, FeedUtils.getFeedLogManager(ctx, FeedUtils.splitsForAdapter(ExternalDataUtils.getDataverse(configuration), ExternalDataUtils.getFeedName(configuration), nodeId, nodePartition)));
            tb = new ArrayTupleBuilder(1);
            return new ITupleParser() {

                @Override
                public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
                    try {
                        parser.setInputStream(in);
                        forwarder.initialize(ctx, writer);
                        while (true) {
                            tb.reset();
                            if (!parser.parse(tb.getDataOutput())) {
                                break;
                            }
                            tb.addFieldEndOffset();
                            forwarder.addTuple(tb);
                        }
                        forwarder.close();
                    } catch (Exception e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
    try {
        return new TestTypedAdapter(tupleParserFactory, outputType, ctx, configuration, partition);
    } catch (IOException e) {
        throw new HyracksDataException(e);
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) ITupleParser(org.apache.hyracks.dataflow.std.file.ITupleParser) InputStream(java.io.InputStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IApplicationContext(org.apache.asterix.common.api.IApplicationContext) IOException(java.io.IOException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) ITupleForwarder(org.apache.asterix.external.api.ITupleForwarder) ITupleParserFactory(org.apache.hyracks.dataflow.std.file.ITupleParserFactory) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Aggregations

ITupleParserFactory (org.apache.hyracks.dataflow.std.file.ITupleParserFactory)2 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 IApplicationContext (org.apache.asterix.common.api.IApplicationContext)1 ClusterPartition (org.apache.asterix.common.cluster.ClusterPartition)1 ITupleForwarder (org.apache.asterix.external.api.ITupleForwarder)1 ADMDataParser (org.apache.asterix.external.parser.ADMDataParser)1 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)1 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)1 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)1 Pair (org.apache.hyracks.algebricks.common.utils.Pair)1 Type (org.apache.hyracks.algebricks.examples.piglet.types.Type)1 IFrameWriter (org.apache.hyracks.api.comm.IFrameWriter)1 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)1 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)1 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)1 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)1 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)1 FileSplit (org.apache.hyracks.api.io.FileSplit)1 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)1