Search in sources :

Example 1 with Type

use of org.apache.hyracks.algebricks.examples.piglet.types.Type in project asterixdb by apache.

the class PigletMetadataProvider method getScannerRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
    PigletFileDataSource ds = (PigletFileDataSource) dataSource;
    FileSplit[] fileSplits = ds.getFileSplits();
    String[] locations = new String[fileSplits.length];
    for (int i = 0; i < fileSplits.length; ++i) {
        locations[i] = fileSplits[i].getNodeName();
    }
    IFileSplitProvider fsp = new ConstantFileSplitProvider(fileSplits);
    Object[] colTypes = ds.getSchemaTypes();
    IValueParserFactory[] vpfs = new IValueParserFactory[colTypes.length];
    ISerializerDeserializer[] serDesers = new ISerializerDeserializer[colTypes.length];
    for (int i = 0; i < colTypes.length; ++i) {
        Type colType = (Type) colTypes[i];
        IValueParserFactory vpf;
        ISerializerDeserializer serDeser;
        switch(colType.getTag()) {
            case INTEGER:
                vpf = IntegerParserFactory.INSTANCE;
                serDeser = IntegerSerializerDeserializer.INSTANCE;
                break;
            case CHAR_ARRAY:
                vpf = UTF8StringParserFactory.INSTANCE;
                serDeser = new UTF8StringSerializerDeserializer();
                break;
            case FLOAT:
                vpf = FloatParserFactory.INSTANCE;
                serDeser = FloatSerializerDeserializer.INSTANCE;
                break;
            default:
                throw new UnsupportedOperationException();
        }
        vpfs[i] = vpf;
        serDesers[i] = serDeser;
    }
    ITupleParserFactory tpf = new DelimitedDataTupleParserFactory(vpfs, ',');
    RecordDescriptor rDesc = new RecordDescriptor(serDesers);
    IOperatorDescriptor scanner = new FileScanOperatorDescriptor(jobSpec, fsp, tpf, rDesc);
    AlgebricksAbsolutePartitionConstraint constraint = new AlgebricksAbsolutePartitionConstraint(locations);
    return new Pair<>(scanner, constraint);
}
Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) Type(org.apache.hyracks.algebricks.examples.piglet.types.Type) ITupleParserFactory(org.apache.hyracks.dataflow.std.file.ITupleParserFactory) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)1 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)1 Pair (org.apache.hyracks.algebricks.common.utils.Pair)1 Type (org.apache.hyracks.algebricks.examples.piglet.types.Type)1 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)1 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)1 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)1 FileSplit (org.apache.hyracks.api.io.FileSplit)1 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)1 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)1 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)1 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)1 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)1 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)1 ITupleParserFactory (org.apache.hyracks.dataflow.std.file.ITupleParserFactory)1