use of org.apache.hyracks.dataflow.std.file.ITupleParserFactory in project asterixdb by apache.
the class PigletMetadataProvider method getScannerRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<String> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
PigletFileDataSource ds = (PigletFileDataSource) dataSource;
FileSplit[] fileSplits = ds.getFileSplits();
String[] locations = new String[fileSplits.length];
for (int i = 0; i < fileSplits.length; ++i) {
locations[i] = fileSplits[i].getNodeName();
}
IFileSplitProvider fsp = new ConstantFileSplitProvider(fileSplits);
Object[] colTypes = ds.getSchemaTypes();
IValueParserFactory[] vpfs = new IValueParserFactory[colTypes.length];
ISerializerDeserializer[] serDesers = new ISerializerDeserializer[colTypes.length];
for (int i = 0; i < colTypes.length; ++i) {
Type colType = (Type) colTypes[i];
IValueParserFactory vpf;
ISerializerDeserializer serDeser;
switch(colType.getTag()) {
case INTEGER:
vpf = IntegerParserFactory.INSTANCE;
serDeser = IntegerSerializerDeserializer.INSTANCE;
break;
case CHAR_ARRAY:
vpf = UTF8StringParserFactory.INSTANCE;
serDeser = new UTF8StringSerializerDeserializer();
break;
case FLOAT:
vpf = FloatParserFactory.INSTANCE;
serDeser = FloatSerializerDeserializer.INSTANCE;
break;
default:
throw new UnsupportedOperationException();
}
vpfs[i] = vpf;
serDesers[i] = serDeser;
}
ITupleParserFactory tpf = new DelimitedDataTupleParserFactory(vpfs, ',');
RecordDescriptor rDesc = new RecordDescriptor(serDesers);
IOperatorDescriptor scanner = new FileScanOperatorDescriptor(jobSpec, fsp, tpf, rDesc);
AlgebricksAbsolutePartitionConstraint constraint = new AlgebricksAbsolutePartitionConstraint(locations);
return new Pair<>(scanner, constraint);
}
use of org.apache.hyracks.dataflow.std.file.ITupleParserFactory in project asterixdb by apache.
the class TestTypedAdapterFactory method createAdapter.
@Override
public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId();
final ITupleParserFactory tupleParserFactory = new ITupleParserFactory() {
private static final long serialVersionUID = 1L;
@Override
public ITupleParser createTupleParser(IHyracksTaskContext ctx) throws HyracksDataException {
ADMDataParser parser;
ITupleForwarder forwarder;
ArrayTupleBuilder tb;
IApplicationContext appCtx = (IApplicationContext) ctx.getJobletContext().getServiceContext().getApplicationContext();
ClusterPartition nodePartition = appCtx.getMetadataProperties().getNodePartitions().get(nodeId)[0];
parser = new ADMDataParser(outputType, true);
forwarder = DataflowUtils.getTupleForwarder(configuration, FeedUtils.getFeedLogManager(ctx, FeedUtils.splitsForAdapter(ExternalDataUtils.getDataverse(configuration), ExternalDataUtils.getFeedName(configuration), nodeId, nodePartition)));
tb = new ArrayTupleBuilder(1);
return new ITupleParser() {
@Override
public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
try {
parser.setInputStream(in);
forwarder.initialize(ctx, writer);
while (true) {
tb.reset();
if (!parser.parse(tb.getDataOutput())) {
break;
}
tb.addFieldEndOffset();
forwarder.addTuple(tb);
}
forwarder.close();
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
};
}
};
try {
return new TestTypedAdapter(tupleParserFactory, outputType, ctx, configuration, partition);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
Aggregations