use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.
the class PushRuntimeTest method scanAggregateWrite.
@Test
public void scanAggregateWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
// the scanner
FileSplit[] fileSplits = new FileSplit[1];
fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl");
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
// the algebricks op.
AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
String filePath = PATH_ACTUAL + SEPARATOR + "scanAggregateWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, aggDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { agg, writer }, new RecordDescriptor[] { aggDesc, null });
PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("75", buf.toString());
outFile.delete();
}
use of org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory in project asterixdb by apache.
the class PushRuntimeTest method scanReplicateWrite.
@Test
public void scanReplicateWrite() throws Exception {
final int outputArity = 2;
JobSpecification spec = new JobSpecification(FRAME_SIZE);
String inputFileName = "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl";
FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, inputFileName) };
DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '