Examples with FileSplit - org.apache.hyracks.api.io.FileSplit

Example 31 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class TPCHCustomerOrderNestedLoopJoinTest method customerOrderCIDOuterJoinMulti.

@Test
public void customerOrderCIDOuterJoinMulti() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part2.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
    IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[ordersDesc.getFieldCount()];
    for (int j = 0; j < nonMatchWriterFactories.length; j++) {
        nonMatchWriterFactories[j] = NoopMissingWriterFactory.INSTANCE;
    }
    NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 5, true, nonMatchWriterFactories);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 0);
    IConnectorDescriptor custJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) NestedLoopJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 32 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FrameFileWriterOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
    final FileSplit[] splits = fileSplitProvider.getFileSplits();
    final IIOManager ioManager = ctx.getIoManager();
    return new AbstractUnaryInputSinkOperatorNodePushable() {

        private OutputStream out;

        @Override
        public void open() throws HyracksDataException {
            try {
                out = new FileOutputStream(splits[partition].getFile(ioManager));
            } catch (FileNotFoundException e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            try {
                out.write(buffer.array());
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void fail() throws HyracksDataException {
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                out.close();
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

Also used : AbstractUnaryInputSinkOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) FileSplit(org.apache.hyracks.api.io.FileSplit) IIOManager(org.apache.hyracks.api.io.IIOManager) ByteBuffer(java.nio.ByteBuffer) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 33 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FileRemoveOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FileSplit split = fileSplitProvider.getFileSplits()[partition];
    final IIOManager ioManager = ctx.getIoManager();
    return new AbstractOperatorNodePushable() {

        @Override
        public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
            throw new IllegalStateException();
        }

        @Override
        public void initialize() throws HyracksDataException {
            // will only work for files inside the io devices
            File f = split.getFile(ioManager);
            if (quietly) {
                FileUtils.deleteQuietly(f);
            } else {
                try {
                    FileUtils.deleteDirectory(f);
                } catch (IOException e) {
                    throw new HyracksDataException(e);
                }
            }
        }

        @Override
        public IFrameWriter getInputFrameWriter(int index) {
            throw new IllegalStateException();
        }

        @Override
        public int getInputArity() {
            return 0;
        }

        @Override
        public void deinitialize() throws HyracksDataException {
        }
    };
}

Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) AbstractOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOException(java.io.IOException) FileSplit(org.apache.hyracks.api.io.FileSplit) IIOManager(org.apache.hyracks.api.io.IIOManager) File(java.io.File) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 34 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FileScanOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FileSplit split = fileSplitProvider.getFileSplits()[partition];
    final ITupleParser tp = tupleParserFactory.createTupleParser(ctx);
    final IIOManager ioManager = ctx.getIoManager();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        @Override
        public void initialize() throws HyracksDataException {
            File f = split.getFile(ioManager);
            try {
                writer.open();
                InputStream in;
                try {
                    in = new FileInputStream(f);
                } catch (FileNotFoundException e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                }
                tp.parse(in, writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
            }
        }
    };
}

Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) FileSplit(org.apache.hyracks.api.io.FileSplit) IIOManager(org.apache.hyracks.api.io.IIOManager) File(java.io.File) FileInputStream(java.io.FileInputStream) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 35 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FilePartition method formResponseObject.

private void formResponseObject(ObjectNode jsonResponse, FileSplit[] fileSplits, ARecordType recordType, String primaryKeys, boolean temp, Map<String, NodeControllerInfo> nodeMap) {
    ObjectMapper om = new ObjectMapper();
    ArrayNode partititons = om.createArrayNode();
    // Whether the dataset is temp or not
    jsonResponse.put("temp", temp);
    // Adds a primary key.
    jsonResponse.put("keys", primaryKeys);
    // Adds record type.
    jsonResponse.set("type", recordType.toJSON());
    // Generates file partitions.
    for (FileSplit split : fileSplits) {
        String ipAddress = nodeMap.get(split.getNodeName()).getNetworkAddress().getAddress();
        String path = split.getPath();
        FilePartition partition = new FilePartition(ipAddress, path);
        partititons.add(partition.toObjectNode());
    }
    // Generates the response object which contains the splits.
    jsonResponse.set("splits", partititons);
}

Also used : ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) FileSplit(org.apache.hyracks.api.io.FileSplit) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Aggregations

FileSplit (org.apache.hyracks.api.io.FileSplit)63 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)43 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)42 JobSpecification (org.apache.hyracks.api.job.JobSpecification)40 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)39 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)39 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)38 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)33 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)33 Test (org.junit.Test)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)32 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)28 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)23 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)21 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)21 File (java.io.File)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)18 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)14 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)10