Search in sources :

Example 1 with ITupleParser

use of org.apache.hyracks.dataflow.std.file.ITupleParser in project asterixdb by apache.

the class TestTypedAdapterFactory method createAdapter.

@Override
public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
    final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId();
    final ITupleParserFactory tupleParserFactory = new ITupleParserFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public ITupleParser createTupleParser(IHyracksTaskContext ctx) throws HyracksDataException {
            ADMDataParser parser;
            ITupleForwarder forwarder;
            ArrayTupleBuilder tb;
            IApplicationContext appCtx = (IApplicationContext) ctx.getJobletContext().getServiceContext().getApplicationContext();
            ClusterPartition nodePartition = appCtx.getMetadataProperties().getNodePartitions().get(nodeId)[0];
            parser = new ADMDataParser(outputType, true);
            forwarder = DataflowUtils.getTupleForwarder(configuration, FeedUtils.getFeedLogManager(ctx, FeedUtils.splitsForAdapter(ExternalDataUtils.getDataverse(configuration), ExternalDataUtils.getFeedName(configuration), nodeId, nodePartition)));
            tb = new ArrayTupleBuilder(1);
            return new ITupleParser() {

                @Override
                public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
                    try {
                        parser.setInputStream(in);
                        forwarder.initialize(ctx, writer);
                        while (true) {
                            tb.reset();
                            if (!parser.parse(tb.getDataOutput())) {
                                break;
                            }
                            tb.addFieldEndOffset();
                            forwarder.addTuple(tb);
                        }
                        forwarder.close();
                    } catch (Exception e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
    try {
        return new TestTypedAdapter(tupleParserFactory, outputType, ctx, configuration, partition);
    } catch (IOException e) {
        throw new HyracksDataException(e);
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) ITupleParser(org.apache.hyracks.dataflow.std.file.ITupleParser) InputStream(java.io.InputStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IApplicationContext(org.apache.asterix.common.api.IApplicationContext) IOException(java.io.IOException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) ITupleForwarder(org.apache.asterix.external.api.ITupleForwarder) ITupleParserFactory(org.apache.hyracks.dataflow.std.file.ITupleParserFactory) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 2 with ITupleParser

use of org.apache.hyracks.dataflow.std.file.ITupleParser in project asterixdb by apache.

the class WordTupleParserFactory method createTupleParser.

@Override
public ITupleParser createTupleParser(final IHyracksTaskContext ctx) {
    return new ITupleParser() {

        @Override
        public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
            try {
                FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
                ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
                DataOutput dos = tb.getDataOutput();
                IValueParser utf8StringParser = UTF8StringParserFactory.INSTANCE.createValueParser();
                WordCursor cursor = new WordCursor(new InputStreamReader(in));
                while (cursor.nextWord()) {
                    tb.reset();
                    utf8StringParser.parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos);
                    tb.addFieldEndOffset();
                    FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
                }
                appender.write(writer, true);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) DataOutput(java.io.DataOutput) InputStreamReader(java.io.InputStreamReader) ITupleParser(org.apache.hyracks.dataflow.std.file.ITupleParser) InputStream(java.io.InputStream) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IValueParser(org.apache.hyracks.dataflow.common.data.parsers.IValueParser) IOException(java.io.IOException) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 3 with ITupleParser

use of org.apache.hyracks.dataflow.std.file.ITupleParser in project asterixdb by apache.

the class StringStreamingRuntimeFactory method createOneOutputPushRuntime.

@Override
public AbstractOneInputOneOutputOneFramePushRuntime createOneOutputPushRuntime(final IHyracksTaskContext ctx) throws HyracksDataException {
    final IPrinter[] printers = new IPrinter[printerFactories.length];
    for (int i = 0; i < printerFactories.length; i++) {
        printers[i] = printerFactories[i].createPrinter();
    }
    return new AbstractOneInputOneOutputOneFramePushRuntime() {

        final class ForwardScriptOutput implements Runnable {

            private InputStream inStream;

            private ITupleParser parser;

            public ForwardScriptOutput(ITupleParser parser, InputStream inStream) {
                this.parser = parser;
                this.inStream = inStream;
            }

            @Override
            public void run() {
                try {
                    parser.parse(inStream, writer);
                } catch (HyracksDataException e) {
                    throw new RuntimeException(e);
                } finally {
                    try {
                        inStream.close();
                    } catch (Exception e) {
                    }
                }
            }
        }

        final class DumpInStreamToPrintStream implements Runnable {

            private BufferedReader reader;

            private PrintStream printStream;

            public DumpInStreamToPrintStream(InputStream inStream, PrintStream printStream) {
                this.reader = new BufferedReader(new InputStreamReader(inStream));
                this.printStream = printStream;
            }

            @Override
            public void run() {
                String s;
                try {
                    while ((s = reader.readLine()) != null) {
                        printStream.println(s);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } finally {
                    try {
                        reader.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    printStream.close();
                }
            }
        }

        private Process process;

        private PrintStream ps;

        private boolean first = true;

        private Thread outputPipe;

        private Thread dumpStderr;

        @Override
        public void open() throws HyracksDataException {
            if (first) {
                first = false;
                initAccessAppendRef(ctx);
            }
            try {
                ITupleParser parser = parserFactory.createTupleParser(ctx);
                process = Runtime.getRuntime().exec(command);
                ps = new PrintStream(process.getOutputStream());
                ForwardScriptOutput fso = new ForwardScriptOutput(parser, process.getInputStream());
                outputPipe = new Thread(fso);
                outputPipe.start();
                DumpInStreamToPrintStream disps = new DumpInStreamToPrintStream(process.getErrorStream(), System.err);
                dumpStderr = new Thread(disps);
                dumpStderr.start();
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            tAccess.reset(buffer);
            int nTuple = tAccess.getTupleCount();
            for (int t = 0; t < nTuple; t++) {
                tRef.reset(tAccess, t);
                for (int i = 0; i < printers.length; i++) {
                    printers[i].print(buffer.array(), tRef.getFieldStart(i), tRef.getFieldLength(i), ps);
                    ps.print(fieldDelimiter);
                    if (i == printers.length - 1) {
                        ps.print('\n');
                    }
                }
            }
        }

        @Override
        public void close() throws HyracksDataException {
            // first close the printer printing to the process
            ps.close();
            int ret = 0;
            try {
                ret = process.waitFor();
                outputPipe.join();
                dumpStderr.join();
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            }
            if (ret != 0) {
                throw new HyracksDataException("Process exit value: " + ret);
            }
            // close the following operator in the chain
            super.close();
        }

        @Override
        public void flush() throws HyracksDataException {
            ps.flush();
        }
    };
}
Also used : PrintStream(java.io.PrintStream) InputStreamReader(java.io.InputStreamReader) AbstractOneInputOneOutputOneFramePushRuntime(org.apache.hyracks.algebricks.runtime.operators.base.AbstractOneInputOneOutputOneFramePushRuntime) InputStream(java.io.InputStream) ITupleParser(org.apache.hyracks.dataflow.std.file.ITupleParser) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) IPrinter(org.apache.hyracks.algebricks.data.IPrinter)

Aggregations

IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)3 ITupleParser (org.apache.hyracks.dataflow.std.file.ITupleParser)3 InputStreamReader (java.io.InputStreamReader)2 IFrameWriter (org.apache.hyracks.api.comm.IFrameWriter)2 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)2 BufferedReader (java.io.BufferedReader)1 DataOutput (java.io.DataOutput)1 PrintStream (java.io.PrintStream)1 ByteBuffer (java.nio.ByteBuffer)1 IApplicationContext (org.apache.asterix.common.api.IApplicationContext)1 ClusterPartition (org.apache.asterix.common.cluster.ClusterPartition)1 ITupleForwarder (org.apache.asterix.external.api.ITupleForwarder)1 ADMDataParser (org.apache.asterix.external.parser.ADMDataParser)1 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)1 IPrinter (org.apache.hyracks.algebricks.data.IPrinter)1 AbstractOneInputOneOutputOneFramePushRuntime (org.apache.hyracks.algebricks.runtime.operators.base.AbstractOneInputOneOutputOneFramePushRuntime)1 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)1 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)1