Search in sources :

Example 1 with ADMDataParser

use of org.apache.asterix.external.parser.ADMDataParser in project asterixdb by apache.

the class TestTypedAdapterFactory method createAdapter.

@Override
public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
    final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId();
    final ITupleParserFactory tupleParserFactory = new ITupleParserFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public ITupleParser createTupleParser(IHyracksTaskContext ctx) throws HyracksDataException {
            ADMDataParser parser;
            ITupleForwarder forwarder;
            ArrayTupleBuilder tb;
            IApplicationContext appCtx = (IApplicationContext) ctx.getJobletContext().getServiceContext().getApplicationContext();
            ClusterPartition nodePartition = appCtx.getMetadataProperties().getNodePartitions().get(nodeId)[0];
            parser = new ADMDataParser(outputType, true);
            forwarder = DataflowUtils.getTupleForwarder(configuration, FeedUtils.getFeedLogManager(ctx, FeedUtils.splitsForAdapter(ExternalDataUtils.getDataverse(configuration), ExternalDataUtils.getFeedName(configuration), nodeId, nodePartition)));
            tb = new ArrayTupleBuilder(1);
            return new ITupleParser() {

                @Override
                public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
                    try {
                        parser.setInputStream(in);
                        forwarder.initialize(ctx, writer);
                        while (true) {
                            tb.reset();
                            if (!parser.parse(tb.getDataOutput())) {
                                break;
                            }
                            tb.addFieldEndOffset();
                            forwarder.addTuple(tb);
                        }
                        forwarder.close();
                    } catch (Exception e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
    try {
        return new TestTypedAdapter(tupleParserFactory, outputType, ctx, configuration, partition);
    } catch (IOException e) {
        throw new HyracksDataException(e);
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) ITupleParser(org.apache.hyracks.dataflow.std.file.ITupleParser) InputStream(java.io.InputStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IApplicationContext(org.apache.asterix.common.api.IApplicationContext) IOException(java.io.IOException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) ITupleForwarder(org.apache.asterix.external.api.ITupleForwarder) ITupleParserFactory(org.apache.hyracks.dataflow.std.file.ITupleParserFactory) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 2 with ADMDataParser

use of org.apache.asterix.external.parser.ADMDataParser in project asterixdb by apache.

the class ADMDataParserTest method test.

@Test
public void test() throws IOException {
    String[] dates = { "-9537-08-04", "9656-06-03", "-9537-04-04", "9656-06-04", "-9537-10-04", "9626-09-05" };
    AMutableDate[] parsedDates = new AMutableDate[] { new AMutableDate(-4202630), new AMutableDate(2807408), new AMutableDate(-4202752), new AMutableDate(2807409), new AMutableDate(-4202569), new AMutableDate(2796544) };
    String[] times = { "12:04:45.689Z", "12:41:59.002Z", "12:10:45.169Z", "15:37:48.736Z", "04:16:42.321Z", "12:22:56.816Z" };
    AMutableTime[] parsedTimes = new AMutableTime[] { new AMutableTime(43485689), new AMutableTime(45719002), new AMutableTime(43845169), new AMutableTime(56268736), new AMutableTime(15402321), new AMutableTime(44576816) };
    String[] dateTimes = { "-2640-10-11T17:32:15.675Z", "4104-02-01T05:59:11.902Z", "0534-12-08T08:20:31.487Z", "6778-02-16T22:40:21.653Z", "2129-12-12T13:18:35.758Z", "8647-07-01T13:10:19.691Z" };
    AMutableDateTime[] parsedDateTimes = new AMutableDateTime[] { new AMutableDateTime(-145452954464325L), new AMutableDateTime(67345192751902L), new AMutableDateTime(-45286270768513L), new AMutableDateTime(151729886421653L), new AMutableDateTime(5047449515758L), new AMutableDateTime(210721439419691L) };
    Thread[] threads = new Thread[16];
    AtomicInteger errorCount = new AtomicInteger(0);
    for (int i = 0; i < threads.length; ++i) {
        threads[i] = new Thread(new Runnable() {

            ADMDataParser parser = new ADMDataParser(null, true);

            ByteArrayOutputStream bos = new ByteArrayOutputStream();

            DataOutput dos = new DataOutputStream(bos);

            @Override
            public void run() {
                try {
                    int round = 0;
                    while (round++ < 10000) {
                        // Test parseDate.
                        for (int index = 0; index < dates.length; ++index) {
                            PA.invokeMethod(parser, "parseDate(java.lang.String, java.io.DataOutput)", dates[index], dos);
                            AMutableDate aDate = (AMutableDate) PA.getValue(parser, "aDate");
                            Assert.assertTrue(aDate.equals(parsedDates[index]));
                        }
                        // Tests parseTime.
                        for (int index = 0; index < times.length; ++index) {
                            PA.invokeMethod(parser, "parseTime(java.lang.String, java.io.DataOutput)", times[index], dos);
                            AMutableTime aTime = (AMutableTime) PA.getValue(parser, "aTime");
                            Assert.assertTrue(aTime.equals(parsedTimes[index]));
                        }
                        // Tests parseDateTime.
                        for (int index = 0; index < dateTimes.length; ++index) {
                            PA.invokeMethod(parser, "parseDateTime(java.lang.String, java.io.DataOutput)", dateTimes[index], dos);
                            AMutableDateTime aDateTime = (AMutableDateTime) PA.getValue(parser, "aDateTime");
                            Assert.assertTrue(aDateTime.equals(parsedDateTimes[index]));
                        }
                    }
                } catch (Exception e) {
                    errorCount.incrementAndGet();
                    e.printStackTrace();
                }
            }
        });
        // Kicks off test threads.
        threads[i].start();
    }
    // Joins all the threads.
    try {
        for (int i = 0; i < threads.length; ++i) {
            threads[i].join();
        }
    } catch (InterruptedException e) {
        throw new IllegalStateException(e);
    }
    // Asserts no failure.
    Assert.assertTrue(errorCount.get() == 0);
}
Also used : DataOutput(java.io.DataOutput) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) AMutableDateTime(org.apache.asterix.om.base.AMutableDateTime) AMutableDate(org.apache.asterix.om.base.AMutableDate) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AMutableTime(org.apache.asterix.om.base.AMutableTime) Test(org.junit.Test)

Example 3 with ADMDataParser

use of org.apache.asterix.external.parser.ADMDataParser in project asterixdb by apache.

the class RecordWithMetaTest method runTest.

@SuppressWarnings({ "unchecked", "rawtypes" })
public // @Test commented out due to ASTERIXDB-1881
void runTest() throws Exception {
    File file = new File("target/beer.adm");
    File expected = new File(getClass().getResource("/openbeerdb/beer.txt").toURI().getPath());
    try {
        FileUtils.deleteQuietly(file);
        PrintStream printStream = new PrintStream(Files.newOutputStream(Paths.get(file.toURI())));
        // create key type
        IAType[] keyTypes = { BuiltinType.ASTRING };
        String keyName = "id";
        List<String> keyNameAsList = new ArrayList<>(1);
        keyNameAsList.add(keyName);
        // create record type
        String[] recordFieldNames = {};
        IAType[] recordFieldTypes = {};
        recordType = new ARecordType("value", recordFieldNames, recordFieldTypes, true);
        // create the meta type
        String[] metaFieldNames = { keyName, "flags", "expiration", "cas", "rev", "vbid", "dtype" };
        IAType[] metaFieldTypes = { BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.AINT64, BuiltinType.AINT64, BuiltinType.AINT32, BuiltinType.AINT32, BuiltinType.AINT32 };
        ARecordType metaType = new ARecordType("meta", metaFieldNames, metaFieldTypes, true);
        int valueIndex = 4;
        char delimiter = ',';
        int numOfTupleFields = 3;
        int[] pkIndexes = { 0 };
        int[] pkIndicators = { 1 };
        List<Path> paths = new ArrayList<>();
        paths.add(Paths.get(getClass().getResource("/openbeerdb/beer.csv").toURI()));
        FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
        // create input stream
        LocalFSInputStream inputStream = new LocalFSInputStream(watcher);
        // create reader record reader
        Map<String, String> config = new HashMap<>();
        config.put(ExternalDataConstants.KEY_HEADER, "true");
        config.put(ExternalDataConstants.KEY_QUOTE, ExternalDataConstants.DEFAULT_QUOTE);
        LineRecordReader lineReader = new LineRecordReader();
        lineReader.configure(inputStream, config);
        // create csv with json record reader
        CSVToRecordWithMetadataAndPKConverter recordConverter = new CSVToRecordWithMetadataAndPKConverter(valueIndex, delimiter, metaType, recordType, pkIndicators, pkIndexes, keyTypes);
        // create the value parser <ADM in this case>
        ADMDataParser valueParser = new ADMDataParser(recordType, false);
        // create parser.
        RecordWithMetadataParser parser = new RecordWithMetadataParser(metaType, valueParser, recordConverter);
        // create serializer deserializer and printer factories
        ISerializerDeserializer[] serdes = new ISerializerDeserializer[keyTypes.length + 2];
        IPrinterFactory[] printerFactories = new IPrinterFactory[keyTypes.length + 2];
        for (int i = 0; i < keyTypes.length; i++) {
            serdes[i + 2] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(keyTypes[i]);
            printerFactories[i + 2] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(keyTypes[i]);
        }
        serdes[0] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(recordType);
        serdes[1] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaType);
        printerFactories[0] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(recordType);
        printerFactories[1] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(metaType);
        // create output descriptor
        IPrinter[] printers = new IPrinter[printerFactories.length];
        for (int i = 0; i < printerFactories.length; i++) {
            printers[i] = printerFactories[i].createPrinter();
        }
        ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
        while (lineReader.hasNext()) {
            IRawRecord<char[]> record = lineReader.next();
            tb.reset();
            parser.parse(record, tb.getDataOutput());
            tb.addFieldEndOffset();
            parser.parseMeta(tb.getDataOutput());
            tb.addFieldEndOffset();
            parser.appendLastParsedPrimaryKeyToTuple(tb);
            //print tuple
            printTuple(tb, printers, printStream);
        }
        lineReader.close();
        printStream.close();
        Assert.assertTrue(FileUtils.contentEquals(file, expected));
    } catch (Throwable th) {
        System.err.println("TEST FAILED");
        th.printStackTrace();
        throw th;
    } finally {
        FileUtils.deleteQuietly(file);
    }
    System.err.println("TEST PASSED.");
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) RecordWithMetadataParser(org.apache.asterix.external.parser.RecordWithMetadataParser) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Path(java.nio.file.Path) PrintStream(java.io.PrintStream) CSVToRecordWithMetadataAndPKConverter(org.apache.asterix.external.input.record.converter.CSVToRecordWithMetadataAndPKConverter) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) LineRecordReader(org.apache.asterix.external.input.record.reader.stream.LineRecordReader) File(java.io.File) ARecordType(org.apache.asterix.om.types.ARecordType) IPrinter(org.apache.hyracks.algebricks.data.IPrinter) IAType(org.apache.asterix.om.types.IAType)

Aggregations

ADMDataParser (org.apache.asterix.external.parser.ADMDataParser)3 IOException (java.io.IOException)2 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 File (java.io.File)1 InputStream (java.io.InputStream)1 PrintStream (java.io.PrintStream)1 Path (java.nio.file.Path)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 IApplicationContext (org.apache.asterix.common.api.IApplicationContext)1 ClusterPartition (org.apache.asterix.common.cluster.ClusterPartition)1 ITupleForwarder (org.apache.asterix.external.api.ITupleForwarder)1 CSVToRecordWithMetadataAndPKConverter (org.apache.asterix.external.input.record.converter.CSVToRecordWithMetadataAndPKConverter)1 LineRecordReader (org.apache.asterix.external.input.record.reader.stream.LineRecordReader)1 LocalFSInputStream (org.apache.asterix.external.input.stream.LocalFSInputStream)1 RecordWithMetadataParser (org.apache.asterix.external.parser.RecordWithMetadataParser)1