Search in sources :

Example 1 with LocalFSInputStream

use of org.apache.asterix.external.input.stream.LocalFSInputStream in project asterixdb by apache.

the class ClassAdToADMTest method testEscaping.

/**
    *
    */
public void testEscaping() {
    try {
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        ClassAd pAd = new ClassAd(objectPool);
        String[] files = new String[] { "/classad/escapes.txt" };
        ClassAdParser parser = new ClassAdParser(objectPool);
        CharArrayLexerSource lexerSource = new CharArrayLexerSource();
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            try {
                Value val = new Value(objectPool);
                while (recordReader.hasNext()) {
                    val.reset();
                    IRawRecord<char[]> record = recordReader.next();
                    lexerSource.setNewSource(record.get());
                    parser.setLexerSource(lexerSource);
                    parser.parseNext(pAd);
                    Assert.assertEquals("[ Args = \"“-1 0.1 0.1 0.5 2e-07 0.001 10 -1”\"; GlobalJobId = \"submit-4.chtc.wisc.edu#3724038.0#1462893042\" ]", pAd.toString());
                }
            } finally {
                recordReader.close();
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        assertTrue(false);
    }
}
Also used : Path(java.nio.file.Path) ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) ClassAd(org.apache.asterix.external.classad.ClassAd) HashMap(java.util.HashMap) CharArrayLexerSource(org.apache.asterix.external.classad.CharArrayLexerSource) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) Value(org.apache.asterix.external.classad.Value) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream)

Example 2 with LocalFSInputStream

use of org.apache.asterix.external.input.stream.LocalFSInputStream in project asterixdb by apache.

the class ByteBufUTF8DecodeTest method testDecodingJsonRecords.

@Test
public void testDecodingJsonRecords() throws URISyntaxException, IOException {
    String jsonFileName = "/record.json";
    List<Path> paths = new ArrayList<>();
    Map<String, String> config = new HashMap<>();
    config.put(ExternalDataConstants.KEY_RECORD_START, "{");
    config.put(ExternalDataConstants.KEY_RECORD_END, "}");
    paths.add(Paths.get(getClass().getResource(jsonFileName).toURI()));
    FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
    LocalFSInputStream in = new LocalFSInputStream(watcher);
    try (SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader()) {
        recordReader.configure(in, config);
        while (recordReader.hasNext()) {
            try {
                IRawRecord<char[]> record = recordReader.next();
                process(record.toString());
            } catch (Throwable th) {
                th.printStackTrace();
                Assert.fail(th.getMessage());
            }
        }
    }
}
Also used : Path(java.nio.file.Path) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) HashMap(java.util.HashMap) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) ArrayList(java.util.ArrayList) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Test(org.junit.Test)

Example 3 with LocalFSInputStream

use of org.apache.asterix.external.input.stream.LocalFSInputStream in project asterixdb by apache.

the class RecordWithMetaTest method runTest.

@SuppressWarnings({ "unchecked", "rawtypes" })
public // @Test commented out due to ASTERIXDB-1881
void runTest() throws Exception {
    File file = new File("target/beer.adm");
    File expected = new File(getClass().getResource("/openbeerdb/beer.txt").toURI().getPath());
    try {
        FileUtils.deleteQuietly(file);
        PrintStream printStream = new PrintStream(Files.newOutputStream(Paths.get(file.toURI())));
        // create key type
        IAType[] keyTypes = { BuiltinType.ASTRING };
        String keyName = "id";
        List<String> keyNameAsList = new ArrayList<>(1);
        keyNameAsList.add(keyName);
        // create record type
        String[] recordFieldNames = {};
        IAType[] recordFieldTypes = {};
        recordType = new ARecordType("value", recordFieldNames, recordFieldTypes, true);
        // create the meta type
        String[] metaFieldNames = { keyName, "flags", "expiration", "cas", "rev", "vbid", "dtype" };
        IAType[] metaFieldTypes = { BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.AINT64, BuiltinType.AINT64, BuiltinType.AINT32, BuiltinType.AINT32, BuiltinType.AINT32 };
        ARecordType metaType = new ARecordType("meta", metaFieldNames, metaFieldTypes, true);
        int valueIndex = 4;
        char delimiter = ',';
        int numOfTupleFields = 3;
        int[] pkIndexes = { 0 };
        int[] pkIndicators = { 1 };
        List<Path> paths = new ArrayList<>();
        paths.add(Paths.get(getClass().getResource("/openbeerdb/beer.csv").toURI()));
        FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
        // create input stream
        LocalFSInputStream inputStream = new LocalFSInputStream(watcher);
        // create reader record reader
        Map<String, String> config = new HashMap<>();
        config.put(ExternalDataConstants.KEY_HEADER, "true");
        config.put(ExternalDataConstants.KEY_QUOTE, ExternalDataConstants.DEFAULT_QUOTE);
        LineRecordReader lineReader = new LineRecordReader();
        lineReader.configure(inputStream, config);
        // create csv with json record reader
        CSVToRecordWithMetadataAndPKConverter recordConverter = new CSVToRecordWithMetadataAndPKConverter(valueIndex, delimiter, metaType, recordType, pkIndicators, pkIndexes, keyTypes);
        // create the value parser <ADM in this case>
        ADMDataParser valueParser = new ADMDataParser(recordType, false);
        // create parser.
        RecordWithMetadataParser parser = new RecordWithMetadataParser(metaType, valueParser, recordConverter);
        // create serializer deserializer and printer factories
        ISerializerDeserializer[] serdes = new ISerializerDeserializer[keyTypes.length + 2];
        IPrinterFactory[] printerFactories = new IPrinterFactory[keyTypes.length + 2];
        for (int i = 0; i < keyTypes.length; i++) {
            serdes[i + 2] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(keyTypes[i]);
            printerFactories[i + 2] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(keyTypes[i]);
        }
        serdes[0] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(recordType);
        serdes[1] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaType);
        printerFactories[0] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(recordType);
        printerFactories[1] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(metaType);
        // create output descriptor
        IPrinter[] printers = new IPrinter[printerFactories.length];
        for (int i = 0; i < printerFactories.length; i++) {
            printers[i] = printerFactories[i].createPrinter();
        }
        ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
        while (lineReader.hasNext()) {
            IRawRecord<char[]> record = lineReader.next();
            tb.reset();
            parser.parse(record, tb.getDataOutput());
            tb.addFieldEndOffset();
            parser.parseMeta(tb.getDataOutput());
            tb.addFieldEndOffset();
            parser.appendLastParsedPrimaryKeyToTuple(tb);
            //print tuple
            printTuple(tb, printers, printStream);
        }
        lineReader.close();
        printStream.close();
        Assert.assertTrue(FileUtils.contentEquals(file, expected));
    } catch (Throwable th) {
        System.err.println("TEST FAILED");
        th.printStackTrace();
        throw th;
    } finally {
        FileUtils.deleteQuietly(file);
    }
    System.err.println("TEST PASSED.");
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) RecordWithMetadataParser(org.apache.asterix.external.parser.RecordWithMetadataParser) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Path(java.nio.file.Path) PrintStream(java.io.PrintStream) CSVToRecordWithMetadataAndPKConverter(org.apache.asterix.external.input.record.converter.CSVToRecordWithMetadataAndPKConverter) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) LineRecordReader(org.apache.asterix.external.input.record.reader.stream.LineRecordReader) File(java.io.File) ARecordType(org.apache.asterix.om.types.ARecordType) IPrinter(org.apache.hyracks.algebricks.data.IPrinter) IAType(org.apache.asterix.om.types.IAType)

Example 4 with LocalFSInputStream

use of org.apache.asterix.external.input.stream.LocalFSInputStream in project asterixdb by apache.

the class ClassAdToADMTest method testSchemaful.

@SuppressWarnings("rawtypes")
public void testSchemaful() {
    try {
        File file = new File("target/classad-wtih-temporals.adm");
        File expected = new File(getClass().getResource("/classad/results/classad-with-temporals.adm").toURI().getPath());
        FileUtils.deleteQuietly(file);
        PrintStream printStream = new PrintStream(Files.newOutputStream(Paths.get(file.toURI())));
        String[] recordFieldNames = { "GlobalJobId", "Owner", "ClusterId", "ProcId", "RemoteWallClockTime", "CompletionDate", "QDate", "JobCurrentStartDate", "JobStartDate", "JobCurrentStartExecutingDate" };
        IAType[] recordFieldTypes = { BuiltinType.ASTRING, BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.AINT32, BuiltinType.ADURATION, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME };
        ARecordType recordType = new ARecordType("value", recordFieldNames, recordFieldTypes, true);
        int numOfTupleFields = 1;
        ISerializerDeserializer[] serdes = new ISerializerDeserializer[1];
        serdes[0] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(recordType);
        IPrinterFactory[] printerFactories = new IPrinterFactory[1];
        printerFactories[0] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(recordType);
        // create output descriptor
        IPrinter[] printers = new IPrinter[printerFactories.length];
        for (int i = 0; i < printerFactories.length; i++) {
            printers[i] = printerFactories[i].createPrinter();
        }
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        String[] files = new String[] { "/classad/classad-with-temporals.classads" };
        ClassAdParser parser = new ClassAdParser(recordType, false, false, false, null, null, null, objectPool);
        ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            while (recordReader.hasNext()) {
                tb.reset();
                IRawRecord<char[]> record = recordReader.next();
                parser.parse(record, tb.getDataOutput());
                tb.addFieldEndOffset();
                printTuple(tb, printers, printStream);
            }
            recordReader.close();
            printStream.close();
            Assert.assertTrue(FileUtils.contentEquals(file, expected));
        }
    } catch (Throwable th) {
        System.err.println("TEST FAILED");
        th.printStackTrace();
        Assert.assertTrue(false);
    }
    System.err.println("TEST PASSED");
}
Also used : ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Path(java.nio.file.Path) PrintStream(java.io.PrintStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) File(java.io.File) ARecordType(org.apache.asterix.om.types.ARecordType) IPrinter(org.apache.hyracks.algebricks.data.IPrinter) IAType(org.apache.asterix.om.types.IAType)

Example 5 with LocalFSInputStream

use of org.apache.asterix.external.input.stream.LocalFSInputStream in project asterixdb by apache.

the class ClassAdToADMTest method testSchemaless.

/**
     *
     */
public void testSchemaless() {
    try {
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        ClassAd pAd = new ClassAd(objectPool);
        String[] files = new String[] { "/classad/jobads.txt" };
        ClassAdParser parser = new ClassAdParser(objectPool);
        CharArrayLexerSource lexerSource = new CharArrayLexerSource();
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            try {
                Value val = new Value(objectPool);
                while (recordReader.hasNext()) {
                    val.reset();
                    IRawRecord<char[]> record = recordReader.next();
                    lexerSource.setNewSource(record.get());
                    parser.setLexerSource(lexerSource);
                    parser.parseNext(pAd);
                    Map<CaseInsensitiveString, ExprTree> attrs = pAd.getAttrList();
                    for (Entry<CaseInsensitiveString, ExprTree> entry : attrs.entrySet()) {
                        ExprTree tree = entry.getValue();
                        switch(tree.getKind()) {
                            case ATTRREF_NODE:
                            case CLASSAD_NODE:
                            case EXPR_ENVELOPE:
                            case EXPR_LIST_NODE:
                            case FN_CALL_NODE:
                            case OP_NODE:
                                break;
                            case LITERAL_NODE:
                                break;
                            default:
                                System.out.println("Something is wrong");
                                break;
                        }
                    }
                }
            } finally {
                recordReader.close();
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        assertTrue(false);
    }
}
Also used : Path(java.nio.file.Path) ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) ClassAd(org.apache.asterix.external.classad.ClassAd) HashMap(java.util.HashMap) CharArrayLexerSource(org.apache.asterix.external.classad.CharArrayLexerSource) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) Value(org.apache.asterix.external.classad.Value) ExprTree(org.apache.asterix.external.classad.ExprTree) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream)

Aggregations

Path (java.nio.file.Path)6 ArrayList (java.util.ArrayList)6 LocalFSInputStream (org.apache.asterix.external.input.stream.LocalFSInputStream)6 FileSystemWatcher (org.apache.asterix.external.util.FileSystemWatcher)6 HashMap (java.util.HashMap)5 SemiStructuredRecordReader (org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader)4 CaseInsensitiveString (org.apache.asterix.external.classad.CaseInsensitiveString)3 ClassAdObjectPool (org.apache.asterix.external.classad.object.pool.ClassAdObjectPool)3 ClassAdParser (org.apache.asterix.external.library.ClassAdParser)3 File (java.io.File)2 PrintStream (java.io.PrintStream)2 CharArrayLexerSource (org.apache.asterix.external.classad.CharArrayLexerSource)2 ClassAd (org.apache.asterix.external.classad.ClassAd)2 Value (org.apache.asterix.external.classad.Value)2 ARecordType (org.apache.asterix.om.types.ARecordType)2 IAType (org.apache.asterix.om.types.IAType)2 IPrinter (org.apache.hyracks.algebricks.data.IPrinter)2 IPrinterFactory (org.apache.hyracks.algebricks.data.IPrinterFactory)2 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)2 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)2