Search in sources :

Example 1 with SemiStructuredRecordReader

use of org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader in project asterixdb by apache.

the class ClassAdToADMTest method testEscaping.

/**
    *
    */
public void testEscaping() {
    try {
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        ClassAd pAd = new ClassAd(objectPool);
        String[] files = new String[] { "/classad/escapes.txt" };
        ClassAdParser parser = new ClassAdParser(objectPool);
        CharArrayLexerSource lexerSource = new CharArrayLexerSource();
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            try {
                Value val = new Value(objectPool);
                while (recordReader.hasNext()) {
                    val.reset();
                    IRawRecord<char[]> record = recordReader.next();
                    lexerSource.setNewSource(record.get());
                    parser.setLexerSource(lexerSource);
                    parser.parseNext(pAd);
                    Assert.assertEquals("[ Args = \"“-1 0.1 0.1 0.5 2e-07 0.001 10 -1”\"; GlobalJobId = \"submit-4.chtc.wisc.edu#3724038.0#1462893042\" ]", pAd.toString());
                }
            } finally {
                recordReader.close();
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        assertTrue(false);
    }
}
Also used : Path(java.nio.file.Path) ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) ClassAd(org.apache.asterix.external.classad.ClassAd) HashMap(java.util.HashMap) CharArrayLexerSource(org.apache.asterix.external.classad.CharArrayLexerSource) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) Value(org.apache.asterix.external.classad.Value) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream)

Example 2 with SemiStructuredRecordReader

use of org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader in project asterixdb by apache.

the class ByteBufUTF8DecodeTest method testDecodingJsonRecords.

@Test
public void testDecodingJsonRecords() throws URISyntaxException, IOException {
    String jsonFileName = "/record.json";
    List<Path> paths = new ArrayList<>();
    Map<String, String> config = new HashMap<>();
    config.put(ExternalDataConstants.KEY_RECORD_START, "{");
    config.put(ExternalDataConstants.KEY_RECORD_END, "}");
    paths.add(Paths.get(getClass().getResource(jsonFileName).toURI()));
    FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
    LocalFSInputStream in = new LocalFSInputStream(watcher);
    try (SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader()) {
        recordReader.configure(in, config);
        while (recordReader.hasNext()) {
            try {
                IRawRecord<char[]> record = recordReader.next();
                process(record.toString());
            } catch (Throwable th) {
                th.printStackTrace();
                Assert.fail(th.getMessage());
            }
        }
    }
}
Also used : Path(java.nio.file.Path) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) HashMap(java.util.HashMap) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) ArrayList(java.util.ArrayList) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Test(org.junit.Test)

Example 3 with SemiStructuredRecordReader

use of org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader in project asterixdb by apache.

the class ClassAdToADMTest method testSchemaful.

@SuppressWarnings("rawtypes")
public void testSchemaful() {
    try {
        File file = new File("target/classad-wtih-temporals.adm");
        File expected = new File(getClass().getResource("/classad/results/classad-with-temporals.adm").toURI().getPath());
        FileUtils.deleteQuietly(file);
        PrintStream printStream = new PrintStream(Files.newOutputStream(Paths.get(file.toURI())));
        String[] recordFieldNames = { "GlobalJobId", "Owner", "ClusterId", "ProcId", "RemoteWallClockTime", "CompletionDate", "QDate", "JobCurrentStartDate", "JobStartDate", "JobCurrentStartExecutingDate" };
        IAType[] recordFieldTypes = { BuiltinType.ASTRING, BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.AINT32, BuiltinType.ADURATION, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME };
        ARecordType recordType = new ARecordType("value", recordFieldNames, recordFieldTypes, true);
        int numOfTupleFields = 1;
        ISerializerDeserializer[] serdes = new ISerializerDeserializer[1];
        serdes[0] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(recordType);
        IPrinterFactory[] printerFactories = new IPrinterFactory[1];
        printerFactories[0] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(recordType);
        // create output descriptor
        IPrinter[] printers = new IPrinter[printerFactories.length];
        for (int i = 0; i < printerFactories.length; i++) {
            printers[i] = printerFactories[i].createPrinter();
        }
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        String[] files = new String[] { "/classad/classad-with-temporals.classads" };
        ClassAdParser parser = new ClassAdParser(recordType, false, false, false, null, null, null, objectPool);
        ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            while (recordReader.hasNext()) {
                tb.reset();
                IRawRecord<char[]> record = recordReader.next();
                parser.parse(record, tb.getDataOutput());
                tb.addFieldEndOffset();
                printTuple(tb, printers, printStream);
            }
            recordReader.close();
            printStream.close();
            Assert.assertTrue(FileUtils.contentEquals(file, expected));
        }
    } catch (Throwable th) {
        System.err.println("TEST FAILED");
        th.printStackTrace();
        Assert.assertTrue(false);
    }
    System.err.println("TEST PASSED");
}
Also used : ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Path(java.nio.file.Path) PrintStream(java.io.PrintStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) File(java.io.File) ARecordType(org.apache.asterix.om.types.ARecordType) IPrinter(org.apache.hyracks.algebricks.data.IPrinter) IAType(org.apache.asterix.om.types.IAType)

Example 4 with SemiStructuredRecordReader

use of org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader in project asterixdb by apache.

the class ClassAdToADMTest method testSchemaless.

/**
     *
     */
public void testSchemaless() {
    try {
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        ClassAd pAd = new ClassAd(objectPool);
        String[] files = new String[] { "/classad/jobads.txt" };
        ClassAdParser parser = new ClassAdParser(objectPool);
        CharArrayLexerSource lexerSource = new CharArrayLexerSource();
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            try {
                Value val = new Value(objectPool);
                while (recordReader.hasNext()) {
                    val.reset();
                    IRawRecord<char[]> record = recordReader.next();
                    lexerSource.setNewSource(record.get());
                    parser.setLexerSource(lexerSource);
                    parser.parseNext(pAd);
                    Map<CaseInsensitiveString, ExprTree> attrs = pAd.getAttrList();
                    for (Entry<CaseInsensitiveString, ExprTree> entry : attrs.entrySet()) {
                        ExprTree tree = entry.getValue();
                        switch(tree.getKind()) {
                            case ATTRREF_NODE:
                            case CLASSAD_NODE:
                            case EXPR_ENVELOPE:
                            case EXPR_LIST_NODE:
                            case FN_CALL_NODE:
                            case OP_NODE:
                                break;
                            case LITERAL_NODE:
                                break;
                            default:
                                System.out.println("Something is wrong");
                                break;
                        }
                    }
                }
            } finally {
                recordReader.close();
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        assertTrue(false);
    }
}
Also used : Path(java.nio.file.Path) ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) ClassAd(org.apache.asterix.external.classad.ClassAd) HashMap(java.util.HashMap) CharArrayLexerSource(org.apache.asterix.external.classad.CharArrayLexerSource) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) Value(org.apache.asterix.external.classad.Value) ExprTree(org.apache.asterix.external.classad.ExprTree) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream)

Aggregations

Path (java.nio.file.Path)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 SemiStructuredRecordReader (org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader)4 LocalFSInputStream (org.apache.asterix.external.input.stream.LocalFSInputStream)4 FileSystemWatcher (org.apache.asterix.external.util.FileSystemWatcher)4 CaseInsensitiveString (org.apache.asterix.external.classad.CaseInsensitiveString)3 ClassAdObjectPool (org.apache.asterix.external.classad.object.pool.ClassAdObjectPool)3 ClassAdParser (org.apache.asterix.external.library.ClassAdParser)3 CharArrayLexerSource (org.apache.asterix.external.classad.CharArrayLexerSource)2 ClassAd (org.apache.asterix.external.classad.ClassAd)2 Value (org.apache.asterix.external.classad.Value)2 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)2 File (java.io.File)1 PrintStream (java.io.PrintStream)1 ExprTree (org.apache.asterix.external.classad.ExprTree)1 ARecordType (org.apache.asterix.om.types.ARecordType)1 IAType (org.apache.asterix.om.types.IAType)1 IPrinter (org.apache.hyracks.algebricks.data.IPrinter)1 IPrinterFactory (org.apache.hyracks.algebricks.data.IPrinterFactory)1