Search in sources :

Example 1 with IPrinterFactory

use of org.apache.hyracks.algebricks.data.IPrinterFactory in project asterixdb by apache.

the class JobGenHelper method mkPrinterFactories.

public static IPrinterFactory[] mkPrinterFactories(IOperatorSchema opSchema, IVariableTypeEnvironment env, JobGenContext context, int[] printColumns) throws AlgebricksException {
    IPrinterFactory[] pf = new IPrinterFactory[printColumns.length];
    IPrinterFactoryProvider pff = context.getPrinterFactoryProvider();
    try {
        for (int i = 0; i < pf.length; i++) {
            LogicalVariable v = opSchema.getVariable(printColumns[i]);
            Object t = env.getVarType(v);
            pf[i] = pff.getPrinterFactory(t);
        }
        return pf;
    } catch (HyracksDataException e) {
        throw new AlgebricksException(e);
    }
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) IPrinterFactoryProvider(org.apache.hyracks.algebricks.data.IPrinterFactoryProvider) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 2 with IPrinterFactory

use of org.apache.hyracks.algebricks.data.IPrinterFactory in project asterixdb by apache.

the class SinkWritePOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    WriteOperator write = (WriteOperator) op;
    int[] columns = new int[write.getExpressions().size()];
    int i = 0;
    for (Mutable<ILogicalExpression> exprRef : write.getExpressions()) {
        ILogicalExpression expr = exprRef.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new NotImplementedException("Only writing variable expressions is supported.");
        }
        VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
        LogicalVariable v = varRef.getVariableReference();
        columns[i++] = inputSchemas[0].findVariable(v);
    }
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    IPrinterFactory[] pf = JobGenHelper.mkPrinterFactories(inputSchemas[0], context.getTypeEnvironment(op), context, columns);
    IMetadataProvider<?, ?> mp = context.getMetadataProvider();
    Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> runtime = mp.getWriteFileRuntime(write.getDataSink(), columns, pf, inputDesc);
    builder.contributeMicroOperator(write, runtime.first, recDesc, runtime.second);
    ILogicalOperator src = write.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, write, 0);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) WriteOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.WriteOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)

Example 3 with IPrinterFactory

use of org.apache.hyracks.algebricks.data.IPrinterFactory in project asterixdb by apache.

the class ClassAdToADMTest method testSchemaful.

@SuppressWarnings("rawtypes")
public void testSchemaful() {
    try {
        File file = new File("target/classad-wtih-temporals.adm");
        File expected = new File(getClass().getResource("/classad/results/classad-with-temporals.adm").toURI().getPath());
        FileUtils.deleteQuietly(file);
        PrintStream printStream = new PrintStream(Files.newOutputStream(Paths.get(file.toURI())));
        String[] recordFieldNames = { "GlobalJobId", "Owner", "ClusterId", "ProcId", "RemoteWallClockTime", "CompletionDate", "QDate", "JobCurrentStartDate", "JobStartDate", "JobCurrentStartExecutingDate" };
        IAType[] recordFieldTypes = { BuiltinType.ASTRING, BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.AINT32, BuiltinType.ADURATION, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME, BuiltinType.ADATETIME };
        ARecordType recordType = new ARecordType("value", recordFieldNames, recordFieldTypes, true);
        int numOfTupleFields = 1;
        ISerializerDeserializer[] serdes = new ISerializerDeserializer[1];
        serdes[0] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(recordType);
        IPrinterFactory[] printerFactories = new IPrinterFactory[1];
        printerFactories[0] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(recordType);
        // create output descriptor
        IPrinter[] printers = new IPrinter[printerFactories.length];
        for (int i = 0; i < printerFactories.length; i++) {
            printers[i] = printerFactories[i].createPrinter();
        }
        ClassAdObjectPool objectPool = new ClassAdObjectPool();
        String[] files = new String[] { "/classad/classad-with-temporals.classads" };
        ClassAdParser parser = new ClassAdParser(recordType, false, false, false, null, null, null, objectPool);
        ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
        for (String path : files) {
            List<Path> paths = new ArrayList<>();
            Map<String, String> config = new HashMap<>();
            config.put(ExternalDataConstants.KEY_RECORD_START, "[");
            config.put(ExternalDataConstants.KEY_RECORD_END, "]");
            paths.add(Paths.get(getClass().getResource(path).toURI()));
            FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
            LocalFSInputStream in = new LocalFSInputStream(watcher);
            SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
            recordReader.configure(in, config);
            while (recordReader.hasNext()) {
                tb.reset();
                IRawRecord<char[]> record = recordReader.next();
                parser.parse(record, tb.getDataOutput());
                tb.addFieldEndOffset();
                printTuple(tb, printers, printStream);
            }
            recordReader.close();
            printStream.close();
            Assert.assertTrue(FileUtils.contentEquals(file, expected));
        }
    } catch (Throwable th) {
        System.err.println("TEST FAILED");
        th.printStackTrace();
        Assert.assertTrue(false);
    }
    System.err.println("TEST PASSED");
}
Also used : ClassAdObjectPool(org.apache.asterix.external.classad.object.pool.ClassAdObjectPool) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CaseInsensitiveString(org.apache.asterix.external.classad.CaseInsensitiveString) ClassAdParser(org.apache.asterix.external.library.ClassAdParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Path(java.nio.file.Path) PrintStream(java.io.PrintStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) SemiStructuredRecordReader(org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader) File(java.io.File) ARecordType(org.apache.asterix.om.types.ARecordType) IPrinter(org.apache.hyracks.algebricks.data.IPrinter) IAType(org.apache.asterix.om.types.IAType)

Example 4 with IPrinterFactory

use of org.apache.hyracks.algebricks.data.IPrinterFactory in project asterixdb by apache.

the class DistributeResultPOperator method contributeRuntimeOperator.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    DistributeResultOperator resultOp = (DistributeResultOperator) op;
    IMetadataProvider mp = context.getMetadataProvider();
    JobSpecification spec = builder.getJobSpec();
    int[] columns = new int[resultOp.getExpressions().size()];
    int i = 0;
    for (Mutable<ILogicalExpression> exprRef : resultOp.getExpressions()) {
        ILogicalExpression expr = exprRef.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new NotImplementedException("Only writing variable expressions is supported.");
        }
        VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
        LogicalVariable v = varRef.getVariableReference();
        columns[i++] = inputSchemas[0].findVariable(v);
    }
    RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    IPrinterFactory[] pf = JobGenHelper.mkPrinterFactories(inputSchemas[0], context.getTypeEnvironment(op), context, columns);
    Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> runtimeAndConstraints = mp.getResultHandleRuntime(resultOp.getDataSink(), columns, pf, inputDesc, true, spec);
    builder.contributeHyracksOperator(resultOp, runtimeAndConstraints.first);
    ILogicalOperator src = resultOp.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, resultOp, 0);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IMetadataProvider(org.apache.hyracks.algebricks.core.algebra.metadata.IMetadataProvider) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) DistributeResultOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 5 with IPrinterFactory

use of org.apache.hyracks.algebricks.data.IPrinterFactory in project asterixdb by apache.

the class RecordWithMetaTest method runTest.

@SuppressWarnings({ "unchecked", "rawtypes" })
public // @Test commented out due to ASTERIXDB-1881
void runTest() throws Exception {
    File file = new File("target/beer.adm");
    File expected = new File(getClass().getResource("/openbeerdb/beer.txt").toURI().getPath());
    try {
        FileUtils.deleteQuietly(file);
        PrintStream printStream = new PrintStream(Files.newOutputStream(Paths.get(file.toURI())));
        // create key type
        IAType[] keyTypes = { BuiltinType.ASTRING };
        String keyName = "id";
        List<String> keyNameAsList = new ArrayList<>(1);
        keyNameAsList.add(keyName);
        // create record type
        String[] recordFieldNames = {};
        IAType[] recordFieldTypes = {};
        recordType = new ARecordType("value", recordFieldNames, recordFieldTypes, true);
        // create the meta type
        String[] metaFieldNames = { keyName, "flags", "expiration", "cas", "rev", "vbid", "dtype" };
        IAType[] metaFieldTypes = { BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.AINT64, BuiltinType.AINT64, BuiltinType.AINT32, BuiltinType.AINT32, BuiltinType.AINT32 };
        ARecordType metaType = new ARecordType("meta", metaFieldNames, metaFieldTypes, true);
        int valueIndex = 4;
        char delimiter = ',';
        int numOfTupleFields = 3;
        int[] pkIndexes = { 0 };
        int[] pkIndicators = { 1 };
        List<Path> paths = new ArrayList<>();
        paths.add(Paths.get(getClass().getResource("/openbeerdb/beer.csv").toURI()));
        FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
        // create input stream
        LocalFSInputStream inputStream = new LocalFSInputStream(watcher);
        // create reader record reader
        Map<String, String> config = new HashMap<>();
        config.put(ExternalDataConstants.KEY_HEADER, "true");
        config.put(ExternalDataConstants.KEY_QUOTE, ExternalDataConstants.DEFAULT_QUOTE);
        LineRecordReader lineReader = new LineRecordReader();
        lineReader.configure(inputStream, config);
        // create csv with json record reader
        CSVToRecordWithMetadataAndPKConverter recordConverter = new CSVToRecordWithMetadataAndPKConverter(valueIndex, delimiter, metaType, recordType, pkIndicators, pkIndexes, keyTypes);
        // create the value parser <ADM in this case>
        ADMDataParser valueParser = new ADMDataParser(recordType, false);
        // create parser.
        RecordWithMetadataParser parser = new RecordWithMetadataParser(metaType, valueParser, recordConverter);
        // create serializer deserializer and printer factories
        ISerializerDeserializer[] serdes = new ISerializerDeserializer[keyTypes.length + 2];
        IPrinterFactory[] printerFactories = new IPrinterFactory[keyTypes.length + 2];
        for (int i = 0; i < keyTypes.length; i++) {
            serdes[i + 2] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(keyTypes[i]);
            printerFactories[i + 2] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(keyTypes[i]);
        }
        serdes[0] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(recordType);
        serdes[1] = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaType);
        printerFactories[0] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(recordType);
        printerFactories[1] = ADMPrinterFactoryProvider.INSTANCE.getPrinterFactory(metaType);
        // create output descriptor
        IPrinter[] printers = new IPrinter[printerFactories.length];
        for (int i = 0; i < printerFactories.length; i++) {
            printers[i] = printerFactories[i].createPrinter();
        }
        ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
        while (lineReader.hasNext()) {
            IRawRecord<char[]> record = lineReader.next();
            tb.reset();
            parser.parse(record, tb.getDataOutput());
            tb.addFieldEndOffset();
            parser.parseMeta(tb.getDataOutput());
            tb.addFieldEndOffset();
            parser.appendLastParsedPrimaryKeyToTuple(tb);
            //print tuple
            printTuple(tb, printers, printStream);
        }
        lineReader.close();
        printStream.close();
        Assert.assertTrue(FileUtils.contentEquals(file, expected));
    } catch (Throwable th) {
        System.err.println("TEST FAILED");
        th.printStackTrace();
        throw th;
    } finally {
        FileUtils.deleteQuietly(file);
    }
    System.err.println("TEST PASSED.");
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) FileSystemWatcher(org.apache.asterix.external.util.FileSystemWatcher) RecordWithMetadataParser(org.apache.asterix.external.parser.RecordWithMetadataParser) LocalFSInputStream(org.apache.asterix.external.input.stream.LocalFSInputStream) Path(java.nio.file.Path) PrintStream(java.io.PrintStream) CSVToRecordWithMetadataAndPKConverter(org.apache.asterix.external.input.record.converter.CSVToRecordWithMetadataAndPKConverter) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) LineRecordReader(org.apache.asterix.external.input.record.reader.stream.LineRecordReader) File(java.io.File) ARecordType(org.apache.asterix.om.types.ARecordType) IPrinter(org.apache.hyracks.algebricks.data.IPrinter) IAType(org.apache.asterix.om.types.IAType)

Aggregations

IPrinterFactory (org.apache.hyracks.algebricks.data.IPrinterFactory)5 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)3 File (java.io.File)2 PrintStream (java.io.PrintStream)2 Path (java.nio.file.Path)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LocalFSInputStream (org.apache.asterix.external.input.stream.LocalFSInputStream)2 FileSystemWatcher (org.apache.asterix.external.util.FileSystemWatcher)2 ARecordType (org.apache.asterix.om.types.ARecordType)2 IAType (org.apache.asterix.om.types.IAType)2 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)2 NotImplementedException (org.apache.hyracks.algebricks.common.exceptions.NotImplementedException)2 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)2 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)2 VariableReferenceExpression (org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression)2 IPrinter (org.apache.hyracks.algebricks.data.IPrinter)2 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)2 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)2