Search in sources :

Example 1 with AbstractFileObject

use of org.apache.commons.vfs2.provider.AbstractFileObject in project pentaho-kettle by pentaho.

the class XsdValidator method processRow.

public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    meta = (XsdValidatorMeta) smi;
    data = (XsdValidatorData) sdi;
    Object[] row = getRow();
    if (row == null) {
        // no more input to be expected...
        setOutputDone();
        return false;
    }
    if (first) {
        first = false;
        data.outputRowMeta = getInputRowMeta().clone();
        meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
        // Check if XML stream is given
        if (meta.getXMLStream() != null) {
            // Try to get XML Field index
            data.xmlindex = getInputRowMeta().indexOfValue(meta.getXMLStream());
            // Let's check the Field
            if (data.xmlindex < 0) {
                // The field is unreachable !
                logError(BaseMessages.getString(PKG, "XsdValidator.Log.ErrorFindingField") + "[" + meta.getXMLStream() + "]");
                throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.CouldnotFindField", meta.getXMLStream()));
            }
            // Let's check that Result Field is given
            if (meta.getResultfieldname() == null) {
                // Result field is missing !
                logError(BaseMessages.getString(PKG, "XsdValidator.Log.ErrorResultFieldMissing"));
                throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.ErrorResultFieldMissing"));
            }
            // Is XSD file is provided?
            if (meta.getXSDSource().equals(meta.SPECIFY_FILENAME)) {
                if (meta.getXSDFilename() == null) {
                    logError(BaseMessages.getString(PKG, "XsdValidator.Log.ErrorXSDFileMissing"));
                    throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.ErrorXSDFileMissing"));
                } else {
                    // Is XSD file exists ?
                    FileObject xsdfile = null;
                    try {
                        xsdfile = KettleVFS.getFileObject(environmentSubstitute(meta.getXSDFilename()), getTransMeta());
                        if (!xsdfile.exists()) {
                            logError(BaseMessages.getString(PKG, "XsdValidator.Log.Error.XSDFileNotExists"));
                            throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.XSDFileNotExists"));
                        }
                    } catch (Exception e) {
                        logError(BaseMessages.getString(PKG, "XsdValidator.Log.Error.GettingXSDFile"));
                        throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.GettingXSDFile"));
                    } finally {
                        try {
                            if (xsdfile != null) {
                                xsdfile.close();
                            }
                        } catch (IOException e) {
                        // Ignore errors
                        }
                    }
                }
            }
            // Is XSD field is provided?
            if (meta.getXSDSource().equals(meta.SPECIFY_FIELDNAME)) {
                if (meta.getXSDDefinedField() == null) {
                    logError(BaseMessages.getString(PKG, "XsdValidator.Log.Error.XSDFieldMissing"));
                    throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.XSDFieldMissing"));
                } else {
                    // Let's check if the XSD field exist
                    // Try to get XML Field index
                    data.xsdindex = getInputRowMeta().indexOfValue(meta.getXSDDefinedField());
                    if (data.xsdindex < 0) {
                        // The field is unreachable !
                        logError(BaseMessages.getString(PKG, "XsdValidator.Log.ErrorFindingXSDField", meta.getXSDDefinedField()));
                        throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.ErrorFindingXSDField", meta.getXSDDefinedField()));
                    }
                }
            }
        } else {
            // XML stream field is missing !
            logError(BaseMessages.getString(PKG, "XsdValidator.Log.Error.XmlStreamFieldMissing"));
            throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.XmlStreamFieldMissing"));
        }
    }
    try {
        // Get the XML field value
        String XMLFieldvalue = getInputRowMeta().getString(row, data.xmlindex);
        boolean isvalid = false;
        // XSD filename
        String xsdfilename = null;
        if (meta.getXSDSource().equals(meta.SPECIFY_FILENAME)) {
            xsdfilename = environmentSubstitute(meta.getXSDFilename());
        } else if (meta.getXSDSource().equals(meta.SPECIFY_FIELDNAME)) {
            // Get the XSD field value
            xsdfilename = getInputRowMeta().getString(row, data.xsdindex);
        }
        // Get XSD filename
        FileObject xsdfile = null;
        String validationmsg = null;
        try {
            SchemaFactory factoryXSDValidator = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
            xsdfile = KettleVFS.getFileObject(xsdfilename, getTransMeta());
            // Get XML stream
            Source sourceXML = new StreamSource(new StringReader(XMLFieldvalue));
            if (meta.getXMLSourceFile()) {
                // We deal with XML file
                // Get XML File
                FileObject xmlfileValidator = KettleVFS.getFileObject(XMLFieldvalue);
                if (xmlfileValidator == null || !xmlfileValidator.exists()) {
                    logError(BaseMessages.getString(PKG, "XsdValidator.Log.Error.XMLfileMissing", XMLFieldvalue));
                    throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.XMLfileMissing", XMLFieldvalue));
                }
                sourceXML = new StreamSource(xmlfileValidator.getContent().getInputStream());
            }
            // create the schema
            Schema SchematXSD = null;
            if (xsdfile instanceof AbstractFileObject) {
                if (xsdfile.getName().getURI().contains("ram:///")) {
                    SchematXSD = factoryXSDValidator.newSchema(new StreamSource(xsdfile.getContent().getInputStream()));
                } else {
                    SchematXSD = factoryXSDValidator.newSchema(new File(KettleVFS.getFilename(xsdfile)));
                }
            } else {
                // a url should be made a FileObject.
                throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.Exception.CannotCreateSchema", xsdfile.getClass().getName()));
            }
            if (meta.getXSDSource().equals(meta.NO_NEED)) {
                // ---Some documents specify the schema they expect to be validated against,
                // ---typically using xsi:noNamespaceSchemaLocation and/or xsi:schemaLocation attributes
                // ---Schema SchematXSD = factoryXSDValidator.newSchema();
                SchematXSD = factoryXSDValidator.newSchema();
            }
            // Create XSDValidator
            Validator xsdValidator = SchematXSD.newValidator();
            // https://www.owasp.org/index.php/XML_Security_Cheat_Sheet#XML_Entity_Expansion
            if (!meta.isAllowExternalEntities()) {
                xsdValidator.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
                xsdValidator.setFeature("http://xml.org/sax/features/external-general-entities", false);
                xsdValidator.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
                xsdValidator.setProperty("http://apache.org/xml/properties/internal/entity-resolver", (XMLEntityResolver) xmlResourceIdentifier -> {
                    String message = BaseMessages.getString(PKG, "XsdValidator.Exception.DisallowedDocType");
                    throw new IOException(message);
                });
            }
            // Validate XML / XSD
            xsdValidator.validate(sourceXML);
            isvalid = true;
        } catch (SAXException ex) {
            validationmsg = ex.getMessage();
        } catch (IOException ex) {
            validationmsg = ex.getMessage();
        } finally {
            try {
                if (xsdfile != null) {
                    xsdfile.close();
                }
            } catch (IOException e) {
            // Ignore errors
            }
        }
        Object[] outputRowData = null;
        Object[] outputRowData2 = null;
        if (meta.getOutputStringField()) {
            // Output type=String
            if (isvalid) {
                outputRowData = RowDataUtil.addValueData(row, getInputRowMeta().size(), environmentSubstitute(meta.getIfXmlValid()));
            } else {
                outputRowData = RowDataUtil.addValueData(row, getInputRowMeta().size(), environmentSubstitute(meta.getIfXmlInvalid()));
            }
        } else {
            outputRowData = RowDataUtil.addValueData(row, getInputRowMeta().size(), isvalid);
        }
        if (meta.useAddValidationMessage()) {
            outputRowData2 = RowDataUtil.addValueData(outputRowData, getInputRowMeta().size() + 1, validationmsg);
        } else {
            outputRowData2 = outputRowData;
        }
        if (log.isRowLevel()) {
            logRowlevel(BaseMessages.getString(PKG, "XsdValidator.Log.ReadRow") + " " + getInputRowMeta().getString(row));
        }
        // add new values to the row.
        // copy row to output rowset(s);
        putRow(data.outputRowMeta, outputRowData2);
    } catch (KettleException e) {
        boolean sendToErrorRow = false;
        String errorMessage = null;
        if (getStepMeta().isDoingErrorHandling()) {
            sendToErrorRow = true;
            errorMessage = e.toString();
        }
        if (sendToErrorRow) {
            // Simply add this row to the error row
            putError(getInputRowMeta(), row, 1, errorMessage, null, "XSD001");
        } else {
            logError(BaseMessages.getString(PKG, "XsdValidator.ErrorProcesing" + " : " + e.getMessage()));
            throw new KettleStepException(BaseMessages.getString(PKG, "XsdValidator.ErrorProcesing"), e);
        }
    }
    return true;
}
Also used : SchemaFactory(javax.xml.validation.SchemaFactory) Trans(org.pentaho.di.trans.Trans) StepDataInterface(org.pentaho.di.trans.step.StepDataInterface) StreamSource(javax.xml.transform.stream.StreamSource) KettleException(org.pentaho.di.core.exception.KettleException) KettleVFS(org.pentaho.di.core.vfs.KettleVFS) Source(javax.xml.transform.Source) Schema(javax.xml.validation.Schema) XMLEntityResolver(org.apache.xerces.xni.parser.XMLEntityResolver) TransMeta(org.pentaho.di.trans.TransMeta) XMLConstants(javax.xml.XMLConstants) BaseMessages(org.pentaho.di.i18n.BaseMessages) StepInterface(org.pentaho.di.trans.step.StepInterface) StepMeta(org.pentaho.di.trans.step.StepMeta) StepMetaInterface(org.pentaho.di.trans.step.StepMetaInterface) SchemaFactory(javax.xml.validation.SchemaFactory) IOException(java.io.IOException) Validator(javax.xml.validation.Validator) BaseStep(org.pentaho.di.trans.step.BaseStep) FileObject(org.apache.commons.vfs2.FileObject) RowDataUtil(org.pentaho.di.core.row.RowDataUtil) File(java.io.File) StringReader(java.io.StringReader) SAXException(org.xml.sax.SAXException) AbstractFileObject(org.apache.commons.vfs2.provider.AbstractFileObject) KettleStepException(org.pentaho.di.core.exception.KettleStepException) KettleException(org.pentaho.di.core.exception.KettleException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) StreamSource(javax.xml.transform.stream.StreamSource) Schema(javax.xml.validation.Schema) IOException(java.io.IOException) KettleException(org.pentaho.di.core.exception.KettleException) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) StreamSource(javax.xml.transform.stream.StreamSource) Source(javax.xml.transform.Source) SAXException(org.xml.sax.SAXException) StringReader(java.io.StringReader) FileObject(org.apache.commons.vfs2.FileObject) AbstractFileObject(org.apache.commons.vfs2.provider.AbstractFileObject) FileObject(org.apache.commons.vfs2.FileObject) AbstractFileObject(org.apache.commons.vfs2.provider.AbstractFileObject) AbstractFileObject(org.apache.commons.vfs2.provider.AbstractFileObject) File(java.io.File) Validator(javax.xml.validation.Validator)

Example 2 with AbstractFileObject

use of org.apache.commons.vfs2.provider.AbstractFileObject in project pentaho-kettle by pentaho.

the class ConnectionFileSystem method createFile.

@Override
protected FileObject createFile(AbstractFileName abstractFileName) throws Exception {
    String connectionName = ((ConnectionFileName) abstractFileName).getConnection();
    VFSConnectionDetails connectionDetails = (VFSConnectionDetails) connectionManager.get().getConnectionDetails(connectionName);
    String url = getUrl(abstractFileName, connectionDetails);
    AbstractFileObject fileObject = null;
    String domain = null;
    if (url != null) {
        domain = connectionDetails.getDomain();
        Variables variables = new Variables();
        variables.setVariable(CONNECTION, connectionName);
        fileObject = (AbstractFileObject) KettleVFS.getFileObject(url, variables);
    }
    return new ConnectionFileObject(abstractFileName, this, fileObject, domain);
}
Also used : Variables(org.pentaho.di.core.variables.Variables) VFSConnectionDetails(org.pentaho.di.connections.vfs.VFSConnectionDetails) AbstractFileObject(org.apache.commons.vfs2.provider.AbstractFileObject)

Aggregations

AbstractFileObject (org.apache.commons.vfs2.provider.AbstractFileObject)2 File (java.io.File)1 IOException (java.io.IOException)1 StringReader (java.io.StringReader)1 XMLConstants (javax.xml.XMLConstants)1 Source (javax.xml.transform.Source)1 StreamSource (javax.xml.transform.stream.StreamSource)1 Schema (javax.xml.validation.Schema)1 SchemaFactory (javax.xml.validation.SchemaFactory)1 Validator (javax.xml.validation.Validator)1 FileObject (org.apache.commons.vfs2.FileObject)1 XMLEntityResolver (org.apache.xerces.xni.parser.XMLEntityResolver)1 VFSConnectionDetails (org.pentaho.di.connections.vfs.VFSConnectionDetails)1 KettleException (org.pentaho.di.core.exception.KettleException)1 KettleStepException (org.pentaho.di.core.exception.KettleStepException)1 RowDataUtil (org.pentaho.di.core.row.RowDataUtil)1 Variables (org.pentaho.di.core.variables.Variables)1 KettleVFS (org.pentaho.di.core.vfs.KettleVFS)1 BaseMessages (org.pentaho.di.i18n.BaseMessages)1 Trans (org.pentaho.di.trans.Trans)1