use of org.knime.core.data.util.NonClosableInputStream in project knime-core by knime.
the class RegressionTreeModel method load.
/**
* Loads and returns new ensemble model, input is NOT closed afterwards.
*
* @param in ...
* @param exec ...
* @return ...
* @throws IOException ...
* @throws CanceledExecutionException ...
*/
public static RegressionTreeModel load(final InputStream in, final ExecutionMonitor exec, final TreeBuildingInterner treeBuildingInterner) throws IOException, CanceledExecutionException {
// wrapping the argument (zip input) stream in a buffered stream
// reduces read operation from, e.g. 42s to 2s
TreeModelDataInputStream input = new TreeModelDataInputStream(new BufferedInputStream(new NonClosableInputStream(in)));
int version = input.readInt();
if (version > 20140201) {
throw new IOException("Tree Ensemble version " + version + " not supported");
}
TreeType type = TreeType.load(input);
TreeMetaData metaData = TreeMetaData.load(input);
boolean isRegression = metaData.isRegression();
TreeModelRegression model;
try {
model = TreeModelRegression.load(input, metaData, treeBuildingInterner);
if (input.readByte() != 0) {
throw new IOException("Model not terminated by 0 byte");
}
} catch (IOException e) {
throw new IOException("Can't read tree model. " + e.getMessage(), e);
}
// does not close the method argument stream!!
input.close();
return new RegressionTreeModel(metaData, model, type);
}
use of org.knime.core.data.util.NonClosableInputStream in project knime-core by knime.
the class PMMLPortObject method loadFrom.
/**
* Initializes the pmml port object based on the xml input stream.
* @param spec the referring spec of this object
* @param is the pmml input stream
* @throws IOException if the file cannot be found
* @throws XmlException if something goes wrong during reading
*/
public void loadFrom(final PMMLPortObjectSpec spec, final InputStream is) throws IOException, XmlException {
// disallow close in the factory -- we had indeterministic behavior
// where close was called more than once (which should be OK) but as
// the argument input stream is a NonClosableZipInput, which delegates
// close to closeEntry(), we have to make sure that close is only
// called once.
// TODO: The document is read twice here. Could we "probe" into the file to check the version?
XmlObject xmlDoc = XmlObject.Factory.parse(new NonClosableInputStream(is));
is.close();
if (xmlDoc instanceof PMMLDocument) {
m_pmmlDoc = (PMMLDocument) xmlDoc;
} else {
/* Try to recover when reading a PMML 3.x/4.0 document that
* was produced by KNIME by just replacing the PMML version and
* namespace. */
if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
try {
String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
/* Parse the modified document and assign it to a
* PMMLDocument.*/
m_pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
} catch (Exception e) {
throw new RuntimeException("Parsing of PMML v 3.x/4.0 document failed.", e);
}
LOGGER.info("KNIME produced PMML 3.x/4.0 converted to PMML 4.1.");
} else {
throw new RuntimeException("Parsing of PMML v 3.x/4.0 document failed.");
}
}
m_spec = spec;
}
use of org.knime.core.data.util.NonClosableInputStream in project knime-core by knime.
the class PMMLPortObjectSpec method loadFrom.
/**
* @param in stream reading the relevant files
* @return a completely loaded port object spec with {@link DataTableSpec},
* and the sets of learning, ignored and target columns.
* @throws IOException if something goes wrong
* @throws InvalidSettingsException if something goes wrong
*/
public static PMMLPortObjectSpec loadFrom(final PortObjectSpecZipInputStream in) throws IOException, InvalidSettingsException {
NonClosableInputStream noCloseIn = new NonClosableInputStream(in);
// the data table spec
in.getNextEntry();
// TODO: sanitycheck if name is the same
NodeSettingsRO settings = NodeSettings.loadFromXML(noCloseIn);
DataTableSpec dataTableSpec = DataTableSpec.load(settings);
// the mining schema
in.getNextEntry();
// TODO: sanity check if names are consistent
NodeSettingsRO miningSchemaSettings = NodeSettings.loadFromXML(noCloseIn);
List<String> learningCols = new LinkedList<String>();
for (String colName : miningSchemaSettings.getStringArray(LEARNING_KEY)) {
DataColumnSpec colSpec = dataTableSpec.getColumnSpec(colName);
if (colSpec == null) {
throw new InvalidSettingsException("Column " + colName + " is not in DataTableSpec");
}
learningCols.add(colName);
}
List<String> targetCols = new LinkedList<String>();
for (String colName : miningSchemaSettings.getStringArray(TARGET_KEY)) {
DataColumnSpec colSpec = dataTableSpec.getColumnSpec(colName);
if (colSpec == null) {
throw new InvalidSettingsException("Column " + colName + " is not in DataTableSpec");
}
targetCols.add(colName);
}
// the preprocessing settings if existent
ZipEntry preprocEntry = in.getNextEntry();
List<String> activeCols = null;
if (preprocEntry != null) {
NodeSettingsRO preprocSettings = NodeSettings.loadFromXML(noCloseIn);
activeCols = new LinkedList<String>();
for (String colName : preprocSettings.getStringArray(PREPROC_COL_KEY)) {
DataColumnSpec colSpec = dataTableSpec.getColumnSpec(colName);
if (colSpec == null) {
throw new InvalidSettingsException("Column " + colName + " is not in DataTableSpec");
}
activeCols.add(colName);
}
}
return new PMMLPortObjectSpec(dataTableSpec, activeCols, learningCols, targetCols);
}
use of org.knime.core.data.util.NonClosableInputStream in project knime-core by knime.
the class PMMLPreprocPortObject method load.
/**
* {@inheritDoc}
*/
@Override
protected void load(final PortObjectZipInputStream in, final PortObjectSpec spec, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
ZipEntry entry;
while ((entry = in.getNextEntry()) != null) {
String clazzName = entry.getName();
Class<?> clazz;
try {
clazz = Class.forName(clazzName);
if (!PMMLPreprocOperation.class.isAssignableFrom(clazz)) {
// throw exception
throw new IllegalArgumentException("Class " + clazz.getName() + " must extend PMMLPreprocOperation! " + "Loading failed!");
}
PMMLPreprocOperation op = (PMMLPreprocOperation) clazz.newInstance();
SAXParserFactory fac = SAXParserFactory.newInstance();
SAXParser parser;
parser = fac.newSAXParser();
parser.parse(new NonClosableInputStream(in), op.getHandlerForLoad());
m_operations.add(op);
} catch (Exception e) {
throw new IOException(e);
}
in.closeEntry();
}
m_spec = (PMMLPreprocPortObjectSpec) spec;
}
use of org.knime.core.data.util.NonClosableInputStream in project knime-core by knime.
the class RegressionTreeModel method load.
/**
* Loads and returns new ensemble model, input is NOT closed afterwards.
*
* @param in ...
* @param exec ...
* @return ...
* @throws IOException ...
* @throws CanceledExecutionException ...
*/
public static RegressionTreeModel load(final InputStream in, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
// wrapping the argument (zip input) stream in a buffered stream
// reduces read operation from, e.g. 42s to 2s
TreeModelDataInputStream input = new TreeModelDataInputStream(new BufferedInputStream(new NonClosableInputStream(in)));
int version = input.readInt();
if (version > 20140201) {
throw new IOException("Tree Ensemble version " + version + " not supported");
}
TreeType type = TreeType.load(input);
TreeMetaData metaData = TreeMetaData.load(input);
boolean isRegression = metaData.isRegression();
TreeModelRegression model;
try {
model = TreeModelRegression.load(input, metaData);
if (input.readByte() != 0) {
throw new IOException("Model not terminated by 0 byte");
}
} catch (IOException e) {
throw new IOException("Can't read tree model. " + e.getMessage(), e);
}
// does not close the method argument stream!!
input.close();
return new RegressionTreeModel(metaData, model, type);
}
Aggregations