use of org.knime.core.util.Pair in project knime-core by knime.
the class RegressionTreePMMLPredictorNodeModel method importModel.
private Pair<RegressionTreeModel, RegressionTreeModelPortObjectSpec> importModel(final PMMLPortObject pmmlPO) {
RegressionTreeModelPMMLTranslator pmmlTranslator = new RegressionTreeModelPMMLTranslator();
pmmlPO.initializeModelTranslator(pmmlTranslator);
if (pmmlTranslator.hasWarning()) {
setWarningMessage(pmmlTranslator.getWarning());
}
return new Pair<>(new RegressionTreeModel(pmmlTranslator.getTreeMetaData(), pmmlTranslator.getTree(), TreeType.Ordinary), new RegressionTreeModelPortObjectSpec(pmmlTranslator.getLearnSpec()));
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class DataCellToJavaConverterRegistry method parseAnnotations.
/*
* Parse @DataCellFactoryMethod and @DataValueAccessMethod annotations
*/
private void parseAnnotations() {
final Collection<DataType> availableDataTypes = DataTypeRegistry.getInstance().availableDataTypes();
final Set<Class<? extends DataValue>> processedValueClasses = new HashSet<>();
for (final DataType dataType : availableDataTypes) {
for (final Class<? extends DataValue> valueClass : dataType.getValueClasses()) {
if (processedValueClasses.contains(valueClass)) {
// already parsed this value class
continue;
}
// get methods annotated with DataValueAccessMethod
final Collection<Pair<Method, DataValueAccessMethod>> methodsWithAnnotation = ClassUtil.getMethodsWithAnnotation(valueClass, DataValueAccessMethod.class);
// register a converter for every DataValueAccessMethod annotation
for (final Pair<Method, DataValueAccessMethod> pair : methodsWithAnnotation) {
parseAnnotation(valueClass, pair.getFirst(), pair.getSecond());
}
processedValueClasses.add(valueClass);
}
}
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class AppendedRowsRowInput method create.
/**
* Concatenates a set of Inputs. Duplicates are handled according to the policy argument. Columns present in one
* input but not the others or conflicting column tables are handled as described in {@link AppendedRowsTable}.
*
* @param ins all inputs to be appended (non-null and no null values allowed)
* @param duplPolicy How to deal with duplicate keys. Non-null.
* @param suffix suffix to append to duplicate keys (must not be null if policy is
* {@link DuplicatePolicy#AppendSuffix})
* @param exec (optional) execution monitor that is used to report progress and check for cancelation. Can be null.
* @param totalRowCount The number of rows to expect (sum over all row counts in the inputs). Only be used for
* progress -- can be negative to have no progress.
* @return a new row input whose iteration scans all argument inputs.
*/
public static AppendedRowsRowInput create(final RowInput[] ins, final DuplicatePolicy duplPolicy, final String suffix, final ExecutionMonitor exec, final long totalRowCount) {
DataTableSpec[] specs = new DataTableSpec[ins.length];
for (int i = 0; i < specs.length; i++) {
specs[i] = ins[i].getDataTableSpec();
}
DataTableSpec spec = AppendedRowsTable.generateDataTableSpec(specs);
CheckUtils.checkArgumentNotNull(duplPolicy, "Arg must not be null");
if (DuplicatePolicy.AppendSuffix.equals(duplPolicy)) {
CheckUtils.checkArgument(StringUtils.isNotEmpty(suffix), "Suffix must not be an empty string.");
}
PairSupplier[] suppliers = new PairSupplier[ins.length];
for (int i = 0; i < suppliers.length; i++) {
suppliers[i] = new PairSupplier(new Pair<RowIterator, DataTableSpec>(new RowInputIterator(ins[i]), ins[i].getDataTableSpec()));
}
AppendedRowsIterator it = new AppendedRowsIterator(suppliers, duplPolicy, suffix, spec, exec, totalRowCount);
return new AppendedRowsRowInput(spec, it);
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class LKGradientBoostedTreesLearner method learn.
/**
* {@inheritDoc}
*
* @throws ExecutionException
* @throws InterruptedException
*/
@Override
public MultiClassGradientBoostedTreesModel learn(final ExecutionMonitor exec) throws CanceledExecutionException, InterruptedException, ExecutionException {
final TreeData data = getData();
final TreeTargetNominalColumnData target = (TreeTargetNominalColumnData) data.getTargetColumn();
final NominalValueRepresentation[] classNomVals = target.getMetaData().getValues();
final int numClasses = classNomVals.length;
final String[] classLabels = new String[numClasses];
final int nrModels = getConfig().getNrModels();
final int nrRows = target.getNrRows();
final TreeModelRegression[][] models = new TreeModelRegression[nrModels][numClasses];
final ArrayList<ArrayList<Map<TreeNodeSignature, Double>>> coefficientMaps = new ArrayList<ArrayList<Map<TreeNodeSignature, Double>>>(nrModels);
// variables for parallelization
final ThreadPool tp = KNIMEConstants.GLOBAL_THREAD_POOL;
final AtomicReference<Throwable> learnThrowableRef = new AtomicReference<Throwable>();
final int procCount = 3 * Runtime.getRuntime().availableProcessors() / 2;
exec.setMessage("Transforming problem");
// transform the original k class classification problem into k regression problems
final TreeData[] actual = new TreeData[numClasses];
for (int i = 0; i < numClasses; i++) {
final double[] newTarget = calculateNewTarget(target, i);
actual[i] = createNumericDataFromArray(newTarget);
classLabels[i] = classNomVals[i].getNominalValue();
}
final RandomData rd = getConfig().createRandomData();
final double[][] previousFunctions = new double[numClasses][nrRows];
TreeNodeSignatureFactory signatureFactory = null;
final int maxLevels = getConfig().getMaxLevels();
if (maxLevels < TreeEnsembleLearnerConfiguration.MAX_LEVEL_INFINITE) {
int capacity = IntMath.pow(2, maxLevels - 1);
signatureFactory = new TreeNodeSignatureFactory(capacity);
} else {
signatureFactory = new TreeNodeSignatureFactory();
}
exec.setMessage("Learn trees");
for (int i = 0; i < nrModels; i++) {
final Semaphore semaphore = new Semaphore(procCount);
final ArrayList<Map<TreeNodeSignature, Double>> classCoefficientMaps = new ArrayList<Map<TreeNodeSignature, Double>>(numClasses);
// prepare calculation of pseudoResiduals
final double[][] probs = new double[numClasses][nrRows];
for (int r = 0; r < nrRows; r++) {
double sumExpF = 0;
for (int j = 0; j < numClasses; j++) {
sumExpF += Math.exp(previousFunctions[j][r]);
}
for (int j = 0; j < numClasses; j++) {
probs[j][r] = Math.exp(previousFunctions[j][r]) / sumExpF;
}
}
final Future<?>[] treeCoefficientMapPairs = new Future<?>[numClasses];
for (int j = 0; j < numClasses; j++) {
checkThrowable(learnThrowableRef);
final RandomData rdSingle = TreeEnsembleLearnerConfiguration.createRandomData(rd.nextLong(Long.MIN_VALUE, Long.MAX_VALUE));
final ExecutionMonitor subExec = exec.createSubProgress(0.0);
semaphore.acquire();
treeCoefficientMapPairs[j] = tp.enqueue(new TreeLearnerCallable(rdSingle, probs[j], actual[j], subExec, numClasses, previousFunctions[j], semaphore, learnThrowableRef, signatureFactory));
}
for (int j = 0; j < numClasses; j++) {
checkThrowable(learnThrowableRef);
semaphore.acquire();
final Pair<TreeModelRegression, Map<TreeNodeSignature, Double>> pair = (Pair<TreeModelRegression, Map<TreeNodeSignature, Double>>) treeCoefficientMapPairs[j].get();
models[i][j] = pair.getFirst();
classCoefficientMaps.add(pair.getSecond());
semaphore.release();
}
checkThrowable(learnThrowableRef);
coefficientMaps.add(classCoefficientMaps);
exec.setProgress((double) i / nrModels, "Finished level " + i + "/" + nrModels);
}
return MultiClassGradientBoostedTreesModel.createMultiClassGradientBoostedTreesModel(getConfig(), data.getMetaData(), models, data.getTreeType(), 0, numClasses, coefficientMaps, classLabels);
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class TreeNominalColumnDataTest method createPCATestData.
private static Pair<TreeNominalColumnData, TreeTargetNominalColumnData> createPCATestData(final TreeEnsembleLearnerConfiguration config) {
DataColumnSpec colSpec = new DataColumnSpecCreator("test-col", StringCell.TYPE).createSpec();
final String[] attVals = new String[] { "A", "B", "C", "D", "E" };
final String[] classes = new String[] { "T1", "T2", "T3" };
TreeNominalColumnDataCreator colCreator = new TreeNominalColumnDataCreator(colSpec);
DataColumnSpecCreator specCreator = new DataColumnSpecCreator("target-col", StringCell.TYPE);
specCreator.setDomain(new DataColumnDomainCreator(Arrays.stream(classes).distinct().map(s -> new StringCell(s)).toArray(i -> new StringCell[i])).createDomain());
DataColumnSpec targetSpec = specCreator.createSpec();
TreeTargetColumnDataCreator targetCreator = new TreeTargetNominalColumnDataCreator(targetSpec);
long rowKeyCounter = 0;
final int[][] classDistributions = new int[][] { { 40, 10, 10 }, { 10, 40, 10 }, { 20, 30, 10 }, { 20, 15, 25 }, { 10, 5, 45 } };
for (int i = 0; i < attVals.length; i++) {
for (int j = 0; j < classes.length; j++) {
for (int k = 0; k < classDistributions[i][j]; k++) {
RowKey key = RowKey.createRowKey(rowKeyCounter++);
colCreator.add(key, new StringCell(attVals[i]));
targetCreator.add(key, new StringCell(classes[j]));
}
}
}
final TreeNominalColumnData testColData = colCreator.createColumnData(0, config);
testColData.getMetaData().setAttributeIndex(0);
return Pair.create(testColData, (TreeTargetNominalColumnData) targetCreator.createColumnData());
}
Aggregations