use of org.knime.core.data.DataType in project knime-core by knime.
the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.
/**
* Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
* @throws Exception
* @throws CanceledExecutionException
*/
@Test
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
// try to sort a table with 1 entry
String[] columnNames = { "col1", "col2", "col3", "col4" };
DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
DataRow[] rows = new DataRow[1];
DataCell[] myRow = new DataCell[4];
myRow[0] = new DoubleCell(2.4325);
myRow[1] = new StringCell("Test");
myRow[2] = new IntCell(7);
myRow[3] = new DoubleCell(32432.324);
rows[0] = new DefaultRow(Integer.toString(1), myRow);
DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
// set settings
String[] includeCols = { "col1", "col2", "col3", "col4" };
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
boolean[] sortorder = { true, true, true, true };
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
// test output
RowIterator rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
Assert.assertEquals(rows[0], rowIt.next());
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
// *********************************************//
// try to sort a large array of DataRows
// In this case we generate a unit matrix
// *********************************************//
// start with a little one
int dimension = 50;
// *********************************************//
// set settings
includeCols = new String[dimension];
for (int i = 0; i < dimension; i++) {
includeCols[i] = "col" + i;
}
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
sortorder = new boolean[dimension];
for (int i = 0; i < dimension; i++) {
sortorder[i] = true;
}
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
// test output (should have sorted all rows in reverse order)
rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
int k = dimension - 1;
while (rowIt.hasNext()) {
RowKey rk = rowIt.next().getKey();
int ic = Integer.parseInt(rk.getString());
Assert.assertEquals(k, ic);
k--;
}
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
// *********************************************//
// try to sort a very large array of DataRows
// In this case we generate a unit matrix
// *********************************************//
// dimension 300 => 15,8 secs.
// dimension 500 => 49,7 secs.
dimension = 100;
// *********************************************//
// set settings
includeCols = new String[dimension];
for (int i = 0; i < dimension; i++) {
includeCols[i] = "col" + i;
}
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
sortorder = new boolean[dimension];
for (int i = 0; i < dimension; i++) {
sortorder[i] = true;
}
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
// test output (should have sorted all rows in reverse order)
rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
k = dimension - 1;
while (rowIt.hasNext()) {
RowKey rk = rowIt.next().getKey();
int ic = Integer.parseInt(rk.getString());
Assert.assertEquals(k, ic);
k--;
}
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class AbstractTrainingRowBuilder method getVectorLength.
private static long getVectorLength(final DataCell vectorCell) {
DataType cellType = vectorCell.getType();
long vectorLength = 0;
if (cellType.isCompatible(BitVectorValue.class)) {
BitVectorValue bv = (BitVectorValue) vectorCell;
vectorLength = bv.length();
} else if (cellType.isCompatible(ByteVectorValue.class)) {
ByteVectorValue bv = (ByteVectorValue) vectorCell;
vectorLength = bv.length();
// uncomment once double vectors become compatible with PMML
// } else if (cellType.isCompatible(DoubleVectorValue.class)) {
// DoubleVectorValue dv = (DoubleVectorValue)vectorCell;
// vectorLength = dv.getLength();
// uncomment once double ists become compatible with PMML
// } else if (vectorCell instanceof ListDataValue) {
// ListDataValue ldv = (ListDataValue)vectorCell;
// vectorLength = ldv.size();
} else {
throw new IllegalStateException("The provided cell is of unknown vector type \"" + vectorCell.getType() + "\".");
}
return vectorLength;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class AbstractTrainingRowBuilder method build.
@Override
public T build(final DataRow row, final int id) {
int nonZeroFeatures = 1;
int accumulatedIdx = 1;
// the intercept feature is always present
m_nonZeroIndices[0] = 0;
m_nonZeroValues[0] = 1.0F;
for (int i = 0; i < m_featureCellIndices.size(); i++) {
// get cell from row
Integer cellIdx = m_featureCellIndices.get(i);
DataCell cell = row.getCell(cellIdx);
DataType cellType = cell.getType();
// handle cell according to cell type
if (cellType.isCompatible(NominalValue.class)) {
// handle nominal cells
List<DataCell> nominalDomainValues = m_nominalDomainValues.get(cellIdx);
int oneHotIdx = nominalDomainValues.indexOf(cell);
if (oneHotIdx == -1) {
throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is not in the DataColumnDomain. Please apply a " + "Domain Calculator on the columns with nominal values.");
} else if (oneHotIdx > 0) {
m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + oneHotIdx - 1;
m_nonZeroValues[nonZeroFeatures] = 1.0F;
nonZeroFeatures++;
}
accumulatedIdx += nominalDomainValues.size() - 1;
} else if (m_vectorLengths.containsKey(cellIdx)) {
// handle vector cells
if (cellType.isCompatible(BitVectorValue.class)) {
BitVectorValue bv = (BitVectorValue) cell;
for (long s = bv.nextSetBit(0L); s >= 0; s = bv.nextSetBit(s + 1)) {
m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
m_nonZeroValues[nonZeroFeatures++] = 1.0F;
}
} else if (cellType.isCompatible(ByteVectorValue.class)) {
ByteVectorValue bv = (ByteVectorValue) cell;
for (long s = bv.nextCountIndex(0L); s >= 0; s = bv.nextCountIndex(s + 1)) {
m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
m_nonZeroValues[nonZeroFeatures++] = bv.get(s);
}
// uncomment once DoubleVectors can be used with PMML
// } else if (cellType.isCompatible(DoubleVectorValue.class)) {
// // DoubleVectorValue also implements CollectionDataValue but
// // as it then first boxes its values into DataCells, it is much more
// // efficient to access its values via the DoubleVectorValue interface
// DoubleVectorValue dv = (DoubleVectorValue)cell;
// for (int s = 0; s < dv.getLength(); s++) {
// float val = (float)dv.getValue(s);
// if (!MathUtils.equals(val, 0.0)) {
// m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
// m_nonZeroValues[nonZeroFeatures++] = val;
// }
// }
// uncomment once double lists become compatible with PMML
// } else if (cellType.isCollectionType() && cellType.getCollectionElementType().isCompatible(DoubleValue.class)) {
// CollectionDataValue cv = (CollectionDataValue)cell;
// int s = 0;
// for (DataCell c : cv) {
// // we already checked above that cv contains DoubleValues
// DoubleValue dv = (DoubleValue)c;
// double val = dv.getDoubleValue();
// if (!MathUtils.equals(val, 0.0)) {
// m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
// m_nonZeroValues[nonZeroFeatures] = (float)val;
// }
// s++;
// }
} else {
// should never be thrown because we check the compatibility in the constructor
throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is of an unknown vector/collections type.");
}
accumulatedIdx += m_vectorLengths.get(cellIdx);
} else if (cellType.isCompatible(DoubleValue.class)) {
// handle numerical cells
double val = ((DoubleValue) cell).getDoubleValue();
if (!MathUtils.equals(val, 0.0)) {
m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx;
m_nonZeroValues[nonZeroFeatures++] = (float) val;
}
accumulatedIdx++;
} else {
// a different DataCell of incompatible type.
throw new IllegalStateException("The DataCell \"" + cell.toString() + "\" is of incompatible type \"" + cellType.toPrettyString() + "\".");
}
}
int[] nonZero = Arrays.copyOf(m_nonZeroIndices, nonZeroFeatures);
float[] values = Arrays.copyOf(m_nonZeroValues, nonZeroFeatures);
return createTrainingRow(row, nonZero, values, id);
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class SampleDataNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) {
DataColumnSpec[] colSpecs = new DataColumnSpec[m_minValues.length + 1];
int currentDim = 0;
for (int u = 0; u < m_clusterCount.length; u++) {
int dimCountInUniverse = m_uniSize[u];
Hashtable<String, String> annot = new Hashtable<String, String>();
annot.put("universe_name", "Universe_" + u);
for (int i = 0; i < dimCountInUniverse; i++) {
String n = "Universe_" + u + "_" + i;
DataType t = DoubleCell.TYPE;
DataColumnSpecCreator creator = new DataColumnSpecCreator(n, t);
creator.setProperties(new DataColumnProperties(annot));
colSpecs[currentDim++] = creator.createSpec();
}
}
String n = "Cluster Membership";
DataType t = StringCell.TYPE;
DataColumnSpecCreator creator = new DataColumnSpecCreator(n, t);
colSpecs[currentDim] = creator.createSpec();
DataColumnSpec[] centerColSpec = new DataColumnSpec[colSpecs.length - 1];
System.arraycopy(colSpecs, 0, centerColSpec, 0, centerColSpec.length);
return new DataTableSpec[] { new DataTableSpec(colSpecs), new DataTableSpec(centerColSpec) };
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class BinningUtil method binNominal.
/**
* @param colSpec the {@link DataColumnSpec} of the column to bin
* @param noOfBins the number of bins
* @return <code>true</code> if the bins should be nominal
*/
public static boolean binNominal(final DataColumnSpec colSpec, final int noOfBins) {
final DataType dataType = colSpec.getType();
if (!dataType.isCompatible(DoubleValue.class)) {
// it's not numerical
return true;
}
if (dataType.isCompatible(LongValue.class)) {
// it's an integer...
final DataColumnDomain domain = colSpec.getDomain();
final long lowerBound = ((LongValue) domain.getLowerBound()).getLongValue();
final long upperBound = ((LongValue) domain.getUpperBound()).getLongValue();
if (upperBound - lowerBound <= noOfBins) {
// an own bin
return true;
}
}
return false;
}
Aggregations