Search in sources :

Example 71 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.

/**
 * Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
 * @throws Exception
 * @throws CanceledExecutionException
 */
@Test
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
    // try to sort a table with 1 entry
    String[] columnNames = { "col1", "col2", "col3", "col4" };
    DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
    DataRow[] rows = new DataRow[1];
    DataCell[] myRow = new DataCell[4];
    myRow[0] = new DoubleCell(2.4325);
    myRow[1] = new StringCell("Test");
    myRow[2] = new IntCell(7);
    myRow[3] = new DoubleCell(32432.324);
    rows[0] = new DefaultRow(Integer.toString(1), myRow);
    DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
    DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
    // set settings
    String[] includeCols = { "col1", "col2", "col3", "col4" };
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    boolean[] sortorder = { true, true, true, true };
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output
    RowIterator rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    Assert.assertEquals(rows[0], rowIt.next());
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
    // *********************************************//
    // try to sort a large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // start with a little one
    int dimension = 50;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    }
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    }
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    int k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk = rowIt.next().getKey();
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
        k--;
    }
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
    // *********************************************//
    // try to sort a very large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // dimension 300 => 15,8 secs.
    // dimension 500 => 49,7 secs.
    dimension = 100;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    }
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    }
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk = rowIt.next().getKey();
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
        k--;
    }
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
}
Also used : DataTable(org.knime.core.data.DataTable) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultTable(org.knime.core.data.def.DefaultTable) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) StringCell(org.knime.core.data.def.StringCell) RowIterator(org.knime.core.data.RowIterator) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) Test(org.junit.Test)

Example 72 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class AbstractTrainingRowBuilder method getVectorLength.

private static long getVectorLength(final DataCell vectorCell) {
    DataType cellType = vectorCell.getType();
    long vectorLength = 0;
    if (cellType.isCompatible(BitVectorValue.class)) {
        BitVectorValue bv = (BitVectorValue) vectorCell;
        vectorLength = bv.length();
    } else if (cellType.isCompatible(ByteVectorValue.class)) {
        ByteVectorValue bv = (ByteVectorValue) vectorCell;
        vectorLength = bv.length();
    // uncomment once double vectors become compatible with PMML
    // } else if (cellType.isCompatible(DoubleVectorValue.class)) {
    // DoubleVectorValue dv = (DoubleVectorValue)vectorCell;
    // vectorLength = dv.getLength();
    // uncomment once double ists become compatible with PMML
    // } else if (vectorCell instanceof ListDataValue) {
    // ListDataValue ldv = (ListDataValue)vectorCell;
    // vectorLength = ldv.size();
    } else {
        throw new IllegalStateException("The provided cell is of unknown vector type \"" + vectorCell.getType() + "\".");
    }
    return vectorLength;
}
Also used : DataType(org.knime.core.data.DataType) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 73 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class AbstractTrainingRowBuilder method build.

@Override
public T build(final DataRow row, final int id) {
    int nonZeroFeatures = 1;
    int accumulatedIdx = 1;
    // the intercept feature is always present
    m_nonZeroIndices[0] = 0;
    m_nonZeroValues[0] = 1.0F;
    for (int i = 0; i < m_featureCellIndices.size(); i++) {
        // get cell from row
        Integer cellIdx = m_featureCellIndices.get(i);
        DataCell cell = row.getCell(cellIdx);
        DataType cellType = cell.getType();
        // handle cell according to cell type
        if (cellType.isCompatible(NominalValue.class)) {
            // handle nominal cells
            List<DataCell> nominalDomainValues = m_nominalDomainValues.get(cellIdx);
            int oneHotIdx = nominalDomainValues.indexOf(cell);
            if (oneHotIdx == -1) {
                throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is not in the DataColumnDomain. Please apply a " + "Domain Calculator on the columns with nominal values.");
            } else if (oneHotIdx > 0) {
                m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + oneHotIdx - 1;
                m_nonZeroValues[nonZeroFeatures] = 1.0F;
                nonZeroFeatures++;
            }
            accumulatedIdx += nominalDomainValues.size() - 1;
        } else if (m_vectorLengths.containsKey(cellIdx)) {
            // handle vector cells
            if (cellType.isCompatible(BitVectorValue.class)) {
                BitVectorValue bv = (BitVectorValue) cell;
                for (long s = bv.nextSetBit(0L); s >= 0; s = bv.nextSetBit(s + 1)) {
                    m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
                    m_nonZeroValues[nonZeroFeatures++] = 1.0F;
                }
            } else if (cellType.isCompatible(ByteVectorValue.class)) {
                ByteVectorValue bv = (ByteVectorValue) cell;
                for (long s = bv.nextCountIndex(0L); s >= 0; s = bv.nextCountIndex(s + 1)) {
                    m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
                    m_nonZeroValues[nonZeroFeatures++] = bv.get(s);
                }
            // uncomment once DoubleVectors can be used with PMML
            // } else if (cellType.isCompatible(DoubleVectorValue.class)) {
            // // DoubleVectorValue also implements CollectionDataValue but
            // // as it then first boxes its values into DataCells, it is much more
            // // efficient to access its values via the DoubleVectorValue interface
            // DoubleVectorValue dv = (DoubleVectorValue)cell;
            // for (int s = 0; s < dv.getLength(); s++) {
            // float val = (float)dv.getValue(s);
            // if (!MathUtils.equals(val, 0.0)) {
            // m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
            // m_nonZeroValues[nonZeroFeatures++] = val;
            // }
            // }
            // uncomment once double lists become compatible with PMML
            // } else if (cellType.isCollectionType() && cellType.getCollectionElementType().isCompatible(DoubleValue.class)) {
            // CollectionDataValue cv = (CollectionDataValue)cell;
            // int s = 0;
            // for (DataCell c : cv) {
            // // we already checked above that cv contains DoubleValues
            // DoubleValue dv = (DoubleValue)c;
            // double val = dv.getDoubleValue();
            // if (!MathUtils.equals(val, 0.0)) {
            // m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
            // m_nonZeroValues[nonZeroFeatures] = (float)val;
            // }
            // s++;
            // }
            } else {
                // should never be thrown because we check the compatibility in the constructor
                throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is of an unknown vector/collections type.");
            }
            accumulatedIdx += m_vectorLengths.get(cellIdx);
        } else if (cellType.isCompatible(DoubleValue.class)) {
            // handle numerical cells
            double val = ((DoubleValue) cell).getDoubleValue();
            if (!MathUtils.equals(val, 0.0)) {
                m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx;
                m_nonZeroValues[nonZeroFeatures++] = (float) val;
            }
            accumulatedIdx++;
        } else {
            // a different DataCell of incompatible type.
            throw new IllegalStateException("The DataCell \"" + cell.toString() + "\" is of incompatible type \"" + cellType.toPrettyString() + "\".");
        }
    }
    int[] nonZero = Arrays.copyOf(m_nonZeroIndices, nonZeroFeatures);
    float[] values = Arrays.copyOf(m_nonZeroValues, nonZeroFeatures);
    return createTrainingRow(row, nonZero, values, id);
}
Also used : ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 74 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class SampleDataNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) {
    DataColumnSpec[] colSpecs = new DataColumnSpec[m_minValues.length + 1];
    int currentDim = 0;
    for (int u = 0; u < m_clusterCount.length; u++) {
        int dimCountInUniverse = m_uniSize[u];
        Hashtable<String, String> annot = new Hashtable<String, String>();
        annot.put("universe_name", "Universe_" + u);
        for (int i = 0; i < dimCountInUniverse; i++) {
            String n = "Universe_" + u + "_" + i;
            DataType t = DoubleCell.TYPE;
            DataColumnSpecCreator creator = new DataColumnSpecCreator(n, t);
            creator.setProperties(new DataColumnProperties(annot));
            colSpecs[currentDim++] = creator.createSpec();
        }
    }
    String n = "Cluster Membership";
    DataType t = StringCell.TYPE;
    DataColumnSpecCreator creator = new DataColumnSpecCreator(n, t);
    colSpecs[currentDim] = creator.createSpec();
    DataColumnSpec[] centerColSpec = new DataColumnSpec[colSpecs.length - 1];
    System.arraycopy(colSpecs, 0, centerColSpec, 0, centerColSpec.length);
    return new DataTableSpec[] { new DataTableSpec(colSpecs), new DataTableSpec(centerColSpec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) Hashtable(java.util.Hashtable) DataType(org.knime.core.data.DataType) DataColumnProperties(org.knime.core.data.DataColumnProperties)

Example 75 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class BinningUtil method binNominal.

/**
 * @param colSpec the {@link DataColumnSpec} of the column to bin
 * @param noOfBins the number of bins
 * @return <code>true</code> if the bins should be nominal
 */
public static boolean binNominal(final DataColumnSpec colSpec, final int noOfBins) {
    final DataType dataType = colSpec.getType();
    if (!dataType.isCompatible(DoubleValue.class)) {
        // it's not numerical
        return true;
    }
    if (dataType.isCompatible(LongValue.class)) {
        // it's an integer...
        final DataColumnDomain domain = colSpec.getDomain();
        final long lowerBound = ((LongValue) domain.getLowerBound()).getLongValue();
        final long upperBound = ((LongValue) domain.getUpperBound()).getLongValue();
        if (upperBound - lowerBound <= noOfBins) {
            // an own bin
            return true;
        }
    }
    return false;
}
Also used : DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) LongValue(org.knime.core.data.LongValue) DataType(org.knime.core.data.DataType)

Aggregations

DataType (org.knime.core.data.DataType)330 DataColumnSpec (org.knime.core.data.DataColumnSpec)142 DataTableSpec (org.knime.core.data.DataTableSpec)101 DataCell (org.knime.core.data.DataCell)96 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)95 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)71 DoubleValue (org.knime.core.data.DoubleValue)67 DataRow (org.knime.core.data.DataRow)61 ArrayList (java.util.ArrayList)55 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)34 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)32 DefaultRow (org.knime.core.data.def.DefaultRow)24 HashSet (java.util.HashSet)23 HashMap (java.util.HashMap)20 StringCell (org.knime.core.data.def.StringCell)20 NominalValue (org.knime.core.data.NominalValue)18 DoubleCell (org.knime.core.data.def.DoubleCell)18 IntCell (org.knime.core.data.def.IntCell)18 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)18 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)18