Search in sources :

Example 1 with SizeSequence

use of javax.swing.SizeSequence in project knime-core by knime.

the class SampleDataNodeModel method run.

private void run(final DataTableSpec spec, final RowOutput dataOutput, final DataTableSpec clusterSpec, final RowOutput clusterOutput, final ExecutionContext exec) throws Exception {
    Random rand = new Random(m_randomSeed);
    NodeLogger.getLogger(getClass()).info("Using '" + m_randomSeed + "' as seed for random data generation.");
    int dimensions = spec.getNumColumns() - 1;
    SizeSequence uniSizes = new SizeSequence(m_uniSize);
    SizeSequence clusters = new SizeSequence(m_clusterCount);
    int l = m_clusterCount.length - 1;
    final int overallClusterCount = clusters.getPosition(l) + clusters.getSize(l);
    final double noiseFrac = Math.min(Math.max(0.0, m_noiseFrac), 1.0);
    /*
         * the cluster centers. If a cluster doesn't restrict a dimension, the
         * value is NaN
         */
    double[][] optimalClusters = new double[Math.max(overallClusterCount, 1)][dimensions];
    if (overallClusterCount == 0) {
        Arrays.fill(optimalClusters[0], Double.NaN);
    }
    for (int c = 0; c < overallClusterCount; c++) {
        int uniToClusterIn = clusters.getIndex(c);
        int startPos = uniSizes.getPosition(uniToClusterIn);
        int endPos = startPos + uniSizes.getSize(uniToClusterIn);
        // assert (universeSize == uniSizes.getSize(uniToClusterIn));
        for (int d = 0; d < dimensions; d++) {
            if (d < startPos || d >= endPos) {
                optimalClusters[c][d] = Double.NaN;
            } else {
                double min = m_minValues[d];
                double max = m_maxValues[d];
                double range = max - min;
                double min2 = min + m_dev * range;
                double max2 = max - m_dev * range;
                double range2 = max2 - min2;
                double center = min2 + rand.nextDouble() * range2;
                optimalClusters[c][d] = center;
            }
        }
    }
    DataRow[] centerRows = new DataRow[overallClusterCount];
    int colNameLength = overallClusterCount + (noiseFrac > 0.0 ? 1 : 0);
    StringCell[] colNames = new StringCell[colNameLength];
    for (int i = 0; i < overallClusterCount; i++) {
        double[] cs = optimalClusters[i];
        DataCell[] cells = new DataCell[dimensions];
        for (int c = 0; c < dimensions; c++) {
            if (Double.isNaN(cs[c])) {
                cells[c] = DataType.getMissingCell();
            } else {
                cells[c] = new DoubleCell(cs[c]);
            }
        }
        colNames[i] = new StringCell("Cluster_" + i);
        centerRows[i] = new DefaultRow(colNames[i].toString(), cells);
    }
    if (noiseFrac > 0.0) {
        colNames[overallClusterCount] = new StringCell("Noise");
    }
    for (DataRow r : centerRows) {
        clusterOutput.push(r);
    }
    clusterOutput.close();
    /* first output (data) comes here */
    // assign attributes to patterns
    int noise = (int) (m_patCount * noiseFrac);
    int patternsPerCluster = (m_patCount - noise) / optimalClusters.length;
    int patternCount = patternsPerCluster * optimalClusters.length;
    noise = noiseFrac > 0.0 ? m_patCount - patternCount : 0;
    int pattern = 0;
    double totalCount = m_patCount;
    for (int c = 0; c < optimalClusters.length; c++) {
        // all clusters
        double[] centers = optimalClusters[c];
        // patterns in cluster
        for (int p = 0; p < patternsPerCluster; p++) {
            double[] d = fill(rand, centers);
            DataCell cl = (overallClusterCount > 0 ? colNames[c] : DataType.getMissingCell());
            DataRow r = createRow(RowKey.createRowKey(pattern), d, cl);
            dataOutput.push(r);
            final int patternTempFinal = pattern;
            exec.setProgress(pattern / totalCount, () -> ("Added row " + patternTempFinal));
            exec.checkCanceled();
            pattern++;
        }
    }
    assert (pattern == patternCount);
    double[] noiseCenter = new double[dimensions];
    Arrays.fill(noiseCenter, Double.NaN);
    // draw noise patterns
    for (int i = 0; i < noise; i++) {
        int index = i + pattern;
        double[] d = fill(rand, noiseCenter);
        DataCell cl = colNames[colNames.length - 1];
        DataRow r = createRow(RowKey.createRowKey(index), d, cl);
        dataOutput.push(r);
        exec.setProgress(index / totalCount, () -> ("Added row " + index));
        exec.checkCanceled();
    }
    dataOutput.close();
}
Also used : DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) SizeSequence(javax.swing.SizeSequence) Random(java.util.Random) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

Random (java.util.Random)1 SizeSequence (javax.swing.SizeSequence)1 DataCell (org.knime.core.data.DataCell)1 DataRow (org.knime.core.data.DataRow)1 DefaultRow (org.knime.core.data.def.DefaultRow)1 DoubleCell (org.knime.core.data.def.DoubleCell)1 StringCell (org.knime.core.data.def.StringCell)1