use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class ARFFTableTest method testCreateDataTableSpecFromARFFfileIRIS.
/**
* test the creation of a table spec from the IRIS data in an ARFF file.
*
* @throws IOException if it wants to.
* @throws InvalidSettingsException if it feels like.
*/
public void testCreateDataTableSpecFromARFFfileIRIS() throws IOException, InvalidSettingsException {
File tempFile = File.createTempFile("ARFFReaderUnitTest", "mini");
tempFile.deleteOnExit();
Writer out = new BufferedWriter(new FileWriter(tempFile));
out.write(ARFF_IRISFULL);
out.close();
try {
DataTableSpec tSpec = ARFFTable.createDataTableSpecFromARFFfile(tempFile.toURI().toURL(), null);
// + "% The lovely Iris data set - as we all know it\n"
// + "\n"
// + "@RELATION iris\n"
// + "\n"
// + "@ATTRIBUTE sepallength REAL\n"
// + "@ATTRIBUTE sepalwidth REAL\n"
// + "@ATTRIBUTE petallength REAL\n"
// + "@ATTRIBUTE petalwidth REAL\n"
// + "@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}\n"
// + "\n"
assertEquals(tSpec.getNumColumns(), 5);
assertEquals(tSpec.getColumnSpec(0).getName().toString(), "sepallength");
assertEquals(tSpec.getColumnSpec(1).getName().toString(), "sepalwidth");
assertEquals(tSpec.getColumnSpec(2).getName().toString(), "petallength");
assertEquals(tSpec.getColumnSpec(3).getName().toString(), "petalwidth");
assertEquals(tSpec.getColumnSpec(4).getName().toString(), "class");
assertEquals(tSpec.getColumnSpec(0).getType(), DoubleCell.TYPE);
assertEquals(tSpec.getColumnSpec(1).getType(), DoubleCell.TYPE);
assertEquals(tSpec.getColumnSpec(2).getType(), DoubleCell.TYPE);
assertEquals(tSpec.getColumnSpec(3).getType(), DoubleCell.TYPE);
assertEquals(tSpec.getColumnSpec(4).getType(), StringCell.TYPE);
assertNull(tSpec.getColumnSpec(0).getDomain().getValues());
assertNull(tSpec.getColumnSpec(1).getDomain().getValues());
assertNull(tSpec.getColumnSpec(2).getDomain().getValues());
assertNull(tSpec.getColumnSpec(3).getDomain().getValues());
assertEquals(tSpec.getColumnSpec(4).getDomain().getValues().size(), 3);
Set<DataCell> vals = tSpec.getColumnSpec(4).getDomain().getValues();
assertTrue(vals.contains(new StringCell("Iris-setosa")));
assertTrue(vals.contains(new StringCell("Iris-versicolor")));
assertTrue(vals.contains(new StringCell("Iris-virginica")));
} catch (CanceledExecutionException cee) {
// no chance to end up here.
}
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.
/**
* Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
* @throws Exception
* @throws CanceledExecutionException
*/
@Test
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
// try to sort a table with 1 entry
String[] columnNames = { "col1", "col2", "col3", "col4" };
DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
DataRow[] rows = new DataRow[1];
DataCell[] myRow = new DataCell[4];
myRow[0] = new DoubleCell(2.4325);
myRow[1] = new StringCell("Test");
myRow[2] = new IntCell(7);
myRow[3] = new DoubleCell(32432.324);
rows[0] = new DefaultRow(Integer.toString(1), myRow);
DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
// set settings
String[] includeCols = { "col1", "col2", "col3", "col4" };
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
boolean[] sortorder = { true, true, true, true };
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
// test output
RowIterator rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
Assert.assertEquals(rows[0], rowIt.next());
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
// *********************************************//
// try to sort a large array of DataRows
// In this case we generate a unit matrix
// *********************************************//
// start with a little one
int dimension = 50;
// *********************************************//
// set settings
includeCols = new String[dimension];
for (int i = 0; i < dimension; i++) {
includeCols[i] = "col" + i;
}
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
sortorder = new boolean[dimension];
for (int i = 0; i < dimension; i++) {
sortorder[i] = true;
}
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
// test output (should have sorted all rows in reverse order)
rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
int k = dimension - 1;
while (rowIt.hasNext()) {
RowKey rk = rowIt.next().getKey();
int ic = Integer.parseInt(rk.getString());
Assert.assertEquals(k, ic);
k--;
}
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
// *********************************************//
// try to sort a very large array of DataRows
// In this case we generate a unit matrix
// *********************************************//
// dimension 300 => 15,8 secs.
// dimension 500 => 49,7 secs.
dimension = 100;
// *********************************************//
// set settings
includeCols = new String[dimension];
for (int i = 0; i < dimension; i++) {
includeCols[i] = "col" + i;
}
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
sortorder = new boolean[dimension];
for (int i = 0; i < dimension; i++) {
sortorder[i] = true;
}
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
// test output (should have sorted all rows in reverse order)
rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
k = dimension - 1;
while (rowIt.hasNext()) {
RowKey rk = rowIt.next().getKey();
int ic = Integer.parseInt(rk.getString());
Assert.assertEquals(k, ic);
k--;
}
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class LogisticRegressionContent method createCoeffStatisticsTablePortObject.
/**
* Creates a BufferedDataTable with the
* @param exec The execution context
* @return a port object
*/
public BufferedDataTable createCoeffStatisticsTablePortObject(final ExecutionContext exec) {
DataTableSpec tableOutSpec = LogRegCoordinator.createCoeffStatisticsTableSpec();
BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
List<DataCell> logits = this.getLogits();
List<String> parameters = this.getParameters();
int c = 0;
for (DataCell logit : logits) {
Map<String, Double> coefficients = this.getCoefficients(logit);
Map<String, Double> stdErrs;
Map<String, Double> zScores;
Map<String, Double> pValues;
if (m_covMat == null) {
HashMap<String, Double> emptyMap = new HashMap<>();
stdErrs = emptyMap;
zScores = emptyMap;
pValues = emptyMap;
} else {
stdErrs = this.getStandardErrors(logit);
zScores = this.getZScores(logit);
pValues = this.getPValues(logit);
}
for (String parameter : parameters) {
List<DataCell> cells = new ArrayList<>();
cells.add(new StringCell(logit.toString()));
cells.add(new StringCell(parameter));
cells.add(new DoubleCell(coefficients.get(parameter)));
if (m_covMat != null) {
cells.add(new DoubleCell(stdErrs.get(parameter)));
cells.add(new DoubleCell(zScores.get(parameter)));
cells.add(new DoubleCell(pValues.get(parameter)));
} else {
cells.add(NOT_INVERTIBLE_MISSING);
cells.add(NOT_INVERTIBLE_MISSING);
cells.add(NOT_INVERTIBLE_MISSING);
}
c++;
dc.addRowToTable(new DefaultRow("Row" + c, cells));
}
List<DataCell> cells = new ArrayList<>();
cells.add(new StringCell(logit.toString()));
cells.add(new StringCell("Constant"));
cells.add(new DoubleCell(this.getIntercept(logit)));
if (m_covMat != null) {
cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
cells.add(new DoubleCell(this.getInterceptZScore(logit)));
cells.add(new DoubleCell(this.getInterceptPValue(logit)));
} else {
cells.add(NOT_INVERTIBLE_MISSING);
cells.add(NOT_INVERTIBLE_MISSING);
cells.add(NOT_INVERTIBLE_MISSING);
}
c++;
dc.addRowToTable(new DefaultRow("Row" + c, cells));
}
dc.close();
return dc.getTable();
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class SampleDataNodeModel method run.
private void run(final DataTableSpec spec, final RowOutput dataOutput, final DataTableSpec clusterSpec, final RowOutput clusterOutput, final ExecutionContext exec) throws Exception {
Random rand = new Random(m_randomSeed);
NodeLogger.getLogger(getClass()).info("Using '" + m_randomSeed + "' as seed for random data generation.");
int dimensions = spec.getNumColumns() - 1;
SizeSequence uniSizes = new SizeSequence(m_uniSize);
SizeSequence clusters = new SizeSequence(m_clusterCount);
int l = m_clusterCount.length - 1;
final int overallClusterCount = clusters.getPosition(l) + clusters.getSize(l);
final double noiseFrac = Math.min(Math.max(0.0, m_noiseFrac), 1.0);
/*
* the cluster centers. If a cluster doesn't restrict a dimension, the
* value is NaN
*/
double[][] optimalClusters = new double[Math.max(overallClusterCount, 1)][dimensions];
if (overallClusterCount == 0) {
Arrays.fill(optimalClusters[0], Double.NaN);
}
for (int c = 0; c < overallClusterCount; c++) {
int uniToClusterIn = clusters.getIndex(c);
int startPos = uniSizes.getPosition(uniToClusterIn);
int endPos = startPos + uniSizes.getSize(uniToClusterIn);
// assert (universeSize == uniSizes.getSize(uniToClusterIn));
for (int d = 0; d < dimensions; d++) {
if (d < startPos || d >= endPos) {
optimalClusters[c][d] = Double.NaN;
} else {
double min = m_minValues[d];
double max = m_maxValues[d];
double range = max - min;
double min2 = min + m_dev * range;
double max2 = max - m_dev * range;
double range2 = max2 - min2;
double center = min2 + rand.nextDouble() * range2;
optimalClusters[c][d] = center;
}
}
}
DataRow[] centerRows = new DataRow[overallClusterCount];
int colNameLength = overallClusterCount + (noiseFrac > 0.0 ? 1 : 0);
StringCell[] colNames = new StringCell[colNameLength];
for (int i = 0; i < overallClusterCount; i++) {
double[] cs = optimalClusters[i];
DataCell[] cells = new DataCell[dimensions];
for (int c = 0; c < dimensions; c++) {
if (Double.isNaN(cs[c])) {
cells[c] = DataType.getMissingCell();
} else {
cells[c] = new DoubleCell(cs[c]);
}
}
colNames[i] = new StringCell("Cluster_" + i);
centerRows[i] = new DefaultRow(colNames[i].toString(), cells);
}
if (noiseFrac > 0.0) {
colNames[overallClusterCount] = new StringCell("Noise");
}
for (DataRow r : centerRows) {
clusterOutput.push(r);
}
clusterOutput.close();
/* first output (data) comes here */
// assign attributes to patterns
int noise = (int) (m_patCount * noiseFrac);
int patternsPerCluster = (m_patCount - noise) / optimalClusters.length;
int patternCount = patternsPerCluster * optimalClusters.length;
noise = noiseFrac > 0.0 ? m_patCount - patternCount : 0;
int pattern = 0;
double totalCount = m_patCount;
for (int c = 0; c < optimalClusters.length; c++) {
// all clusters
double[] centers = optimalClusters[c];
// patterns in cluster
for (int p = 0; p < patternsPerCluster; p++) {
double[] d = fill(rand, centers);
DataCell cl = (overallClusterCount > 0 ? colNames[c] : DataType.getMissingCell());
DataRow r = createRow(RowKey.createRowKey(pattern), d, cl);
dataOutput.push(r);
final int patternTempFinal = pattern;
exec.setProgress(pattern / totalCount, () -> ("Added row " + patternTempFinal));
exec.checkCanceled();
pattern++;
}
}
assert (pattern == patternCount);
double[] noiseCenter = new double[dimensions];
Arrays.fill(noiseCenter, Double.NaN);
// draw noise patterns
for (int i = 0; i < noise; i++) {
int index = i + pattern;
double[] d = fill(rand, noiseCenter);
DataCell cl = colNames[colNames.length - 1];
DataRow r = createRow(RowKey.createRowKey(index), d, cl);
dataOutput.push(r);
exec.setProgress(index / totalCount, () -> ("Added row " + index));
exec.checkCanceled();
}
dataOutput.close();
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class ParallelCoordinatesPlotter method calculateLines.
/**
* Calculates the lines, containing the mapped data points.
*/
private synchronized List<LineInfo> calculateLines() {
if (getDataProvider() == null || getDataProvider().getDataArray(getDataArrayIdx()) == null || m_axes == null) {
return new ArrayList<LineInfo>();
}
DataArray array = getDataProvider().getDataArray(getDataArrayIdx());
// LOGGER.debug("calculate points: " + m_axes);
List<LineInfo> lines = new ArrayList<LineInfo>(array.size());
row: for (DataRow row : array) {
List<Point> points = new ArrayList<Point>();
List<DataCell> domainValues = new ArrayList<DataCell>();
for (ParallelAxis axis : m_axes) {
int colIdx = array.getDataTableSpec().findColumnIndex(axis.getName());
DataCell value = row.getCell(colIdx);
if (value.isMissing() && m_skipMissingValues) {
continue row;
}
domainValues.add(value);
int x = (int) getXAxis().getCoordinate().calculateMappedValue(new StringCell(axis.getName()), getDrawingPaneDimension().width);
int y = MISSING;
if (!value.isMissing()) {
y = getDrawingPaneDimension().height - ParallelCoordinateDrawingPane.BOTTOM_SPACE - (int) axis.getMappedValue(value);
}
Point p = new Point(x, y);
points.add(p);
}
boolean isHilite = delegateIsHiLit(row.getKey());
if (!m_hide || (m_hide && isHilite)) {
LineInfo line = new LineInfo(points, domainValues, m_selected.contains(row.getKey()), isHilite, array.getDataTableSpec().getRowColor(row), array.getDataTableSpec().getRowSizeFactor(row), row.getKey());
line.setShape(array.getDataTableSpec().getRowShape(row));
lines.add(line);
}
}
return lines;
}
Aggregations