use of org.apache.commons.math.stat.regression.SimpleRegression in project rascal by usethesource.
the class SimpleRegressions method make.
SimpleRegression make(IList dataValues) {
if (dataValues.length() <= 2)
throw RuntimeExceptionFactory.illegalArgument(dataValues, "SimpleRegression data should have more than 2 elements");
SimpleRegression simple = new SimpleRegression();
for (IValue v : dataValues) {
ITuple t = (ITuple) v;
INumber x = (INumber) t.get(0);
INumber y = (INumber) t.get(1);
simple.addData(x.toReal(values.getPrecision()).doubleValue(), y.toReal(values.getPrecision()).doubleValue());
}
return simple;
}
use of org.apache.commons.math.stat.regression.SimpleRegression in project jochre by urieli.
the class Segmenter method splitRows.
/**
* Split rows if they're particularly high, and contain considerable white
* space in the middle. Shapes causing the join will be removed if too high,
* or attached to the closest row otherwise.
*/
void splitRows(SourceImage sourceImage) {
LOG.debug("########## splitRows #########");
// Calculate the min row height to be considered for splitting
double minHeightForSplit = sourceImage.getAverageShapeHeight();
LOG.debug("minHeightForSplit: " + minHeightForSplit);
double slopeMean = sourceImage.getMeanHorizontalSlope();
List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>();
for (RowOfShapes row : sourceImage.getRows()) {
if (row.getRight() == row.getLeft())
continue;
int height = row.getBottom() - row.getTop();
if (height >= minHeightForSplit) {
LOG.debug("Adding candidate " + row.toString());
candidateRows.add(row);
}
}
// For each row to be considered for splitting, see if there are lines
// of white space inside it.
Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>();
for (RowOfShapes row : candidateRows) {
SimpleRegression regression = new SimpleRegression();
// y = intercept + slope * x
LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")");
regression.addData(row.getLeft(), row.getTop());
double rightHandY = row.getTop() + ((row.getRight() - row.getLeft()) * slopeMean);
LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")");
regression.addData(row.getRight(), rightHandY);
int yDelta = (int) Math.ceil(Math.abs(rightHandY - row.getTop()));
int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta;
LOG.debug("yDelta: " + yDelta);
LOG.debug("yInterval: " + yInterval);
// let's get pixel counts shape by shape, and leave out the rest (in
// case rows overlap vertically)
int[] pixelCounts = new int[yInterval];
for (Shape shape : row.getShapes()) {
LOG.trace("Shape " + shape);
int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft);
// the shape offset + the offset between the regression line and
// the row top
// + the delta we left at the start in case the line slopes
// upwards to the right
int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - " + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex);
for (int x = 0; x < shape.getWidth(); x++) {
for (int y = 0; y < shape.getHeight(); y++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
pixelCounts[topIndex + y]++;
}
}
}
}
Mean pixelCountMean = new Mean();
StandardDeviation pixelCountStdDev = new StandardDeviation();
for (int i = 0; i < yInterval; i++) {
LOG.debug("Pixel count " + i + ": " + pixelCounts[i]);
pixelCountMean.increment(pixelCounts[i]);
pixelCountStdDev.increment(pixelCounts[i]);
}
LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: " + pixelCountStdDev.getResult());
// If there's a split required, we're going to go considerably above
// and below the mean several times
double lowThreshold = pixelCountMean.getResult() / 2.0;
double highThreshold = pixelCountMean.getResult() * 2.0;
boolean inRow = false;
List<Integer> switches = new ArrayList<Integer>();
for (int i = 0; i < yInterval; i++) {
if (!inRow && pixelCounts[i] > highThreshold) {
LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]);
inRow = true;
switches.add(i);
} else if (inRow && pixelCounts[i] < lowThreshold) {
LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]);
inRow = false;
switches.add(i);
}
}
if (switches.size() > 2) {
// we have more than one row
List<Integer> rowSeparations = new ArrayList<Integer>();
// find the row separators
for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) {
int outOfRow = switches.get(switchIndex);
int intoRow = switches.get(switchIndex + 1);
int minPixelCount = (int) Math.ceil(highThreshold);
int minIndex = -1;
// find the row with the lowest pixel count
for (int i = outOfRow; i <= intoRow; i++) {
if (pixelCounts[i] < minPixelCount) {
minPixelCount = pixelCounts[i];
minIndex = i;
}
}
rowSeparations.add(minIndex);
}
// separate the shapes among the rows
List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1);
for (int i = 0; i <= rowSeparations.size(); i++) {
newRows.add(new RowOfShapes(sourceImage, jochreSession));
}
// add a separator at the beginning and end
rowSeparations.add(0, 0);
rowSeparations.add(yInterval + 1);
for (Shape shape : row.getShapes()) {
int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
int firstSepAfterShapeBottom = rowSeparations.size();
int lastSepBeforeShapeTop = -1;
for (int i = rowSeparations.size() - 1; i >= 0; i--) {
int rowSeparation = rowSeparations.get(i);
if (rowSeparation <= topIndex) {
lastSepBeforeShapeTop = i;
break;
}
}
for (int i = 0; i < rowSeparations.size(); i++) {
int rowSeparation = rowSeparations.get(i);
if (rowSeparation >= topIndex + shape.getHeight()) {
firstSepAfterShapeBottom = i;
break;
}
}
if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) {
// shape clearly belongs to one row
RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop);
newRow.addShape(shape);
} else {
// is the shape much closer to one row than another?
// if yes, add it to then add it to this row
int[] yPixelsPerRow = new int[newRows.size()];
for (int i = 0; i < newRows.size(); i++) {
int separatorTop = rowSeparations.get(i);
int separatorBottom = rowSeparations.get(i + 1);
int top = topIndex < separatorTop ? separatorTop : topIndex;
int bottom = topIndex + shape.getHeight() < separatorBottom ? topIndex + shape.getHeight() : separatorBottom;
yPixelsPerRow[i] = bottom - top;
}
int pixelsInMaxRow = 0;
int maxPixelRowIndex = -1;
for (int i = 0; i < newRows.size(); i++) {
if (yPixelsPerRow[i] > pixelsInMaxRow) {
pixelsInMaxRow = yPixelsPerRow[i];
maxPixelRowIndex = i;
}
}
double minPercentage = 0.8;
if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) {
RowOfShapes newRow = newRows.get(maxPixelRowIndex);
newRow.addShape(shape);
} else {
// otherwise, the shape needs to be got rid of
// as it's causing massive confusion
// do this by simply not adding it anywhere
}
}
// is the shape in one row exactly?
}
// next shape
splitRows.put(row, newRows);
}
// do we have more than one row?
}
for (RowOfShapes row : splitRows.keySet()) {
List<RowOfShapes> newRows = splitRows.get(row);
sourceImage.replaceRow(row, newRows);
}
}
use of org.apache.commons.math.stat.regression.SimpleRegression in project jochre by urieli.
the class SourceImage method getInclination.
/**
* Returns the slope of the current image's horizontal inclination. Assumes an
* initial stab has already been made at group shapes into rows, and that rows
* are grouped from top to bottom.
*/
public double getInclination() {
LOG.debug("#### getInclination ####");
// It may well be that rows have been grouped together
// wrongly if the image has several columns.
// The only rows that are fairly reliable are very long horizontal bars
// and the first long row, in which all letters are aligned to the same
// baseline,
// regardless of their size.
// let's get the medium width first
Mean widthMean = new Mean();
for (RowOfShapes row : this.getRows()) {
widthMean.increment(row.getRight() - row.getLeft());
}
double meanWidth = widthMean.getResult();
LOG.debug("meanWidth: " + meanWidth);
double minWidth = meanWidth * 0.75;
// find the first row with a pretty wide width
RowOfShapes theRow = null;
for (RowOfShapes row : this.getRows()) {
int width = row.getRight() - row.getLeft();
if (width > minWidth) {
theRow = row;
break;
}
}
// calculate a regression for average shapes on this row
double minHeight = theRow.getAverageShapeHeight() - theRow.getAverageShapeHeightMargin();
double maxHeight = theRow.getAverageShapeHeight() + theRow.getAverageShapeHeightMargin();
SimpleRegression regression = new SimpleRegression();
for (Shape shape : theRow.getShapes()) {
if (shape.getHeight() >= minHeight && shape.getHeight() <= maxHeight) {
for (int x = 0; x < shape.getWidth(); x++) {
for (int y = 0; y < shape.getHeight(); y++) {
if (shape.isPixelBlack(x, y, this.getBlackThreshold())) {
regression.addData(shape.getLeft() + x, shape.getTop() + y);
}
}
}
}
}
return regression.getSlope();
}
use of org.apache.commons.math.stat.regression.SimpleRegression in project jochre by urieli.
the class SlopeDifferenceFeature method checkInternal.
@Override
public FeatureResult<Double> checkInternal(Split split, RuntimeEnvironment env) {
FeatureResult<Double> result = null;
FeatureResult<Integer> contourDistanceResult = contourDistanceFeature.check(split, env);
if (contourDistanceResult != null) {
int contourDistance = contourDistanceResult.getOutcome();
int[][] verticalContour = split.getShape().getVerticalContour();
int x = split.getPosition();
Shape shape = split.getShape();
int topStart = verticalContour[x][0];
int bottomStart = verticalContour[x][1];
SimpleRegression topRightRegression = new SimpleRegression();
SimpleRegression bottomRightRegression = new SimpleRegression();
SimpleRegression topLeftRegression = new SimpleRegression();
SimpleRegression bottomLeftRegression = new SimpleRegression();
topRightRegression.addData(x, topStart);
topLeftRegression.addData(x, topStart);
bottomRightRegression.addData(x, bottomStart);
bottomLeftRegression.addData(x, bottomStart);
for (int i = 1; i <= contourDistance; i++) {
if (x + i < shape.getWidth()) {
topRightRegression.addData(x + i, verticalContour[x + i][0]);
bottomRightRegression.addData(x + i, verticalContour[x + i][1]);
}
if (x - i >= 0) {
topLeftRegression.addData(x - i, verticalContour[x - i][0]);
bottomLeftRegression.addData(x - i, verticalContour[x - i][1]);
}
}
// get the slopes
double topRightSlope = topRightRegression.getSlope();
double bottomRightSlope = bottomRightRegression.getSlope();
double topLeftSlope = topLeftRegression.getSlope();
double bottomLeftSlope = bottomLeftRegression.getSlope();
// convert slopes to angles
double topRightAngle = Math.atan(topRightSlope);
double bottomRightAngle = Math.atan(bottomRightSlope);
double topLeftAngle = Math.atan(topLeftSlope);
double bottomLeftAngle = Math.atan(bottomLeftSlope);
// calculate the right & left-hand differences
double rightDiff = Math.abs(topRightAngle - bottomRightAngle);
double leftDiff = Math.abs(topLeftAngle - bottomLeftAngle);
// normalise the differences from 0 to 1
rightDiff = rightDiff / Math.PI;
leftDiff = leftDiff / Math.PI;
double product = rightDiff * leftDiff;
if (LOG.isTraceEnabled()) {
LOG.trace("topRightAngle: " + topRightAngle);
LOG.trace("bottomRightAngle: " + bottomRightAngle);
LOG.trace("topLeftAngle: " + topLeftAngle);
LOG.trace("bottomLeftAngle: " + bottomLeftAngle);
LOG.trace("rightDiff: " + rightDiff);
LOG.trace("leftDiff: " + leftDiff);
LOG.trace("product: " + product);
}
result = this.generateResult(product);
}
return result;
}
use of org.apache.commons.math.stat.regression.SimpleRegression in project mzmine2 by mzmine.
the class RansacAlignerTask method smooth.
private List<RTs> smooth(List<RTs> list) {
// Add points to the model in between of the real points to smooth the
// regression model
Collections.sort(list, new RTs());
for (int i = 0; i < list.size() - 1; i++) {
RTs point1 = list.get(i);
RTs point2 = list.get(i + 1);
if (point1.RT < point2.RT - 2) {
SimpleRegression regression = new SimpleRegression();
regression.addData(point1.RT, point1.RT2);
regression.addData(point2.RT, point2.RT2);
double rt = point1.RT + 1;
while (rt < point2.RT) {
RTs newPoint = new RTs(rt, regression.predict(rt));
list.add(newPoint);
rt++;
}
}
}
return list;
}
Aggregations