use of net.sf.mzmine.modules.masslistmethods.chromatogrambuilder.Chromatogram in project mzmine2 by mzmine.
the class GridMassTask method run.
/**
* @see Runnable#run()
*/
public void run() {
Format mzFormat = MZmineCore.getConfiguration().getMZFormat();
Format timeFormat = MZmineCore.getConfiguration().getRTFormat();
setStatus(TaskStatus.PROCESSING);
logger.info("Started GRIDMASS v1.0 [Apr-09-2014] on " + dataFile);
scans = scanSelection.getMatchingScans(dataFile);
scanNumbers = scanSelection.getMatchingScanNumbers(dataFile);
totalScans = scans.length;
// Check if we have any scans
if (totalScans == 0) {
setStatus(TaskStatus.ERROR);
setErrorMessage("No scans match the selected criteria");
return;
}
// Check if the scans are properly ordered by RT
double prevRT = Double.NEGATIVE_INFINITY;
for (Scan s : scans) {
if (s.getRetentionTime() < prevRT) {
setStatus(TaskStatus.ERROR);
final String msg = "Retention time of scan #" + s.getScanNumber() + " is smaller then the retention time of the previous scan." + " Please make sure you only use scans with increasing retention times." + " You can restrict the scan numbers in the parameters, or you can use the Crop filter module";
setErrorMessage(msg);
return;
}
prevRT = s.getRetentionTime();
}
// Create new feature list
newPeakList = new SimplePeakList(dataFile + " " + suffix, dataFile);
int j;
// minimumTimeSpan
Scan scan = scans[0];
double minRT = scan.getRetentionTime();
double maxRT = scan.getRetentionTime();
retentiontime = new double[totalScans];
int i;
for (i = 0; i < totalScans; i++) {
scan = scans[i];
double irt = scan.getRetentionTime();
if (irt < minRT)
minRT = irt;
if (irt > maxRT)
maxRT = irt;
retentiontime[i] = irt;
}
rtPerScan = (maxRT - minRT) / i;
// "tolerable" units in scans
tolScans = Math.max(2, (int) ((minimumTimeSpan / rtPerScan)));
maxTolScans = Math.max(2, (int) ((maximumTimeSpan / rtPerScan)));
// Algorithm to find masses:
// (1) copy masses:intensity > threshold
// (2) sort intensities descend
// (3) Find "spot" for each intensity
// (3.1) if they have not spot ID assigned
// (3.1.1) Extend mass in mass and time while > 70% pixels > threshold
// (3.1.2) If extension > mintime ==> mark all pixels with the spot ID
// (3.1.3) if extension < mintime ==> mark all pixels with spot ID = -1
// (4) Group spots within a time-tolerance and mass-tolerance
logger.info("Getting data points on " + dataFile);
roi = new Datum[totalScans][];
ArrayList<Datum> roiAL = new ArrayList<Datum>();
long passed = 0, nopassed = 0;
minMasa = Double.MAX_VALUE;
maxMasa = 0;
int maxJ = 0;
boolean[] scanOk = new boolean[totalScans];
Arrays.fill(scanOk, true);
logger.info("Smoothing data points on " + dataFile + " (Time min=" + smoothTimeSpan + "; Time m/z=" + smoothTimeMZ + ")");
IndexedDataPoint[][] data = smoothDataPoints(dataFile, smoothTimeSpan, smoothTimeMZ, 0, smoothMZ, 0, minimumHeight);
logger.info("Determining intensities (mass sum) per scan on " + dataFile);
for (i = 0; i < totalScans; i++) {
if (isCanceled())
return;
scan = scans[i];
// scan.getDataPoints();
IndexedDataPoint[] mzv = data[i];
double prev = (mzv.length > 0 ? mzv[0].datapoint.getMZ() : 0);
double massSum = 0;
for (j = 0; j < mzv.length; j++) {
if (mzv[j].datapoint.getIntensity() >= minimumHeight)
massSum += mzv[j].datapoint.getMZ() - prev;
prev = mzv[j].datapoint.getMZ();
if (mzv[j].datapoint.getMZ() < minMasa)
minMasa = mzv[j].datapoint.getMZ();
if (mzv[j].datapoint.getMZ() > maxMasa)
maxMasa = mzv[j].datapoint.getMZ();
}
double dm = 100.0 / (maxMasa - minMasa);
if (i % 30 == 0 && debug > 0) {
System.out.println("");
System.out.print("t=" + Math.round(retentiontime[i] * 100) / 100.0 + ": (in %) ");
}
if (scanOk[i]) {
if (!scanOk[i]) {
// Disable neighbouring scans, how many ?
for (j = i; j > 0 && retentiontime[j] + additionTimeMaxPeaksPerScan > retentiontime[i]; j--) {
scanOk[j] = false;
}
for (j = i; j < totalScans && retentiontime[j] - additionTimeMaxPeaksPerScan < retentiontime[i]; j++) {
scanOk[j] = false;
}
}
if (debug > 0)
System.out.print(((int) (massSum * dm)) + (scanOk[i] ? " " : "*** "));
} else {
if (debug > 0)
System.out.print(((int) (massSum * dm)) + (scanOk[i] ? " " : "* "));
}
setProcedure(i, totalScans, 1);
}
if (debug > 0)
System.out.println("");
String[] it = ignoreTimes.trim().split(", ?");
for (j = 0; j < it.length; j++) {
String[] itj = it[j].split("-");
if (itj.length == 2) {
Double a = Double.parseDouble(itj[0].trim());
Double b = Double.parseDouble(itj[1].trim());
for (i = Math.abs(Arrays.binarySearch(retentiontime, a)); i < totalScans && retentiontime[i] <= b; i++) {
if (retentiontime[i] >= a) {
scanOk[i] = false;
}
}
}
}
passed = 0;
nopassed = 0;
for (i = 0; i < totalScans; i++) {
if (i % 100 == 0 && isCanceled())
return;
if (scanOk[i]) {
scan = scans[i];
IndexedDataPoint[] mzv = data[i];
DataPoint[] mzvOriginal = scan.getDataPoints();
ArrayList<Datum> dal = new ArrayList<Datum>();
for (j = 0; j < mzv.length; j++) {
if (mzv[j].datapoint.getIntensity() >= minimumHeight) {
dal.add(new Datum(mzv[j].datapoint, i, mzvOriginal[mzv[j].index]));
passed++;
} else {
nopassed++;
}
}
if (j > maxJ)
maxJ = j;
roi[i] = dal.toArray(new Datum[0]);
roiAL.addAll(dal);
}
setProcedure(i, totalScans, 2);
}
logger.info(passed + " intensities >= " + minimumHeight + " of " + (passed + nopassed) + " (" + Math.round(passed * 10000.0 / (double) (passed + nopassed)) / 100.0 + "%) on " + dataFile);
// New "probing" algorithm
// (1) Generate probes all over chromatograms
// (2) Move each probe to their closest maximum until it cannot find a
// new maximum
// (3) assign spot id to each "center" using all points within region
// (1) Generate probes all over
double byMZ = Math.max(mzTol * 2, 1e-6);
int byScan = Math.max(1, tolScans / 4);
logger.info("Creating Grid of probes on " + dataFile + " every " + mzFormat.format(byMZ) + " m/z and " + byScan + " scans");
double m;
int ndata = (int) Math.round((((double) totalScans / (double) byScan) + 1) * ((maxMasa - minMasa + byMZ) / byMZ));
Probe[] probes = new Probe[ndata];
int idata = 0;
for (i = 0; i < totalScans; i += byScan) {
if (i % 100 == 0 && isCanceled())
return;
for (m = minMasa - (i % 2) * byMZ / 2; m <= maxMasa; m += byMZ) {
probes[idata++] = new Probe(m, i);
}
setProcedure(i, totalScans, 3);
}
// (2) Move each probe to their closest center
double mzR = byMZ / 2;
int scanR = Math.max(byScan - 1, 2);
logger.info("Finding local maxima for each probe on " + dataFile + " radius: scans=" + scanR + ", m/z=" + mzR);
int okProbes = 0;
for (i = 0; i < idata; i++) {
if (i % 100 == 0 && isCanceled())
return;
moveProbeToCenter(probes[i], scanR, mzR);
if (probes[i].intensityCenter < minimumHeight) {
probes[i] = null;
} else {
okProbes++;
}
setProcedure(i, idata, 4);
}
if (okProbes > 0) {
Probe[] pArr = new Probe[okProbes];
for (okProbes = i = 0; i < idata; i++) {
if (probes[i] != null) {
pArr[okProbes++] = probes[i];
}
}
probes = pArr;
pArr = null;
}
// (3) Assign spot id to each "center"
logger.info("Sorting probes " + dataFile);
Arrays.sort(probes);
logger.info("Assigning spot id to local maxima on " + dataFile);
SpotByProbes sbp = new SpotByProbes();
ArrayList<SpotByProbes> spots = new ArrayList<SpotByProbes>();
double mzA = -1;
int scanA = -1;
for (i = 0; i < probes.length; i++) {
if (probes[i] != null && probes[i].intensityCenter >= minimumHeight) {
if (probes[i].mzCenter != mzA || probes[i].scanCenter != scanA) {
if (i % 10 == 0 && isCanceled())
return;
if (sbp.size() > 0) {
spots.add(sbp);
sbp.assignSpotId();
// System.out.println(sbp.toString());
}
sbp = new SpotByProbes();
mzA = probes[i].mzCenter;
scanA = probes[i].scanCenter;
}
sbp.addProbe(probes[i]);
}
setProcedure(i, probes.length, 5);
}
if (sbp.size() > 0) {
spots.add(sbp);
sbp.assignSpotId();
// System.out.println(sbp.toString());
}
logger.info("Spots:" + spots.size());
// Assign specific datums to spots to avoid using datums to several
// spots
logger.info("Assigning intensities to local maxima on " + dataFile);
i = 0;
for (SpotByProbes sx : spots) {
if (sx.size() > 0) {
if (i % 100 == 0 && isCanceled())
return;
assignSpotIdToDatumsFromScans(sx, scanR, mzR);
}
setProcedure(i++, spots.size(), 6);
}
// (4) Join Tolerable Centers
logger.info("Joining tolerable maxima on " + dataFile);
int criticScans = Math.max(1, tolScans / 2);
int joins = 0;
for (i = 0; i < spots.size() - 1; i++) {
SpotByProbes s1 = spots.get(i);
if (s1.center != null && s1.size() > 0) {
if (i % 100 == 0 && isCanceled())
return;
for (j = i; j > 0 && j < spots.size() && spots.get(j - 1).center != null && spots.get(j - 1).center.mzCenter + mzTol > s1.center.mzCenter; j--) ;
for (; j < spots.size(); j++) {
SpotByProbes s2 = spots.get(j);
if (i != j && s2.center != null) {
if (s2.center.mzCenter - s1.center.mzCenter > mzTol)
break;
int l = Math.min(Math.abs(s1.minScan - s2.minScan), Math.abs(s1.minScan - s2.maxScan));
int r = Math.min(Math.abs(s1.maxScan - s2.minScan), Math.abs(s1.maxScan - s2.maxScan));
int d = Math.min(l, r);
boolean overlap = !(s2.maxScan < s1.minScan || s2.minScan > s1.maxScan);
if ((d <= criticScans || overlap) && (intensityRatio(s1.center.intensityCenter, s2.center.intensityCenter) > intensitySimilarity)) {
if (debug > 2)
System.out.println("Joining s1 id " + s1.spotId + "=" + mzFormat.format(s1.center.mzCenter) + " mz [" + mzFormat.format(s1.minMZ) + " ~ " + mzFormat.format(s1.maxMZ) + "] time=" + timeFormat.format(retentiontime[s1.center.scanCenter]) + " int=" + s1.center.intensityCenter + " with s2 id " + s2.spotId + "=" + mzFormat.format(s2.center.mzCenter) + " mz [" + mzFormat.format(s2.minMZ) + " ~ " + mzFormat.format(s2.maxMZ) + "] time=" + timeFormat.format(retentiontime[s2.center.scanCenter]) + " int=" + s2.center.intensityCenter);
assignSpotIdToDatumsFromSpotId(s1, s2, scanR, mzR);
s1.addProbesFromSpot(s2, true);
// restart
j = i;
joins++;
}
// }
}
}
}
setProcedure(i, spots.size(), 7);
}
logger.info("Joins:" + joins);
// (5) Remove "Large" spanned masses
logger.info("Removing long and comparable 'masses' on " + dataFile);
for (i = 0; i < spots.size() - 1; i++) {
SpotByProbes s1 = spots.get(i);
if (s1.center != null && s1.size() > 0) {
if (i % 100 == 0 && isCanceled())
return;
int totalScans = s1.maxScan - s1.minScan + 1;
int lScan = s1.minScan;
int rScan = s1.maxScan;
ArrayList<Integer> toRemove = new ArrayList<Integer>();
toRemove.add(i);
for (j = i; j > 0 && j < spots.size() && spots.get(j - 1).center != null && spots.get(j - 1).center.mzCenter + mzTol > s1.center.mzCenter; j--) ;
for (; j < spots.size(); j++) {
SpotByProbes s2 = spots.get(j);
if (i != j && s2.center != null) {
if (s2.center.mzCenter - s1.center.mzCenter > mzTol)
break;
if (intensityRatio(s1.center.intensityCenter, s2.center.intensityCenter) > intensitySimilarity) {
int dl = Math.min(Math.abs(lScan - s2.minScan), Math.abs(lScan - s2.maxScan));
int dr = Math.min(Math.abs(rScan - s2.minScan), Math.abs(rScan - s2.maxScan));
int md = Math.min(dl, dr);
if (md <= maxTolScans || !(s2.maxScan < lScan || s2.minScan > rScan)) {
// distancia tolerable o intersectan
totalScans += s2.maxScan - s2.minScan + 1;
toRemove.add(j);
lScan = Math.min(lScan, s2.minScan);
rScan = Math.max(rScan, s2.maxScan);
}
}
}
}
if (totalScans * rtPerScan > maximumTimeSpan) {
if (debug > 2)
System.out.println("Removing " + toRemove.size() + " masses around " + mzFormat.format(s1.center.mzCenter) + " m/z (" + s1.spotId + "), time " + timeFormat.format(retentiontime[s1.center.scanCenter]) + ", intensity " + s1.center.intensityCenter + ", Total Scans=" + totalScans + " (" + Math.round(totalScans * rtPerScan * 1000.0) / 1000.0 + " min).");
for (Integer J : toRemove) {
// System.out.println("Removing: "+spots.get(J).spotId);
spots.get(J).clear();
}
}
}
setProcedure(i, spots.size(), 8);
}
// Build peaks from assigned datums
logger.info("Building peak rows on " + dataFile + " (tolereance scans=" + tolScans + ")");
i = 0;
for (SpotByProbes sx : spots) {
if (sx.size() > 0 && sx.maxScan - sx.minScan + 1 >= tolScans) {
if (i % 100 == 0 && isCanceled())
return;
sx.buildMaxDatumFromScans(roi, minimumHeight);
if (sx.getMaxDatumScans() >= tolScans && (sx.getContigousMaxDatumScans() >= tolScans || sx.getContigousToMaxDatumScansRatio() > 0.5)) {
Chromatogram peak = new Chromatogram(dataFile, scanNumbers);
if (addMaxDatumFromScans(sx, peak) > 0) {
peak.finishChromatogram();
if (peak.getArea() > 1e-6) {
newPeakID++;
SimplePeakListRow newRow = new SimplePeakListRow(newPeakID);
newRow.addPeak(dataFile, peak);
newRow.setComment(sx.toString(retentiontime));
newPeakList.addRow(newRow);
if (debug > 0)
System.out.println("Peak added id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio() + " area = " + peak.getArea());
if (debug > 1) {
// Peak info:
System.out.println(sx.toString());
sx.printDebugInfo();
}
} else {
if (debug > 0)
System.out.println("Ignored by area ~ 0 id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio() + " area = " + peak.getArea());
}
}
} else {
if (debug > 0)
System.out.println("Ignored by continous criteria: id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio());
}
} else {
if (sx.size() > 0) {
if (debug > 0)
System.out.println("Ignored by time range criteria: id=" + sx.spotId + " " + mzFormat.format(sx.center.mzCenter) + " mz, time=" + timeFormat.format(retentiontime[sx.center.scanCenter]) + ", intensity=" + sx.center.intensityCenter + ", probes=" + sx.size() + ", data scans=" + sx.getMaxDatumScans() + ", cont scans=" + sx.getContigousMaxDatumScans() + ", cont ratio=" + sx.getContigousToMaxDatumScansRatio());
}
}
setProcedure(i++, spots.size(), 9);
}
logger.info("Peaks on " + dataFile + " = " + newPeakList.getNumberOfRows());
// Add new peaklist to the project
project.addPeakList(newPeakList);
// Add quality parameters to peaks
QualityParameters.calculateQualityParameters(newPeakList);
setStatus(TaskStatus.FINISHED);
logger.info("Finished chromatogram builder (RT) on " + dataFile);
}
Aggregations