use of org.apache.commons.math3.stat.descriptive.rank.Max in project SeqMonk by s-andrews.
the class IntronRegressionPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
int minDensity = optionsPanel.minDensity();
int minLength = optionsPanel.minLength();
double maxPValue = optionsPanel.maxPValue();
int binSize = optionsPanel.measurementBinSize();
QuantitationStrandType readFilter = optionsPanel.readFilter();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
Vector<Probe> probesForThisChromosome = new Vector<Probe>();
progressUpdated("Making probes", c, chrs.length);
Feature[] features = getValidFeatures(chrs[c]);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
// There are no introns here
if (!(features[f].location() instanceof SplitLocation))
continue;
Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();
// TODO: Reverse the subLocations if its a reverse feature
for (int intron = 1; intron < subLocations.length; intron++) {
int start = subLocations[intron - 1].end();
int end = subLocations[intron].start();
if ((end - start) + 1 < minLength) {
// This intron is too short.
continue;
}
// TODO: We could throw away any probes which didn't have enough reads in any feature
Probe p = new Probe(chrs[c], start, end, features[f].location().strand(), features[f].name() + "_" + intron);
probesForThisChromosome.add(p);
}
}
// Now we can deduplicate the probes for this chromosome and add them to the main collection
Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
Arrays.sort(dupProbes);
for (int p = 0; p < dupProbes.length; p++) {
if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
continue;
probes.add(dupProbes[p]);
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we go back through the probes and quantitate them
for (int p = 0; p < allProbes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
if (p % 1000 == 0) {
progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p, allProbes.length);
}
for (int d = 0; d < data.length; d++) {
long[] reads = data[d].getReadsForProbe(allProbes[p]);
int[] countsPerSite = new int[allProbes[p].length()];
int usableCounts = 0;
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(allProbes[p], reads[r])) {
++usableCounts;
for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math.min(countsPerSite.length - 1, SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
++countsPerSite[pos];
}
}
}
if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
// We're going to do a linear regression rather than a correlation
// We're analysing in bins so we'll work out the bin counts and
// add them dynamically to the regression.
SimpleRegression regression = new SimpleRegression();
int binCount = 0;
for (int i = 0; i < countsPerSite.length; i++) {
if (i > 0 && i % binSize == 0) {
regression.addData(i, binCount);
binCount = 0;
}
binCount += countsPerSite[i];
}
float slope = (float) (regression.getSlope() * 1000000);
double pValue = regression.getSignificance();
if (allProbes[p].strand() == Location.REVERSE) {
slope = 0 - slope;
}
if (pValue <= maxPValue) {
data[d].setValueForProbe(allProbes[p], slope);
} else {
data[d].setValueForProbe(allProbes[p], Float.NaN);
}
} else {
data[d].setValueForProbe(allProbes[p], Float.NaN);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Intron regression pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
quantitationDescription.append(". Min intron length was ");
quantitationDescription.append(minLength);
quantitationDescription.append(". Min read density was ");
quantitationDescription.append(minDensity);
quantitationDescription.append(". Max slope p-value was ");
quantitationDescription.append(maxPValue);
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project tetrad by cmu-phil.
the class Lofs2 method optimizeRow.
private void optimizeRow(final int rowIndex, final TetradMatrix data, final double range, final List<List<Integer>> rows, final List<List<Double>> parameters) {
System.out.println("A");
final int numParams = rows.get(rowIndex).size();
final double[] dLeftMin = new double[numParams];
final double[] dRightMin = new double[numParams];
double[] values = new double[numParams];
double delta = 0.1;
if (false) {
// isEdgeCorrected()) {
double min = -2;
double max = 2;
int[] dims = new int[values.length];
int numBins = 5;
for (int i = 0; i < values.length; i++) dims[i] = numBins;
CombinationGenerator gen = new CombinationGenerator(dims);
int[] comb;
List<Double> maxParams = new ArrayList<>();
for (int i = 0; i < values.length; i++) maxParams.add(0.0);
double maxV = Double.NEGATIVE_INFINITY;
while ((comb = gen.next()) != null) {
if (Thread.currentThread().isInterrupted()) {
break;
}
List<Double> params = new ArrayList<>();
for (int i = 0; i < values.length; i++) {
params.add(min + (max - min) * (comb[i] / (double) numBins));
}
parameters.set(rowIndex, params);
double v = scoreRow(rowIndex, data, rows, parameters);
if (v > maxV) {
maxV = v;
maxParams = params;
}
}
System.out.println("maxparams = " + maxParams);
parameters.set(rowIndex, maxParams);
for (int i = 0; i < values.length; i++) {
dLeftMin[i] = -range;
dRightMin[i] = range;
values[i] = maxParams.get(i);
}
} else if (false) {
for (int i = 0; i < numParams; i++) {
if (Thread.currentThread().isInterrupted()) {
break;
}
parameters.get(rowIndex).set(i, -range);
double vLeft = scoreRow(rowIndex, data, rows, parameters);
double dLeft = -range;
// Search from the left for the first valley; mark that as dleft.
for (double d = -range + delta; d < range; d += delta) {
if (Thread.currentThread().isInterrupted()) {
break;
}
parameters.get(rowIndex).set(i, d);
double v = scoreRow(rowIndex, data, rows, parameters);
if (Double.isNaN(v))
continue;
if (v > vLeft)
break;
vLeft = v;
dLeft = d;
}
parameters.get(rowIndex).set(i, range);
double vRight = scoreRow(rowIndex, data, rows, parameters);
double dRight = range;
// to avoid high scores at the boundaries.
for (double d = range - delta; d > -range; d -= delta) {
if (Thread.currentThread().isInterrupted()) {
break;
}
parameters.get(rowIndex).set(i, d);
double v = scoreRow(rowIndex, data, rows, parameters);
if (Double.isNaN(v))
continue;
if (v > vRight)
break;
vRight = v;
dRight = d;
}
// If dleft dright ended up reversed, re-reverse them.
if (dLeft > dRight) {
double temp = dRight;
dLeft = dRight;
dRight = temp;
}
System.out.println("dLeft = " + dLeft + " dRight = " + dRight);
dLeftMin[i] = dLeft;
dRightMin[i] = dRight;
values[i] = (dLeft + dRight) / 2.0;
}
} else {
// Default case: search for the maximum score over the entire range.
for (int i = 0; i < numParams; i++) {
if (Thread.currentThread().isInterrupted()) {
break;
}
dLeftMin[i] = -range;
dRightMin[i] = range;
values[i] = 0;
}
}
MultivariateFunction function = new MultivariateFunction() {
public double value(double[] values) {
System.out.println(Arrays.toString(values));
for (int i = 0; i < values.length; i++) {
if (Thread.currentThread().isInterrupted()) {
break;
}
parameters.get(rowIndex).set(i, values[i]);
}
double v = scoreRow(rowIndex, data, rows, parameters);
if (Double.isNaN(v)) {
// was 10000
return Double.POSITIVE_INFINITY;
}
return -v;
}
};
try {
MultivariateOptimizer search = new PowellOptimizer(1e-7, 1e-7);
PointValuePair pair = search.optimize(new InitialGuess(values), new ObjectiveFunction(function), GoalType.MINIMIZE, new MaxEval(100000));
values = pair.getPoint();
} catch (Exception e) {
e.printStackTrace();
for (int i = 0; i < values.length; i++) {
parameters.get(rowIndex).set(i, Double.NaN);
}
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project jvarkit by lindenb.
the class CopyNumber01 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.refFile == null) {
LOG.error("Undefined REF file");
return -1;
}
if (this.archiveFile == null) {
LOG.error("Undefined output file.");
return -1;
}
if (!StringUtil.isBlank(prefix)) {
if (!(prefix.endsWith(".") || prefix.endsWith("_"))) {
prefix = prefix + ".";
}
}
SamReader samReader = null;
ArchiveFactory archive = null;
try {
final String input = oneAndOnlyOneFile(args);
samReader = super.openSamReader(input);
if (!samReader.hasIndex()) {
LOG.error("file is not indexed " + input);
return -1;
}
final String sampleName = samReader.getFileHeader().getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse("SAMPLE");
final SAMFileHeader header = samReader.getFileHeader();
this.samDictionary = header.getSequenceDictionary();
if (this.samDictionary == null || this.samDictionary.isEmpty()) {
throw new JvarkitException.DictionaryMissing(input);
}
/* loading REF Reference */
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
final SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
if (dict == null) {
throw new JvarkitException.DictionaryMissing(refFile.getPath());
}
this.sam2faiContigNameConverter = ContigNameConverter.fromDictionaries(this.samDictionary, dict);
this.sam2faiContigNameConverter.setOnNotFound(OnNotFound.SKIP);
archive = ArchiveFactory.open(this.archiveFile);
PrintWriter mkw = archive.openWriter(this.prefix + "cnv01.mk");
mkw.println("## " + this.getProgramCommandLine());
mkw.println(".PHONY=all all2");
mkw.println("TARGETS=");
mkw.println("SCREEN_WIDTH?=2600");
mkw.println("SCREEN_HEIGHT?=1000");
mkw.println("GNUPLOT?=gnuplot");
mkw.println("all : all2");
final List<String> all_chrom_files = new ArrayList<>();
double maxDepth = 0.0;
for (final SAMSequenceRecord ssr : this.samDictionary.getSequences()) {
if (!StringUtil.isBlank(this.limitToChrom) && !this.limitToChrom.equals(ssr.getSequenceName())) {
LOG.info("Skipping " + ssr.getSequenceName() + "....");
continue;
}
if (this.sam2faiContigNameConverter.apply(ssr.getSequenceName()) == null) {
LOG.info("Ignoring " + ssr.getSequenceName() + " because it's not in REF");
continue;
}
if (ignoreChromosomeName(ssr.getSequenceName())) {
LOG.info("Ignoring " + ssr.getSequenceName());
continue;
}
LOG.info("processing chromosome " + ssr.getSequenceName());
ContigProcessor proc = new ContigProcessor(samReader, ssr, sampleName);
proc.run();
final String tsvFilename = this.prefix + proc.getContig() + "." + proc.sampleName + ".bed";
final String pngFilename = "$(addsuffix .png,$(basename " + tsvFilename + "))";
final double depth = proc.items.stream().mapToDouble(I -> I.norm_depth).max().orElse(4);
maxDepth = Math.max(maxDepth, depth);
all_chrom_files.add(tsvFilename);
mkw.println("TARGETS+=" + pngFilename);
mkw.println(pngFilename + ":" + tsvFilename);
mkw.println("\techo 'set terminal png truecolor size ${SCREEN_WIDTH},${SCREEN_HEIGHT};" + "set key autotitle columnhead;" + "set datafile separator \"\t\";" + "set title \"" + proc.sampleName + " " + ssr.getSequenceName() + "\";" + "set ylabel \"Normalized Depth - Number of copies\";" + "set xlabel \"Position on " + ssr.getSequenceName() + "\";" + // + "set style circle radius 0.02;"
"set nokey;" + "set yrange [0:" + Math.min(4, Math.ceil(depth)) + "];" + "set xrange [1:" + ssr.getSequenceLength() + "];" + "set xtic rotate by 90 right;" + "set output \"$@\";" + // + "plot \"$<\" using 1:2:1 w points linecolor variable "
"plot \"$<\" using 2:7 w lines " + "' | ${GNUPLOT}");
PrintWriter pw = archive.openWriter(tsvFilename);
proc.saveCoverage(pw);
pw.flush();
pw.close();
proc = null;
}
samReader.close();
final String pngFilename = this.prefix + "wholeGenome.png";
mkw.println("TARGETS+=" + pngFilename);
mkw.println(pngFilename + ":" + String.join(" ", all_chrom_files));
mkw.println("\tgrep -v '^#' $^ | cut -f 1,4,7 | sed " + samDictionary.getSequences().stream().map(SSR -> " -e 's/^" + SSR.getSequenceName() + "\t/" + SSR.getSequenceIndex() + "\t/' ").collect(Collectors.joining()) + "| sort -t '\t' -k2,2n > $(addsuffix .tmp.tsv,$@)");
mkw.println("\techo 'set terminal png truecolor size ${SCREEN_WIDTH},${SCREEN_HEIGHT};" + "set datafile separator \"\t\";" + "set title \"" + sampleName + " (Whole Genome)\";" + "set ylabel \"Normalized Depth - Number of copies\";" + "set xlabel \"Genomic Position\";" + "set yrange [0:" + Math.min(4, Math.ceil(maxDepth)) + "];" + "set xrange [1:" + this.samDictionary.getReferenceLength() + ".0];" + "set xtic rotate by 90 right;" + "set nokey;" + "set output \"$@\";" + "plot \"$(addsuffix .tmp.tsv,$@)\" using 2:3:1 w points linecolor variable " + "' | ${GNUPLOT}");
mkw.println("\trm -f $(addsuffix .tmp.tsv,$@)");
mkw.println("all2: ${TARGETS}");
mkw.flush();
mkw.close();
mkw = null;
archive.close();
archive = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(samReader);
CloserUtil.close(archive);
CloserUtil.close(this.indexedFastaSequenceFile);
}
}
Aggregations