use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class Consensus method getConsensusSequence.
public final Sequence getConsensusSequence() {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < consensus.length; i++) {
buffer.append(dataType.getChar(getState(i)));
}
Sequence sequence = new Sequence(new Taxon(name), buffer.toString());
sequence.setDataType(dataType);
return sequence;
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class Defects method main.
public static void main(String[] args) throws java.io.IOException {
String alignmentFileName = null;
if (args != null && args.length > 0) {
alignmentFileName = args[0];
}
Alignment alignment = readNexusFile(alignmentFileName);
Defects defects = new Defects(alignment);
int siteCount = alignment.getSiteCount() / 3;
int sequenceCount = alignment.getSequenceCount();
int totalSites = siteCount * sequenceCount;
int totalReads = defects.getTotalCodonsMinusGaps();
int defectiveSites = defects.getDefectiveSiteCount();
int defectiveSequences = defects.getDefectiveSequenceCount();
int totalDefects = defects.getDefectCount();
int totalStops = defects.getStopCodonCount();
double siteP = (double) defectiveSites / (double) siteCount;
double sequenceP = (double) defectiveSequences / (double) sequenceCount;
double totalP = (double) totalDefects / (double) totalReads;
double totalSP = (double) totalStops / (double) totalReads;
System.out.println("Matrix size=" + totalSites);
System.out.println("Non-gap codons=" + totalReads);
System.out.println(defectiveSequences + "/" + sequenceCount + "(" + sequenceP + ") defective sequences.");
System.out.println(defectiveSites + "/" + siteCount + "(" + siteP + ") defective sites.");
System.out.println(totalDefects + "/" + totalReads + "(" + totalP + ") defects.");
System.out.println(totalStops + "/" + totalReads + "(" + totalSP + ") stop codons.");
double mean = (double) totalDefects / (double) sequenceCount;
System.out.println(mean + " defects per sequence");
double meanS = (double) totalStops / (double) sequenceCount;
System.out.println(meanS + " stops per sequence");
int maxDefects = defects.getMaxDefectiveSites();
int maxStops = defects.getMaxStopSites();
System.out.println("defective sequences:");
// }
for (int d = 1; d <= maxDefects; d++) {
Set<Integer> seqs = defects.getSequences(d);
for (Integer seq : seqs) {
String name = alignment.getTaxonId(seq);
System.out.println(d + " " + name);
}
}
System.out.println("Defects\tSequences\texpected");
double probTerm;
// probability of 0
probTerm = Math.exp(-mean);
for (int i = 0; i < 10; i++) {
System.out.println(i + "\t" + defects.getSequenceCount(i) + "\t" + probTerm * sequenceCount);
// compute probability of n from prob. of n-1
probTerm *= mean / (i + 1);
}
System.out.println("Stops\tSequences\texpected");
// probability of 0
probTerm = Math.exp(-meanS);
for (int i = 0; i < 10; i++) {
System.out.println(i + "\t" + defects.getStopSequenceCount(i) + "\t" + probTerm * sequenceCount);
// compute probability of n from prob. of n-1
probTerm *= meanS / (i + 1);
}
System.out.println("stop-codon sequences:");
for (int d = 1; d <= maxStops; d++) {
Set<Integer> seqs = defects.getSequencesByStopCount(d);
for (Integer index : seqs) {
String name = alignment.getTaxonId(index);
System.out.println(d + " " + name);
}
}
Consensus con = new Consensus("mode", alignment, true);
Sequence consensus = con.getConsensusSequence();
SimpleAlignment a = new SimpleAlignment();
a.addSequence(new Sequence(consensus));
ConvertAlignment ca = new ConvertAlignment(Codons.UNIVERSAL, a);
double[] pstop = new double[ca.getSiteCount()];
int[] counts = new int[10];
for (double kappa = 2.0; kappa <= 2.0; kappa += 1.0) {
for (int i = 0; i < ca.getSiteCount(); i++) {
int state = ca.getState(0, i);
if (Codons.UNIVERSAL.isStopCodon(state)) {
throw new RuntimeException("Consensus has a stop codon in it at position " + i + "!");
}
int[] triplet = Codons.UNIVERSAL.getTripletStates(state);
int[][] tvmutants = generateTransversionMutants(triplet);
int[][] timutants = generateTransitionMutants(triplet);
int tvStops = 0;
for (int j = 0; j < 6; j++) {
if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(tvmutants[j][0], tvmutants[j][1], tvmutants[j][2]))) {
tvStops += 1;
}
}
int tiStops = 0;
for (int j = 0; j < 3; j++) {
if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(timutants[j][0], timutants[j][1], timutants[j][2]))) {
tiStops += 1;
}
}
pstop[i] = (tvStops + kappa * tiStops) / (6.0 + kappa * 3.0);
counts[tvStops + tiStops] += 1;
}
System.out.println("kappa = " + kappa + " pstop=" + dr.stats.DiscreteStatistics.mean(pstop));
}
System.out.println("stop-mutations\tcodons");
for (int i = 0; i < 10; i++) {
System.out.println(i + "\t" + counts[i]);
}
/*int count = 0;
int reps = 100000;
for (int i = 0; i < reps; i++) {
SimpleAlignment simpleAlignment = new SimpleAlignment();
simpleAlignment.addSequence(new Sequence(consensus));
//pick a random position
int pos = random.nextInt(simpleAlignment.getSiteCount());
int state = simpleAlignment.getState(0,pos);
int newState = random.nextInt(4);
while (newState == state) {
newState = random.nextInt(4);
}
simpleAlignment.setState(0,pos,newState);
defects = new Defects(simpleAlignment);
count += defects.getDefectCount();
}
double p = (double)count/(double)reps;
*/
double p = dr.stats.DiscreteStatistics.mean(pstop);
double rate = totalSP * (1.0 - p) / p;
System.out.println("Total inferred point-mutation rate = " + rate);
System.out.println("Total inferred point-mutations = " + (totalStops * (1 - p) / p));
System.out.println("Proportion of point-mutations that produce premature stop codons = " + p);
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class SimpleAlignment method getUncertainSitePattern.
@Override
public double[][] getUncertainSitePattern(int siteIndex) {
if (areUncertain()) {
double[][] pattern = new double[getSequenceCount()][];
for (int i = 0; i < getSequenceCount(); ++i) {
Sequence seq = getSequence(i);
if (siteIndex > seq.getLength()) {
pattern[i] = new double[dataType.getStateCount()];
Arrays.fill(pattern[i], 1.0);
} else {
if (seq instanceof UncertainSequence) {
pattern[i] = ((UncertainSequence) seq).getUncertainPattern(siteIndex);
} else {
pattern[i] = new double[dataType.getStateCount()];
int[] states = dataType.getStates(seq.getState(siteIndex));
for (int state : states) {
pattern[i][state] = 1.0;
}
}
}
}
return pattern;
} else {
throw new UnsupportedOperationException("getUncertainSitePattern not implemented yet");
}
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class NexusImporter method readSequenceData.
/**
* Reads sequences in a 'DATA' or 'CHARACTERS' block.
*/
private void readSequenceData(Sequences sequences, TaxonList taxonList) throws ImportException, IOException {
int n, i;
String firstSequence = null;
if (isInterleaved) {
boolean firstLoop = true;
int readCount = 0;
while (readCount < siteCount) {
n = -1;
for (i = 0; i < taxonCount; i++) {
String token = readToken().trim();
Sequence sequence;
if (firstLoop) {
sequence = new Sequence();
sequence.setDataType(dataType);
sequences.addSequence(sequence);
Taxon taxon;
if (taxonList != null) {
int index = taxonList.getTaxonIndex(token.trim());
if (index == -1) {
// ...perhaps it is a numerical taxon reference?
throw new UnknownTaxonException(token);
} else {
taxon = taxonList.getTaxon(index);
}
} else {
taxon = new Taxon(token.trim());
}
sequence.setTaxon(taxon);
} else {
sequence = sequences.getSequence(i);
Taxon taxon = sequence.getTaxon();
if (!taxon.getId().equals(token)) {
throw new UnknownTaxonException("Unknown taxon label: expecting '" + taxon.getId() + "', found '" + token + "'");
}
}
StringBuffer buffer = new StringBuffer();
readSequenceLine(buffer, dataType, ";", gapCharacters, missingCharacters, matchCharacters, firstSequence);
String seqString = buffer.toString();
sequence.appendSequenceString(seqString);
if (i == 0) {
firstSequence = seqString;
}
if (getLastDelimiter() == ';') {
if (i < taxonCount - 1) {
throw new TooFewTaxaException();
}
if (readCount + n < siteCount) {
throw new ShortSequenceException(sequence.getTaxon().getId());
}
}
if (n == -1) {
n = seqString.length();
}
if (n != seqString.length()) {
throw new ShortSequenceException(sequence.getTaxon().getId());
}
}
firstLoop = false;
readCount += n;
}
if (getLastDelimiter() != ';') {
throw new BadFormatException("Expecting ';' after sequences data");
}
} else {
for (i = 0; i < taxonCount; i++) {
String token = readToken().trim();
Sequence sequence = new Sequence();
sequence.setDataType(dataType);
sequences.addSequence(sequence);
Taxon taxon;
if (taxonList != null) {
int index = taxonList.getTaxonIndex(token);
if (index == -1) {
// ...perhaps it is a numerical taxon reference?
throw new UnknownTaxonException(token);
} else {
taxon = taxonList.getTaxon(index);
}
} else {
taxon = new Taxon(token);
}
sequence.setTaxon(taxon);
StringBuffer buffer = new StringBuffer();
readSequence(buffer, dataType, ";", siteCount, gapCharacters, missingCharacters, matchCharacters, firstSequence);
String seqString = buffer.toString();
if (seqString.length() != siteCount) {
throw new ShortSequenceException(sequence.getTaxon().getId());
}
sequence.appendSequenceString(seqString);
if (i == 0) {
firstSequence = seqString;
}
if (getLastDelimiter() == ';' && i < taxonCount - 1) {
throw new TooFewTaxaException();
}
}
if (getLastDelimiter() != ';') {
throw new BadFormatException("Expecting ';' after sequences data, has '" + (char) getLastDelimiter() + "' in line " + getLineNumber());
}
}
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class FastaImporter method importAlignment.
/**
* importAlignment.
*/
public Alignment importAlignment() throws IOException, ImportException {
SimpleAlignment alignment = null;
try {
// find fasta line start
while (read() != FASTA_FIRST_CHAR) {
}
do {
final String name = readLine().trim();
StringBuffer seq = new StringBuffer();
readSequence(seq, dataType, "" + FASTA_FIRST_CHAR, Integer.MAX_VALUE, "-", "?", "", "");
if (alignment == null) {
alignment = new SimpleAlignment();
}
alignment.addSequence(new Sequence(new Taxon(name.toString()), seq.toString()));
} while (getLastDelimiter() == FASTA_FIRST_CHAR);
} catch (EOFException e) {
// catch end of file the ugly way.
}
return alignment;
}
Aggregations