use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class FastaImporter method importAlignment.
/**
* importAlignment.
*/
public Alignment importAlignment() throws IOException, ImportException {
SimpleAlignment alignment = null;
try {
// find fasta line start
while (read() != FASTA_FIRST_CHAR) {
}
do {
final String name = readLine().trim();
StringBuffer seq = new StringBuffer();
readSequence(seq, dataType, "" + FASTA_FIRST_CHAR, Integer.MAX_VALUE, "-", "?", "", "");
if (alignment == null) {
alignment = new SimpleAlignment();
}
alignment.addSequence(new Sequence(new Taxon(name.toString()), seq.toString()));
} while (getLastDelimiter() == FASTA_FIRST_CHAR);
} catch (EOFException e) {
// catch end of file the ugly way.
}
return alignment;
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class Defects method main.
public static void main(String[] args) throws java.io.IOException {
String alignmentFileName = null;
if (args != null && args.length > 0) {
alignmentFileName = args[0];
}
Alignment alignment = readNexusFile(alignmentFileName);
Defects defects = new Defects(alignment);
int siteCount = alignment.getSiteCount() / 3;
int sequenceCount = alignment.getSequenceCount();
int totalSites = siteCount * sequenceCount;
int totalReads = defects.getTotalCodonsMinusGaps();
int defectiveSites = defects.getDefectiveSiteCount();
int defectiveSequences = defects.getDefectiveSequenceCount();
int totalDefects = defects.getDefectCount();
int totalStops = defects.getStopCodonCount();
double siteP = (double) defectiveSites / (double) siteCount;
double sequenceP = (double) defectiveSequences / (double) sequenceCount;
double totalP = (double) totalDefects / (double) totalReads;
double totalSP = (double) totalStops / (double) totalReads;
System.out.println("Matrix size=" + totalSites);
System.out.println("Non-gap codons=" + totalReads);
System.out.println(defectiveSequences + "/" + sequenceCount + "(" + sequenceP + ") defective sequences.");
System.out.println(defectiveSites + "/" + siteCount + "(" + siteP + ") defective sites.");
System.out.println(totalDefects + "/" + totalReads + "(" + totalP + ") defects.");
System.out.println(totalStops + "/" + totalReads + "(" + totalSP + ") stop codons.");
double mean = (double) totalDefects / (double) sequenceCount;
System.out.println(mean + " defects per sequence");
double meanS = (double) totalStops / (double) sequenceCount;
System.out.println(meanS + " stops per sequence");
int maxDefects = defects.getMaxDefectiveSites();
int maxStops = defects.getMaxStopSites();
System.out.println("defective sequences:");
//}
for (int d = 1; d <= maxDefects; d++) {
Set<Integer> seqs = defects.getSequences(d);
for (Integer seq : seqs) {
String name = alignment.getTaxonId(seq);
System.out.println(d + " " + name);
}
}
System.out.println("Defects\tSequences\texpected");
double probTerm;
// probability of 0
probTerm = Math.exp(-mean);
for (int i = 0; i < 10; i++) {
System.out.println(i + "\t" + defects.getSequenceCount(i) + "\t" + probTerm * sequenceCount);
// compute probability of n from prob. of n-1
probTerm *= mean / (i + 1);
}
System.out.println("Stops\tSequences\texpected");
// probability of 0
probTerm = Math.exp(-meanS);
for (int i = 0; i < 10; i++) {
System.out.println(i + "\t" + defects.getStopSequenceCount(i) + "\t" + probTerm * sequenceCount);
// compute probability of n from prob. of n-1
probTerm *= meanS / (i + 1);
}
System.out.println("stop-codon sequences:");
for (int d = 1; d <= maxStops; d++) {
Set<Integer> seqs = defects.getSequencesByStopCount(d);
for (Integer index : seqs) {
String name = alignment.getTaxonId(index);
System.out.println(d + " " + name);
}
}
Consensus con = new Consensus("mode", alignment, true);
Sequence consensus = con.getConsensusSequence();
SimpleAlignment a = new SimpleAlignment();
a.addSequence(new Sequence(consensus));
ConvertAlignment ca = new ConvertAlignment(Codons.UNIVERSAL, a);
double[] pstop = new double[ca.getSiteCount()];
int[] counts = new int[10];
for (double kappa = 2.0; kappa <= 2.0; kappa += 1.0) {
for (int i = 0; i < ca.getSiteCount(); i++) {
int state = ca.getState(0, i);
if (Codons.UNIVERSAL.isStopCodon(state)) {
throw new RuntimeException("Consensus has a stop codon in it at position " + i + "!");
}
int[] triplet = Codons.UNIVERSAL.getTripletStates(state);
int[][] tvmutants = generateTransversionMutants(triplet);
int[][] timutants = generateTransitionMutants(triplet);
int tvStops = 0;
for (int j = 0; j < 6; j++) {
if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(tvmutants[j][0], tvmutants[j][1], tvmutants[j][2]))) {
tvStops += 1;
}
}
int tiStops = 0;
for (int j = 0; j < 3; j++) {
if (Codons.UNIVERSAL.isStopCodon(Codons.UNIVERSAL.getState(timutants[j][0], timutants[j][1], timutants[j][2]))) {
tiStops += 1;
}
}
pstop[i] = (tvStops + kappa * tiStops) / (6.0 + kappa * 3.0);
counts[tvStops + tiStops] += 1;
}
System.out.println("kappa = " + kappa + " pstop=" + dr.stats.DiscreteStatistics.mean(pstop));
}
System.out.println("stop-mutations\tcodons");
for (int i = 0; i < 10; i++) {
System.out.println(i + "\t" + counts[i]);
}
/*int count = 0;
int reps = 100000;
for (int i = 0; i < reps; i++) {
SimpleAlignment simpleAlignment = new SimpleAlignment();
simpleAlignment.addSequence(new Sequence(consensus));
//pick a random position
int pos = random.nextInt(simpleAlignment.getSiteCount());
int state = simpleAlignment.getState(0,pos);
int newState = random.nextInt(4);
while (newState == state) {
newState = random.nextInt(4);
}
simpleAlignment.setState(0,pos,newState);
defects = new Defects(simpleAlignment);
count += defects.getDefectCount();
}
double p = (double)count/(double)reps;
*/
double p = dr.stats.DiscreteStatistics.mean(pstop);
double rate = totalSP * (1.0 - p) / p;
System.out.println("Total inferred point-mutation rate = " + rate);
System.out.println("Total inferred point-mutations = " + (totalStops * (1 - p) / p));
System.out.println("Proportion of point-mutations that produce premature stop codons = " + p);
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class Consensus method getConsensusSequence.
public final Sequence getConsensusSequence() {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < consensus.length; i++) {
buffer.append(dataType.getChar(getState(i)));
}
Sequence sequence = new Sequence(new Taxon(name), buffer.toString());
sequence.setDataType(dataType);
return sequence;
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class SelectionMapping method main.
public static void main(String[] args) throws java.io.IOException {
if (args.length != 1)
throw new RuntimeException("Expect a file name containing parameters!");
FileInputStream fi = new FileInputStream(args[0]);
Properties props = new Properties();
props.load(fi);
fi.close();
int genomeLength = Integer.parseInt(props.getProperty("genomeLength", "1000"));
int populationSize = Integer.parseInt(props.getProperty("populationSize", "250"));
int replicates = Integer.parseInt(props.getProperty("replicates", "1000"));
double mu = Double.parseDouble(props.getProperty("mu", "1e-4"));
double alpha = Double.parseDouble(props.getProperty("alpha", "0.06"));
double pInv = Double.parseDouble(props.getProperty("pinv", "0.5"));
double beta = Double.parseDouble(props.getProperty("beta", "1"));
int stateSize = Integer.parseInt(props.getProperty("stateSize", "4"));
int n = Integer.parseInt(props.getProperty("sampleSize", "40"));
boolean randomFittest = Boolean.valueOf(props.getProperty("randomizeFittest", "false"));
boolean outputAlignments = Boolean.valueOf(props.getProperty("outputAlignments", "false"));
String alignmentFileName = props.getProperty("alignment.filename", "alignment.fasta");
String fileName = props.getProperty("output.filename", "out.txt");
int burninFactor = Integer.parseInt(props.getProperty("burninFactor", "10"));
PrintWriter writer = new PrintWriter(new FileWriter(fileName));
writer.println("// WMD v1.0");
writer.println("// genomeLength: " + genomeLength);
writer.println("// populationSize: " + populationSize);
writer.println("// alpha: " + alpha);
writer.println("// beta: " + beta);
writer.println("// replicates: " + replicates);
writer.println("// output.filename: " + fileName);
writer.println("// mu: " + mu);
writer.println("// outputAlignments: " + outputAlignments);
writer.println("// randomFittest: " + randomFittest);
writer.println("// alignment.filename: " + alignmentFileName);
int[] totalMutationalDensity = new int[200];
int[] totalLineages = new int[200];
int[] nMutants = new int[genomeLength * 3];
double[] mrcaFitness = new double[1];
final int generations = populationSize;
ArrayList[] unfoldedSites = new ArrayList[populationSize + 1];
for (int i = 0; i < populationSize + 1; i++) {
unfoldedSites[i] = new ArrayList();
}
long start = System.currentTimeMillis();
int dnaSingles = 0;
int aaSingles = 0;
int dnaSegs = 0;
int aaSegs = 0;
for (int reps = 0; reps < replicates; reps++) {
// FitnessFunction f = null;
// if (alpha == 0.0) {
// f = new NeutralModel();
// }
FitnessFunction f = new CodonFitnessFunction(genomeLength, alpha, beta, pInv);
Population p = new Population(populationSize, genomeLength * 3, new SimpleMutator(mu, stateSize), f, randomFittest);
Genome master = new SimpleGenome(genomeLength * 3, f, randomFittest);
p = Population.forwardSimulation(p, burninFactor * populationSize);
int age = -1;
while (age == -1) {
p = Population.forwardSimulation(p, generations);
age = p.getAgeOfMRCA(mrcaFitness);
}
Population children = Population.forwardSimulation(p, 1);
writer.println(reps + "\t" + age + "\t" + p.getMeanParentFitness() + "\t" + mrcaFitness[0] + "\t" + p.getProportionAsFit(mrcaFitness[0]));
if (reps % 10 == 0) {
System.out.print(reps + "\t" + age + "\t" + p.getMeanParentFitness() + "\t" + mrcaFitness[0] + "\t" + p.getProportionAsFit(mrcaFitness[0]));
long millis = System.currentTimeMillis() - start;
double seconds = millis / 1000.0;
if (reps != 0) {
double seconds2go = (replicates - reps) * seconds / reps;
System.out.println(" -- " + (Math.round(seconds2go / 36) / 100.0) + " hours");
} else {
System.out.println();
}
}
//p.unfoldedSiteFrequencies(unfoldedSites);
// ************************************************************************************
// get the mutational density per lineage as a function of time
// ************************************************************************************
List<Integer>[] mutations = new ArrayList[200];
for (int i = 0; i < mutations.length; i++) {
mutations[i] = new ArrayList<Integer>(200);
}
int sampleSize = Math.min(n, populationSize);
p.getMutationDensity(sampleSize, mutations);
for (int i = 0; i < mutations.length; i++) {
for (int j = 0; j < mutations[i].size(); j++) {
totalMutationalDensity[i] += mutations[i].get(j);
}
totalLineages[i] += mutations[i].size();
}
// ************************************************************************************
// output alignments if requested
// ************************************************************************************
SimpleAlignment alignment = new SimpleAlignment();
SimpleAlignment aaAlignment = new SimpleAlignment();
alignment.setDataType(Nucleotides.INSTANCE);
aaAlignment.setDataType(AminoAcids.INSTANCE);
if (outputAlignments) {
PrintWriter pw = new PrintWriter(new FileWriter(alignmentFileName));
//pw.println(sampleSize + "\t" + p.getGenomeLength());
for (int i = 0; i < sampleSize; i++) {
pw.print(">sequence_");
if (i < 10) {
pw.print("0");
}
pw.println("" + i);
String dnaSequence = p.getGenome(i).getDNASequenceString();
String aaSequence = p.getGenome(i).getAminoAcidSequenceString();
pw.println(dnaSequence);
alignment.addSequence(new Sequence(dnaSequence));
aaAlignment.addSequence(new Sequence(aaSequence));
}
pw.println();
pw.close();
}
// ************************************************************************************
for (int i = 0; i < sampleSize; i++) {
nMutants[master.hammingDistance(p.getGenome(i))] += 1;
}
dnaSingles += countSingletons(alignment);
aaSingles += countSingletons(aaAlignment);
dnaSegs += countSegregating(alignment);
aaSegs += countSegregating(aaAlignment);
}
for (int i = 0; i < unfoldedSites.length; i++) {
double meanFitness = 0.0;
double proportionNeutral = 0.0;
double proportionPositive = 0.0;
double proportionNegative = 0.0;
for (int j = 0; j < unfoldedSites[i].size(); j++) {
double relativeFitness = (Double) unfoldedSites[i].get(j);
meanFitness += relativeFitness;
if (relativeFitness == 1.0) {
proportionNeutral += 1.0;
} else if (relativeFitness > 1.0) {
proportionPositive += 1.0;
} else {
proportionNegative += 1.0;
}
}
meanFitness /= (double) unfoldedSites[i].size();
proportionNeutral /= (double) unfoldedSites[i].size();
proportionPositive /= (double) unfoldedSites[i].size();
proportionNegative /= (double) unfoldedSites[i].size();
writer.println(i + "\t" + unfoldedSites[i].size() + "\t" + meanFitness + "\t" + proportionNeutral + "\t" + proportionPositive + "\t" + proportionNegative);
}
writer.println("--------------------");
writer.println("SINGLETON COUNTS");
writer.println("--------------------");
writer.println("dna singletons = " + dnaSingles);
writer.println("aa singletons = " + aaSingles);
int dnaNons = dnaSegs - dnaSingles;
int aaNons = aaSegs - aaSingles;
System.out.println("dna singletons = " + dnaSingles);
System.out.println("aa singletons = " + aaSingles);
System.out.println("dna segregating = " + dnaSegs);
System.out.println("aa segregating = " + aaSegs);
System.out.println("dna non-singles = " + dnaNons);
System.out.println("aa non-singles = " + aaNons);
System.out.println("ratio = " + ((double) dnaSingles / (double) aaSingles));
System.out.println("ratio(non) = " + ((double) dnaNons / (double) aaNons));
writer.println("--------------------");
writer.println("MUTATIONAL DENSITIES");
writer.println("--------------------");
for (int i = 0; i < 200; i++) {
writer.println(totalMutationalDensity[i] + "\t" + totalLineages[i]);
}
// ************************************************************************************
// output hamming distances
// ************************************************************************************
writer.println("--------------------");
writer.println("Hamming distance distribution");
writer.println("--------------------");
for (int i = 0; i < nMutants.length; i++) {
writer.println(i + "\t" + nMutants[i]);
}
writer.close();
}
use of dr.evolution.sequence.Sequence in project beast-mcmc by beast-dev.
the class NexusImporter method readSequenceData.
/**
* Reads sequences in a 'DATA' or 'CHARACTERS' block.
*/
private void readSequenceData(Sequences sequences, TaxonList taxonList) throws ImportException, IOException {
int n, i;
String firstSequence = null;
if (isInterleaved) {
boolean firstLoop = true;
int readCount = 0;
while (readCount < siteCount) {
n = -1;
for (i = 0; i < taxonCount; i++) {
String token = readToken().trim();
Sequence sequence;
if (firstLoop) {
sequence = new Sequence();
sequence.setDataType(dataType);
sequences.addSequence(sequence);
Taxon taxon;
if (taxonList != null) {
int index = taxonList.getTaxonIndex(token.trim());
if (index == -1) {
// ...perhaps it is a numerical taxon reference?
throw new UnknownTaxonException(token);
} else {
taxon = taxonList.getTaxon(index);
}
} else {
taxon = new Taxon(token.trim());
}
sequence.setTaxon(taxon);
} else {
sequence = sequences.getSequence(i);
Taxon taxon = sequence.getTaxon();
if (!taxon.getId().equals(token)) {
throw new UnknownTaxonException("Unknown taxon label: expecting '" + taxon.getId() + "', found '" + token + "'");
}
}
StringBuffer buffer = new StringBuffer();
readSequenceLine(buffer, dataType, ";", gapCharacters, missingCharacters, matchCharacters, firstSequence);
String seqString = buffer.toString();
sequence.appendSequenceString(seqString);
if (i == 0) {
firstSequence = seqString;
}
if (getLastDelimiter() == ';') {
if (i < taxonCount - 1) {
throw new TooFewTaxaException();
}
if (readCount + n < siteCount) {
throw new ShortSequenceException(sequence.getTaxon().getId());
}
}
if (n == -1) {
n = seqString.length();
}
if (n != seqString.length()) {
throw new ShortSequenceException(sequence.getTaxon().getId());
}
}
firstLoop = false;
readCount += n;
}
if (getLastDelimiter() != ';') {
throw new BadFormatException("Expecting ';' after sequences data");
}
} else {
for (i = 0; i < taxonCount; i++) {
String token = readToken().trim();
Sequence sequence = new Sequence();
sequence.setDataType(dataType);
sequences.addSequence(sequence);
Taxon taxon;
if (taxonList != null) {
int index = taxonList.getTaxonIndex(token);
if (index == -1) {
// ...perhaps it is a numerical taxon reference?
throw new UnknownTaxonException(token);
} else {
taxon = taxonList.getTaxon(index);
}
} else {
taxon = new Taxon(token);
}
sequence.setTaxon(taxon);
StringBuffer buffer = new StringBuffer();
readSequence(buffer, dataType, ";", siteCount, gapCharacters, missingCharacters, matchCharacters, firstSequence);
String seqString = buffer.toString();
if (seqString.length() != siteCount) {
throw new ShortSequenceException(sequence.getTaxon().getId());
}
sequence.appendSequenceString(seqString);
if (i == 0) {
firstSequence = seqString;
}
if (getLastDelimiter() == ';' && i < taxonCount - 1) {
throw new TooFewTaxaException();
}
}
if (getLastDelimiter() != ';') {
throw new BadFormatException("Expecting ';' after sequences data, has '" + (char) getLastDelimiter() + "' in line " + getLineNumber());
}
}
}
Aggregations