use of beast.evolution.datatype.DataType in project beast2 by CompEvol.
the class SequenceSimulator method intArray2Sequence.
/**
* Convert integer representation of sequence into a Sequence
*
* @param seq integer representation of the sequence
* @param node used to determine taxon for sequence
* @return Sequence
* @
*/
Sequence intArray2Sequence(int[] seq, Node node) {
DataType dataType = m_data.get().getDataType();
String seqString = dataType.state2string(seq);
// StringBuilder seq = new StringBuilder();
// String map = m_data.get().getMap();
// if (map != null) {
// for (int i = 0; i < m_sequenceLength; i++) {
// seq.append(map.charAt(seq[i]));
// }
// } else {
// for (int i = 0; i < m_sequenceLength-1; i++) {
// seq.append(seq[i] + ",");
// }
// seq.append(seq[m_sequenceLength-1] + "");
// }
List<Sequence> taxa = m_data.get().sequenceInput.get();
String taxon = taxa.get(node.getNr()).taxonInput.get();
return new Sequence(taxon, seqString);
}
use of beast.evolution.datatype.DataType in project beast2 by CompEvol.
the class SimulatedAlignment method intArray2Sequence.
/**
* Convert integer representation of sequence into a Sequence
*
* @param seq integer representation of the sequence
* @param node used to determine taxon for sequence
* @return Sequence
*/
Sequence intArray2Sequence(int[] seq, Node node) {
DataType dataType = m_data.get().getDataType();
String seqString = dataType.state2string(seq);
// StringBuilder seq = new StringBuilder();
// String map = m_data.get().getMap();
// if (map != null) {
// for (int i = 0; i < m_sequenceLength; i++) {
// seq.append(map.charAt(seq[i]));
// }
// } else {
// for (int i = 0; i < m_sequenceLength-1; i++) {
// seq.append(seq[i] + ",");
// }
// seq.append(seq[m_sequenceLength-1] + "");
// }
String taxon = m_data.get().getTaxaNames().get(node.getNr());
return new Sequence(taxon, seqString);
}
use of beast.evolution.datatype.DataType in project beast2 by CompEvol.
the class FilteredAlignment method calcPatterns.
@Override
protected void calcPatterns() {
int nrOfTaxa = counts.size();
int nrOfSites = filter.length;
DataType baseType = alignmentInput.get().m_dataType;
// convert data to transposed int array
int[][] data = new int[nrOfSites][nrOfTaxa];
for (int i = 0; i < nrOfTaxa; i++) {
List<Integer> sites = counts.get(i);
for (int j = 0; j < nrOfSites; j++) {
data[j][i] = sites.get(filter[j]);
if (convertDataType) {
try {
String code = baseType.getCode(data[j][i]);
data[j][i] = m_dataType.string2state(code).get(0);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
// add constant sites, if specified
if (constantSiteWeightsInput.get() != null) {
int dim = constantSiteWeightsInput.get().getDimension();
// add constant patterns
int[][] data2 = new int[nrOfSites + dim][];
System.arraycopy(data, 0, data2, 0, nrOfSites);
for (int i = 0; i < dim; i++) {
data2[nrOfSites + i] = new int[nrOfTaxa];
for (int j = 0; j < nrOfTaxa; j++) {
data2[nrOfSites + i][j] = i;
}
}
data = data2;
nrOfSites += dim;
}
// sort data
SiteComparator comparator = new SiteComparator();
Arrays.sort(data, comparator);
// count patterns in sorted data
int[] weights = new int[nrOfSites];
int nrOfPatterns = 1;
if (nrOfSites > 0) {
weights[0] = 1;
for (int i = 1; i < nrOfSites; i++) {
if (comparator.compare(data[i - 1], data[i]) != 0) {
nrOfPatterns++;
data[nrOfPatterns - 1] = data[i];
}
weights[nrOfPatterns - 1]++;
}
} else {
nrOfPatterns = 0;
}
// addjust weight of invariant sites, if stripInvariantSitesInput i sspecified
if (stripInvariantSitesInput.get()) {
// don't add patterns that are invariant, e.g. all gaps
Log.info.print("Stripping invariant sites");
int removedSites = 0;
for (int i = 0; i < nrOfPatterns; i++) {
boolean isContant = true;
for (int j = 1; j < nrOfTaxa; j++) {
if (data[i][j] != data[i][0]) {
isContant = false;
break;
}
}
// if this is a constant site, and it is not an ambiguous site
if (isContant) {
Log.warning.print(" <" + data[i][0] + "> ");
removedSites += weights[i];
weights[i] = 0;
}
}
Log.warning.println(" removed " + removedSites + " sites ");
}
// addjust weight of constant sites, if specified
if (constantSiteWeightsInput.get() != null) {
Integer[] constantWeights = constantSiteWeightsInput.get().getValues();
for (int i = 0; i < nrOfPatterns; i++) {
boolean isContant = true;
for (int j = 1; j < nrOfTaxa; j++) {
if (data[i][j] != data[i][0]) {
isContant = false;
break;
}
}
// if this is a constant site, and it is not an ambiguous site
if (isContant && data[i][0] >= 0 && data[i][0] < constantWeights.length) {
// take weights in data in account as well
// by adding constant patterns, we added a weight of 1, which now gets corrected
// but if filtered by stripping constant sites, that weight is already set to zero
weights[i] = (stripInvariantSitesInput.get() ? 0 : weights[i] - 1) + constantWeights[data[i][0]];
}
}
// need to decrease siteCount for mapping sites to patterns in m_nPatternIndex
nrOfSites -= constantWeights.length;
}
// reserve memory for patterns
patternWeight = new int[nrOfPatterns];
sitePatterns = new int[nrOfPatterns][nrOfTaxa];
for (int i = 0; i < nrOfPatterns; i++) {
patternWeight[i] = weights[i];
sitePatterns[i] = data[i];
}
// find patterns for the sites
patternIndex = new int[nrOfSites];
for (int i = 0; i < nrOfSites; i++) {
int[] sites = new int[nrOfTaxa];
for (int j = 0; j < nrOfTaxa; j++) {
sites[j] = counts.get(j).get(filter[i]);
if (convertDataType) {
try {
sites[j] = m_dataType.string2state(baseType.getCode(sites[j])).get(0);
} catch (Exception e) {
e.printStackTrace();
}
}
}
patternIndex[i] = Arrays.binarySearch(sitePatterns, sites, comparator);
}
if (siteWeights != null) {
// TODO: fill in weights with siteweights.
throw new RuntimeException("Cannot handle site weights in FilteredAlignment. Remove \"weights\" from data input.");
}
// determine maximum state count
// Usually, the state count is equal for all sites,
// though for SnAP analysis, this is typically not the case.
maxStateCount = 0;
for (int stateCount1 : stateCounts) {
maxStateCount = Math.max(maxStateCount, stateCount1);
}
if (convertDataType) {
maxStateCount = Math.max(maxStateCount, m_dataType.getStateCount());
}
// report some statistics
// for (int i = 0; i < m_sTaxaNames.size(); i++) {
// System.err.println(m_sTaxaNames.get(i) + ": " + m_counts.get(i).size() + " " + m_nStateCounts.get(i));
// }
Log.info.println("Filter " + filterInput.get());
Log.info.println(getTaxonCount() + " taxa");
if (constantSiteWeightsInput.get() != null) {
Integer[] constantWeights = constantSiteWeightsInput.get().getValues();
int sum = 0;
for (int i : constantWeights) {
sum += i;
}
Log.info.println(getSiteCount() + " sites + " + sum + " constant sites");
} else {
Log.info.println(getSiteCount() + " sites");
}
Log.info.println(getPatternCount() + " patterns");
// counts are not valid any more -- better set to null in case
// someone gets bitten by this.
this.counts = null;
}