use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class GenomeParser method parseChromosome.
/**
* Parses the chromosome.
*
* @param br the br
* @return the chromosome
* @throws SeqMonkException the seq monk exception
* @throws IOException Signals that an I/O exception has occurred.
*/
private Chromosome parseChromosome(BufferedReader br, SingleGenome genome) throws SeqMonkException, IOException {
String line;
while ((line = br.readLine()) != null) {
if (line.startsWith("AC")) {
String[] sections = line.split(":");
if (sections.length != 6) {
// It's not a chromosome file. We probably just want to
// skip it and move onto the next entry
progressWarningReceived(new SeqMonkException("AC line didn't have 6 sections '" + line + "'"));
skipToEntryEnd(br);
continue;
}
if (line.indexOf("supercontig") >= 0) {
// It's not a chromosome file. We probably just want to
// skip it and move onto the next entry
skipToEntryEnd(br);
continue;
}
// This will return the existing chromosome of this
// name if it exists already, but will create a new
// one if it doesn't.
Chromosome c = genome.addChromosome(sections[2]);
c.setLength(Integer.parseInt(sections[4]));
// Since the positions of all features are given relative
// to the current sequence we need to add the current
// start position to all locations as an offset.
currentOffset = Integer.parseInt(sections[3]) - 1;
return c;
}
if (line.startsWith("//")) {
throw new SeqMonkException("Couldn't find AC line");
}
}
return null;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class GenomeParser method parseChrListFile.
private void parseChrListFile(SingleGenome genome, File baseLocation) throws Exception {
File chrListFile = new File(baseLocation.getAbsolutePath() + "/chr_list");
if (chrListFile.exists()) {
BufferedReader br = new BufferedReader(new FileReader(chrListFile));
String line;
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
Chromosome c = genome.addChromosome(sections[0]);
c.setLength(Integer.parseInt(sections[1]));
}
br.close();
// If we've loaded the chromosome list we also need to check at this point
// whether there are any aliases defined since these might be used by the
// dat or gff files. We will end up re-adding these aliases a bit later
// which is unfortunate but won't slow things down much so it's not too bad.
File aliasesFile = new File(baseLocation.getAbsoluteFile() + "/aliases.txt");
if (aliasesFile.exists()) {
try {
readAliases(aliasesFile, genome);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class ActiveProbeListParser method processNormalDataStore.
private DataSet processNormalDataStore(ProbeList activeList) {
int extendBy = prefs.extendReads();
boolean reverse = prefs.reverseReads();
boolean modifyStrand = false;
int forcedStrand = 0;
if (!prefs.strandOptionBox.getSelectedItem().equals("From probes")) {
modifyStrand = true;
if (prefs.strandOptionBox.getSelectedItem().equals("Forward")) {
forcedStrand = Location.FORWARD;
} else if (prefs.strandOptionBox.getSelectedItem().equals("Reverse")) {
forcedStrand = Location.REVERSE;
} else if (prefs.strandOptionBox.getSelectedItem().equals("Unknown")) {
forcedStrand = Location.UNKNOWN;
} else {
throw new IllegalArgumentException("Unknown forced strand option " + prefs.strandOptionBox.getSelectedItem());
}
}
DataSet newData = new DataSet(activeList.name(), "Reimported from " + activeList.name(), prefs.removeDuplicates());
// Now process the data
Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Processing " + activeList.name() + " chr " + chrs[c].name(), c, chrs.length);
Probe[] probes = activeList.getProbesForChromosome(chrs[c]);
for (int r = 0; r < probes.length; r++) {
if (cancel) {
progressCancelled();
return null;
}
long read;
int start = probes[r].start();
int end = probes[r].end();
int strand = probes[r].strand();
if (reverse) {
if (strand == Location.FORWARD) {
strand = Location.REVERSE;
} else if (strand == Location.REVERSE) {
strand = Location.FORWARD;
}
}
if (extendBy != 0) {
// We now allow negative extensions to shorten reads
if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
end += extendBy;
if (end < start)
end = start;
} else if (strand == Location.REVERSE) {
start -= extendBy;
if (start > end)
start = end;
}
}
// We don't allow reads before the start of the chromosome
if (start < 1) {
int overrun = (0 - start) + 1;
progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// We also don't allow readings which are beyond the end of the chromosome
if (end > chrs[c].length()) {
int overrun = end - chrs[c].length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// Force the strand to what they specified if they want this.
if (modifyStrand) {
strand = forcedStrand;
}
// We can now make the new reading
try {
read = SequenceRead.packPosition(start, end, strand);
newData.addData(chrs[c], read);
} catch (SeqMonkException e) {
progressWarningReceived(e);
continue;
}
}
}
return newData;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class HiCOtherEndExtractor method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// We need to get the baits and other options from the preferences
Probe[] baits = prefs.getBaits();
boolean mergeIntoOne = prefs.mergeIntoOne();
boolean excludeSelf = prefs.excludeSelf();
DataSet[] newData;
if (mergeIntoOne) {
newData = new DataSet[visibleHiCDataSets.length];
} else {
newData = new DataSet[visibleHiCDataSets.length * baits.length];
}
try {
for (int d = 0; d < visibleHiCDataSets.length; d++) {
if (baits.length == 1) {
System.err.println("Only one bait");
newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[0].chromosome().name() + ":" + baits[0].start() + "-" + baits[0].end(), DataSet.DUPLICATES_REMOVE_NO);
} else if (mergeIntoOne) {
System.err.println("Multiple baits, but merging");
newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for " + baits.length + " regions", DataSet.DUPLICATES_REMOVE_NO);
}
for (int b = 0; b < baits.length; b++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Getting other ends from " + visibleHiCDataSets[d].name() + " and " + baits[b].toString(), (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
if (!(baits.length == 1 || mergeIntoOne)) {
newData[(d * baits.length) + b] = new DataSet(baits[b].toString() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[b].chromosome().name() + ":" + baits[b].start() + "-" + baits[b].end(), DataSet.DUPLICATES_REMOVE_NO);
}
HiCHitCollection hiCCollection = visibleHiCDataSets[d].getHiCReadsForProbe(baits[b]);
String[] chromosomes = hiCCollection.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomes.length; c++) {
Chromosome chromosome = collection.genome().getChromosome(chromosomes[c]).chromosome();
long[] reads = hiCCollection.getHitPositionsForChromosome(chromosomes[c]);
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
if (excludeSelf && baits[b].chromosome().name().equals(chromosomes[c]) && SequenceRead.overlaps(reads[r], baits[b].packedPosition())) {
continue;
}
if (mergeIntoOne) {
newData[d].addData(chromosome, reads[r]);
} else {
newData[(d * baits.length) + b].addData(chromosome, reads[r]);
}
}
}
if (!mergeIntoOne) {
// Cache the data in the new dataset
progressUpdated("Caching data", (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
newData[(d * baits.length) + b].finalise();
}
}
if (mergeIntoOne) {
// Cache the data in the new dataset
progressUpdated("Caching data", d, visibleHiCDataSets.length);
newData[d].finalise();
}
}
processingFinished(newData);
} catch (Exception ex) {
progressExceptionReceived(ex);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class PairedDataSet method getExportableReadsForChromosome.
public synchronized HiCHitCollection getExportableReadsForChromosome(Chromosome c) {
if (!isFinalised)
finalise();
HiCHitCollection exportableReads = new HiCHitCollection(c.name());
HiCHitCollection redundantReads = getHiCReadsForChromosome(c);
String[] chrs = redundantReads.getChromosomeNamesWithHits();
for (int i = 0; i < chrs.length; i++) {
// The way we have to do this lookup is ugly, but that's how it is
// We only include data for chromosomes the same or later than our own
Chromosome thisChr = SeqMonkApplication.getInstance().dataCollection().genome().getChromosome(chrs[i]).chromosome();
if (c.compareTo(thisChr) < 0) {
System.err.println("Ignoring " + thisChr + " when exporting " + c);
continue;
}
long[] source = redundantReads.getSourcePositionsForChromosome(chrs[i]);
long[] hits = redundantReads.getHitPositionsForChromosome(chrs[i]);
if (c != thisChr) {
// Put everything from this chromosome into the results
for (int j = 0; j < source.length; j++) {
exportableReads.addHit(chrs[i], source[j], hits[j]);
}
} else {
// Put only the entries where the hit is later than the source in
for (int j = 0; j < source.length; j++) {
if ((source[j] == hits[j] && j % 2 == 0) || SequenceRead.compare(source[j], hits[j]) >= 0) {
exportableReads.addHit(chrs[i], source[j], hits[j]);
}
}
}
}
return exportableReads;
}
Aggregations