Search in sources :

Example 41 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gridss by PapenfussLab.

the class Manual method assembly_scoring_should_be_symmetrical.

// @Test
@Category(Hg19Tests.class)
public void assembly_scoring_should_be_symmetrical() throws FileNotFoundException {
    File sam = new File("W:/debug/test.bam");
    File ref = Hg19Tests.findHg19Reference();
    ProcessingContext pc = new ProcessingContext(getFSContext(), ref, new SynchronousReferenceLookupAdapter(new IndexedFastaSequenceFile(ref)), null, getConfig());
    SAMEvidenceSource ses = new SAMEvidenceSource(pc, sam, null, 0);
    List<SAMRecord> reads = getRecords(new File("W:/debug/test.bam.gridss.working/test.bam.sv.bam"));
    List<SplitReadEvidence> list = ses.iterator().stream().filter(e -> e instanceof SplitReadEvidence).map(e -> (SplitReadEvidence) e).filter(e -> e.getSAMRecord().getReadName().equals("variant.chr12-art1000553")).collect(Collectors.toList());
    for (SplitReadEvidence e : list) {
        e.getBreakpointQual();
    }
}
Also used : Iterator(java.util.Iterator) Header(htsjdk.samtools.metrics.Header) IOException(java.io.IOException) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) GridssConfiguration(au.edu.wehi.idsv.configuration.GridssConfiguration) List(java.util.List) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Assert(org.junit.Assert) ConfigurationException(org.apache.commons.configuration.ConfigurationException) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category)

Example 42 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gridss by PapenfussLab.

the class ExtractSVReadsTest method regression_should_extract_split_read_alignments_as_group.

// @Test
@Category(Hg38Tests.class)
public void regression_should_extract_split_read_alignments_as_group() throws IOException {
    File ref = Hg38Tests.findHg38Reference();
    ReferenceLookup lookup = new SynchronousReferenceLookupAdapter(new IndexedFastaSequenceFile(ref));
    Files.copy(new File("src/test/resources/sa.split/test1.sam"), input);
    ExtractSVReads extract = new ExtractSVReads();
    // new ProcessingContext(getFSContext(), ref, lookup, null, getConfig());
    extract.setReference(lookup);
    extract.MIN_CLIP_LENGTH = 4;
    extract.INSERT_SIZE_METRICS = new File("src/test/resources/sa.split/test.insert_size_metrics");
    extract.READ_PAIR_CONCORDANCE_METHOD = ReadPairConcordanceMethod.PERCENTAGE;
    extract.OUTPUT = output;
    extract.INPUT = input;
    try (SamReader reader = SamReaderFactory.make().open(input)) {
        extract.setup(reader.getFileHeader(), input);
    }
    List<SAMRecord> records = getRecords(input);
    List<Boolean> result = records.stream().map(r -> extract.shouldExtract(ImmutableList.of(r), lookup)[0]).collect(Collectors.toList());
    /*
		for (int i = 0; i < records.size(); i++) {
			SAMRecord r = records.get(i);
			System.out.print(r.getSupplementaryAlignmentFlag() ? "S" : " ");
			System.out.print(r.getFirstOfPairFlag() ? "1" : "2");
			System.out.print(" Extracted=" + (result.get(i) ? "y" : "n"));
			System.out.print(" HasConcPair=" + (ExtractSVReads.hasReadPairingConsistentWithReference(extract.getReadPairConcordanceCalculator(), ImmutableList.of(r)) ? "y" : "n"));
			boolean[] ra = ExtractSVReads.hasReadAlignmentConsistentWithReference(ImmutableList.of(r));
			System.out.print(" HasConcRead=" + (ra[0] ? "y" : "n") + (ra[1] ? "y" : "n"));
			System.out.println(" " + new ChimericAlignment(r).toString());
		}
		*/
    Assert.assertEquals(records.stream().map(r -> r.getSecondOfPairFlag()).collect(Collectors.toList()), result);
    lookup.close();
}
Also used : FixedSizeReadPairConcordanceCalculator(au.edu.wehi.idsv.FixedSizeReadPairConcordanceCalculator) Iterables(com.google.common.collect.Iterables) ReadPairConcordanceMethod(au.edu.wehi.idsv.ReadPairConcordanceMethod) Iterators(com.google.common.collect.Iterators) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) StructuralVariantReadMetrics(gridss.analysis.StructuralVariantReadMetrics) ChimericAlignment(au.edu.wehi.idsv.sam.ChimericAlignment) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) IntermediateFilesTest(au.edu.wehi.idsv.IntermediateFilesTest) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category) SamReader(htsjdk.samtools.SamReader) MetricsFile(htsjdk.samtools.metrics.MetricsFile) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) Hg38Tests(au.edu.wehi.idsv.Hg38Tests) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Assert.assertFalse(org.junit.Assert.assertFalse) Assert(org.junit.Assert) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Assert.assertEquals(org.junit.Assert.assertEquals) SamReader(htsjdk.samtools.SamReader) ReferenceLookup(au.edu.wehi.idsv.picard.ReferenceLookup) SAMRecord(htsjdk.samtools.SAMRecord) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SynchronousReferenceLookupAdapter(au.edu.wehi.idsv.picard.SynchronousReferenceLookupAdapter) Category(org.junit.experimental.categories.Category)

Example 43 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gridss by PapenfussLab.

the class SynchronousReferenceLookupAdapterTest method multi_threaded_read.

@Test
@Category(Hg19Tests.class)
public void multi_threaded_read() throws Exception {
    File ref = Hg19Tests.findHg19Reference();
    IndexedFastaSequenceFile indexed = new IndexedFastaSequenceFile(ref);
    SynchronousReferenceLookupAdapter a = new SynchronousReferenceLookupAdapter(indexed);
    e = null;
    Thread[] th = new Thread[16];
    for (int t = 0; t < th.length; t++) {
        th[t] = new Thread(() -> {
            try {
                Random rng = new Random();
                for (int i = 0; i < 4096; i++) {
                    int pos = rng.nextInt(100000000);
                    assertEquals(a.getBase(indexed.getSequenceDictionary().getSequence("chr4").getSequenceIndex(), pos), indexed.getSubsequenceAt("chr4", pos, pos).getBases()[0]);
                }
            } catch (Exception ex) {
                e = ex;
            }
        });
        th[t].start();
    }
    for (int t = 0; t < th.length; t++) {
        th[t].join();
    }
    a.close();
    if (e != null)
        throw e;
}
Also used : Random(java.util.Random) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 44 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class LocalRealignReads method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidxFile == null) {
        LOG.error("REFerence file missing;");
        return -1;
    }
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SamReader samReader = null;
    SAMFileWriter w = null;
    SAMRecordIterator iter = null;
    GenomicSequence genomicSequence = null;
    LongestCommonSequence matrix = new LongestCommonSequence();
    try {
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
        samReader = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = samReader.getFileHeader();
        final SAMFileHeader header2 = header1.clone();
        header2.setSortOrder(SortOrder.unsorted);
        w = this.writingBamArgs.setReferenceFile(faidxFile).openSAMFileWriter(outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = samReader.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (rec.getReadUnmappedFlag() || rec.isSecondaryOrSupplementary() || rec.getReadFailsVendorQualityCheckFlag() || rec.getDuplicateReadFlag()) {
                w.addAlignment(rec);
                continue;
            }
            final Cigar cigar = rec.getCigar();
            final int nCigarElement = cigar.numCigarElements();
            if (nCigarElement < 2) {
                w.addAlignment(rec);
                continue;
            }
            final CigarElement ce5 = cigar.getCigarElement(0);
            final CigarElement ce3 = cigar.getCigarElement(nCigarElement - 1);
            if (!((ce3.getOperator().equals(CigarOperator.S) && ce3.getLength() >= MIN_ALIGN_LEN) || (ce5.getOperator().equals(CigarOperator.S) && ce5.getLength() >= MIN_ALIGN_LEN))) {
                w.addAlignment(rec);
                continue;
            }
            if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final CharSequence readseq = rec.getReadString();
            /**
             * short invert
             */
            if (ce5.getOperator() == CigarOperator.S && ce5.getLength() >= MIN_ALIGN_LEN) {
                CharSequence clipseq = new SubSequence(readseq, 0, ce5.getLength());
                CharSequence revcomp = new RevCompCharSequence(clipseq);
                LongestCommonSequence.Hit hit = matrix.align(genomicSequence, Math.max(0, rec.getUnclippedStart() - rec.getReadLength()), Math.min(rec.getUnclippedEnd() + rec.getReadLength(), genomicSequence.length()), revcomp, 0, revcomp.length());
                if (hit.size() >= MIN_ALIGN_LEN) {
                    System.err.println("REVCOMP5' " + hit.getMatchingSequence() + " " + clipseq + " for " + readseq + " " + rec.getReadName() + " " + rec.getCigarString());
                }
            /*
					
					hit = matrix.align(
							readseq, 0, readseq.length(),
							revcomp,
							0,
							revcomp.length()
							);
					if(hit.size()>=MIN_ALIGN_LEN)
						{
						System.err.println("REVCOMP5' vs ITSELF: "+hit.getMatchingSequence()+" "+clipseq+" for "+readseq+" "+rec.getReadName()+" "+rec.getCigarString());
						}
					*/
            }
            if (ce3.getOperator() == CigarOperator.S && ce3.getLength() >= MIN_ALIGN_LEN) {
                CharSequence clipseq = new SubSequence(readseq, readseq.length() - ce3.getLength(), readseq.length());
                CharSequence revcomp = new RevCompCharSequence(clipseq);
                LongestCommonSequence.Hit hit = matrix.align(genomicSequence, Math.max(0, rec.getUnclippedStart() - rec.getReadLength()), Math.min(rec.getUnclippedEnd() + rec.getReadLength(), genomicSequence.length()), revcomp, 0, revcomp.length());
                if (hit.size() >= MIN_ALIGN_LEN) {
                    System.err.println("REVCOMP3' " + hit.getMatchingSequence() + " " + clipseq + " for " + readseq + " " + rec.getReadName());
                }
            /*
					hit = matrix.align(
							readseq, 0, readseq.length(),
							revcomp,
							0,
							revcomp.length()
							);
					if(hit.size()>=MIN_ALIGN_LEN)
						{
						System.err.println("REVCOMP3' vs ITSELF: "+hit.getMatchingSequence()+" "+clipseq+" for "+readseq+" "+rec.getReadName()+" "+rec.getCigarString());
						}*/
            }
            /* other */
            List<LongestCommonSequence.Hit> hits = new ArrayList<>();
            align(hits, matrix, genomicSequence, Math.max(0, rec.getUnclippedStart() - rec.getReadLength()), Math.min(rec.getUnclippedEnd() + rec.getReadLength(), genomicSequence.length()), readseq, 0, readseq.length(), -1);
            if (hits.size() < 2000)
                continue;
            for (LongestCommonSequence.Hit hit : hits) {
                int readstart = 0;
                boolean in_M = false;
                for (int i = 0; i < nCigarElement; ++i) {
                    final CigarElement ce = cigar.getCigarElement(i);
                    if (ce.getOperator().consumesReadBases()) {
                        int readend = readstart + ce.getLength();
                        if (ce.getOperator() == CigarOperator.M || ce.getOperator() == CigarOperator.X || ce.getOperator() == CigarOperator.EQ) {
                            if (!(hit.getEndY() <= readstart || readend <= hit.getStartY())) {
                                in_M = true;
                                break;
                            }
                        }
                        readstart = readend;
                    }
                }
                if (in_M)
                    continue;
                int align_size = hit.size();
                System.err.println(rec.getReadName() + " " + rec.getCigarString() + " " + rec.getAlignmentStart() + "-" + rec.getAlignmentEnd());
                System.err.println("REF: " + hit.getStartX() + "-" + hit.getEndX());
                System.err.println("READ " + hit.getStartY() + "-" + hit.getEndY());
                System.err.print("REF  :");
                for (int i = 0; i < align_size; ++i) {
                    System.err.print(genomicSequence.charAt(hit.getStartX() + i));
                }
                System.err.println();
                System.err.print("READ :");
                for (int i = 0; i < align_size; ++i) {
                    System.err.print(readseq.charAt(hit.getStartY() + i));
                }
                System.err.println();
            }
            System.err.println();
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception e) {
        return wrapException(e);
    } finally {
        genomicSequence = null;
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(w);
        CloserUtil.close(indexedFastaSequenceFile);
        matrix = null;
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) RevCompCharSequence(com.github.lindenb.jvarkit.util.bio.RevCompCharSequence) ArrayList(java.util.ArrayList) LongestCommonSequence(com.github.lindenb.jvarkit.util.align.LongestCommonSequence) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SubSequence(com.github.lindenb.jvarkit.lang.SubSequence) SAMRecord(htsjdk.samtools.SAMRecord) RevCompCharSequence(com.github.lindenb.jvarkit.util.bio.RevCompCharSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 45 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class MapUniProtFeatures method doWork.

@Override
public int doWork(List<String> args) {
    PrintWriter pw = null;
    try {
        JAXBContext jc = JAXBContext.newInstance("org.uniprot");
        Unmarshaller uniprotUnmarshaller = jc.createUnmarshaller();
        Marshaller uniprotMarshaller = jc.createMarshaller();
        pw = super.openFileOrStdoutAsPrintWriter(OUT);
        LOG.info("read " + REF);
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(REF);
        if (this.indexedFastaSequenceFile.getSequenceDictionary() == null) {
            LOG.error("fasta sequence is not indexed");
            return -1;
        }
        LOG.info("readubf " + kgUri);
        String line;
        Pattern tab = Pattern.compile("[\t]");
        BufferedReader r = IOUtils.openURIForBufferedReading(kgUri);
        while ((line = r.readLine()) != null) {
            String[] tokens = tab.split(line);
            KnownGene kg = new KnownGene();
            kg.setName(tokens[0]);
            kg.setChrom(tokens[1]);
            kg.setStrand(tokens[2].charAt(0));
            kg.setTxStart(Integer.parseInt(tokens[3]));
            kg.setTxEnd(Integer.parseInt(tokens[4]));
            kg.setCdsStart(Integer.parseInt(tokens[5]));
            kg.setCdsEnd(Integer.parseInt(tokens[6]));
            kg.setExonBounds(Integer.parseInt(tokens[7]), tokens[8], tokens[9]);
            List<KnownGene> L = prot2genes.get(tokens[10]);
            if (L == null) {
                L = new ArrayList<KnownGene>();
                prot2genes.put(tokens[10], L);
            }
            if (indexedFastaSequenceFile.getSequenceDictionary().getSequence(kg.getContig()) == null) {
                LOG.info("ignoring " + line);
                continue;
            }
            L.add(kg);
        }
        r.close();
        if (OUT != null) {
            LOG.info("opening " + OUT);
            pw = new PrintWriter(OUT);
        }
        LOG.info("knownGenes: " + this.prot2genes.size());
        XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
        xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
        xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        xmlInputFactory.setXMLResolver(new XMLResolver() {

            @Override
            public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
                LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
                return null;
            }
        });
        // SortingCollection<UBed> mappedFeatures=SortingCollection.newInstance(UBed.class, new UBedCodec(),new UBedCmp(),super.MAX_RECORDS_IN_RAM);
        // mappedFeatures.setDestructiveIteration(true);
        LOG.info("Scanning " + UNIPROT);
        Reader fr = IOUtils.openURIForBufferedReading(UNIPROT);
        XMLEventReader rx = xmlInputFactory.createXMLEventReader(fr);
        final QName uEntry = new QName(UNIPROT_NS, "entry");
        GenomicSequence genomic = null;
        while (rx.hasNext()) {
            XMLEvent evt = rx.peek();
            if (!(evt.isStartElement() && evt.asStartElement().getName().equals(uEntry))) {
                rx.next();
                continue;
            }
            JAXBElement<Entry> jaxbElement = uniprotUnmarshaller.unmarshal(rx, Entry.class);
            Entry entry = jaxbElement.getValue();
            if (entry.getFeature().isEmpty())
                continue;
            List<KnownGene> genes = null;
            for (String acn : entry.getAccession()) {
                genes = prot2genes.get(acn);
                if (genes != null)
                    break;
            }
            if (genes == null)
                continue;
            for (KnownGene kg : genes) {
                if (genomic == null || !genomic.getChrom().equals(kg.getChromosome())) {
                    genomic = new GenomicSequence(this.indexedFastaSequenceFile, kg.getChromosome());
                }
                KnownGene.Peptide pep = kg.getCodingRNA(genomic).getPeptide();
                int sameSequenceLength = 0;
                while (sameSequenceLength < entry.getSequence().getValue().length() && sameSequenceLength < pep.length()) {
                    if (Character.toUpperCase(entry.getSequence().getValue().charAt(sameSequenceLength)) != Character.toUpperCase(entry.getSequence().getValue().charAt(sameSequenceLength))) {
                        break;
                    }
                    sameSequenceLength++;
                }
                if (sameSequenceLength != pep.length()) {
                    System.err.println("Not Same sequence " + kg.getName() + " strand " + kg.getStrand() + " ok-up-to-" + sameSequenceLength);
                    System.err.println("P:" + pep.toString() + " " + pep.length());
                    System.err.println("Q:" + entry.getSequence().getValue() + " " + entry.getSequence().getLength());
                    if (pep.toString().contains("?")) {
                        System.err.println("RNA:" + pep.getCodingRNA().toString());
                    }
                }
                if (sameSequenceLength == 0)
                    continue;
                for (FeatureType feat : entry.getFeature()) {
                    if (feat.getType() == null || feat.getType().isEmpty())
                        continue;
                    LocationType locType = feat.getLocation();
                    if (locType == null)
                        continue;
                    int pepStart, pepEnd;
                    if (locType.getPosition() != null && locType.getPosition().getPosition() != null) {
                        pepEnd = locType.getPosition().getPosition().intValue();
                        pepStart = pepEnd - 1;
                    } else if (locType.getBegin() != null && locType.getEnd() != null && locType.getBegin().getPosition() != null && locType.getEnd().getPosition() != null) {
                        pepStart = locType.getBegin().getPosition().intValue() - 1;
                        pepEnd = locType.getEnd().getPosition().intValue();
                    } else {
                        continue;
                    }
                    if (pepEnd >= sameSequenceLength)
                        continue;
                    List<Integer> genomicPos = new ArrayList<Integer>();
                    while (pepStart < pepEnd) {
                        if (pepStart >= pep.length()) {
                            System.err.println();
                            System.err.println("P:" + pep.toString() + " " + pep.length() + " " + kg.getStrand());
                            System.err.println("Q:" + entry.getSequence().getValue() + " " + entry.getSequence().getLength());
                            uniprotMarshaller.marshal(new JAXBElement<FeatureType>(new QName(UNIPROT_NS, "feature"), FeatureType.class, feat), System.err);
                        }
                        int[] codon = pep.convertToGenomicCoordinates(pepStart);
                        pepStart++;
                        for (int gP : codon) {
                            if (gP == -1) {
                                uniprotMarshaller.marshal(new JAXBElement<FeatureType>(new QName(UNIPROT_NS, "feature"), FeatureType.class, feat), System.err);
                                LOG.error("error in genomoc for (" + pepStart + "/" + pepEnd + "):" + entry.getName());
                                System.exit(-1);
                            }
                            genomicPos.add(gP);
                        }
                    }
                    Collections.sort(genomicPos);
                    UBed ubed = new UBed();
                    ubed.tid = this.indexedFastaSequenceFile.getSequenceDictionary().getSequenceIndex(genomic.getChrom());
                    int k0 = 0;
                    while (k0 < genomicPos.size()) {
                        Range range = new Range(genomicPos.get(k0), genomicPos.get(k0) + 1);
                        k0++;
                        while (k0 < genomicPos.size() && range.end == genomicPos.get(k0)) {
                            range = new Range(range.start, range.end + 1);
                            ++k0;
                        }
                        ubed.positions.add(range);
                    }
                    ubed.strand = (byte) (kg.isPositiveStrand() ? '+' : '-');
                    ubed.name = feat.getType();
                    ubed.print(pw);
                }
            }
        }
        rx.close();
        fr.close();
        LOG.info("End scan uniprot");
    /*
			mappedFeatures.doneAdding();
			
			
			CloseableIterator<UBed> iter=mappedFeatures.iterator();
			while(iter.hasNext())
				{
				UBed ubed=iter.next();
				ubed.print();
				}
			mappedFeatures.cleanup();
			*/
    } catch (Exception err) {
        err.printStackTrace();
        if (OUT != null)
            OUT.deleteOnExit();
        return -1;
    } finally {
        pw.flush();
        pw.close();
        CloserUtil.close(this.indexedFastaSequenceFile);
    }
    return 0;
}
Also used : FeatureType(org.uniprot.FeatureType) ArrayList(java.util.ArrayList) XMLEventReader(javax.xml.stream.XMLEventReader) Reader(java.io.Reader) BufferedReader(java.io.BufferedReader) JAXBContext(javax.xml.bind.JAXBContext) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Entry(org.uniprot.Entry) XMLEventReader(javax.xml.stream.XMLEventReader) XMLResolver(javax.xml.stream.XMLResolver) Unmarshaller(javax.xml.bind.Unmarshaller) PrintWriter(java.io.PrintWriter) Pattern(java.util.regex.Pattern) Marshaller(javax.xml.bind.Marshaller) QName(javax.xml.namespace.QName) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) XMLStreamException(javax.xml.stream.XMLStreamException) XMLStreamException(javax.xml.stream.XMLStreamException) BufferedReader(java.io.BufferedReader) XMLEvent(javax.xml.stream.events.XMLEvent) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) LocationType(org.uniprot.LocationType) XMLInputFactory(javax.xml.stream.XMLInputFactory)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7