Search in sources :

Example 1 with ChemicalDatabase

use of de.unijena.bioinf.chemdb.ChemicalDatabase in project sirius by boecker-lab.

the class FingerprintVisualization method main.

public static void main(String[] args) throws IOException {
    final PrintStream stream = new PrintStream("feature_examples.tsv");
    final CdkFingerprintVersion version = CdkFingerprintVersion.getComplete();
    final List<String> lines = Files.readAllLines(new File("summary.csv").toPath(), Charset.forName("UTF-8"));
    final int[] atomSizes = new int[version.size()];
    Arrays.fill(atomSizes, -1);
    try (final ChemicalDatabase db = new ChemicalDatabase()) {
        final SMARTSQueryTool query = new SMARTSQueryTool("C-C", SilentChemObjectBuilder.getInstance());
        query.setQueryCacheSize(atomSizes.length);
        final ECFPFingerprinter fingerprinter = new ECFPFingerprinter();
        final int ECFP_OFFSET = version.getOffsetFor(CdkFingerprintVersion.USED_FINGERPRINTS.ECFP);
        final String[] substructures = new String[fingerprinter.getSize()];
        final String[] smarts = new String[fingerprinter.getSize()];
        final List<String> compounds = Files.readAllLines(new File("compounds.csv").toPath(), Charset.forName("UTF-8"));
        final InChIGeneratorFactory igf = InChIGeneratorFactory.getInstance();
        for (String aline : compounds) {
            final String[] linetabs = aline.split("\t");
            final String inchi = linetabs[2];
            final String inchikey = linetabs[1];
            final String smiles = linetabs[3];
            final IAtomContainer molecule = new SmilesParser(SilentChemObjectBuilder.getInstance()).parseSmiles(smiles);
            final int[][] matrix = AdjacencyMatrix.getMatrix(molecule);
            final IBitFingerprint fp = fingerprinter.getBitFingerprint(molecule);
            final CircularFingerprinter.FP[] details = fingerprinter.getFingerprintDetails();
            int k = 0;
            for (int i = 0; i < fp.size(); ++i) {
                if (fp.get(i)) {
                    boolean aroma = false;
                    final CircularFingerprinter.FP x = details[i];
                    final TIntArrayList inds = new TIntArrayList();
                    final BitSet ids = new BitSet(molecule.getAtomCount());
                    for (int atomi : x.atoms) {
                        if (ids.get(atomi))
                            continue;
                        final IAtom atom = molecule.getAtom(atomi);
                        aroma = aroma || atom.isAromatic();
                        inds.add(atomi);
                        ids.set(atomi);
                        addAromaticRings(molecule, matrix, atomi, atom, ids, inds);
                    }
                    if (substructures[i] == null || !aroma) {
                        final String asmiles = SmilesGenerator.absolute().aromatic().create(AtomContainerManipulator.extractSubstructure(molecule, inds.toArray()));
                        substructures[i] = asmiles;
                        atomSizes[i + ECFP_OFFSET] = x.atoms.length;
                        addNeighbours(molecule, matrix, ids, inds);
                        final String smart = SmilesGenerator.absolute().aromatic().create(AtomContainerManipulator.extractSubstructure(molecule, inds.toArray()));
                        smarts[i] = smart;
                    }
                }
            }
            Fingerprint fingerprint = db.lookupFingerprintByInChI(InChIs.newInChI(inchikey, inchi));
            if (fingerprint == null) {
                // compute it...
                final Fingerprinter fingerprinter1 = Fingerprinter.getFor(CdkFingerprintVersion.getComplete());
                fingerprint = new FixedFingerprinter(CdkFingerprintVersion.getDefault()).computeFingerprintFromSMILES(smiles);
            }
            if (fingerprint != null) {
                for (FPIter f : fingerprint) {
                    if (f.isSet() && atomSizes[f.getIndex()] < 0) {
                        if (f.getMolecularProperty() instanceof SubstructureProperty) {
                            query.setSmarts(((SubstructureProperty) f.getMolecularProperty()).getSmarts());
                            if (query.matches(molecule)) {
                                final List<List<Integer>> size = query.getUniqueMatchingAtoms();
                                if (!size.isEmpty()) {
                                    atomSizes[f.getIndex()] = Math.max(atomSizes[f.getIndex()], size.get(0).size());
                                }
                            }
                        }
                    }
                }
            }
        }
        final CdkFingerprintVersion v = CdkFingerprintVersion.getComplete();
        final SMARTSQueryTool queryTool = new SMARTSQueryTool("C#C", SilentChemObjectBuilder.getInstance());
        queryTool.setQueryCacheSize(6000);
        for (String line : lines) {
            String[] tabs = line.split("\t");
            final MolecularProperty prop = v.getMolecularProperty(Integer.parseInt(tabs[0]));
            stream.print(tabs[0]);
            stream.print("\t");
            stream.print(atomSizes[Integer.parseInt(tabs[0])]);
            if (tabs.length == 2) {
                stream.print("\t" + tabs[1]);
            } else {
                for (int i = 2; i < tabs.length; i += 2) {
                    final String substruct = tabs[i];
                    try {
                        final IAtomContainer substructure = new SmilesParser(SilentChemObjectBuilder.getInstance()).parseSmiles(substruct);
                        if (prop instanceof SubstructureProperty) {
                            queryTool.setSmarts(((SubstructureProperty) prop).getSmarts());
                            if (queryTool.matches(substructure)) {
                                List<List<Integer>> match = queryTool.getUniqueMatchingAtoms();
                                if (match.size() > 0 && match.get(0).size() > 0) {
                                    stream.print("\t" + tabs[i]);
                                }
                            }
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
            stream.print("\n");
        }
        final int offset = version.getOffsetFor(CdkFingerprintVersion.USED_FINGERPRINTS.ECFP);
        for (int i = 0; i < substructures.length; ++i) {
            if (substructures[i] == null) {
                System.err.println("Warning: No smiles for ECFP with index " + i + " and hash " + version.getMolecularProperty(offset + i).getDescription());
            } else {
                stream.print(offset + i);
                stream.print("\t");
                stream.print(atomSizes[i + offset]);
                stream.print("\t");
                stream.print(substructures[i]);
                stream.print("\t");
                stream.println(smarts[i]);
            }
        }
    } catch (CDKException e) {
        e.printStackTrace();
    } catch (CloneNotSupportedException e) {
        e.printStackTrace();
    } catch (ChemicalDatabaseException e) {
        e.printStackTrace();
    }
    stream.close();
// now add ECFP fingerprints
}
Also used : ECFPFingerprinter(de.unijena.bioinf.fingerid.fingerprints.ECFPFingerprinter) SmilesParser(org.openscience.cdk.smiles.SmilesParser) IAtomContainer(org.openscience.cdk.interfaces.IAtomContainer) SMARTSQueryTool(org.openscience.cdk.smiles.smarts.SMARTSQueryTool) InChIGeneratorFactory(org.openscience.cdk.inchi.InChIGeneratorFactory) ChemicalDatabase(de.unijena.bioinf.chemdb.ChemicalDatabase) CircularFingerprinter(org.openscience.cdk.fingerprint.CircularFingerprinter) TIntArrayList(gnu.trove.list.array.TIntArrayList) List(java.util.List) IAtom(org.openscience.cdk.interfaces.IAtom) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) CDKException(org.openscience.cdk.exception.CDKException) BitSet(java.util.BitSet) IBitFingerprint(org.openscience.cdk.fingerprint.IBitFingerprint) TIntArrayList(gnu.trove.list.array.TIntArrayList) CDKException(org.openscience.cdk.exception.CDKException) ChemicalDatabaseException(de.unijena.bioinf.chemdb.ChemicalDatabaseException) ECFPFingerprinter(de.unijena.bioinf.fingerid.fingerprints.ECFPFingerprinter) CircularFingerprinter(org.openscience.cdk.fingerprint.CircularFingerprinter) FixedFingerprinter(de.unijena.bioinf.fingerid.fingerprints.FixedFingerprinter) Fingerprinter(de.unijena.bioinf.fingerid.Fingerprinter) FixedFingerprinter(de.unijena.bioinf.fingerid.fingerprints.FixedFingerprinter) ChemicalDatabaseException(de.unijena.bioinf.chemdb.ChemicalDatabaseException)

Aggregations

ChemicalDatabase (de.unijena.bioinf.chemdb.ChemicalDatabase)1 ChemicalDatabaseException (de.unijena.bioinf.chemdb.ChemicalDatabaseException)1 Fingerprinter (de.unijena.bioinf.fingerid.Fingerprinter)1 ECFPFingerprinter (de.unijena.bioinf.fingerid.fingerprints.ECFPFingerprinter)1 FixedFingerprinter (de.unijena.bioinf.fingerid.fingerprints.FixedFingerprinter)1 TIntArrayList (gnu.trove.list.array.TIntArrayList)1 BitSet (java.util.BitSet)1 List (java.util.List)1 CDKException (org.openscience.cdk.exception.CDKException)1 CircularFingerprinter (org.openscience.cdk.fingerprint.CircularFingerprinter)1 IBitFingerprint (org.openscience.cdk.fingerprint.IBitFingerprint)1 InChIGeneratorFactory (org.openscience.cdk.inchi.InChIGeneratorFactory)1 IAtom (org.openscience.cdk.interfaces.IAtom)1 IAtomContainer (org.openscience.cdk.interfaces.IAtomContainer)1 SmilesParser (org.openscience.cdk.smiles.SmilesParser)1 SMARTSQueryTool (org.openscience.cdk.smiles.smarts.SMARTSQueryTool)1