use of org.openscience.cdk.fingerprint.BitSetFingerprint in project cdk by cdk.
the class TanimotoTest method testCompareBitSetandBitFingerprintTanimoto.
@Test
public void testCompareBitSetandBitFingerprintTanimoto() throws Exception {
IAtomContainer mol1 = TestMoleculeFactory.make123Triazole();
IAtomContainer mol2 = TestMoleculeFactory.makeImidazole();
Fingerprinter fingerprinter = new Fingerprinter(1024, 8);
BitSet bs1 = fingerprinter.getBitFingerprint(mol1).asBitSet();
BitSet bs2 = fingerprinter.getBitFingerprint(mol2).asBitSet();
float tanimoto = Tanimoto.calculate(bs1, bs2);
BitSetFingerprint fp1 = new BitSetFingerprint(bs1);
BitSetFingerprint fp2 = new BitSetFingerprint(bs2);
double tanimoto2 = Tanimoto.calculate(fp1, fp2);
Assert.assertEquals(tanimoto, tanimoto2, 0.01);
IntArrayFingerprint ifp1 = new IntArrayFingerprint(fp1);
IntArrayFingerprint ifp2 = new IntArrayFingerprint(fp2);
tanimoto2 = Tanimoto.calculate(ifp1, ifp2);
Assert.assertEquals(tanimoto, tanimoto2, 0.01);
}
use of org.openscience.cdk.fingerprint.BitSetFingerprint in project cdk by cdk.
the class Tanimoto method calculate.
/**
* Evaluates Tanimoto coefficient for two <code>IBitFingerprint</code>.
* <p>
* @param fingerprint1 fingerprint for the first molecule
* @param fingerprint2 fingerprint for the second molecule
* @return The Tanimoto coefficient
* @throws IllegalArgumentException if bitsets are not of the same length
*/
public static double calculate(IBitFingerprint fingerprint1, IBitFingerprint fingerprint2) {
if (fingerprint1.size() != fingerprint2.size()) {
throw new IllegalArgumentException("Fingerprints must have the same size");
}
int cardinality1 = fingerprint1.cardinality();
int cardinality2 = fingerprint2.cardinality();
// If the fingerprint is an IntArrayFingeprint that could mean a big
// fingerprint so let's take the safe way out and create a
// new IntArrayfingerprint
IBitFingerprint one_and_two = fingerprint1 instanceof IntArrayFingerprint ? new IntArrayFingerprint(fingerprint1) : new BitSetFingerprint(fingerprint1);
one_and_two.and(fingerprint2);
double cardinalityCommon = one_and_two.cardinality();
return cardinalityCommon / (cardinality1 + cardinality2 - cardinalityCommon);
}
use of org.openscience.cdk.fingerprint.BitSetFingerprint in project cdk by cdk.
the class TemplateExtractor method makeFingerprintsFromSdf.
public List<IBitFingerprint> makeFingerprintsFromSdf(boolean anyAtom, boolean anyAtomAnyBond, Map<String, Integer> timings, BufferedReader fin, int limit) throws Exception {
HybridizationFingerprinter fingerPrinter = new HybridizationFingerprinter(HybridizationFingerprinter.DEFAULT_SIZE, HybridizationFingerprinter.DEFAULT_SEARCH_DEPTH);
fingerPrinter.setHashPseudoAtoms(true);
IAtomContainer m;
IteratingSDFReader imdl = null;
// QueryAtomContainer query=null;
IAtomContainer query;
List<IBitFingerprint> data = new ArrayList<>();
ILoggingTool logger = LoggingToolFactory.createLoggingTool(getClass());
try {
logger.info("Read data file in ...");
imdl = new IteratingSDFReader(fin, builder);
// fin.close();
logger.info("ready");
} catch (Exception exc) {
System.out.println("Could not read Molecules from file" + " due to: " + exc.getMessage());
}
int moleculeCounter = 0;
int fingerprintCounter = 0;
logger.info("Generated Fingerprints: " + fingerprintCounter + " ");
while (imdl.hasNext() && (moleculeCounter < limit || limit == -1)) {
m = imdl.next();
moleculeCounter++;
if (anyAtom && !anyAtomAnyBond) {
query = QueryAtomContainerCreator.createAnyAtomContainer(m, false);
} else {
query = AtomContainerManipulator.anonymise(m);
}
try {
long time = -System.currentTimeMillis();
// query fp for anyAtom is probably not useful
data.add(fingerPrinter.getBitFingerprint(query));
fingerprintCounter = fingerprintCounter + 1;
time += System.currentTimeMillis();
// store the time
String bin = Integer.toString((int) Math.floor(time / 10.0));
if (timings.containsKey(bin)) {
timings.put(bin, (timings.get(bin)) + 1);
} else {
timings.put(bin, 1);
}
} catch (Exception exc1) {
logger.info("QueryFingerprintError: from molecule:" + moleculeCounter + " due to:" + exc1.getMessage());
// OK, just adds a fingerprint with all ones, so that any
// structure will match this template, and leave it up
// to substructure match to figure things out
IBitFingerprint allOnesFingerprint = new BitSetFingerprint(fingerPrinter.getSize());
for (int i = 0; i < fingerPrinter.getSize(); i++) {
allOnesFingerprint.set(i, true);
}
data.add(allOnesFingerprint);
fingerprintCounter = fingerprintCounter + 1;
}
if (fingerprintCounter % 2 == 0)
logger.info("\b" + "/");
else
logger.info("\b" + "\\");
if (fingerprintCounter % 100 == 0)
logger.info("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "Generated Fingerprints: " + fingerprintCounter + " \n");
}
// while
try {
imdl.close();
} catch (Exception exc2) {
LoggingToolFactory.createLoggingTool(TemplateExtractor.class).warn("Unexpected Exception:", exc2);
}
logger.info("...ready with:" + moleculeCounter + " molecules\nWrite data...of data vector:" + data.size() + " fingerprintCounter:" + fingerprintCounter);
return data;
}
Aggregations