use of org.openscience.cdk.fingerprint.ICountFingerprint in project ambit-mirror by ideaconsult.
the class Context method parseCommandFingerprints.
public void parseCommandFingerprints(String subcommand, long now) throws Exception {
boolean multifile = true;
int page = parsePageParam();
int pagesize = parsePageSizeParam();
final boolean fp_count = parseWriteCountParam();
final boolean fp_raw = parseWriteRawParam();
String smiles_header = parseInputTag_Param("smiles", IteratingDelimitedFileReader.defaultSMILESHeader);
String inchi_header = parseInputTag_Param("inchi", "InChI");
String inchikey_header = parseInputTag_Param("inchikey", "InChIKey");
Object tmpTag = parseSdfTitleParam();
final String[] tags_to_keep = parsetags_to_keep();
final String sdf_title = tmpTag == null ? null : tmpTag.toString().toLowerCase();
final int startRecord = pagesize > 0 ? (page * pagesize + 1) : 1;
final int maxRecord = pagesize > 0 ? ((page + 1) * pagesize + 1) : pagesize;
final File file = getInputFile();
FileInputState input = new FileInputState(file);
input.setOptionalSMILESHeader(smiles_header);
input.setOptionalInChIHeader(inchi_header);
input.setOptionalInChIKeyHeader(inchikey_header);
if (options.output == null)
throw new FileNotFoundException("Output file not specified. Please use -o {file}");
final File outfile = new File(options.output);
logger_cli.log(Level.INFO, "MSG_INFO_READINGWRITING", new Object[] { file.getAbsoluteFile(), outfile.getAbsolutePath() });
final List<IFingerprinter> fps = parseFingerprinterParams();
FileOutputState out = null;
final IChemObjectWriter awriter;
if (multifile) {
awriter = new MultiFingerprintsWriter(outfile, fps, tags_to_keep);
} else {
out = new FileOutputState(outfile);
awriter = out.getWriter();
if (awriter instanceof FilesWithHeaderWriter)
((FilesWithHeaderWriter) awriter).setAddSMILEScolumn(false);
}
final IChemObjectWriter writer = awriter;
final boolean writesdf = writer instanceof SDFWriter;
final Map<Object, Property> tags = new HashMap<>();
Property newtag = Property.getSMILESInstance();
newtag.setName("SMILES");
newtag.setEnabled(false);
tags.put(Property.opentox_SMILES, newtag);
tags.put(Property.getSMILESInstance(), newtag);
newtag = Property.getInChIInstance();
newtag.setEnabled(false);
tags.put(Property.opentox_InChI, newtag);
tags.put("InChI", newtag);
tags.put(Property.getInChIInstance(), newtag);
newtag = Property.getInChIKeyInstance();
newtag.setEnabled(true);
newtag.setName("InChIKey");
tags.put(Property.opentox_InChIKey, newtag);
tags.put(Property.getInChIKeyInstance(), newtag);
newtag = Property.getInstance(CDKConstants.TITLE, CDKConstants.TITLE);
newtag.setEnabled(false);
tags.put(CDKConstants.TITLE, newtag);
tags.put(newtag, newtag);
newtag = Property.getInstance("CHEMBL", "CHEMBL");
newtag.setEnabled(false);
tags.put("CHEMBL", newtag);
tags.put(newtag, newtag);
final BatchDBProcessor<IStructureRecord> batch = new BatchDBProcessor<IStructureRecord>() {
@Override
public void onItemRead(IStructureRecord input, IBatchStatistics stats) {
super.onItemRead(input, stats);
if ((maxRecord > 0) && stats.getRecords(RECORDS_STATS.RECORDS_READ) >= (maxRecord))
cancel();
}
@Override
public boolean skip(IStructureRecord input, IBatchStatistics stats) {
return (stats.getRecords(RECORDS_STATS.RECORDS_READ) < startRecord) || ((maxRecord > 0) && (stats.getRecords(RECORDS_STATS.RECORDS_READ) >= maxRecord));
}
@Override
public void onItemSkipped(IStructureRecord input, IBatchStatistics stats) {
super.onItemSkipped(input, stats);
if (stats.isTimeToPrint(getSilentInterval() * 2))
propertyChangeSupport.firePropertyChange(PROPERTY_BATCHSTATS, null, stats);
}
@Override
public void onItemProcessing(IStructureRecord input, Object output, IBatchStatistics stats) {
}
@Override
public void onError(IStructureRecord input, Object output, IBatchStatistics stats, Exception x) {
super.onError(input, output, stats, x);
logger_cli.log(Level.SEVERE, "MSG_ERR", new Object[] { x.getMessage() });
}
@Override
public long getSilentInterval() {
return 30000L;
}
@Override
public void close() throws Exception {
try {
writer.close();
} catch (Exception x) {
} finally {
}
super.close();
}
};
batch.setProcessorChain(new ProcessorsChain<IStructureRecord, IBatchStatistics, IProcessor>());
batch.getProcessorChain().add(new DefaultAmbitProcessor<IStructureRecord, IStructureRecord>() {
protected MoleculeReader molReader = new MoleculeReader(true, false);
@Override
public IStructureRecord process(IStructureRecord record) throws Exception {
IAtomContainer mol;
IAtomContainer processed = null;
try {
mol = molReader.process(record);
if (mol != null) {
for (Property p : record.getRecordProperties()) {
Object v = record.getRecordProperty(p);
String pname = p.getName().replace("http://www.opentox.org/api/1.1#", "");
// already parsed
if (tags_to_keep != null && Arrays.binarySearch(tags_to_keep, pname) < 0)
continue;
else
mol.setProperty(p, v);
}
if (tags_to_keep != null) {
List<String> toRemove = null;
Iterator pi = mol.getProperties().keySet().iterator();
while (pi.hasNext()) {
Object p = pi.next();
if (Arrays.binarySearch(tags_to_keep, p.toString()) < 0) {
if (toRemove == null)
toRemove = new ArrayList<String>();
toRemove.add(p.toString());
}
}
if (toRemove != null)
for (String propertyToRemove : toRemove) mol.removeProperty(propertyToRemove);
}
} else {
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
return record;
}
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
} catch (Exception x) {
logger_cli.log(Level.SEVERE, "MSG_ERR_MOLREAD", new Object[] { getIds(record), x.toString() });
return record;
} finally {
}
processed = mol;
for (IFingerprinter fp : fps) {
ICountFingerprint cfp = null;
try {
cfp = fp.getCountFingerprint(processed);
} catch (Exception x) {
logger.log(Level.FINER, x.getMessage());
}
IBitFingerprint bfp = null;
try {
bfp = fp.getBitFingerprint(processed);
} catch (Exception x) {
}
Map<String, Integer> fpraw = null;
try {
if (fp_raw)
fpraw = fp.getRawFingerprint(processed);
} catch (Exception x) {
}
try {
if (cfp != null) {
if (fp_count)
processed.setProperty(fp.getClass().getName() + ".count", cfp);
processed.setProperty(fp.getClass().getName(), cfp);
}
if (bfp != null)
processed.setProperty(fp.getClass().getName() + ".hashed", bfp);
if (fpraw != null)
processed.setProperty(fp.getClass().getName() + ".raw", fpraw);
} catch (Exception x) {
// StringWriter w = new StringWriter();
// x.printStackTrace(new PrintWriter(w));
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
if (processed != null)
processed.setProperty("ERROR." + fp.getClass().getName(), x.getMessage());
} finally {
if (processed != null)
processed.addProperties(mol.getProperties());
}
}
if (processed != null)
try {
if (writesdf && sdf_title != null) {
for (Entry<Object, Object> p : processed.getProperties().entrySet()) if (sdf_title.equals(p.getKey().toString().toLowerCase())) {
processed.setProperty(CDKConstants.TITLE, p.getValue());
break;
}
}
StructureStandardizer.renameTags(processed, tags, true);
writer.write(processed);
} catch (Exception x) {
// StringWriter w = new StringWriter();
// x.printStackTrace(new PrintWriter(w));
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { x.getMessage(), getIds(record) });
}
else {
logger_cli.log(Level.SEVERE, "MSG_FINGEPRINTGEN", new Object[] { "Empty molecule", getIds(record) });
}
return record;
}
});
batch.addPropertyChangeListener(new PropertyChangeListener() {
@Override
public void propertyChange(PropertyChangeEvent evt) {
if (AbstractBatchProcessor.PROPERTY_BATCHSTATS.equals(evt.getPropertyName()))
logger_cli.log(Level.INFO, evt.getNewValue().toString());
}
});
/*
* standardprocessor.setCallback(new
* DefaultAmbitProcessor<IAtomContainer, IAtomContainer>() {
*
* @Override public IAtomContainer process(IAtomContainer target) throws
* Exception { try { //writer.write(target); } catch (Exception x) {
* logger.log(Level.SEVERE, x.getMessage()); } return target; } });
*/
IBatchStatistics stats = null;
try {
stats = batch.process(input);
} catch (Exception x) {
logger_cli.log(Level.WARNING, x.getMessage(), x);
} finally {
try {
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
try {
if (batch != null)
batch.close();
} catch (Exception x) {
logger_cli.warning(x.getMessage());
}
if (stats != null)
logger_cli.log(Level.INFO, stats.toString());
}
}
use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.
the class Tanimoto method method1.
/**
* Calculates Tanimoto distance for two count fingerprints using method 1.
*
* The feature/count type fingerprints may be of different length.
* Uses Tanimoto method from {@cdk.cite Steffen09}.
*
* @param fp1 count fingerprint 1
* @param fp2 count fingerprint 2
* @return a Tanimoto distance
*/
public static double method1(ICountFingerprint fp1, ICountFingerprint fp2) {
long xy = 0, x = 0, y = 0;
for (int i = 0; i < fp1.numOfPopulatedbins(); i++) {
int hash = fp1.getHash(i);
for (int j = 0; j < fp2.numOfPopulatedbins(); j++) {
if (hash == fp2.getHash(j)) {
xy += (long) fp1.getCount(i) * fp2.getCount(j);
}
}
x += (long) fp1.getCount(i) * fp1.getCount(i);
}
for (int j = 0; j < fp2.numOfPopulatedbins(); j++) {
y += (long) fp2.getCount(j) * fp2.getCount(j);
}
long union = x + y - xy;
if (union == 0)
throw new IllegalArgumentException(EMPTY_FINGERPRINTS_PROVIDED);
return ((double) xy / union);
}
use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.
the class SignatureFingerprintTanimotoTest method testCountMethod1and2.
@Test
public void testCountMethod1and2() throws CDKException {
ICountFingerprint fp1 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {
{
put("A", 3);
}
});
ICountFingerprint fp2 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {
{
put("A", 4);
}
});
Assert.assertEquals(0.923, Tanimoto.method1(fp1, fp2), 0.001);
Assert.assertEquals(0.75, Tanimoto.method2(fp1, fp2), 0.001);
IAtomContainer mol1 = TestMoleculeFactory.makeIndole();
IAtomContainer mol2 = TestMoleculeFactory.makeIndole();
SignatureFingerprinter fingerprinter = new SignatureFingerprinter();
fp1 = fingerprinter.getCountFingerprint(mol1);
fp2 = fingerprinter.getCountFingerprint(mol2);
Assert.assertEquals(1.0, Tanimoto.method1(fp1, fp2), 0.001);
Assert.assertEquals(1.0, Tanimoto.method2(fp1, fp2), 0.001);
}
use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.
the class SignatureFingerprintTanimotoTest method testComparingBitFingerprintAndCountBehavingAsBit.
@Test
public void testComparingBitFingerprintAndCountBehavingAsBit() throws Exception {
IAtomContainer mol1 = TestMoleculeFactory.make123Triazole();
IAtomContainer mol2 = TestMoleculeFactory.makeImidazole();
SignatureFingerprinter fingerprinter = new SignatureFingerprinter(1);
ICountFingerprint countFp1 = fingerprinter.getCountFingerprint(mol1);
ICountFingerprint countFp2 = fingerprinter.getCountFingerprint(mol2);
countFp1.setBehaveAsBitFingerprint(true);
countFp2.setBehaveAsBitFingerprint(true);
IBitFingerprint bitFp1 = fingerprinter.getBitFingerprint(mol1);
IBitFingerprint bitFp2 = fingerprinter.getBitFingerprint(mol2);
double bitTanimoto = Tanimoto.calculate(bitFp1, bitFp2);
double countTanimoto1 = Tanimoto.method1(countFp1, countFp2);
double countTanimoto2 = Tanimoto.method2(countFp1, countFp2);
Assert.assertEquals(countTanimoto1, countTanimoto2, 0.001);
Assert.assertEquals(bitTanimoto, countTanimoto1, 0.001);
}
use of org.openscience.cdk.fingerprint.ICountFingerprint in project cdk by cdk.
the class TanimotoTest method method1.
@Test
public void method1() throws CDKException {
ICountFingerprint fp1 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {
{
put("A", 3);
}
});
ICountFingerprint fp2 = new IntArrayCountFingerprint(new HashMap<String, Integer>() {
{
put("A", 4);
}
});
Assert.assertEquals(0.923, Tanimoto.method1(fp1, fp2), 0.001);
}
Aggregations