use of edu.sdsc.mmtf.spark.filters.ContainsSequenceRegex in project mmtf-spark by sbl-sdsc.
the class FilterBySequenceRegex method main.
/**
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
String path = MmtfReader.getMmtfReducedPath();
long start = System.nanoTime();
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterBySequenceRegex.class.getSimpleName());
JavaSparkContext sc = new JavaSparkContext(conf);
// read PDB in MMTF format
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(path, sc);
// find structures that containing a Zinc finger motif
pdb = pdb.filter(new ContainsSequenceRegex("C.{2,4}C.{12}H.{3,5}H"));
System.out.println("Number of PDB entries containing a Zinc finger motif: " + pdb.count());
long end = System.nanoTime();
System.out.println("Time: " + (end - start) / 1E9 + " sec.");
sc.close();
}
Aggregations