use of com.pratilipi.common.util.PratilipiFilter in project pratilipi by Pratilipi.
the class PratilipiIdfApi method get.
@Get
public GenericResponse get(GenericRequest request) throws UnexpectedServerException {
Date idfGenerationDate = new Date();
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
PratilipiFilter pratilipiFilter = new PratilipiFilter();
String cursor = null;
DataListCursorTuple<Long> pratilipiIdListCursorTupe = dataAccessor.getPratilipiIdList(pratilipiFilter, cursor, null, null);
List<Long> pratilipiIdList = pratilipiIdListCursorTupe.getDataList();
// Populate Keyword-Frequency map.
final HashMap<String, Integer> keywordFrequencyMap = new HashMap<>();
for (Long pratilipiId : pratilipiIdList) {
String[] keywords = PratilipiDataUtil.getPratilipiKeywords(pratilipiId).split("\\s+");
if (keywords == null)
continue;
for (String keyword : keywords) {
if (keywordFrequencyMap.containsKey(keyword))
keywordFrequencyMap.put(keyword, keywordFrequencyMap.get(keyword) + 1);
else
keywordFrequencyMap.put(keyword, 1);
}
}
// Sort Keyword-Frequency map in descending order of frequency
Comparator<String> comparator = new Comparator<String>() {
@Override
public int compare(String a, String b) {
return keywordFrequencyMap.get(a) >= keywordFrequencyMap.get(b) ? -1 : 1;
}
};
TreeMap<String, Integer> sortedKeywordFrequencyMap = new TreeMap<>(comparator);
sortedKeywordFrequencyMap.putAll(keywordFrequencyMap);
// Transform sorted map to csv string
StringBuilder csv = new StringBuilder();
for (Map.Entry<String, Integer> entry : sortedKeywordFrequencyMap.entrySet()) {
csv.append(entry.getKey() + ",");
csv.append(entry.getValue().toString() + ",");
csv.append("\n");
}
// Persist csv string in BlobStore
BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
BlobEntry blobEntry = blobAccessor.newBlob("pratilipi/" + new SimpleDateFormat("yyyy-MM-dd-HH:mm").format(idfGenerationDate) + "-idf.csv", null, "text/plain");
blobEntry.setData(csv.toString().getBytes(Charset.forName("UTF-8")));
blobAccessor.createOrUpdateBlob(blobEntry);
logger.log(Level.INFO, "Generated IDF with " + keywordFrequencyMap.size() + " keywords.");
return new GenericResponse();
}
Aggregations