use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.
the class PratilipiDocUtil method _createPageletList.
private static List<Object[]> _createPageletList(Pratilipi pratilipi, Node node) throws UnexpectedServerException {
List<Object[]> pageletList = new LinkedList<>();
Object[] currPagelet = null;
for (Node childNode : node.childNodes()) {
if (childNode.nodeName().equals("body") || childNode.nodeName().equals("div") || childNode.nodeName().equals("p")) {
currPagelet = null;
List<Object[]> pList = _createPageletList(pratilipi, childNode);
if (pList.size() == 0) {
pageletList.add(new Object[] { PratilipiContentDoc.PageletType.HTML, "<br/>", null });
} else {
AlignmentType alignment = _getAlignment(childNode);
if (alignment != null)
for (Object[] pagelet : pList) if (pagelet[2] == null && (pagelet[0] == PratilipiContentDoc.PageletType.TEXT || pagelet[0] == PratilipiContentDoc.PageletType.HTML))
pagelet[2] = alignment;
pageletList.addAll(pList);
}
} else if (childNode.nodeName().equals("h1") || childNode.nodeName().equals("h2")) {
String text = _extractText(childNode);
if (text == null)
continue;
if (currPagelet != null && currPagelet[0] == PratilipiContentDoc.PageletType.HEAD) {
currPagelet[1] = currPagelet[1] + " - " + text;
} else {
currPagelet = new Object[] { PratilipiContentDoc.PageletType.HEAD, text, null };
pageletList.add(currPagelet);
}
} else if (childNode.nodeName().equals("img")) {
currPagelet = null;
BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
BlobEntry blobEntry = null;
String imageUrl = childNode.attr("src");
String imageName = null;
if (imageUrl.indexOf("name=") != -1) {
imageName = imageUrl.substring(imageUrl.indexOf("name=") + 5);
if (imageName.indexOf('&') != -1)
imageName = imageName.substring(0, imageName.indexOf('&'));
imageName = imageName.replace("%20", " ");
String fileName = _createImageFullName(pratilipi.getId(), imageName);
blobEntry = blobAccessor.getBlob(fileName);
if (blobEntry == null) {
// Copying from old resource location
blobEntry = blobAccessor.getBlob("pratilipi-resource/" + pratilipi.getId() + "/" + imageName);
if (blobEntry != null) {
blobEntry.setName(fileName);
blobAccessor.createOrUpdateBlob(blobEntry);
}
}
if (blobEntry == null && imageUrl.indexOf("pratilipiId=") != -1) {
// Copying from old resource location of another Pratilipi
String pratilipiIdStr = imageUrl.substring(imageUrl.indexOf("pratilipiId=") + 12);
if (pratilipiIdStr.indexOf('&') != -1)
pratilipiIdStr = pratilipiIdStr.substring(0, pratilipiIdStr.indexOf('&'));
blobEntry = blobAccessor.getBlob("pratilipi-resource/" + pratilipiIdStr + "/" + imageName);
if (blobEntry != null) {
blobEntry.setName(fileName);
blobAccessor.createOrUpdateBlob(blobEntry);
}
}
if (blobEntry == null)
continue;
} else if (imageUrl.startsWith("http")) {
imageName = imageUrl.replaceAll("[:/.?=&+]+", "_");
String fileName = _createImageFullName(pratilipi.getId(), imageName);
blobEntry = blobAccessor.getBlob(fileName);
if (blobEntry == null) {
blobEntry = HttpUtil.doGet(imageUrl);
if (!blobEntry.getMimeType().startsWith("image/"))
continue;
blobEntry.setName(fileName);
blobAccessor.createOrUpdateBlob(blobEntry);
}
} else if (imageUrl.startsWith("data:") && imageUrl.indexOf("base64") != -1) {
imageName = UUID.randomUUID().toString();
String mimeType = imageUrl.substring(5, imageUrl.indexOf(';'));
String base64String = imageUrl.substring(imageUrl.indexOf("base64,") + 7);
blobEntry = blobAccessor.newBlob(_createImageFullName(pratilipi.getId(), imageName), Base64.decodeBase64(base64String), mimeType);
blobAccessor.createOrUpdateBlob(blobEntry);
} else if (imageUrl.startsWith("file:///") || imageUrl.startsWith("C:")) {
continue;
}
JsonObject imgData = new JsonObject();
imgData.addProperty("name", imageName);
imgData.addProperty("height", ImageUtil.getHeight(blobEntry));
imgData.addProperty("width", ImageUtil.getWidth(blobEntry));
pageletList.add(new Object[] { PratilipiContentDoc.PageletType.IMAGE, imgData, null });
} else if (childNode.nodeName().equals("br")) {
if (currPagelet != null && currPagelet[0] == PratilipiContentDoc.PageletType.HTML)
currPagelet[1] = currPagelet[1] + "<br/>";
} else {
String text = _extractText(childNode);
if (text == null)
continue;
if (childNode.nodeName().equals("b") || childNode.nodeName().equals("strong") || childNode.nodeName().equals("h3") || childNode.nodeName().equals("h4") || childNode.nodeName().equals("h5") || childNode.nodeName().equals("h6"))
text = "<b>" + text + "</b>";
if (currPagelet == null || currPagelet[0] != PratilipiContentDoc.PageletType.HTML) {
currPagelet = new Object[] { PratilipiContentDoc.PageletType.HTML, text, null };
pageletList.add(currPagelet);
} else {
currPagelet[1] = currPagelet[1] + " " + text;
}
}
}
return pageletList;
}
use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.
the class AuthorBackupApi method get.
@Get
public GenericResponse get(GetRequest request) throws UnexpectedServerException {
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessorBackup();
Date backupDate = new Date();
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
DateFormat csvDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
DateFormat dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd-HH:mm-z");
dateFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
csvDateFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
dateTimeFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
StringBuilder backup = new StringBuilder();
StringBuilder csv = new StringBuilder(CSV_HEADER + LINE_SEPARATOR);
int count = 0;
AuthorFilter authorFilter = new AuthorFilter();
String cursor = null;
Gson gson = new GsonBuilder().registerTypeAdapter(Date.class, new GsonIstDateAdapter()).create();
while (true) {
DataListCursorTuple<Author> authorListCursorTupe = dataAccessor.getAuthorList(authorFilter, cursor, 1000);
List<Author> authorList = authorListCursorTupe.getDataList();
for (Author author : authorList) {
backup.append(gson.toJson(author) + LINE_SEPARATOR);
if (request.generateCsv != null && request.generateCsv)
csv.append("'" + author.getId()).append(CSV_SEPARATOR).append(author.getUserId() == null ? "" : "'" + author.getUserId()).append(CSV_SEPARATOR).append(author.getFirstName() == null ? "" : author.getFirstName()).append(CSV_SEPARATOR).append(author.getLastName() == null ? "" : author.getLastName()).append(CSV_SEPARATOR).append(author.getPenName() == null ? "" : author.getPenName()).append(CSV_SEPARATOR).append(author.getFirstNameEn() == null ? "" : author.getFirstNameEn()).append(CSV_SEPARATOR).append(author.getLastNameEn() == null ? "" : author.getLastNameEn()).append(CSV_SEPARATOR).append(author.getPenNameEn() == null ? "" : author.getPenNameEn()).append(CSV_SEPARATOR).append(author.getLanguage()).append(CSV_SEPARATOR).append(author.getSummary() != null && author.getSummary().trim().length() != 0).append(CSV_SEPARATOR).append(author.getContentPublished()).append(CSV_SEPARATOR).append(csvDateFormat.format(author.getRegistrationDate())).append(LINE_SEPARATOR);
}
count = count + authorList.size();
if (authorList.size() < 1000)
break;
else
cursor = authorListCursorTupe.getCursor();
}
String fileName = "datastore.author/" + dateFormat.format(backupDate) + "/" + "author-" + dateTimeFormat.format(backupDate);
BlobEntry authorBackupEntry = blobAccessor.newBlob(fileName, backup.toString().getBytes(Charset.forName("UTF-8")), "text/plain");
blobAccessor.createOrUpdateBlob(authorBackupEntry);
if (request.generateCsv != null && request.generateCsv) {
BlobEntry authorCsvEntry = blobAccessor.newBlob("datastore/author.csv", csv.toString().getBytes(Charset.forName("UTF-8")), "text/csv");
blobAccessor.createOrUpdateBlob(authorCsvEntry);
}
logger.log(Level.INFO, "Backed up " + count + " Author Entities.");
return new GenericResponse();
}
use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.
the class InitDataUtil method saveInitBanner.
public static String saveInitBanner(Language language, BlobEntry blobEntry) throws InsufficientAccessException, UnexpectedServerException {
if (!hasAccessToUpdateInit(language))
throw new InsufficientAccessException();
String name = new Date().getTime() + "";
BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
blobEntry.setName("init/banners/" + language.getCode() + "/" + name);
blobAccessor.createOrUpdateBlob(blobEntry);
return name;
}
use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.
the class DataStoreBackupUtil method csvAuthor.
public static void csvAuthor() throws UnexpectedServerException {
String CSV_HEADER = "AuthorId,UserId," + "FirstName,LastName,PenName,FirstNameEN,LastNameEN,PenNameEN," + "Language,HasSummary,ContentsPublished,FollowCount,RegistrationDate";
String CSV_SEPARATOR = ",";
String LINE_SEPARATOR = "\n";
BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessorBackup();
DateFormat csvDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
csvDateFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
StringBuilder csv = new StringBuilder(CSV_HEADER + LINE_SEPARATOR);
int count = 0;
QueryResultIterator<AuthorEntity> itr = ObjectifyService.ofy().cache(false).load().type(AuthorEntity.class).chunk(1000).iterator();
while (itr.hasNext()) {
Author author = itr.next();
csv.append("'" + author.getId()).append(CSV_SEPARATOR).append(author.getUserId() == null ? "" : "'" + author.getUserId()).append(CSV_SEPARATOR).append(author.getFirstName() == null ? "" : author.getFirstName()).append(CSV_SEPARATOR).append(author.getLastName() == null ? "" : author.getLastName()).append(CSV_SEPARATOR).append(author.getPenName() == null ? "" : author.getPenName()).append(CSV_SEPARATOR).append(author.getFirstNameEn() == null ? "" : author.getFirstNameEn()).append(CSV_SEPARATOR).append(author.getLastNameEn() == null ? "" : author.getLastNameEn()).append(CSV_SEPARATOR).append(author.getPenNameEn() == null ? "" : author.getPenNameEn()).append(CSV_SEPARATOR).append(author.getLanguage()).append(CSV_SEPARATOR).append(author.getSummary() != null && author.getSummary().trim().length() != 0).append(CSV_SEPARATOR).append(author.getContentPublished()).append(CSV_SEPARATOR).append(author.getFollowCount()).append(CSV_SEPARATOR).append(csvDateFormat.format(author.getRegistrationDate())).append(LINE_SEPARATOR);
count++;
if (count % 1000 == 0)
System.out.println(count + " ...");
}
System.out.println(count + " ...");
BlobEntry authorCsvEntry = blobAccessor.newBlob("datastore/author.csv", csv.toString().getBytes(Charset.forName("UTF-8")), "text/csv");
blobAccessor.createOrUpdateBlob(authorCsvEntry);
}
use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.
the class PratilipiIdfApi method get.
@Get
public GenericResponse get(GenericRequest request) throws UnexpectedServerException {
Date idfGenerationDate = new Date();
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
PratilipiFilter pratilipiFilter = new PratilipiFilter();
String cursor = null;
DataListCursorTuple<Long> pratilipiIdListCursorTupe = dataAccessor.getPratilipiIdList(pratilipiFilter, cursor, null, null);
List<Long> pratilipiIdList = pratilipiIdListCursorTupe.getDataList();
// Populate Keyword-Frequency map.
final HashMap<String, Integer> keywordFrequencyMap = new HashMap<>();
for (Long pratilipiId : pratilipiIdList) {
String[] keywords = PratilipiDataUtil.getPratilipiKeywords(pratilipiId).split("\\s+");
if (keywords == null)
continue;
for (String keyword : keywords) {
if (keywordFrequencyMap.containsKey(keyword))
keywordFrequencyMap.put(keyword, keywordFrequencyMap.get(keyword) + 1);
else
keywordFrequencyMap.put(keyword, 1);
}
}
// Sort Keyword-Frequency map in descending order of frequency
Comparator<String> comparator = new Comparator<String>() {
@Override
public int compare(String a, String b) {
return keywordFrequencyMap.get(a) >= keywordFrequencyMap.get(b) ? -1 : 1;
}
};
TreeMap<String, Integer> sortedKeywordFrequencyMap = new TreeMap<>(comparator);
sortedKeywordFrequencyMap.putAll(keywordFrequencyMap);
// Transform sorted map to csv string
StringBuilder csv = new StringBuilder();
for (Map.Entry<String, Integer> entry : sortedKeywordFrequencyMap.entrySet()) {
csv.append(entry.getKey() + ",");
csv.append(entry.getValue().toString() + ",");
csv.append("\n");
}
// Persist csv string in BlobStore
BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
BlobEntry blobEntry = blobAccessor.newBlob("pratilipi/" + new SimpleDateFormat("yyyy-MM-dd-HH:mm").format(idfGenerationDate) + "-idf.csv", null, "text/plain");
blobEntry.setData(csv.toString().getBytes(Charset.forName("UTF-8")));
blobAccessor.createOrUpdateBlob(blobEntry);
logger.log(Level.INFO, "Generated IDF with " + keywordFrequencyMap.size() + " keywords.");
return new GenericResponse();
}
Aggregations