Search in sources :

Example 11 with BlobAccessor

use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.

the class PratilipiDocUtil method _createPageletList.

private static List<Object[]> _createPageletList(Pratilipi pratilipi, Node node) throws UnexpectedServerException {
    List<Object[]> pageletList = new LinkedList<>();
    Object[] currPagelet = null;
    for (Node childNode : node.childNodes()) {
        if (childNode.nodeName().equals("body") || childNode.nodeName().equals("div") || childNode.nodeName().equals("p")) {
            currPagelet = null;
            List<Object[]> pList = _createPageletList(pratilipi, childNode);
            if (pList.size() == 0) {
                pageletList.add(new Object[] { PratilipiContentDoc.PageletType.HTML, "<br/>", null });
            } else {
                AlignmentType alignment = _getAlignment(childNode);
                if (alignment != null)
                    for (Object[] pagelet : pList) if (pagelet[2] == null && (pagelet[0] == PratilipiContentDoc.PageletType.TEXT || pagelet[0] == PratilipiContentDoc.PageletType.HTML))
                        pagelet[2] = alignment;
                pageletList.addAll(pList);
            }
        } else if (childNode.nodeName().equals("h1") || childNode.nodeName().equals("h2")) {
            String text = _extractText(childNode);
            if (text == null)
                continue;
            if (currPagelet != null && currPagelet[0] == PratilipiContentDoc.PageletType.HEAD) {
                currPagelet[1] = currPagelet[1] + " - " + text;
            } else {
                currPagelet = new Object[] { PratilipiContentDoc.PageletType.HEAD, text, null };
                pageletList.add(currPagelet);
            }
        } else if (childNode.nodeName().equals("img")) {
            currPagelet = null;
            BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
            BlobEntry blobEntry = null;
            String imageUrl = childNode.attr("src");
            String imageName = null;
            if (imageUrl.indexOf("name=") != -1) {
                imageName = imageUrl.substring(imageUrl.indexOf("name=") + 5);
                if (imageName.indexOf('&') != -1)
                    imageName = imageName.substring(0, imageName.indexOf('&'));
                imageName = imageName.replace("%20", " ");
                String fileName = _createImageFullName(pratilipi.getId(), imageName);
                blobEntry = blobAccessor.getBlob(fileName);
                if (blobEntry == null) {
                    // Copying from old resource location
                    blobEntry = blobAccessor.getBlob("pratilipi-resource/" + pratilipi.getId() + "/" + imageName);
                    if (blobEntry != null) {
                        blobEntry.setName(fileName);
                        blobAccessor.createOrUpdateBlob(blobEntry);
                    }
                }
                if (blobEntry == null && imageUrl.indexOf("pratilipiId=") != -1) {
                    // Copying from old resource location of another Pratilipi
                    String pratilipiIdStr = imageUrl.substring(imageUrl.indexOf("pratilipiId=") + 12);
                    if (pratilipiIdStr.indexOf('&') != -1)
                        pratilipiIdStr = pratilipiIdStr.substring(0, pratilipiIdStr.indexOf('&'));
                    blobEntry = blobAccessor.getBlob("pratilipi-resource/" + pratilipiIdStr + "/" + imageName);
                    if (blobEntry != null) {
                        blobEntry.setName(fileName);
                        blobAccessor.createOrUpdateBlob(blobEntry);
                    }
                }
                if (blobEntry == null)
                    continue;
            } else if (imageUrl.startsWith("http")) {
                imageName = imageUrl.replaceAll("[:/.?=&+]+", "_");
                String fileName = _createImageFullName(pratilipi.getId(), imageName);
                blobEntry = blobAccessor.getBlob(fileName);
                if (blobEntry == null) {
                    blobEntry = HttpUtil.doGet(imageUrl);
                    if (!blobEntry.getMimeType().startsWith("image/"))
                        continue;
                    blobEntry.setName(fileName);
                    blobAccessor.createOrUpdateBlob(blobEntry);
                }
            } else if (imageUrl.startsWith("data:") && imageUrl.indexOf("base64") != -1) {
                imageName = UUID.randomUUID().toString();
                String mimeType = imageUrl.substring(5, imageUrl.indexOf(';'));
                String base64String = imageUrl.substring(imageUrl.indexOf("base64,") + 7);
                blobEntry = blobAccessor.newBlob(_createImageFullName(pratilipi.getId(), imageName), Base64.decodeBase64(base64String), mimeType);
                blobAccessor.createOrUpdateBlob(blobEntry);
            } else if (imageUrl.startsWith("file:///") || imageUrl.startsWith("C:")) {
                continue;
            }
            JsonObject imgData = new JsonObject();
            imgData.addProperty("name", imageName);
            imgData.addProperty("height", ImageUtil.getHeight(blobEntry));
            imgData.addProperty("width", ImageUtil.getWidth(blobEntry));
            pageletList.add(new Object[] { PratilipiContentDoc.PageletType.IMAGE, imgData, null });
        } else if (childNode.nodeName().equals("br")) {
            if (currPagelet != null && currPagelet[0] == PratilipiContentDoc.PageletType.HTML)
                currPagelet[1] = currPagelet[1] + "<br/>";
        } else {
            String text = _extractText(childNode);
            if (text == null)
                continue;
            if (childNode.nodeName().equals("b") || childNode.nodeName().equals("strong") || childNode.nodeName().equals("h3") || childNode.nodeName().equals("h4") || childNode.nodeName().equals("h5") || childNode.nodeName().equals("h6"))
                text = "<b>" + text + "</b>";
            if (currPagelet == null || currPagelet[0] != PratilipiContentDoc.PageletType.HTML) {
                currPagelet = new Object[] { PratilipiContentDoc.PageletType.HTML, text, null };
                pageletList.add(currPagelet);
            } else {
                currPagelet[1] = currPagelet[1] + " " + text;
            }
        }
    }
    return pageletList;
}
Also used : AlignmentType(com.pratilipi.data.type.PratilipiContentDoc.AlignmentType) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) BlobEntry(com.pratilipi.data.type.BlobEntry) BlobAccessor(com.pratilipi.data.BlobAccessor) JsonObject(com.google.gson.JsonObject) JsonObject(com.google.gson.JsonObject) LinkedList(java.util.LinkedList)

Example 12 with BlobAccessor

use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.

the class AuthorBackupApi method get.

@Get
public GenericResponse get(GetRequest request) throws UnexpectedServerException {
    DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
    BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessorBackup();
    Date backupDate = new Date();
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
    DateFormat csvDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
    DateFormat dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd-HH:mm-z");
    dateFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
    csvDateFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
    StringBuilder backup = new StringBuilder();
    StringBuilder csv = new StringBuilder(CSV_HEADER + LINE_SEPARATOR);
    int count = 0;
    AuthorFilter authorFilter = new AuthorFilter();
    String cursor = null;
    Gson gson = new GsonBuilder().registerTypeAdapter(Date.class, new GsonIstDateAdapter()).create();
    while (true) {
        DataListCursorTuple<Author> authorListCursorTupe = dataAccessor.getAuthorList(authorFilter, cursor, 1000);
        List<Author> authorList = authorListCursorTupe.getDataList();
        for (Author author : authorList) {
            backup.append(gson.toJson(author) + LINE_SEPARATOR);
            if (request.generateCsv != null && request.generateCsv)
                csv.append("'" + author.getId()).append(CSV_SEPARATOR).append(author.getUserId() == null ? "" : "'" + author.getUserId()).append(CSV_SEPARATOR).append(author.getFirstName() == null ? "" : author.getFirstName()).append(CSV_SEPARATOR).append(author.getLastName() == null ? "" : author.getLastName()).append(CSV_SEPARATOR).append(author.getPenName() == null ? "" : author.getPenName()).append(CSV_SEPARATOR).append(author.getFirstNameEn() == null ? "" : author.getFirstNameEn()).append(CSV_SEPARATOR).append(author.getLastNameEn() == null ? "" : author.getLastNameEn()).append(CSV_SEPARATOR).append(author.getPenNameEn() == null ? "" : author.getPenNameEn()).append(CSV_SEPARATOR).append(author.getLanguage()).append(CSV_SEPARATOR).append(author.getSummary() != null && author.getSummary().trim().length() != 0).append(CSV_SEPARATOR).append(author.getContentPublished()).append(CSV_SEPARATOR).append(csvDateFormat.format(author.getRegistrationDate())).append(LINE_SEPARATOR);
        }
        count = count + authorList.size();
        if (authorList.size() < 1000)
            break;
        else
            cursor = authorListCursorTupe.getCursor();
    }
    String fileName = "datastore.author/" + dateFormat.format(backupDate) + "/" + "author-" + dateTimeFormat.format(backupDate);
    BlobEntry authorBackupEntry = blobAccessor.newBlob(fileName, backup.toString().getBytes(Charset.forName("UTF-8")), "text/plain");
    blobAccessor.createOrUpdateBlob(authorBackupEntry);
    if (request.generateCsv != null && request.generateCsv) {
        BlobEntry authorCsvEntry = blobAccessor.newBlob("datastore/author.csv", csv.toString().getBytes(Charset.forName("UTF-8")), "text/csv");
        blobAccessor.createOrUpdateBlob(authorCsvEntry);
    }
    logger.log(Level.INFO, "Backed up " + count + " Author Entities.");
    return new GenericResponse();
}
Also used : GsonBuilder(com.google.gson.GsonBuilder) GenericResponse(com.pratilipi.api.shared.GenericResponse) DataAccessor(com.pratilipi.data.DataAccessor) BlobEntry(com.pratilipi.data.type.BlobEntry) AuthorFilter(com.pratilipi.common.util.AuthorFilter) Gson(com.google.gson.Gson) Date(java.util.Date) GsonIstDateAdapter(com.pratilipi.common.util.GsonIstDateAdapter) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) BlobAccessor(com.pratilipi.data.BlobAccessor) Author(com.pratilipi.data.type.Author) SimpleDateFormat(java.text.SimpleDateFormat) Get(com.pratilipi.api.annotation.Get)

Example 13 with BlobAccessor

use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.

the class InitDataUtil method saveInitBanner.

public static String saveInitBanner(Language language, BlobEntry blobEntry) throws InsufficientAccessException, UnexpectedServerException {
    if (!hasAccessToUpdateInit(language))
        throw new InsufficientAccessException();
    String name = new Date().getTime() + "";
    BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
    blobEntry.setName("init/banners/" + language.getCode() + "/" + name);
    blobAccessor.createOrUpdateBlob(blobEntry);
    return name;
}
Also used : BlobAccessor(com.pratilipi.data.BlobAccessor) InsufficientAccessException(com.pratilipi.common.exception.InsufficientAccessException) Date(java.util.Date)

Example 14 with BlobAccessor

use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.

the class DataStoreBackupUtil method csvAuthor.

public static void csvAuthor() throws UnexpectedServerException {
    String CSV_HEADER = "AuthorId,UserId," + "FirstName,LastName,PenName,FirstNameEN,LastNameEN,PenNameEN," + "Language,HasSummary,ContentsPublished,FollowCount,RegistrationDate";
    String CSV_SEPARATOR = ",";
    String LINE_SEPARATOR = "\n";
    BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessorBackup();
    DateFormat csvDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
    csvDateFormat.setTimeZone(TimeZone.getTimeZone("Asia/Kolkata"));
    StringBuilder csv = new StringBuilder(CSV_HEADER + LINE_SEPARATOR);
    int count = 0;
    QueryResultIterator<AuthorEntity> itr = ObjectifyService.ofy().cache(false).load().type(AuthorEntity.class).chunk(1000).iterator();
    while (itr.hasNext()) {
        Author author = itr.next();
        csv.append("'" + author.getId()).append(CSV_SEPARATOR).append(author.getUserId() == null ? "" : "'" + author.getUserId()).append(CSV_SEPARATOR).append(author.getFirstName() == null ? "" : author.getFirstName()).append(CSV_SEPARATOR).append(author.getLastName() == null ? "" : author.getLastName()).append(CSV_SEPARATOR).append(author.getPenName() == null ? "" : author.getPenName()).append(CSV_SEPARATOR).append(author.getFirstNameEn() == null ? "" : author.getFirstNameEn()).append(CSV_SEPARATOR).append(author.getLastNameEn() == null ? "" : author.getLastNameEn()).append(CSV_SEPARATOR).append(author.getPenNameEn() == null ? "" : author.getPenNameEn()).append(CSV_SEPARATOR).append(author.getLanguage()).append(CSV_SEPARATOR).append(author.getSummary() != null && author.getSummary().trim().length() != 0).append(CSV_SEPARATOR).append(author.getContentPublished()).append(CSV_SEPARATOR).append(author.getFollowCount()).append(CSV_SEPARATOR).append(csvDateFormat.format(author.getRegistrationDate())).append(LINE_SEPARATOR);
        count++;
        if (count % 1000 == 0)
            System.out.println(count + " ...");
    }
    System.out.println(count + " ...");
    BlobEntry authorCsvEntry = blobAccessor.newBlob("datastore/author.csv", csv.toString().getBytes(Charset.forName("UTF-8")), "text/csv");
    blobAccessor.createOrUpdateBlob(authorCsvEntry);
}
Also used : SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) BlobEntry(com.pratilipi.data.type.BlobEntry) BlobAccessor(com.pratilipi.data.BlobAccessor) Author(com.pratilipi.data.type.Author) SimpleDateFormat(java.text.SimpleDateFormat) AuthorEntity(com.pratilipi.data.type.gae.AuthorEntity)

Example 15 with BlobAccessor

use of com.pratilipi.data.BlobAccessor in project pratilipi by Pratilipi.

the class PratilipiIdfApi method get.

@Get
public GenericResponse get(GenericRequest request) throws UnexpectedServerException {
    Date idfGenerationDate = new Date();
    DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
    PratilipiFilter pratilipiFilter = new PratilipiFilter();
    String cursor = null;
    DataListCursorTuple<Long> pratilipiIdListCursorTupe = dataAccessor.getPratilipiIdList(pratilipiFilter, cursor, null, null);
    List<Long> pratilipiIdList = pratilipiIdListCursorTupe.getDataList();
    // Populate Keyword-Frequency map.
    final HashMap<String, Integer> keywordFrequencyMap = new HashMap<>();
    for (Long pratilipiId : pratilipiIdList) {
        String[] keywords = PratilipiDataUtil.getPratilipiKeywords(pratilipiId).split("\\s+");
        if (keywords == null)
            continue;
        for (String keyword : keywords) {
            if (keywordFrequencyMap.containsKey(keyword))
                keywordFrequencyMap.put(keyword, keywordFrequencyMap.get(keyword) + 1);
            else
                keywordFrequencyMap.put(keyword, 1);
        }
    }
    // Sort Keyword-Frequency map in descending order of frequency
    Comparator<String> comparator = new Comparator<String>() {

        @Override
        public int compare(String a, String b) {
            return keywordFrequencyMap.get(a) >= keywordFrequencyMap.get(b) ? -1 : 1;
        }
    };
    TreeMap<String, Integer> sortedKeywordFrequencyMap = new TreeMap<>(comparator);
    sortedKeywordFrequencyMap.putAll(keywordFrequencyMap);
    // Transform sorted map to csv string
    StringBuilder csv = new StringBuilder();
    for (Map.Entry<String, Integer> entry : sortedKeywordFrequencyMap.entrySet()) {
        csv.append(entry.getKey() + ",");
        csv.append(entry.getValue().toString() + ",");
        csv.append("\n");
    }
    // Persist csv string in BlobStore
    BlobAccessor blobAccessor = DataAccessorFactory.getBlobAccessor();
    BlobEntry blobEntry = blobAccessor.newBlob("pratilipi/" + new SimpleDateFormat("yyyy-MM-dd-HH:mm").format(idfGenerationDate) + "-idf.csv", null, "text/plain");
    blobEntry.setData(csv.toString().getBytes(Charset.forName("UTF-8")));
    blobAccessor.createOrUpdateBlob(blobEntry);
    logger.log(Level.INFO, "Generated IDF with " + keywordFrequencyMap.size() + " keywords.");
    return new GenericResponse();
}
Also used : HashMap(java.util.HashMap) GenericResponse(com.pratilipi.api.shared.GenericResponse) DataAccessor(com.pratilipi.data.DataAccessor) BlobEntry(com.pratilipi.data.type.BlobEntry) TreeMap(java.util.TreeMap) Date(java.util.Date) Comparator(java.util.Comparator) PratilipiFilter(com.pratilipi.common.util.PratilipiFilter) BlobAccessor(com.pratilipi.data.BlobAccessor) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Map(java.util.Map) SimpleDateFormat(java.text.SimpleDateFormat) Get(com.pratilipi.api.annotation.Get)

Aggregations

BlobAccessor (com.pratilipi.data.BlobAccessor)19 DataAccessor (com.pratilipi.data.DataAccessor)13 BlobEntry (com.pratilipi.data.type.BlobEntry)12 InsufficientAccessException (com.pratilipi.common.exception.InsufficientAccessException)7 Date (java.util.Date)7 Pratilipi (com.pratilipi.data.type.Pratilipi)6 SimpleDateFormat (java.text.SimpleDateFormat)6 UserPratilipi (com.pratilipi.data.type.UserPratilipi)5 DateFormat (java.text.DateFormat)5 AuditLog (com.pratilipi.data.type.AuditLog)4 Gson (com.google.gson.Gson)3 JsonObject (com.google.gson.JsonObject)3 Get (com.pratilipi.api.annotation.Get)3 GenericResponse (com.pratilipi.api.shared.GenericResponse)3 GsonBuilder (com.google.gson.GsonBuilder)2 InvalidArgumentException (com.pratilipi.common.exception.InvalidArgumentException)2 UnexpectedServerException (com.pratilipi.common.exception.UnexpectedServerException)2 DocAccessor (com.pratilipi.data.DocAccessor)2 Author (com.pratilipi.data.type.Author)2 Page (com.pratilipi.data.type.Page)2