Search in sources :

Example 16 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class ZakonyProLidiHarvestJobConfig method zakonyProLidiFulltextReader.

@Bean(name = Constants.JOB_ID_FULLTEXT_ZAKONYPROLIDI + ":reader")
@StepScope
public ItemReader<HarvestedRecordUniqueId> zakonyProLidiFulltextReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId) throws Exception {
    JdbcPagingItemReader<HarvestedRecordUniqueId> reader = new JdbcPagingItemReader<HarvestedRecordUniqueId>();
    SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
    pqpf.setDataSource(dataSource);
    pqpf.setSelectClause("SELECT import_conf_id, record_id");
    pqpf.setFromClause("FROM harvested_record");
    pqpf.setWhereClause("WHERE import_conf_id = :conf_id and deleted is null");
    pqpf.setSortKeys(ImmutableMap.of("record_id", Order.DESCENDING));
    Map<String, Object> parameterValues = new HashMap<String, Object>();
    parameterValues.put("conf_id", configId);
    reader.setParameterValues(parameterValues);
    reader.setRowMapper(new HarvestedRecordIdRowMapper());
    reader.setPageSize(1);
    reader.setQueryProvider(pqpf.getObject());
    reader.setDataSource(dataSource);
    reader.afterPropertiesSet();
    return reader;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) HashMap(java.util.HashMap) HarvestedRecordIdRowMapper(cz.mzk.recordmanager.server.export.HarvestedRecordIdRowMapper) JdbcPagingItemReader(org.springframework.batch.item.database.JdbcPagingItemReader) StepScope(org.springframework.batch.core.configuration.annotation.StepScope) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) Bean(org.springframework.context.annotation.Bean)

Example 17 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class ExportRecordsJobConfig method exportRecordsForClassifierReader.

@Bean(name = "exportRecordsForClassifierJob:exportRecordsForClassifierReader")
@StepScope
public ItemReader<HarvestedRecordUniqueId> exportRecordsForClassifierReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId) throws Exception {
    JdbcPagingItemReader<HarvestedRecordUniqueId> reader = new JdbcPagingItemReader<HarvestedRecordUniqueId>();
    SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
    pqpf.setDataSource(dataSource);
    pqpf.setSelectClause("SELECT import_conf_id, record_id");
    pqpf.setFromClause("FROM harvested_record");
    pqpf.setWhereClause("WHERE import_conf_id = :conf_id AND  dedup_record_id IN ( " + "  SELECT dedup_record_id " + "  FROM harvested_record " + "  WHERE EXISTS( " + "      SELECT 1 " + "      FROM fulltext_kramerius " + "      WHERE harvested_record.id = fulltext_kramerius.harvested_record_id " + "  ))");
    pqpf.setSortKey("record_id");
    Map<String, Object> parameterValues = new HashMap<String, Object>();
    parameterValues.put("conf_id", configId);
    reader.setParameterValues(parameterValues);
    reader.setRowMapper(new HarvestedRecordIdRowMapper());
    reader.setPageSize(20);
    reader.setQueryProvider(pqpf.getObject());
    reader.setDataSource(dataSource);
    reader.afterPropertiesSet();
    return reader;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) HashMap(java.util.HashMap) JdbcPagingItemReader(org.springframework.batch.item.database.JdbcPagingItemReader) StepScope(org.springframework.batch.core.configuration.annotation.StepScope) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) Bean(org.springframework.context.annotation.Bean)

Example 18 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class HarvestedRecordLimitedRowMapper method mapRow.

@Override
public HarvestedRecord mapRow(ResultSet rs, int rowNum) throws SQLException {
    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(oaiHarvestConfigurationDao.load(rs.getLong("IMPORT_CONF_ID")), rs.getString("RECORD_ID"));
    HarvestedRecord harvestedRecord = new HarvestedRecord(id);
    harvestedRecord.setPublicationYear(rs.getLong("PUBLICATION_YEAR"));
    harvestedRecord.setFormat(rs.getString("FORMAT"));
    return harvestedRecord;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 19 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class ManuscriptoriumFulltextWriter method write.

@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
    for (HarvestedRecordUniqueId uniqueId : items) {
        HarvestedRecord hr = harvestedRecordDao.get(uniqueId);
        if (!hr.getFulltextKramerius().isEmpty())
            continue;
        getNextFulltext(uniqueId.getRecordId());
        FulltextKramerius fk = new FulltextKramerius();
        String fulltext = fulltextReader.next();
        if (fulltext.isEmpty()) {
            logger.warn("Fulltext from " + FULLTEXT_URL + uniqueId.getRecordId() + " is empty.");
        } else {
            fk.setFulltext(fulltext.getBytes());
            fk.setUuidPage(uniqueId.getRecordId());
            fk.setPage("1");
            fk.setOrder(1L);
            hr.setFulltextKramerius(Collections.singletonList(fk));
            hr.setUpdated(new Date());
            InputStream is = new ByteArrayInputStream(hr.getRawRecord());
            Document doc = documentBuilder.parse(removeFormating(is));
            // remove old TEI element from DC
            NodeList tei = doc.getElementsByTagName(TEI);
            if (tei != null && tei.getLength() > 0) {
                Node remove = tei.item(0);
                remove.getParentNode().removeChild(tei.item(0));
            }
            // get new TEI element from source document
            Document teiDoc = documentBuilder.parse(removeFormating(teiReader));
            Node newNode = teiDoc.getElementsByTagName(TEI).item(0).cloneNode(true);
            doc.adoptNode(newNode);
            // add TEI elemenet to DC
            Node root = doc.getFirstChild();
            root.appendChild(newNode);
            DOMSource source = new DOMSource(doc.getDocumentElement());
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            StreamResult result = new StreamResult(bos);
            transformer.transform(source, result);
            hr.setRawRecord(bos.toByteArray());
            harvestedRecordDao.persist(hr);
        }
        client.close();
    }
    sessionFactory.getCurrentSession().flush();
    sessionFactory.getCurrentSession().clear();
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) StreamResult(javax.xml.transform.stream.StreamResult) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) FulltextKramerius(cz.mzk.recordmanager.server.model.FulltextKramerius) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.w3c.dom.Document) Date(java.util.Date) ByteArrayInputStream(java.io.ByteArrayInputStream) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 20 with HarvestedRecordUniqueId

use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.

the class KrameriusHarvesterNoSorting method downloadRecord.

public HarvestedRecord downloadRecord(String uuid) {
    String recordId;
    HarvestedRecordUniqueId id;
    HarvestedRecord unparsedHr;
    recordId = uuid;
    id = new HarvestedRecordUniqueId(harvestedFrom, recordId);
    unparsedHr = new HarvestedRecord(id);
    String url = createUrl(uuid);
    logger.info("Harvesting record from: " + url);
    try (InputStream is = httpClient.executeGet(url)) {
        if (is.markSupported()) {
            is.mark(Integer.MAX_VALUE);
            is.reset();
        }
        unparsedHr.setRawRecord(IOUtils.toByteArray(is));
    } catch (IOException ioe) {
        logger.error("Harvesting record from: " + url + " caused IOException!");
        logger.error(ioe.getMessage());
        return null;
    }
    // yet)
    return unparsedHr;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) InputStream(java.io.InputStream) IOException(java.io.IOException) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)21 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)12 HashMap (java.util.HashMap)8 StepScope (org.springframework.batch.core.configuration.annotation.StepScope)7 JdbcPagingItemReader (org.springframework.batch.item.database.JdbcPagingItemReader)7 SqlPagingQueryProviderFactoryBean (org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean)7 Bean (org.springframework.context.annotation.Bean)7 Date (java.util.Date)6 HarvestedRecordIdRowMapper (cz.mzk.recordmanager.server.export.HarvestedRecordIdRowMapper)5 InputStream (java.io.InputStream)5 Record (org.marc4j.marc.Record)4 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)3 ByteArrayInputStream (java.io.ByteArrayInputStream)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)2 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)2 MarcRecordInterceptor (cz.mzk.recordmanager.server.marc.intercepting.MarcRecordInterceptor)2 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)2 ImportConfiguration (cz.mzk.recordmanager.server.model.ImportConfiguration)2 IOException (java.io.IOException)2