use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class ZakonyProLidiHarvestJobConfig method zakonyProLidiFulltextReader.
@Bean(name = Constants.JOB_ID_FULLTEXT_ZAKONYPROLIDI + ":reader")
@StepScope
public ItemReader<HarvestedRecordUniqueId> zakonyProLidiFulltextReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId) throws Exception {
JdbcPagingItemReader<HarvestedRecordUniqueId> reader = new JdbcPagingItemReader<HarvestedRecordUniqueId>();
SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
pqpf.setDataSource(dataSource);
pqpf.setSelectClause("SELECT import_conf_id, record_id");
pqpf.setFromClause("FROM harvested_record");
pqpf.setWhereClause("WHERE import_conf_id = :conf_id and deleted is null");
pqpf.setSortKeys(ImmutableMap.of("record_id", Order.DESCENDING));
Map<String, Object> parameterValues = new HashMap<String, Object>();
parameterValues.put("conf_id", configId);
reader.setParameterValues(parameterValues);
reader.setRowMapper(new HarvestedRecordIdRowMapper());
reader.setPageSize(1);
reader.setQueryProvider(pqpf.getObject());
reader.setDataSource(dataSource);
reader.afterPropertiesSet();
return reader;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class ExportRecordsJobConfig method exportRecordsForClassifierReader.
@Bean(name = "exportRecordsForClassifierJob:exportRecordsForClassifierReader")
@StepScope
public ItemReader<HarvestedRecordUniqueId> exportRecordsForClassifierReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId) throws Exception {
JdbcPagingItemReader<HarvestedRecordUniqueId> reader = new JdbcPagingItemReader<HarvestedRecordUniqueId>();
SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
pqpf.setDataSource(dataSource);
pqpf.setSelectClause("SELECT import_conf_id, record_id");
pqpf.setFromClause("FROM harvested_record");
pqpf.setWhereClause("WHERE import_conf_id = :conf_id AND dedup_record_id IN ( " + " SELECT dedup_record_id " + " FROM harvested_record " + " WHERE EXISTS( " + " SELECT 1 " + " FROM fulltext_kramerius " + " WHERE harvested_record.id = fulltext_kramerius.harvested_record_id " + " ))");
pqpf.setSortKey("record_id");
Map<String, Object> parameterValues = new HashMap<String, Object>();
parameterValues.put("conf_id", configId);
reader.setParameterValues(parameterValues);
reader.setRowMapper(new HarvestedRecordIdRowMapper());
reader.setPageSize(20);
reader.setQueryProvider(pqpf.getObject());
reader.setDataSource(dataSource);
reader.afterPropertiesSet();
return reader;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class HarvestedRecordLimitedRowMapper method mapRow.
@Override
public HarvestedRecord mapRow(ResultSet rs, int rowNum) throws SQLException {
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(oaiHarvestConfigurationDao.load(rs.getLong("IMPORT_CONF_ID")), rs.getString("RECORD_ID"));
HarvestedRecord harvestedRecord = new HarvestedRecord(id);
harvestedRecord.setPublicationYear(rs.getLong("PUBLICATION_YEAR"));
harvestedRecord.setFormat(rs.getString("FORMAT"));
return harvestedRecord;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class ManuscriptoriumFulltextWriter method write.
@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
for (HarvestedRecordUniqueId uniqueId : items) {
HarvestedRecord hr = harvestedRecordDao.get(uniqueId);
if (!hr.getFulltextKramerius().isEmpty())
continue;
getNextFulltext(uniqueId.getRecordId());
FulltextKramerius fk = new FulltextKramerius();
String fulltext = fulltextReader.next();
if (fulltext.isEmpty()) {
logger.warn("Fulltext from " + FULLTEXT_URL + uniqueId.getRecordId() + " is empty.");
} else {
fk.setFulltext(fulltext.getBytes());
fk.setUuidPage(uniqueId.getRecordId());
fk.setPage("1");
fk.setOrder(1L);
hr.setFulltextKramerius(Collections.singletonList(fk));
hr.setUpdated(new Date());
InputStream is = new ByteArrayInputStream(hr.getRawRecord());
Document doc = documentBuilder.parse(removeFormating(is));
// remove old TEI element from DC
NodeList tei = doc.getElementsByTagName(TEI);
if (tei != null && tei.getLength() > 0) {
Node remove = tei.item(0);
remove.getParentNode().removeChild(tei.item(0));
}
// get new TEI element from source document
Document teiDoc = documentBuilder.parse(removeFormating(teiReader));
Node newNode = teiDoc.getElementsByTagName(TEI).item(0).cloneNode(true);
doc.adoptNode(newNode);
// add TEI elemenet to DC
Node root = doc.getFirstChild();
root.appendChild(newNode);
DOMSource source = new DOMSource(doc.getDocumentElement());
ByteArrayOutputStream bos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(bos);
transformer.transform(source, result);
hr.setRawRecord(bos.toByteArray());
harvestedRecordDao.persist(hr);
}
client.close();
}
sessionFactory.getCurrentSession().flush();
sessionFactory.getCurrentSession().clear();
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class KrameriusHarvesterNoSorting method downloadRecord.
public HarvestedRecord downloadRecord(String uuid) {
String recordId;
HarvestedRecordUniqueId id;
HarvestedRecord unparsedHr;
recordId = uuid;
id = new HarvestedRecordUniqueId(harvestedFrom, recordId);
unparsedHr = new HarvestedRecord(id);
String url = createUrl(uuid);
logger.info("Harvesting record from: " + url);
try (InputStream is = httpClient.executeGet(url)) {
if (is.markSupported()) {
is.mark(Integer.MAX_VALUE);
is.reset();
}
unparsedHr.setRawRecord(IOUtils.toByteArray(is));
} catch (IOException ioe) {
logger.error("Harvesting record from: " + url + " caused IOException!");
logger.error(ioe.getMessage());
return null;
}
// yet)
return unparsedHr;
}
Aggregations