use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class KrameriusFulltextJobConfig method missingReader.
@Bean(name = Constants.JOB_ID_MISSING_FULLTEXT_KRAMERIUS + ":reader")
@StepScope
public ItemReader<HarvestedRecord> missingReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId, @Value("#{jobParameters[" + Constants.JOB_PARAM_FULLTEXT_FIRST + "]}") String firstId, @Value("#{jobParameters[" + Constants.JOB_PARAM_FULLTEXT_LAST + "]}") String lastId, @Value("#{stepExecutionContext[" + Constants.JOB_PARAM_FROM_DATE + "] " + "?:jobParameters[ " + Constants.JOB_PARAM_FROM_DATE + "]}") Date from, @Value("#{stepExecutionContext[" + Constants.JOB_PARAM_UNTIL_DATE + "]" + "?:jobParameters[" + Constants.JOB_PARAM_UNTIL_DATE + "]}") Date to) throws Exception {
JdbcPagingItemReader<HarvestedRecord> reader = new JdbcPagingItemReader<HarvestedRecord>();
SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
pqpf.setDataSource(dataSource);
pqpf.setSelectClause("SELECT *");
pqpf.setFromClause("FROM harvested_record hr");
String whereClause = "WHERE hr.import_conf_id = :configId AND NOT EXISTS (" + "SELECT 1 FROM fulltext_kramerius fk WHERE hr.id = fk.harvested_record_id)";
Map<String, Object> parameterValues = new HashMap<String, Object>();
parameterValues.put("configId", configId);
if (from != null) {
whereClause += " AND hr.updated >= :from";
parameterValues.put("from", new Timestamp(from.getTime()));
}
if (to != null) {
Date toStamp = new Timestamp(to.getTime());
whereClause += " AND hr.updated <= :to";
parameterValues.put("to", toStamp);
}
if (firstId != null) {
whereClause += " AND hr.record_id >= :firstId";
parameterValues.put("firstId", firstId);
}
if (lastId != null) {
whereClause += " AND hr.record_id <= :lastId";
parameterValues.put("lastId", lastId);
}
pqpf.setWhereClause(whereClause);
pqpf.setSortKey("record_id");
reader.setParameterValues(parameterValues);
reader.setRowMapper(harvestedRecordRowMapper);
reader.setPageSize(PAGE_SIZE);
reader.setQueryProvider(pqpf.getObject());
reader.setDataSource(dataSource);
reader.afterPropertiesSet();
return reader;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class RegenerateDedupKeysWriter method write.
@Override
public void write(List<? extends Long> ids) {
for (Long id : ids) {
HarvestedRecord rec = harvestedRecordDao.get(id);
if (rec.getDedupKeysHash() != null || !rec.getHarvestedFrom().isGenerateDedupKeys() || rec.getRawRecord() == null || rec.getRawRecord().length == 0) {
continue;
}
try {
rec = dedupKeysParser.parse(rec);
harvestedRecordDao.persist(rec);
++totalCount;
logProgress();
} catch (InvalidMarcException ime) {
logger.warn("Invalid Marc in record: " + rec.getId());
} catch (Exception e) {
logger.warn("Skipping record due to error: " + e.toString());
}
}
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class KrameriusFulltextProcessor method process.
@Override
public HarvestedRecord process(HarvestedRecord item) throws Exception {
logger.debug("Processing Harvested Record: " + item.toString() + " uniqueId: " + item.getUniqueId());
String policy;
String model;
// read complete HarvestedRecord using DAO
HarvestedRecord rec = recordDao.findByIdAndHarvestConfiguration(item.getUniqueId().getRecordId(), confId);
InputStream is = new ByteArrayInputStream(rec.getRawRecord());
// get Kramerius policy from record
try {
DublinCoreRecord dcRecord = parser.parseRecord(is);
MetadataDublinCoreRecord mdrc = new MetadataDublinCoreRecord(dcRecord);
policy = mdrc.getPolicyKramerius();
model = mdrc.getModelKramerius();
} catch (InvalidDcException e) {
logger.warn("InvalidDcException for record with id:" + item.getUniqueId());
logger.warn(e.getMessage());
// doesn't do anything, just returns rec from DAO and writes a message into log
return rec;
}
// modify read HarvestedRecord only if following condition is fulfilled
if (policy.equals("public") || downloadPrivateFulltexts) {
logger.debug("Processor: privacy condition fulfilled, reading pages");
String rootUuid = rec.getUniqueId().getRecordId();
List<FulltextKramerius> pages;
if (model.equals("periodical")) {
logger.info("Using (periodical) fultexter \"for root\" for uuid " + rootUuid + ".");
pages = fulltexter.getFulltextForRoot(rootUuid);
} else {
logger.info("Using (monograph/default) fultexter \"for parent\" for uuid " + rootUuid + ".");
pages = fulltexter.getFulltextObjects(rootUuid);
}
// if we got empty list in pages => do nothing, return original record
if (pages.isEmpty()) {
return rec;
}
// delete old FulltextKramerius from database before adding new ones
fmDao.deleteFulltext(rec.getId());
rec.setFulltextKramerius(pages);
} else {
logger.debug("Processor: privacy condition is NOT fulfilled, skipping record");
}
return rec;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class ExportRecordsForClassifierProcessor method process.
@Override
public String process(HarvestedRecord.HarvestedRecordUniqueId recordId) throws Exception {
HarvestedRecord record = harvestedRecordDao.get(recordId);
if (record != null && record.getRawRecord().length != 0) {
InputStream is = new ByteArrayInputStream(record.getRawRecord());
MarcRecord marcRecord = marcXmlParser.parseRecord(is);
if (marcRecord.getDataFields(OAI_FIELD).isEmpty()) {
marcRecord.addDataField(OAI_FIELD, ' ', ' ', "a", record.getUniqueId().getRecordId());
}
return marcRecord.getDataFields("080").isEmpty() ? null : marcRecord.export(iOFormat);
}
return null;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class FilterCaslinRecordsWriter method write.
@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
for (HarvestedRecordUniqueId uniqueId : items) {
try {
HarvestedRecord hr = hrDao.get(uniqueId);
if (hr == null || hr.getRawRecord().length == 0)
continue;
MarcRecord marc = marcXmlParser.parseRecord(new ByteArrayInputStream(hr.getRawRecord()));
Record record = marcXmlParser.parseUnderlyingRecord(new ByteArrayInputStream(hr.getRawRecord()));
Boolean updated = false;
Record newRecord = new RecordImpl();
MarcFactory marcFactory = new MarcFactoryImpl();
newRecord.setLeader(record.getLeader());
for (ControlField cf : record.getControlFields()) {
newRecord.addVariableField(cf);
}
Map<String, List<DataField>> dfMap = marc.getAllFields();
for (String tag : new TreeSet<String>(dfMap.keySet())) {
for (DataField df : dfMap.get(tag)) {
// add $q0 when sigla is in db
if (df.getTag().equals("996")) {
if (caslinFilter.filter(df.getSubfield('e').getData()) && (df.getSubfield('q') == null || !df.getSubfield('q').getData().equals("0"))) {
df.addSubfield(marcFactory.newSubfield('q', "0"));
updated = true;
}
}
newRecord.addVariableField(df);
}
}
hr.setRawRecord(new MarcRecordImpl(newRecord).export(IOFormat.XML_MARC).getBytes(StandardCharsets.UTF_8));
if (hr.getDeleted() == null && !mrFactory.getMetadataRecord(hr).matchFilter()) {
hr.setDeleted(new Date());
updated = true;
}
if (updated) {
hr.setUpdated(new Date());
hrDao.persist(hr);
}
} catch (Exception ex) {
logger.error(String.format("Exception thrown when filtering harvested_record with id=%s", uniqueId), ex);
}
}
}
Aggregations