use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class MiscellaneousJobsConfig method generateItemIdReader.
@Bean(name = Constants.JOB_ID_GENERATE_ITEM_ID + ":generateItemIdReader")
@StepScope
public synchronized ItemReader<HarvestedRecordUniqueId> generateItemIdReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long confId) throws Exception {
JdbcPagingItemReader<HarvestedRecordUniqueId> reader = new JdbcPagingItemReader<HarvestedRecordUniqueId>();
SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
pqpf.setDataSource(dataSource);
pqpf.setSelectClause("SELECT id, import_conf_id, record_id");
pqpf.setFromClause("FROM harvested_record");
String where = "WHERE deleted is null";
if (confId != null) {
where += " AND import_conf_id=:conf_id";
Map<String, Object> parameterValues = new HashMap<String, Object>();
parameterValues.put("conf_id", confId);
reader.setParameterValues(parameterValues);
}
pqpf.setWhereClause(where);
pqpf.setSortKey("id");
reader.setRowMapper(new HarvestedRecordIdRowMapper());
reader.setPageSize(20);
reader.setQueryProvider(pqpf.getObject());
reader.setDataSource(dataSource);
reader.afterPropertiesSet();
return reader;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class GenerateItemIdWriter method write.
@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
for (HarvestedRecordUniqueId uniqueId : items) {
try {
progress.incrementAndLogProgress();
HarvestedRecord hr = hrDao.get(uniqueId);
if (hr == null || hr.getRawRecord() == null || hr.getRawRecord().length == 0 || !hr.getFormat().equals("marc21-xml") || hr.getHarvestedFrom().getItemId() == null) {
continue;
}
Record record = marcXmlParser.parseUnderlyingRecord(new ByteArrayInputStream(hr.getRawRecord()));
hr.setRawRecord(new DefaultMarcInterceptor(record, hr.getHarvestedFrom(), uniqueId.getRecordId()).intercept());
} catch (Exception ex) {
logger.error(String.format("Exception thrown in harvested_record with id=%s", uniqueId), ex);
}
}
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class MarcXmlDedupKeyParserTest method parseCorrectRecord.
@Test
public void parseCorrectRecord() throws Exception {
InputStream is = this.getClass().getResourceAsStream("/records/marcxml/MZK01-001439241.xml");
ImportConfiguration ic = importConfDao.get(300L);
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(ic, "1");
HarvestedRecord record = new HarvestedRecord(id);
record.setHarvestedFrom(ic);
record.setFormat("marc21-xml");
byte[] rawRecord = ByteStreams.toByteArray(is);
record.setRawRecord(rawRecord);
record.setId(1L);
parser.parse(record);
Assert.assertTrue(record.getIsbns().size() > 0);
Assert.assertEquals(record.getIsbns().get(0).getIsbn(), EXPECTED_ISBN);
Assert.assertEquals(record.getTitles().size(), 1);
Assert.assertEquals(record.getTitles().get(0).getTitleStr(), EXPECTED_TITLE);
Assert.assertEquals(record.getPhysicalFormats().size(), 1);
Assert.assertEquals(record.getPhysicalFormats().get(0).getName(), HarvestedRecordFormatEnum.BOOKS.name());
Assert.assertEquals(record.getPublicationYear(), new Long(2014));
Assert.assertEquals(record.getAuthorAuthKey(), EXPECTED_AUTHORAUTHKEY);
Assert.assertEquals(record.getAuthorString(), EXPECTED_AUTHORSTRING);
Assert.assertEquals(record.getScale(), EXPECTED_SCALE);
Assert.assertEquals(record.getUuid(), EXPECTED_UUID);
Assert.assertEquals(record.getIssnSeries(), EXPECTED_ISSNSERIES);
Assert.assertEquals(record.getIssnSeriesOrder(), EXPECTED_ISSNSERIESORDER);
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class ZakonyProLidiTest method metadataHarvestTest.
@Test
public void metadataHarvestTest() throws Exception {
reset(httpClient);
InputStream response0 = this.getClass().getResourceAsStream("/import/zakony/Records.xml");
expect(httpClient.executeGet("http://www.zakonyprolidi.cz/api/v1/data.xml/YearDocList?apikey=test&Collection=cs&Year=1945")).andReturn(response0);
replay(httpClient);
Job job = jobRegistry.getJob(Constants.JOB_ID_HARVEST_ZAKONYPROLIDI);
Map<String, JobParameter> params = new HashMap<String, JobParameter>();
params.put(Constants.JOB_PARAM_CONF_ID, new JobParameter(300L));
params.put(Constants.JOB_PARAM_FROM_DATE, new JobParameter(1945L));
params.put(Constants.JOB_PARAM_UNTIL_DATE, new JobParameter(1945L));
JobParameters jobParams = new JobParameters(params);
jobLauncher.run(job, jobParams);
Assert.assertNotNull(hrDao.get(new HarvestedRecordUniqueId(300L, "1945-1")));
Assert.assertNotNull(hrDao.get(new HarvestedRecordUniqueId(300L, "1945-2")));
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId in project RecordManager2 by moravianlibrary.
the class ImportRecordsWriter method writeInner.
protected void writeInner(List<? extends List<Record>> items) throws Exception {
for (List<Record> records : items) {
for (Record currentRecord : records) {
try {
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
MarcWriter marcWriter = new MarcXmlWriter(outStream, true);
marcWriter.setConverter(ISOCharConvertor.INSTANCE);
marcWriter.write(currentRecord);
marcWriter.close();
// need recordId before interception
byte[] recordContent = outStream.toByteArray();
MetadataRecord metadata = parseMetadata(recordContent);
String recordId = metadata.getUniqueId();
if (regexpExtractor != null) {
recordId = regexpExtractor.extract(recordId);
}
if (harvestConfiguration.isInterceptionEnabled()) {
MarcRecordInterceptor interceptor = marcInterceptorFactory.getInterceptor(harvestConfiguration, recordId, recordContent);
if (interceptor != null) {
byte[] recordContentNew = interceptor.intercept();
if (!Arrays.equals(recordContent, recordContentNew)) {
// if record content was changed, parse metadata again
metadata = parseMetadata(recordContentNew);
// set intercepted content
recordContent = recordContentNew;
}
}
}
HarvestedRecord hr = harvestedRecordDao.findByIdAndHarvestConfiguration(recordId, configurationId);
if (hr == null) {
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestConfiguration, recordId);
hr = new HarvestedRecord(id);
// TODO detect format
hr.setFormat("marc21-xml");
hr.setHarvestedFrom(harvestConfiguration);
}
hr.setUpdated(new Date());
hr.setDeleted(null);
hr.setRawRecord(recordContent);
harvestedRecordDao.persist(hr);
dedupKeysParser.parse(hr, metadata);
if (harvestConfiguration.isFilteringEnabled() && !hr.getShouldBeProcessed()) {
logger.debug("Filtered record: " + hr.getUniqueId());
hr.setDeleted(new Date());
}
harvestedRecordDao.persist(hr);
progress.incrementAndLogProgress();
} catch (Exception e) {
logger.warn("Error occured in processing record");
throw e;
}
}
}
}
Aggregations