use of com.robertsanek.data.etl.Etl in project core by z1lc.
the class MasterEtl method runIndividualEtl.
@SuppressWarnings("rawtypes")
private EtlRun runIndividualEtl(Class<? extends Etl> etlClazz, SessionFactory sf) {
Stopwatch thisEtlStopwatch = Stopwatch.createStarted();
ZonedDateTime thisEtlStarted = ZonedDateTime.now();
AtomicInteger max = new AtomicInteger(0);
AtomicBoolean successful = new AtomicBoolean(false);
AtomicLong secondsEtl = new AtomicLong(0);
Exception exceptionDuringEtl = new RuntimeException("dummy exception -- didn't catch any other exceptions");
AtomicLong tryNumber = new AtomicLong(0);
try (Session session = sf.openSession()) {
Transaction insertTransaction = session.beginTransaction();
insertTransaction.setTimeout((int) TRANSACTION_TIMEOUT.getSeconds());
Failsafe.with(individualEtlRetry).run(() -> {
log.info("Running ETL with class %s (try #%s).", etlClazz.getName(), tryNumber.incrementAndGet());
Etl instance = InjectUtils.inject(etlClazz);
List objects = instance.getObjects();
max.set(Math.min(ROW_LIMIT, objects.size()));
if (max.get() == ROW_LIMIT) {
throw new RuntimeException(String.format("ETL %s has %d+ rows. " + "You should probably do some kind of pre-aggregation.", etlClazz.getName(), ROW_LIMIT));
}
secondsEtl.set(thisEtlStopwatch.elapsed().getSeconds());
thisEtlStopwatch.reset().start();
IntStream.range(0, max.get()).forEach(i -> {
try {
session.save(objects.get(i));
} catch (NonUniqueObjectException e) {
log.error(e.toString());
}
if (i % getBatchingSize() == 0) {
session.flush();
session.clear();
}
});
insertTransaction.commit();
successful.set(true);
});
} catch (Exception e) {
log.error(e);
exceptionDuringEtl = e;
}
long secondsTransaction = thisEtlStopwatch.elapsed().getSeconds();
long totalRuntime = secondsEtl.get() + secondsTransaction;
String template = String.format("ETL with class %s completed %s in %s %s on try #%s, generating %s rows.", etlClazz.getName(), successful.get() ? "successfully" : "unsuccessfully", totalRuntime, totalRuntime == 1 ? "second" : "seconds", tryNumber.get(), max);
if (successful.get()) {
log.info(template);
if (max.get() == 0) {
notificationSender.sendNotificationDefault(String.format("%s generated 0 rows at %s!", etlClazz.getSimpleName(), LocalDateTime.now().format(DateTimeFormatter.ofPattern("HH:mm:ss", new Locale("en")))), "Check output.");
}
} else {
notificationSender.sendNotificationDefault(String.format("%s failed at %s!", etlClazz.getSimpleName(), LocalDateTime.now().format(DateTimeFormatter.ofPattern("HH:mm:ss", new Locale("en")))), template + "\n\n" + ExceptionUtils.getStackTrace(exceptionDuringEtl));
log.error(template);
}
return EtlRun.EtlRunBuilder.anEtlRun().withId(ETL_RUN_ID_GENERATOR.getAndIncrement()).withClass_name(etlClazz.getName()).withStart_time(thisEtlStarted).withEnd_time(ZonedDateTime.now()).withRows_generated((long) max.get()).withThread_name(Thread.currentThread().getName()).withUsing_parallel(true).withWas_successful(successful.get()).withIs_slow(etlClazz.getAnnotation(SlowEtl.class) != null).withUses_local_files(etlClazz.getAnnotation(UsesLocalFiles.class) != null).withSeconds_in_extract_and_transform(secondsEtl.get()).withSeconds_in_load(secondsTransaction).withTry_number(tryNumber.get()).build();
}
use of com.robertsanek.data.etl.Etl in project core by z1lc.
the class MasterEtl method getConcreteEtls.
@VisibleForTesting
@SuppressWarnings("rawtypes")
List<Class<? extends Etl>> getConcreteEtls(boolean fastRun) {
// return Lists.newArrayList(LeetCodeQuestionEtl.class);
Reflections reflections = new Reflections(REFLECTIONS_PREFIX);
Set<Class<? extends Etl>> subTypesOf = reflections.getSubTypesOf(Etl.class);
log.info("Flag fastRun is set to %s. Will%s include slow ETLs in run.", fastRun, fastRun ? " not" : "");
return subTypesOf.stream().filter(clazz -> !Modifier.isAbstract(clazz.getModifiers())).filter(clazz -> {
if (clazz.getAnnotation(DoNotRun.class) != null) {
log.info("Will not run class %s because %s.", clazz.getName(), clazz.getAnnotation(DoNotRun.class).explanation());
return false;
}
if (fastRun && clazz.getAnnotation(SlowEtl.class) != null) {
log.info("Will not run class %s because it is annotated @%s " + "and we are running with fastRun flag set to %s.", clazz.getName(), SlowEtl.class.getSimpleName(), fastRun);
return false;
}
return true;
}).sorted(Comparator.comparing(Class::getName)).collect(Collectors.toList());
}
use of com.robertsanek.data.etl.Etl in project core by z1lc.
the class MasterEtl method runEtls.
@SuppressWarnings({ "rawtypes", "try" })
public boolean runEtls(boolean fastRun, boolean parallel) {
Stopwatch total = Stopwatch.createStarted();
List<Class<? extends Etl>> concreteEtls = getConcreteEtls(fastRun);
log.info("Will run %s ETLs.", concreteEtls.size());
log.info("Creating connection to Cloud SQL and re-generating table schemas... (this may take up to 3 minutes)");
try (SessionFactory ignored = Unchecked.get(() -> getSessionFactory(Hbm2ddlType.CREATE, ConnectionType.RSANEK));
SessionFactory noneSf = Unchecked.get(() -> getSessionFactory(Hbm2ddlType.NONE, ConnectionType.RSANEK))) {
log.info("Schema re-generation complete, taking %s seconds. Beginning ETL with parallel = %s.", total.elapsed().getSeconds(), parallel);
Stream<Class<? extends Etl>> stream = parallel ? concreteEtls.parallelStream() : concreteEtls.stream();
List<EtlRun> etlRuns = stream.map(etlClazz -> runIndividualEtl(etlClazz, noneSf)).collect(Collectors.toList());
try (Session session = noneSf.openSession()) {
Transaction transaction = session.beginTransaction();
etlRuns.forEach(session::save);
session.flush();
session.clear();
transaction.commit();
} catch (Exception e) {
log.error(e);
}
log.info("Completed %s ETLs in %s seconds.", concreteEtls.size(), total.elapsed().getSeconds());
return etlRuns.stream().allMatch(etlRun -> etlRun.getWas_successful() && etlRun.getRows_generated() > 0);
}
}
Aggregations