From c3546638e597ac1da00fbeae9e329403ab40a80d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 16 Mar 2022 11:53:16 +0100 Subject: [PATCH 1/2] change ocrd-anybaseocr-layout-analysis model OCR-D/ocrd_anybaseocr#89 --- ocrd/ocrd/resource_list.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ocrd/ocrd/resource_list.yml b/ocrd/ocrd/resource_list.yml index fcdfa63398..ade23e251c 100644 --- a/ocrd/ocrd/resource_list.yml +++ b/ocrd/ocrd/resource_list.yml @@ -133,10 +133,12 @@ ocrd-anybaseocr-block-segmentation: description: block segmentation model for anybaseocr size: 256139800 ocrd-anybaseocr-layout-analysis: - - url: https://ocr-d-repo.scc.kit.edu/models/dfki/layoutAnalysis/structure_analysis.h5 - name: structure_analysis.h5 + - url: https://ocr-d.kba.cloud/structure_analysis.tar.gz + name: structure_analysis description: structure analysis model for anybaseocr - size: 31477056 + type: tarball + path_in_archive: 'structure_analysis' + size: 29002514 - url: https://ocr-d-repo.scc.kit.edu/models/dfki/layoutAnalysis/mapping_densenet.pickle name: mapping_densenet.pickle description: mapping model for anybaseocr From 072294b570bf5bc351be91c9cbda433876434916 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 20 Mar 2022 16:48:24 +0100 Subject: [PATCH 2/2] resmgr: updated ocrd-anybaseocr-tiseg model --- ocrd/ocrd/resource_list.yml | 8 +++++--- ocrd/ocrd/resource_manager.py | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ocrd/ocrd/resource_list.yml b/ocrd/ocrd/resource_list.yml index ade23e251c..4840fa3296 100644 --- a/ocrd/ocrd/resource_list.yml +++ b/ocrd/ocrd/resource_list.yml @@ -144,10 +144,12 @@ ocrd-anybaseocr-layout-analysis: description: mapping model for anybaseocr size: 374 ocrd-anybaseocr-tiseg: - - url: https://ocr-d-repo.scc.kit.edu/models/dfki/tiseg/seg_model.hdf5 - name: seg_model.hdf5 + - url: https://ocr-d.kba.cloud/seg_model.tar.gz + name: seg_model description: text image segmentation model for anybaseocr - size: 66080688 + type: tarball + path_in_archive: 'seg_model' + size: 61388872 ocrd-kraken-segment: - url: https://github.com/mittagessen/kraken/raw/master/kraken/blla.mlmodel description: Pretrained baseline segmentation model diff --git a/ocrd/ocrd/resource_manager.py b/ocrd/ocrd/resource_manager.py index 06832e0fd2..4720cb54ef 100644 --- a/ocrd/ocrd/resource_manager.py +++ b/ocrd/ocrd/resource_manager.py @@ -249,17 +249,17 @@ def download( else: self._copy_impl(url, fpath, progress_cb) elif resource_type == 'tarball': - with pushd_popd(tempdir=True): + with pushd_popd(tempdir=True) as tempdir: if is_url: self._download_impl(url, 'download.tar.xx', progress_cb, size) else: self._copy_impl(url, 'download.tar.xx', progress_cb) Path('out').mkdir() with pushd_popd('out'): - log.info("Extracting tarball") + log.info("Extracting tarball to %s/out" % tempdir) with open_tarfile('../download.tar.xx', 'r:*') as tar: tar.extractall() - log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath)) + log.info("Copying '%s' from extracted tarball %s/out to %s" % (path_in_archive, tempdir, fpath)) copytree(path_in_archive, str(fpath)) # TODO # elif resource_type == 'github-dir':