Skip to content

Commit

Permalink
crop: create AlternativeImage when successful (to prevent inconsisten…
Browse files Browse the repository at this point in the history
…t annotation in case AlternativeImage was already present)
  • Loading branch information
bertsky committed Jul 9, 2019
1 parent 01c0208 commit 2896d24
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions ocrd_tesserocr/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ocrd_models.ocrd_page import (
MetadataItemType,
LabelsType, LabelType,
CoordsType,
CoordsType, AlternativeImageType,
to_xml
)
from ocrd_models.ocrd_page_generateds import BorderType
Expand All @@ -19,11 +19,12 @@
from .config import TESSDATA_PREFIX, OCRD_TOOL
from .common import (
bbox_from_points, points_from_bbox,
bbox_from_xywh
bbox_from_xywh, save_image_file
)

TOOL = 'ocrd-tesserocr-crop'
LOG = getLogger('processor.TesserocrCrop')
FILEGRP_IMG = 'OCR-D-IMG-CROP'

class TesserocrCrop(Processor):

Expand Down Expand Up @@ -150,7 +151,19 @@ def process(self):
LOG.debug("Padded page border: %i:%i,%i:%i", min_x, max_x, min_y, max_y)
border = BorderType(Coords=CoordsType(
points_from_bbox(min_x, min_y, max_x, max_y)))
# update PAGE (annotate border):
page.set_Border(border)
# update METS (add the image file):
page_image = page_image.crop(
box=(min_x, min_y, max_x, max_y))
file_id = input_file.ID.replace(self.input_file_grp, FILEGRP_IMG)
file_path = save_image_file(self.workspace, page_image,
file_id,
page_id=page_id,
file_grp=FILEGRP_IMG)
# update PAGE (reference the image file):
page.add_AlternativeImage(AlternativeImageType(
filename=file_path, comments="cropped"))
else:
LOG.error("Cannot find valid extent for page '%s'", page_id)

Expand Down

0 comments on commit 2896d24

Please sign in to comment.