Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(predictions): TABLE, CELL & KEY_VALUE_SET blocks are not properly processed #660

Merged
merged 17 commits into from
Aug 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class IdentifyResultTransformers {
points.append(point)
}
return Polygon(points: points)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can revert changes in this file.

}

static func processPolygon(_ textractPolygonPoints: [AWSTextractPoint]?) -> Polygon? {
Expand All @@ -64,7 +63,6 @@ class IdentifyResultTransformers {
points.append(point)
}
return Polygon(points: points)

}

// swiftlint:disable cyclomatic_complexity
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//
// Copyright 2018-2020 Amazon.com,
// Inc. or its affiliates. All Rights Reserved.
//
// SPDX-License-Identifier: Apache-2.0
//

import Foundation
import Amplify
import AWSTextract

extension IdentifyTextResultTransformers {

static func processKeyValues(keyValueBlocks: [AWSTextractBlock],
blockMap: [String: AWSTextractBlock]) -> [BoundedKeyValue] {
var keyValues = [BoundedKeyValue]()
for keyValueBlock in keyValueBlocks {
if let keyValue = processKeyValue(keyValueBlock, blockMap: blockMap) {
keyValues.append(keyValue)
}
}
return keyValues
}

static func processKeyValue(_ keyBlock: AWSTextractBlock,
blockMap: [String: AWSTextractBlock]) -> BoundedKeyValue? {
guard keyBlock.blockType == .keyValueSet,
keyBlock.entityTypes?.contains("KEY") ?? false,
let relationships = keyBlock.relationships else {
return nil
}

var keyText = ""
var valueText = ""
var valueSelected = false

for keyBlockRelationship in relationships {
guard let ids = keyBlockRelationship.ids else {
continue
}

switch keyBlockRelationship.types {
case .child:
keyText = processChildOfKeyValueSet(ids: ids, blockMap: blockMap)
case .value:
let valueResult = processValueOfKeyValueSet(ids: ids, blockMap: blockMap)
valueText = valueResult.0
valueSelected = valueResult.1
default:
break
}
}

guard let boundingBox = processBoundingBox(keyBlock.geometry?.boundingBox) else {
return nil
}

guard let polygon = processPolygon(keyBlock.geometry?.polygon) else {
return nil
}

return BoundedKeyValue(key: keyText,
value: valueText,
isSelected: valueSelected,
boundingBox: boundingBox,
polygon: polygon)
}

static func processChildOfKeyValueSet(ids: [String],
blockMap: [String: AWSTextractBlock]) -> String {
var keyText = ""
for keyId in ids {
guard let keyBlock = blockMap[keyId],
let text = keyBlock.text,
case .word = keyBlock.blockType else {
continue
}
keyText += text + " "
}
return keyText.trimmingCharacters(in: .whitespacesAndNewlines)
}

static func processValueOfKeyValueSet(ids: [String],
blockMap: [String: AWSTextractBlock]) -> (String, Bool) {
var valueText = ""
var isSelected = false
var selectionItemFound = false

for valueId in ids {
guard let valueBlock = blockMap[valueId],
let valueBlockRelations = valueBlock.relationships else {
continue
}

for valueBlockRelation in valueBlockRelations {
guard let wordBlockIds = valueBlockRelation.ids else {
break
}

for wordBlockId in wordBlockIds {
guard let wordBlock = blockMap[wordBlockId] else {
continue
}
let wordValueBlockType = wordBlock.blockType
let selectionStatus = wordBlock.selectionStatus

switch wordValueBlockType {
case .word:
if let text = wordBlock.text {
valueText += text + " "
}
case .selectionElement:
if !selectionItemFound {
selectionItemFound = true
//TODO: https://github.com/aws-amplify/amplify-ios/issues/695
// Support multiple selection items found in a KeyValueSet
isSelected = selectionStatus == .selected
} else {
Amplify.log.error("Multiple selection items found in KeyValueSet")
}
default: break
}
}
}
}
return (valueText.trimmingCharacters(in: .whitespacesAndNewlines), isSelected)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//
// Copyright 2018-2020 Amazon.com,
// Inc. or its affiliates. All Rights Reserved.
//
// SPDX-License-Identifier: Apache-2.0
//

import Foundation
import Amplify
import AWSTextract

extension IdentifyTextResultTransformers {

static func processTables(tableBlocks: [AWSTextractBlock],
blockMap: [String: AWSTextractBlock]) -> [Table] {
var tables = [Table]()
for tableBlock in tableBlocks {
if let table = processTable(tableBlock, blockMap: blockMap) {
tables.append(table)
}
}
return tables
}

static func processTable(_ tableBlock: AWSTextractBlock,
blockMap: [String: AWSTextractBlock]) -> Table? {

guard let relationships = tableBlock.relationships,
case .table = tableBlock.blockType else {
return nil
}
var table = Table()
var rows = Set<Int>()
var cols = Set<Int>()

for tableRelation in relationships {
guard let cellIds = tableRelation.ids else {
continue
}

for cellId in cellIds {
guard let cellBlock = blockMap[cellId],
let rowIndex = cellBlock.rowIndex,
let colIndex = cellBlock.columnIndex
else {
continue
}

// textract starts indexing at 1, so subtract it by 1.
let row = Int(truncating: rowIndex) - 1
let col = Int(truncating: colIndex) - 1

if !rows.contains(row),
!cols.contains(row),
let cell = constructTableCell(cellBlock, blockMap) {
table.cells.append(cell)
rows.insert(row)
cols.insert(col)
}
}
}
table.rows = rows.count
table.columns = cols.count
return table
}

static func constructTableCell(_ block: AWSTextractBlock, _ blockMap: [String: AWSTextractBlock]) -> Table.Cell? {
guard block.blockType == .cell,
let relationships = block.relationships,
let rowSpan = block.rowSpan,
let columnSpan = block.columnSpan,
let geometry = block.geometry,
let textractBoundingBox = geometry.boundingBox,
let texttractPolygon = geometry.polygon
else {
return nil
}

let selectionStatus = block.selectionStatus
var words = ""
var isSelected = false
var selectionItemFound = false

for cellRelation in relationships {
guard let wordOrSelectionIds = cellRelation.ids else {
continue
}

for wordOrSelectionId in wordOrSelectionIds {
let wordOrSelectionBlock = blockMap[wordOrSelectionId]

switch wordOrSelectionBlock?.blockType {
case .word:
guard let text = wordOrSelectionBlock?.text else {
return nil
}
words += text + " "
case .selectionElement:
if !selectionItemFound {
selectionItemFound = true
//TODO: https://github.com/aws-amplify/amplify-ios/issues/695
// Support multiple selection items found in a KeyValueSet
isSelected = selectionStatus == .selected
} else {
Amplify.log.error("Multiple selection items found in KeyValueSet")
}
default:
break
}
}
}

guard let boundingBox = processBoundingBox(textractBoundingBox),
let polygon = processPolygon(texttractPolygon) else {
return nil
}

return Table.Cell(text: words,
boundingBox: boundingBox,
polygon: polygon,
isSelected: isSelected,
rowSpan: Int(truncating: rowSpan),
columnSpan: Int(truncating: columnSpan))
}
}
Loading