-
Notifications
You must be signed in to change notification settings - Fork 200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(predictions): TABLE, CELL & KEY_VALUE_SET blocks are not properly processed #660
Merged
Merged
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
858096f
in the middle of predictioin bug fix
ruiguoamz bbeb3e4
fix processText
ruiguoamz 589849e
fix processText
ruiguoamz 06baaa8
stash
ruiguoamz 86e0bcc
added missing table, cell processing and keyValueSet processing
ruiguoamz 3bbdc19
Updated integration tests
ruiguoamz deaefdc
removed configuration file
ruiguoamz c2a3859
1st: fix PR comments
ruiguoamz 7e15ee6
2nd: fix PR comments
ruiguoamz 5241c8e
removed one comment and changed the name of three functions
ruiguoamz bd151d0
some minor comments fix
ruiguoamz 6923394
Added IdentifyTextResultTransformers+Tables.swift and IdentifyTextRes…
ruiguoamz 2a35d82
removed Podfile.lock
ruiguoamz bb4922e
Added podfile.lock
ruiguoamz 23e9c5e
Merge branch 'main' into prediction/transformer
ruiguoamz c23dc78
disable empty_enum_arguments
ruiguoamz 9ba5977
fix one empty_enum_type
ruiguoamz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
128 changes: 128 additions & 0 deletions
128
...tions/AWSPredictionsPlugin/Support/Utils/IdentifyTextResultTransformers+KeyValueSet.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
// | ||
// Copyright 2018-2020 Amazon.com, | ||
// Inc. or its affiliates. All Rights Reserved. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
import Foundation | ||
import Amplify | ||
import AWSTextract | ||
|
||
extension IdentifyTextResultTransformers { | ||
|
||
static func processKeyValues(keyValueBlocks: [AWSTextractBlock], | ||
blockMap: [String: AWSTextractBlock]) -> [BoundedKeyValue] { | ||
var keyValues = [BoundedKeyValue]() | ||
for keyValueBlock in keyValueBlocks { | ||
if let keyValue = processKeyValue(keyValueBlock, blockMap: blockMap) { | ||
keyValues.append(keyValue) | ||
} | ||
} | ||
return keyValues | ||
} | ||
|
||
static func processKeyValue(_ keyBlock: AWSTextractBlock, | ||
blockMap: [String: AWSTextractBlock]) -> BoundedKeyValue? { | ||
guard keyBlock.blockType == .keyValueSet, | ||
keyBlock.entityTypes?.contains("KEY") ?? false, | ||
let relationships = keyBlock.relationships else { | ||
return nil | ||
} | ||
|
||
var keyText = "" | ||
var valueText = "" | ||
var valueSelected = false | ||
|
||
for keyBlockRelationship in relationships { | ||
guard let ids = keyBlockRelationship.ids else { | ||
continue | ||
} | ||
|
||
switch keyBlockRelationship.types { | ||
case .child: | ||
keyText = processChildOfKeyValueSet(ids: ids, blockMap: blockMap) | ||
case .value: | ||
let valueResult = processValueOfKeyValueSet(ids: ids, blockMap: blockMap) | ||
valueText = valueResult.0 | ||
valueSelected = valueResult.1 | ||
default: | ||
break | ||
} | ||
} | ||
|
||
guard let boundingBox = processBoundingBox(keyBlock.geometry?.boundingBox) else { | ||
return nil | ||
} | ||
|
||
guard let polygon = processPolygon(keyBlock.geometry?.polygon) else { | ||
return nil | ||
} | ||
|
||
return BoundedKeyValue(key: keyText, | ||
value: valueText, | ||
isSelected: valueSelected, | ||
boundingBox: boundingBox, | ||
polygon: polygon) | ||
} | ||
|
||
static func processChildOfKeyValueSet(ids: [String], | ||
blockMap: [String: AWSTextractBlock]) -> String { | ||
var keyText = "" | ||
for keyId in ids { | ||
guard let keyBlock = blockMap[keyId], | ||
let text = keyBlock.text, | ||
case .word = keyBlock.blockType else { | ||
continue | ||
} | ||
keyText += text + " " | ||
} | ||
return keyText.trimmingCharacters(in: .whitespacesAndNewlines) | ||
} | ||
|
||
static func processValueOfKeyValueSet(ids: [String], | ||
blockMap: [String: AWSTextractBlock]) -> (String, Bool) { | ||
var valueText = "" | ||
var isSelected = false | ||
var selectionItemFound = false | ||
|
||
for valueId in ids { | ||
guard let valueBlock = blockMap[valueId], | ||
let valueBlockRelations = valueBlock.relationships else { | ||
continue | ||
} | ||
|
||
for valueBlockRelation in valueBlockRelations { | ||
guard let wordBlockIds = valueBlockRelation.ids else { | ||
break | ||
} | ||
|
||
for wordBlockId in wordBlockIds { | ||
guard let wordBlock = blockMap[wordBlockId] else { | ||
continue | ||
} | ||
let wordValueBlockType = wordBlock.blockType | ||
let selectionStatus = wordBlock.selectionStatus | ||
|
||
switch wordValueBlockType { | ||
case .word: | ||
if let text = wordBlock.text { | ||
valueText += text + " " | ||
} | ||
case .selectionElement: | ||
if !selectionItemFound { | ||
selectionItemFound = true | ||
//TODO: https://github.com/aws-amplify/amplify-ios/issues/695 | ||
// Support multiple selection items found in a KeyValueSet | ||
isSelected = selectionStatus == .selected | ||
} else { | ||
Amplify.log.error("Multiple selection items found in KeyValueSet") | ||
} | ||
default: break | ||
} | ||
} | ||
} | ||
} | ||
return (valueText.trimmingCharacters(in: .whitespacesAndNewlines), isSelected) | ||
} | ||
} |
125 changes: 125 additions & 0 deletions
125
...redictions/AWSPredictionsPlugin/Support/Utils/IdentifyTextResultTransformers+Tables.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
// | ||
// Copyright 2018-2020 Amazon.com, | ||
// Inc. or its affiliates. All Rights Reserved. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
import Foundation | ||
import Amplify | ||
import AWSTextract | ||
|
||
extension IdentifyTextResultTransformers { | ||
|
||
static func processTables(tableBlocks: [AWSTextractBlock], | ||
blockMap: [String: AWSTextractBlock]) -> [Table] { | ||
var tables = [Table]() | ||
for tableBlock in tableBlocks { | ||
if let table = processTable(tableBlock, blockMap: blockMap) { | ||
tables.append(table) | ||
} | ||
} | ||
return tables | ||
} | ||
|
||
static func processTable(_ tableBlock: AWSTextractBlock, | ||
blockMap: [String: AWSTextractBlock]) -> Table? { | ||
|
||
guard let relationships = tableBlock.relationships, | ||
case .table = tableBlock.blockType else { | ||
return nil | ||
} | ||
var table = Table() | ||
var rows = Set<Int>() | ||
var cols = Set<Int>() | ||
|
||
for tableRelation in relationships { | ||
guard let cellIds = tableRelation.ids else { | ||
continue | ||
} | ||
|
||
for cellId in cellIds { | ||
guard let cellBlock = blockMap[cellId], | ||
let rowIndex = cellBlock.rowIndex, | ||
let colIndex = cellBlock.columnIndex | ||
else { | ||
continue | ||
} | ||
|
||
// textract starts indexing at 1, so subtract it by 1. | ||
let row = Int(truncating: rowIndex) - 1 | ||
let col = Int(truncating: colIndex) - 1 | ||
|
||
if !rows.contains(row), | ||
!cols.contains(row), | ||
let cell = constructTableCell(cellBlock, blockMap) { | ||
table.cells.append(cell) | ||
rows.insert(row) | ||
cols.insert(col) | ||
} | ||
} | ||
} | ||
table.rows = rows.count | ||
table.columns = cols.count | ||
return table | ||
} | ||
|
||
static func constructTableCell(_ block: AWSTextractBlock, _ blockMap: [String: AWSTextractBlock]) -> Table.Cell? { | ||
guard block.blockType == .cell, | ||
let relationships = block.relationships, | ||
let rowSpan = block.rowSpan, | ||
let columnSpan = block.columnSpan, | ||
let geometry = block.geometry, | ||
let textractBoundingBox = geometry.boundingBox, | ||
let texttractPolygon = geometry.polygon | ||
else { | ||
return nil | ||
} | ||
|
||
let selectionStatus = block.selectionStatus | ||
var words = "" | ||
var isSelected = false | ||
var selectionItemFound = false | ||
|
||
for cellRelation in relationships { | ||
guard let wordOrSelectionIds = cellRelation.ids else { | ||
continue | ||
} | ||
|
||
for wordOrSelectionId in wordOrSelectionIds { | ||
let wordOrSelectionBlock = blockMap[wordOrSelectionId] | ||
|
||
switch wordOrSelectionBlock?.blockType { | ||
case .word: | ||
guard let text = wordOrSelectionBlock?.text else { | ||
return nil | ||
} | ||
words += text + " " | ||
case .selectionElement: | ||
if !selectionItemFound { | ||
selectionItemFound = true | ||
//TODO: https://github.com/aws-amplify/amplify-ios/issues/695 | ||
// Support multiple selection items found in a KeyValueSet | ||
isSelected = selectionStatus == .selected | ||
} else { | ||
Amplify.log.error("Multiple selection items found in KeyValueSet") | ||
} | ||
default: | ||
break | ||
} | ||
} | ||
} | ||
|
||
guard let boundingBox = processBoundingBox(textractBoundingBox), | ||
let polygon = processPolygon(texttractPolygon) else { | ||
return nil | ||
} | ||
|
||
return Table.Cell(text: words, | ||
boundingBox: boundingBox, | ||
polygon: polygon, | ||
isSelected: isSelected, | ||
rowSpan: Int(truncating: rowSpan), | ||
columnSpan: Int(truncating: columnSpan)) | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can revert changes in this file.