From 0fe95f5e88133c181be46af6ff5cae2882657672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Klocek?= Date: Mon, 13 Jan 2025 15:08:42 +0100 Subject: [PATCH 1/8] filter for entity types intro --- src/unitxt/operators.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index 15b4499af..a741d2923 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -2257,3 +2257,13 @@ def process_value(self, value: Any) -> Any: page = self.wikipedia.page(title) return {"title": page.title, "body": getattr(page, self.mode)} + +class FilterEntityTypes(InstanceOperator): + + entities_types_to_keep: List[str] + + def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None + ) -> Dict[str, Any]: + + return {key:dict_get(instance,key) for key in self.entity_types_to_keep} + From ead722a4c4e3cc88b0c154e306090955f2c96ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Klocek?= Date: Tue, 14 Jan 2025 13:35:49 +0100 Subject: [PATCH 2/8] code update --- src/unitxt/operators.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index a741d2923..122c68351 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -2265,5 +2265,11 @@ class FilterEntityTypes(InstanceOperator): def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None ) -> Dict[str, Any]: - return {key:dict_get(instance,key) for key in self.entity_types_to_keep} - + data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in self.entities_types_to_keep] + + return { + key:( + [value[i] for i in data_to_keep_indices] if isinstance(value, List) else value + ) + for key,value in instance.items() + } From 5f7980fe38e9055d5348c20728e92f3d6bbfe5e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Klocek?= Date: Tue, 14 Jan 2025 14:21:34 +0100 Subject: [PATCH 3/8] optimalisation --- src/unitxt/operators.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index 122c68351..6d4694332 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -2261,15 +2261,18 @@ def process_value(self, value: Any) -> Any: class FilterEntityTypes(InstanceOperator): entities_types_to_keep: List[str] + fields_to_filter: List[str] def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None ) -> Dict[str, Any]: - data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in self.entities_types_to_keep] + data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in set(self.entities_types_to_keep)] return { key:( - [value[i] for i in data_to_keep_indices] if isinstance(value, List) else value + [value[i] for i in data_to_keep_indices] if key in self.fields_to_filter else value ) for key,value in instance.items() } + + From 9b40fbb058bb68a942ed434034ac2ee90db09376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Klocek?= Date: Thu, 16 Jan 2025 13:55:09 +0100 Subject: [PATCH 4/8] improv --- src/unitxt/operators.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index 6d4694332..0d73347c8 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -2266,13 +2266,15 @@ class FilterEntityTypes(InstanceOperator): def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None ) -> Dict[str, Any]: + if set(self.entities_types_to_keep) == set(instance['labels']): + return instance + data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in set(self.entities_types_to_keep)] - return { - key:( - [value[i] for i in data_to_keep_indices] if key in self.fields_to_filter else value - ) + return dict( + ( + key, value[data_to_keep_indices] + if key in self.fields_to_filter + else value) for key,value in instance.items() - } - - + ) From 41eee2de687905d2385299d5b12c601a8f0d7875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Klocek?= Date: Mon, 20 Jan 2025 17:54:44 +0100 Subject: [PATCH 5/8] remove filter and add its functionality to intersect --- src/unitxt/operators.py | 58 ++++++++----------- tests/library/test_operators.py | 99 ++++++++++++++++++++------------- 2 files changed, 83 insertions(+), 74 deletions(-) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index 0d73347c8..f84175c00 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -1448,33 +1448,40 @@ def prepare(self): self.min_frequency_percent = 0 -class Intersect(FieldOperator): +class Intersect(InstanceOperator): """Intersects the value of a field, which must be a list, with a given list. Args: - allowed_values (list) - list to intersect. + allowed_field_values (list) - list to intersect. + fields_to_intersect (list) - list of fields to be filtered """ - allowed_values: List[Any] + allowed_field_values: List[str] + fields_to_intersect: List[str] def verify(self): super().verify() - if self.process_every_value: - raise ValueError( - "'process_every_value=True' is not supported in Intersect operator" - ) - if not isinstance(self.allowed_values, list): + if not isinstance(self.allowed_field_values, list): raise ValueError( - f"The allowed_values is not a list but '{self.allowed_values}'" + f"The allowed_field_values is not a type list but '{type(self.allowed_field_values)}'" ) - def process_value(self, value: Any) -> Any: - super().process_value(value) - if not isinstance(value, list): - raise ValueError(f"The value in field is not a list but '{value}'") - return [e for e in value if e in self.allowed_values] - + def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None + ) -> Dict[str, Any]: + + if set(self.allowed_field_values) == set(instance['labels']): + return instance + + data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in set(self.allowed_field_values)] + + return dict( + ( + key, value[data_to_keep_indices] + if key in self.fields_to_intersect + else value) + for key,value in instance.items() + ) class RemoveValues(FieldOperator): """Removes elements in a field, which must be a list, using a given list of unallowed. @@ -2257,24 +2264,3 @@ def process_value(self, value: Any) -> Any: page = self.wikipedia.page(title) return {"title": page.title, "body": getattr(page, self.mode)} - -class FilterEntityTypes(InstanceOperator): - - entities_types_to_keep: List[str] - fields_to_filter: List[str] - - def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None - ) -> Dict[str, Any]: - - if set(self.entities_types_to_keep) == set(instance['labels']): - return instance - - data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in set(self.entities_types_to_keep)] - - return dict( - ( - key, value[data_to_keep_indices] - if key in self.fields_to_filter - else value) - for key,value in instance.items() - ) diff --git a/tests/library/test_operators.py b/tests/library/test_operators.py index 43a120629..47cdca81e 100644 --- a/tests/library/test_operators.py +++ b/tests/library/test_operators.py @@ -580,47 +580,68 @@ def test_execute_expression(self): ) def test_intersect(self): - inputs = [ - {"label": ["a", "b"]}, - {"label": ["a", "c", "d"]}, - {"label": ["a", "b", "f"]}, - ] - - targets = [ - {"label": ["b"]}, - {"label": []}, - {"label": ["b", "f"]}, - ] - - check_operator( - operator=Intersect(field="label", allowed_values=["b", "f"]), - inputs=inputs, - targets=targets, - tester=self, - ) - with self.assertRaises(ValueError) as cm: - check_operator( - operator=Intersect(field="label", allowed_values=3), - inputs=inputs, - targets=targets, - tester=self, - ) - self.assertEqual(str(cm.exception), "The allowed_values is not a list but '3'") - - with self.assertRaises(ValueError) as cm: - check_operator( - operator=Intersect( - field="label", allowed_values=["3"], process_every_value=True - ), + + def __test_intersect(inputs, targets, fields_to_intersect, allowed_field_values): + return check_operator( + operator=Intersect(fields_to_intersect, allowed_field_values), inputs=inputs, targets=targets, - tester=self, - ) - self.assertEqual( - str(cm.exception), - "'process_every_value=True' is not supported in Intersect operator", + ) + + ## basic test + __test_intersect( + inputs=[{"label": [1,2]}], + targets=[{"label": [1]}], + fields_to_intersect=["label"], + allowed_field_values=[1] + ) + + # multiple fields of the same name + __test_intersect( + inputs = [ + {"label": ["a", "b"]}, + {"label": ["a", "c", "d"]}, + {"name": ["a", "b", "f"]}, + ], + targets = [ + {"label": ["b"]}, + {"label": []}, + {"name": ["b", "f"]}, + ], + fields_to_intersect=["label",'name'], + allowed_field_values=["b", "f"] + ) + + __test_intersect( + inputs = [ + {"label": ["a", "b"]}, + {"label": ["a", "c", "d"]}, + {"label": ["a", "b", "f"]}, + ], + targets = [ + {"label": ["b"]}, + {"label": []}, + {"label": ["b", "f"]}, + ], + fields_to_intersect=["label"], + allowed_field_values=["b", "f"] ) + + + with self.assertRaises(ValueError) as cm: + __test_intersect( + inputs = [ + {"label": ["a", "b"]}, + ], + targets = [ + {"label": ["b"]}, + ], + fields_to_intersect=["label"], + allowed_field_values=3 + ) + self.assertEqual(str(cm.exception), "The allowed_field_values is not a list but ''") + inputs = [ {"label": "b"}, ] @@ -630,7 +651,7 @@ def test_intersect(self): "The value in field is not a list but 'b'", ] check_operator_exception( - operator=Intersect(field="label", allowed_values=["c"]), + operator=Intersect(field=["label"], allowed_field_values=["c"]), inputs=inputs, exception_texts=exception_texts, tester=self, @@ -3206,3 +3227,5 @@ def test_select_fields(self): } ] TestOperators().compare_streams(joined_stream, expected_joined_stream) + + def test_ From d57cd1b1e2c1ccfa33dc0cc017cd148e1ece631c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Klocek?= Date: Mon, 20 Jan 2025 18:07:55 +0100 Subject: [PATCH 6/8] typo --- tests/library/test_operators.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/library/test_operators.py b/tests/library/test_operators.py index 85e54a30a..c548f1ec2 100644 --- a/tests/library/test_operators.py +++ b/tests/library/test_operators.py @@ -3146,5 +3146,4 @@ def test_select_fields(self): } ] TestOperators().compare_streams(joined_stream, expected_joined_stream) - - def test_ + \ No newline at end of file From 886fd6d6897c33aee41011b0f0fdb1c59959c175 Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Sat, 25 Jan 2025 15:12:07 +0200 Subject: [PATCH 7/8] Created a new type of intersect operator Signed-off-by: Yoav Katz --- src/unitxt/operators.py | 140 +++++++++++++++++--- tests/library/test_operators.py | 227 +++++++++++++++++++++++--------- 2 files changed, 287 insertions(+), 80 deletions(-) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index 73f4dcfb4..3946cc34c 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -84,7 +84,7 @@ from .random_utils import new_random_generator from .settings_utils import get_settings from .stream import DynamicStream, Stream -from .text_utils import nested_tuple_to_string +from .text_utils import nested_tuple_to_string, to_pretty_string from .type_utils import isoftype from .utils import ( LRUCache, @@ -1448,41 +1448,141 @@ def prepare(self): self.min_frequency_percent = 0 -class Intersect(InstanceOperator): +class Intersect(FieldOperator): """Intersects the value of a field, which must be a list, with a given list. Args: - allowed_field_values (list) - list to intersect. - fields_to_intersect (list) - list of fields to be filtered + allowed_values (list) - list to intersect. """ - allowed_field_values: List[str] - fields_to_intersect: List[str] + allowed_values: List[Any] + + def verify(self): + super().verify() + if self.process_every_value: + raise ValueError( + "'process_every_value=True' is not supported in Intersect operator" + ) + + if not isinstance(self.allowed_values, list): + raise ValueError( + f"The allowed_values is not a list but '{self.allowed_values}'" + ) + + def process_value(self, value: Any) -> Any: + super().process_value(value) + if not isinstance(value, list): + raise ValueError(f"The value in field is not a list but '{value}'") + return [e for e in value if e in self.allowed_values] + + +class IntersectCorrespondingFields(InstanceOperator): + """Intersects the value of a field, which must be a list, with a given list. + + For example: + + Assume the instances contain a field of 'labels' and a field with their corresponding 'positions' in the text. + + IntersectCorrespondingFields(field="label", + allowed_values=["b", "f"], + corresponding_fields_to_intersect=["position"]) + + would keep only "b" and "f" values in 'labels' field and + their respective values in the 'position' field. + (All other fields aer not effected) + + Given this input: + + [ + {"label": ["a", "b"],"position": [0,1],"other" : "not"}, + {"label": ["a", "c", "d"], "position": [0,1,2], "other" : "relevant"}, + {"label": ["a", "b", "f"], "position": [0,1,2], "other" : "field"} + ] + + So the output would be: + [ + {"label": ["b"], "position":[1],"other" : "not"}, + {"label": [], "position": [], "other" : "relevant"}, + {"label": ["b", "f"],"position": [1,2], "other" : "field"}, + ] + + Args: + field - the field to intersected (must contain list values) + allowed_values (list) - list of values to keep + corresponding_fields_to_intersect (list) - additional list fields from which values + are removed based the corresponding index of values removed from the 'field' + """ + + field: str + allowed_values: List[str] + corresponding_fields_to_intersect: List[str] def verify(self): super().verify() - if not isinstance(self.allowed_field_values, list): + if not isinstance(self.allowed_values, list): raise ValueError( f"The allowed_field_values is not a type list but '{type(self.allowed_field_values)}'" ) - def process(self, instance: Dict[str, Any], stream_name: Optional[str] = None + def process( + self, instance: Dict[str, Any], stream_name: Optional[str] = None ) -> Dict[str, Any]: - - if set(self.allowed_field_values) == set(instance['labels']): - return instance - - data_to_keep_indices = [i for i, label in enumerate(instance['labels']) if label in set(self.allowed_field_values)] - - return dict( - ( - key, value[data_to_keep_indices] - if key in self.fields_to_intersect - else value) - for key,value in instance.items() + if self.field not in instance: + raise ValueError( + f"Field '{self.field}' is not in provided instance.\n" + + to_pretty_string(instance) ) + for corresponding_field in self.corresponding_fields_to_intersect: + if corresponding_field not in instance: + raise ValueError( + f"Field '{corresponding_field}' is not in provided instance.\n" + + to_pretty_string(instance) + ) + + if not isinstance(instance[self.field], list): + raise ValueError( + f"Value of field '{self.field}' is not a list, so IntersectCorrespondingFields can not intersect with allowed values. Field value:\n" + + to_pretty_string(instance, keys=[self.field]) + ) + + num_values_in_field = len(instance[self.field]) + + if set(self.allowed_values) == set(instance[self.field]): + return instance + + indices_to_keep = [ + i + for i, value in enumerate(instance[self.field]) + if value in set(self.allowed_values) + ] + + result_instance = {} + for field_name, field_value in instance.items(): + if ( + field_name in self.corresponding_fields_to_intersect + or field_name == self.field + ): + if not isinstance(field_value, list): + raise ValueError( + f"Value of field '{field_name}' is not a list, IntersectCorrespondingFields can not intersect with allowed values." + ) + if len(field_value) != num_values_in_field: + raise ValueError( + f"Number of elements in field '{field_name}' is not the same as the number of elements in field '{self.field}' so the IntersectCorrespondingFields can not remove corresponding values.\n" + + to_pretty_string(instance, keys=[self.field, field_name]) + ) + result_instance[field_name] = [ + value + for index, value in enumerate(field_value) + if index in indices_to_keep + ] + else: + result_instance[field_name] = field_value + return result_instance + + class RemoveValues(FieldOperator): """Removes elements in a field, which must be a list, using a given list of unallowed. diff --git a/tests/library/test_operators.py b/tests/library/test_operators.py index c548f1ec2..200f2c22a 100644 --- a/tests/library/test_operators.py +++ b/tests/library/test_operators.py @@ -29,6 +29,7 @@ FromIterables, IndexOf, Intersect, + IntersectCorrespondingFields, IterableSource, JoinStr, LengthBalancer, @@ -579,68 +580,47 @@ def test_execute_expression(self): ) def test_intersect(self): - - def __test_intersect(inputs, targets, fields_to_intersect, allowed_field_values): - return check_operator( - operator=Intersect(fields_to_intersect, allowed_field_values), + inputs = [ + {"label": ["a", "b"]}, + {"label": ["a", "c", "d"]}, + {"label": ["a", "b", "f"]}, + ] + + targets = [ + {"label": ["b"]}, + {"label": []}, + {"label": ["b", "f"]}, + ] + + check_operator( + operator=Intersect(field="label", allowed_values=["b", "f"]), + inputs=inputs, + targets=targets, + tester=self, + ) + with self.assertRaises(ValueError) as cm: + check_operator( + operator=Intersect(field="label", allowed_values=3), inputs=inputs, targets=targets, - ) - - ## basic test - __test_intersect( - inputs=[{"label": [1,2]}], - targets=[{"label": [1]}], - fields_to_intersect=["label"], - allowed_field_values=[1] - ) - - # multiple fields of the same name - __test_intersect( - inputs = [ - {"label": ["a", "b"]}, - {"label": ["a", "c", "d"]}, - {"name": ["a", "b", "f"]}, - ], - targets = [ - {"label": ["b"]}, - {"label": []}, - {"name": ["b", "f"]}, - ], - fields_to_intersect=["label",'name'], - allowed_field_values=["b", "f"] - ) - - __test_intersect( - inputs = [ - {"label": ["a", "b"]}, - {"label": ["a", "c", "d"]}, - {"label": ["a", "b", "f"]}, - ], - targets = [ - {"label": ["b"]}, - {"label": []}, - {"label": ["b", "f"]}, - ], - fields_to_intersect=["label"], - allowed_field_values=["b", "f"] - ) - - + tester=self, + ) + self.assertEqual(str(cm.exception), "The allowed_values is not a list but '3'") + with self.assertRaises(ValueError) as cm: - __test_intersect( - inputs = [ - {"label": ["a", "b"]}, - ], - targets = [ - {"label": ["b"]}, - ], - fields_to_intersect=["label"], - allowed_field_values=3 - ) - self.assertEqual(str(cm.exception), "The allowed_field_values is not a list but ''") + check_operator( + operator=Intersect( + field="label", allowed_values=["3"], process_every_value=True + ), + inputs=inputs, + targets=targets, + tester=self, + ) + self.assertEqual( + str(cm.exception), + "'process_every_value=True' is not supported in Intersect operator", + ) - inputs = [ {"label": "b"}, ] @@ -650,7 +630,135 @@ def __test_intersect(inputs, targets, fields_to_intersect, allowed_field_values) "The value in field is not a list but 'b'", ] check_operator_exception( - operator=Intersect(field=["label"], allowed_field_values=["c"]), + operator=Intersect(field="label", allowed_values=["c"]), + inputs=inputs, + exception_texts=exception_texts, + tester=self, + ) + + def test_intersect_corresponding_fields(self): + inputs = [ + {"label": ["a", "b"], "position": [0, 1], "other": "not"}, + {"label": ["a", "c", "d"], "position": [0, 1, 2], "other": "relevant"}, + {"label": ["a", "b", "f"], "position": [0, 1, 2], "other": "field"}, + ] + + targets = [ + {"label": ["b"], "position": [1], "other": "not"}, + {"label": [], "position": [], "other": "relevant"}, + {"label": ["b", "f"], "position": [1, 2], "other": "field"}, + ] + + check_operator( + operator=IntersectCorrespondingFields( + field="label", + allowed_values=["b", "f"], + corresponding_fields_to_intersect=["position"], + ), + inputs=inputs, + targets=targets, + tester=self, + ) + + exception_texts = [ + "Error processing instance '0' from stream 'test' in IntersectCorrespondingFields due to the exception above.", + """Field 'acme_field' is not in provided instance. +label (list): + [0] (str): + a + [1] (str): + b +position (list): + [0] (int): + 0 + [1] (int): + 1 +other (str): + not +""", + ] + check_operator_exception( + operator=IntersectCorrespondingFields( + field="acme_field", + allowed_values=["b", "f"], + corresponding_fields_to_intersect=["other"], + ), + inputs=inputs, + exception_texts=exception_texts, + tester=self, + ) + + exception_texts = [ + "Error processing instance '0' from stream 'test' in IntersectCorrespondingFields due to the exception above.", + """Field 'acme_field' is not in provided instance. +label (list): + [0] (str): + a + [1] (str): + b +position (list): + [0] (int): + 0 + [1] (int): + 1 +other (str): + not +""", + ] + check_operator_exception( + operator=IntersectCorrespondingFields( + field="label", + allowed_values=["b", "f"], + corresponding_fields_to_intersect=["acme_field"], + ), + inputs=inputs, + exception_texts=exception_texts, + tester=self, + ) + + exception_texts = [ + "Error processing instance '0' from stream 'test' in IntersectCorrespondingFields due to the exception above.", + "Value of field 'other' is not a list, so IntersectCorrespondingFields can not intersect with allowed values. Field value:\nother (str):\n not\n", + ] + check_operator_exception( + operator=IntersectCorrespondingFields( + field="other", + allowed_values=["b", "f"], + corresponding_fields_to_intersect=["other"], + ), + inputs=inputs, + exception_texts=exception_texts, + tester=self, + ) + + inputs = [ + {"label": ["a", "b"], "position": [0, 1, 2], "other": "not"}, + {"label": ["a", "c", "d"], "position": [0, 1, 2], "other": "relevant"}, + {"label": ["a", "b", "f"], "position": [0, 1, 2], "other": "field"}, + ] + exception_texts = [ + "Error processing instance '0' from stream 'test' in IntersectCorrespondingFields due to the exception above.", + """Number of elements in field 'position' is not the same as the number of elements in field 'label' so the IntersectCorrespondingFields can not remove corresponding values. +label (list): + [0] (str): + a + [1] (str): + b +position (list): + [0] (int): + 0 + [1] (int): + 1 + [2] (int): + 2 +""", + ] + check_operator_exception( + operator=IntersectCorrespondingFields( + field="label", + allowed_values=["b", "f"], + corresponding_fields_to_intersect=["position"], + ), inputs=inputs, exception_texts=exception_texts, tester=self, @@ -3146,4 +3254,3 @@ def test_select_fields(self): } ] TestOperators().compare_streams(joined_stream, expected_joined_stream) - \ No newline at end of file From 713e7e57dc23c68f47444f12ea8683b4cd042b06 Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Sat, 25 Jan 2025 15:15:49 +0200 Subject: [PATCH 8/8] Updated documentation Signed-off-by: Yoav Katz --- src/unitxt/operators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py index 16b0169ee..2b230bbf3 100644 --- a/src/unitxt/operators.py +++ b/src/unitxt/operators.py @@ -1477,11 +1477,11 @@ def process_value(self, value: Any) -> Any: class IntersectCorrespondingFields(InstanceOperator): - """Intersects the value of a field, which must be a list, with a given list. + """Intersects the value of a field, which must be a list, with a given list , and removes corresponding elements from other list fields. For example: - Assume the instances contain a field of 'labels' and a field with their corresponding 'positions' in the text. + Assume the instances contain a field of 'labels' and a field with the labels' corresponding 'positions' in the text. IntersectCorrespondingFields(field="label", allowed_values=["b", "f"], @@ -1489,7 +1489,7 @@ class IntersectCorrespondingFields(InstanceOperator): would keep only "b" and "f" values in 'labels' field and their respective values in the 'position' field. - (All other fields aer not effected) + (All other fields are not effected) Given this input: @@ -1510,7 +1510,7 @@ class IntersectCorrespondingFields(InstanceOperator): field - the field to intersected (must contain list values) allowed_values (list) - list of values to keep corresponding_fields_to_intersect (list) - additional list fields from which values - are removed based the corresponding index of values removed from the 'field' + are removed based the corresponding indices of values removed from the 'field' """ field: str