Skip to content

Commit

Permalink
feat: only_wikipedia
Browse files Browse the repository at this point in the history
  • Loading branch information
lukasellinger committed Oct 18, 2024
1 parent 0ee4a38 commit 1a2ffbe
Show file tree
Hide file tree
Showing 2 changed files with 38,509 additions and 17 deletions.
38,458 changes: 38,457 additions & 1 deletion notebooks/evaluation_pipeline.ipynb

Large diffs are not rendered by default.

68 changes: 52 additions & 16 deletions pipeline_module/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,34 @@ def verify(self, word: str, claim: str, search_word: Optional[str] = None,
entry['search_word'] = search_word
return self.verify_batch([entry], only_intro=only_intro)[0]

@staticmethod
def filter_batch_for_wikipedia(batch: List[Dict],
evids_batch: List[List[Dict]],
outputs) -> Tuple[List[Dict], List[List[Dict]], List[Dict]]:
filtered_batch, filtered_evids = [], []
for evid in evids_batch:
evid[:] = [d for d in evid if d.get('title', '').endswith('(wikipedia)')]

for entry, evid in zip(batch, evids_batch):
if len(evid) > 0:
filtered_batch.append(entry)
filtered_evids.append(evid)
else:
outputs.append(
{'id': entry.get('id'),
'word': entry.get('word'),
'claim': entry.get('claim'),
'connected_claim': entry.get('connected_claim'),
'label': entry.get('label'),
'predicted': -1,
'in_wiki': 'No'
})

return filtered_batch, filtered_evids, outputs

def verify_test_batch(self, batch: List[Dict], only_intro: bool = True,
max_evidence_count: int = 3, top_k: int = 3) -> List[Dict]:
max_evidence_count: int = 3, top_k: int = 3,
only_wikipedia: bool = False) -> List[Dict]:
"""
Verify a test batch of claims by fetching, selecting, and verifying evidence.
Expand All @@ -125,6 +151,7 @@ def verify_test_batch(self, batch: List[Dict], only_intro: bool = True,
be considered.
:param max_evidence_count: Maximum number of evidences to consider for each claim.
:param top_k: Number of top sentences to select for each claim.
:param only_wikipedia: Whether evidence should only be from wikipedia. Else also wiktionary.
:return: List of verified claims with evidence.
"""
filtered_batch = []
Expand All @@ -150,6 +177,11 @@ def verify_test_batch(self, batch: List[Dict], only_intro: bool = True,

_, evids = self.evid_fetcher(evid_fetcher_input, word_lang=self.lang, only_intro=only_intro)

if only_wikipedia:
filtered_batch, evids, outputs = self.filter_batch_for_wikipedia(filtered_batch,
evids,
outputs)

if not filtered_batch:
return outputs

Expand Down Expand Up @@ -195,7 +227,8 @@ def verify_test_dataset(self,
output_file_name: str = '',
only_intro: bool = True,
max_evidence_count: int = 3,
top_k: int = 3) -> Tuple[List[Dict], str, int]:
top_k: int = 3,
only_wikipedia: bool = False) -> Tuple[List[Dict], str, int]:
"""
Verify a test dataset in batches.
Expand All @@ -206,6 +239,7 @@ def verify_test_dataset(self,
be considered.
:param max_evidence_count: Maximum number of evidences to consider for each claim.
:param top_k: Number of top sentences to select for each claim.
:param only_wikipedia: Whether evidence should only be from wikipedia. Else also wiktionary.
:return: Tuple containing verification results, classification report, and count of claims
not found in wiki.
"""
Expand All @@ -216,7 +250,9 @@ def verify_test_dataset(self,
for i in tqdm(range(0, len(dataset), batch_size)):
batch = dataset[i:i + batch_size]
output = self.verify_test_batch(batch, only_intro=only_intro,
max_evidence_count=max_evidence_count, top_k=top_k)
max_evidence_count=max_evidence_count,
top_k=top_k,
only_wikipedia=only_wikipedia)

for entry in output:
if entry['predicted'] != -1:
Expand Down Expand Up @@ -397,16 +433,16 @@ def verify_test_dataset(self, dataset, batch_size: int = 4, output_file_name: st
ModelStatementVerifier(
model_name='MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7'),
'de')
# result = pipeline.verify_test_batch([{'word': 'ERTU',
# 'document_search_word': 'glacier',
# 'in_wiki': 'Yes',
# 'connected_claim': 'A glacier is an ice mass resulting '
# 'from snow with a clearly defined '
# 'catchment area, which moves '
# 'independently due to the slope, '
# 'structure of the ice, temperature, '
# 'and the shear stress resulting from '
# 'the mass of the ice and the other '
# 'factors.'},
# ])
pipeline.verify_batch([{'word': 'Apfel', 'text': 'Huhn'}])
result = pipeline.verify_test_batch([{'word': 'ERTU',
'document_search_word': 'glacier',
'in_wiki': 'Yes',
'connected_claim': 'A glacier is an ice mass resulting '
'from snow with a clearly defined '
'catchment area, which moves '
'independently due to the slope, '
'structure of the ice, temperature, '
'and the shear stress resulting from '
'the mass of the ice and the other '
'factors.'},
], only_wikipedia=True)
#pipeline.verify_batch([{'word': 'Apfel', 'text': 'Huhn'}])

0 comments on commit 1a2ffbe

Please sign in to comment.