Skip to content

Commit

Permalink
Return skip_special_tokens in _tokenizer.py
Browse files Browse the repository at this point in the history
  • Loading branch information
krammnic authored Oct 11, 2024
1 parent 4315ee7 commit 33c6d54
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions torchtune/models/phi3/_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,13 @@ def encode(
trim_leading_whitespace=trim_leading_whitespace,
)

def decode(self, ids: List[int]) -> str:
def decode(self, ids: List[int], skip_special_tokens: bool = True) -> str:
"""Decode token IDs to strings.
Args:
ids (List[int]): The input token IDs to be decoded.
skip_special_tokens (bool): Whether to show or skip special tokens in the decoded string.
Default is True.
Returns:
str: The decoded text.
Expand All @@ -114,7 +116,7 @@ def decode(self, ids: List[int]) -> str:
for token_id in ids:
# Filter out special tokens and the placeholder tokens added
# by the Phi3 team
if token_id >= 32_000 and token_id <= 32_064:
if skip_special_tokens and (token_id >= 32_000 and token_id <= 32_064):
continue
else:
ids_for_decode.append(token_id)
Expand Down

0 comments on commit 33c6d54

Please sign in to comment.