Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add model: xlnet #988

Merged
merged 7 commits into from
Apr 7, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add model:xlnet
ResDream committed Apr 3, 2024
commit b3d49214ab15719a5ac921a8d85b200918e60ca7
42 changes: 22 additions & 20 deletions mindnlp/transformers/models/xlnet/modeling_xlnet.py
Original file line number Diff line number Diff line change
@@ -27,9 +27,9 @@
from mindspore import nn, Parameter, Tensor
from mindspore.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

from ...ms_utils import apply_chunking_to_forward
from ...activations import ACT2FN
from ...modeling_utils import PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits, PreTrainedModel, SequenceSummary
from mindnlp. transformers. ms_utils import apply_chunking_to_forward
from mindnlp. transformers. activations import ACT2FN
from mindnlp. transformers. modeling_utils import PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits, PreTrainedModel, SequenceSummary
from mindnlp.utils import (ModelOutput, logging)
from .configuration_xlnet import XLNetConfig

@@ -947,7 +947,7 @@ def construct(
input_ids = input_ids.transpose(1, 0).contiguous()
qlen, bsz = input_ids.shape[0], input_ids.shape[1]
elif inputs_embeds is not None:
inputs_embeds = inputs_embeds.transpose(1, 0).contiguous()
inputs_embeds = inputs_embeds.transpose(1, 0, 2).contiguous()
qlen, bsz = inputs_embeds.shape[0], inputs_embeds.shape[1]
else:
raise ValueError("You have to specify either input_ids or inputs_embeds")
@@ -1041,7 +1041,6 @@ def construct(

# Positional encoding
pos_emb = self.relative_positional_encoding(qlen, klen, bsz=bsz)
pos_emb = pos_emb
pos_emb = self.dropout(pos_emb)

# Prepare head mask if needed
@@ -1055,9 +1054,8 @@ def construct(
head_mask = head_mask.expand(self.n_layer, -1, -1, -1, -1)
elif head_mask.dim() == 2:
head_mask = head_mask.unsqueeze(1).unsqueeze(1).unsqueeze(1)
head_mask = head_mask.to(
dtype=next(self.parameters()).dtype
) # switch to float if need + fp16 compatibility
head_mask_dtype = next(iter(self.parameters_dict().items()))[1].dtype
head_mask = head_mask.to(dtype=head_mask_dtype)
else:
head_mask = [None] * self.n_layer

@@ -1320,7 +1318,7 @@ def _reorder_cache(mems: List[mindspore.Tensor], beam_idx: mindspore.Tensor) ->
[`~PreTrainedModel.beam_sample`] is called. This is required to match `mems` with the correct beam_idx at every
generation step.
"""
return [layer_past.index_select(1, beam_idx.to(layer_past.device)) for layer_past in mems]
return [layer_past.index_select(1, beam_idx) for layer_past in mems]


class XLNetForSequenceClassification(XLNetPreTrainedModel):
@@ -1387,7 +1385,7 @@ def construct(
if self.config.problem_type is None:
if self.num_labels == 1:
self.config.problem_type = "regression"
elif self.num_labels > 1 and (labels.dtype == mindspore.int64 or labels.dtype == mindspore.int32):
elif self.num_labels > 1 and (labels.dtype in (mindspore.int64, mindspore.int32)):
self.config.problem_type = "single_label_classification"
else:
self.config.problem_type = "multi_label_classification"
@@ -1533,12 +1531,12 @@ def construct(

num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]

flat_input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
flat_input_mask = input_mask.view(-1, input_mask.size(-1)) if input_mask is not None else None
flat_input_ids = input_ids.view(-1, input_ids.shape[-1]) if input_ids is not None else None
flat_token_type_ids = token_type_ids.view(-1, token_type_ids.shape[-1]) if token_type_ids is not None else None
flat_attention_mask = attention_mask.view(-1, attention_mask.shape[-1]) if attention_mask is not None else None
flat_input_mask = input_mask.view(-1, input_mask.shape[-1]) if input_mask is not None else None
flat_inputs_embeds = (
inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1))
inputs_embeds.view(-1, inputs_embeds.shape[-2], inputs_embeds.shape[-1])
if inputs_embeds is not None
else None
)
@@ -1569,6 +1567,7 @@ def construct(
loss = None
if labels is not None:
loss_fct = CrossEntropyLoss()
labels = labels.astype(mindspore.int32)
loss = loss_fct(reshaped_logits, labels.view(-1))

if not return_dict:
@@ -1646,22 +1645,25 @@ def construct(
sequence_output = outputs[0]

logits = self.qa_outputs(sequence_output)
start_logits, end_logits = logits.split(1, dim=-1)
start_logits, end_logits = logits.split(1, axis=-1)
start_logits = start_logits.squeeze(-1).contiguous()
end_logits = end_logits.squeeze(-1).contiguous()

total_loss = None
if start_positions is not None and end_positions is not None:
# If we are on multi-GPU, split add a dimension
if len(start_positions.size()) > 1:
if len(start_positions.shape) > 1:
start_positions = start_positions.squeeze(-1)
if len(end_positions.size()) > 1:
if len(end_positions.shape) > 1:
end_positions = end_positions.squeeze(-1)
# sometimes the start/end positions are outside our model inputs, we ignore these terms
ignored_index = start_logits.size(1)
ignored_index = start_logits.shape[1]
start_positions = start_positions.clamp(0, ignored_index)
end_positions = end_positions.clamp(0, ignored_index)

start_positions = start_positions.astype(mindspore.int32)
end_positions = end_positions.astype(mindspore.int32)

loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
start_loss = loss_fct(start_logits, start_positions)
end_loss = loss_fct(end_logits, end_positions)
@@ -1870,4 +1872,4 @@ def construct(
"XLNetForTokenClassification",
"XLNetPreTrainedModel",
"XLNetForQuestionAnsweringSimple",
]
]
2 changes: 1 addition & 1 deletion mindnlp/transformers/models/xlnet/tokenization_xlnet.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@

import sentencepiece as spm

from ...tokenization_utils import AddedToken, PreTrainedTokenizer
from mindnlp.transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
from mindnlp.utils import logging

logger = logging.get_logger(__name__)
4 changes: 2 additions & 2 deletions mindnlp/transformers/models/xlnet/tokenization_xlnet_fast.py
Original file line number Diff line number Diff line change
@@ -19,8 +19,8 @@
from shutil import copyfile
from typing import List, Optional, Tuple

from ...tokenization_utils import AddedToken
from ...tokenization_utils_fast import PreTrainedTokenizerFast
from mindnlp.transformers.tokenization_utils import AddedToken
from mindnlp.transformers.tokenization_utils_fast import PreTrainedTokenizerFast
from mindnlp.utils import is_sentencepiece_available, logging


3 changes: 1 addition & 2 deletions tests/ut/transformers/models/xlnet/test_modeling_xlnet.py
Original file line number Diff line number Diff line change
@@ -12,8 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import random
"""test xlnet modeling"""
import unittest

import numpy as np
52 changes: 51 additions & 1 deletion tests/ut/transformers/models/xlnet/test_tokenization_xlnet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team. All rights reserved.
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,6 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""test xlnet tokenization"""

import unittest

@@ -202,7 +204,55 @@ def test_sequence_builders(self):

@slow
def test_tokenizer_integration(self):
expected_encoding = {'input_ids': [[17, 21442, 270, 17, 10, 14645, 318, 34, 17, 4546, 3145, 787, 13, 7752, 22018, 23, 21, 17, 4546, 3145, 787, 13, 3352, 14431, 13, 5500, 11, 1176, 580, 13, 16819, 4797, 23, 17, 10, 17135, 658, 19, 457, 7932, 13, 184, 19, 3154, 17135, 6468, 19, 1404, 12269, 19, 4229, 5356, 16264, 46, 19, 17, 20545, 10395, 9, 9, 9, 11, 28, 6421, 9531, 20729, 17, 10, 353, 17022, 11, 21, 6421, 9531, 16949, 17, 10, 11509, 753, 11, 33, 95, 2421, 7385, 956, 14431, 2626, 25, 842, 7385, 4836, 21, 1429, 2272, 9855, 3120, 161, 24738, 19, 13203, 658, 218, 787, 21, 430, 18482, 847, 2637, 9, 4, 3], [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 322, 22178, 27, 1064, 22, 956, 13, 11101, 1429, 5854, 24313, 18953, 40, 422, 24366, 68, 1758, 37, 10483, 14257, 31, 207, 263, 21, 203, 3773, 25, 71, 9735, 9, 4, 3], [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 32, 2049, 3442, 17, 13894, 3380, 23, 95, 18, 17634, 2288, 9, 4, 3]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2], [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2], [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]} # fmt: skip
expected_encoding = {'input_ids': [[17, 21442, 270, 17, 10, 14645, 318, 34, 17, 4546, 3145, 787, 13, 7752,
22018, 23, 21, 17, 4546, 3145, 787, 13, 3352, 14431, 13, 5500, 11, 1176,
580, 13, 16819, 4797, 23, 17, 10, 17135, 658, 19, 457, 7932, 13, 184, 19,
3154, 17135, 6468, 19, 1404, 12269, 19, 4229, 5356, 16264, 46, 19, 17,
20545, 10395, 9, 9, 9, 11, 28, 6421, 9531, 20729, 17, 10, 353, 17022, 11,
21, 6421, 9531, 16949, 17, 10, 11509, 753, 11, 33, 95, 2421, 7385, 956,
14431, 2626, 25, 842, 7385, 4836, 21, 1429, 2272, 9855, 3120, 161, 24738,
19, 13203, 658, 218, 787, 21, 430, 18482, 847, 2637, 9, 4, 3],
[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 322, 22178, 27, 1064, 22, 956, 13, 11101, 1429, 5854, 24313,
18953, 40, 422, 24366, 68, 1758, 37, 10483, 14257, 31, 207, 263, 21, 203,
3773, 25, 71, 9735, 9, 4, 3],
[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 32, 2049,
3442, 17, 13894, 3380, 23, 95, 18, 17634, 2288, 9, 4, 3]],
'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]],
'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]} # fmt: skip

self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,