From 363580444b61da8e262815598d430438c53ed90f Mon Sep 17 00:00:00 2001 From: Desiree Vogt-Lee Date: Fri, 19 Jul 2019 15:42:41 +1000 Subject: [PATCH 1/3] updated to pytorch_transformers --- README.md | 4 ++-- convert_tf_checkpoint_to_pytorch.py | 2 +- inference.py | 2 +- pybert/io/dataset.py | 2 +- pybert/model/nn/bert_fine.py | 5 +++-- train_bert_multi_label.py | 4 ++-- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6b671b2..5d6e85a 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This repo contains a PyTorch implementation of a pretrained BERT model for multi-label text classification. -**note**: for the new `pytorch-pretrained-bert` package . use comd `from pytorch_pretrained_bert.modeling import BertPreTrainedModel` +**note**: for the new `pytorch_transformers` package . use comd `from pytorch_transformers.modeling_bert import BertPreTrainedModel` ## Structure of the code At the root of the project, you will see: @@ -42,7 +42,7 @@ At the root of the project, you will see: - PyTorch 1.0 - matplotlib - pandas -- pytorch_pretrained_bert (load bert model) +- pytorch_transformers (load bert model) ## How to use the code diff --git a/convert_tf_checkpoint_to_pytorch.py b/convert_tf_checkpoint_to_pytorch.py index 483eba9..1476425 100644 --- a/convert_tf_checkpoint_to_pytorch.py +++ b/convert_tf_checkpoint_to_pytorch.py @@ -1,7 +1,7 @@ #encoding:utf-8 import os from pybert.config.basic_config import configs as config -from pytorch_pretrained_bert.convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch +from pytorch_transformers.convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch if __name__ == "__main__": os.system('cp {config} {save_path}'.format(config = config['pretrained']['bert']['bert_config_file'], diff --git a/inference.py b/inference.py index c39d5f1..3a99ce6 100644 --- a/inference.py +++ b/inference.py @@ -10,7 +10,7 @@ from pybert.model.nn.bert_fine import BertFine from pybert.test.predicter import Predicter from pybert.preprocessing.preprocessor import EnglishPreProcessor -from pytorch_pretrained_bert.tokenization import BertTokenizer +from pytorch_transformers.tokenization_bert import BertTokenizer warnings.filterwarnings("ignore") # 主函数 diff --git a/pybert/io/dataset.py b/pybert/io/dataset.py index 1051ffc..8ef5b53 100644 --- a/pybert/io/dataset.py +++ b/pybert/io/dataset.py @@ -4,7 +4,7 @@ import numpy as np from pathlib import Path from torch.utils.data import Dataset -from pytorch_pretrained_bert.tokenization import BertTokenizer +from pytorch_transformers.tokenization_bert import BertTokenizer class InputExample(object): def __init__(self, guid, text_a, text_b=None, label=None): diff --git a/pybert/model/nn/bert_fine.py b/pybert/model/nn/bert_fine.py index 853a0a3..d0d1e2c 100644 --- a/pybert/model/nn/bert_fine.py +++ b/pybert/model/nn/bert_fine.py @@ -1,8 +1,9 @@ #encoding:utf-8 import torch.nn as nn -from pytorch_pretrained_bert.modeling import PreTrainedBertModel, BertModel +from pytorch_transformers import BertModel +from pytorch_transformers.modeling_bert import BertPreTrainedModel -class BertFine(PreTrainedBertModel): +class BertFine(BertPreTrainedModel): def __init__(self,bertConfig,num_classes): super(BertFine ,self).__init__(bertConfig) self.bert = BertModel(bertConfig) # bert模型 diff --git a/train_bert_multi_label.py b/train_bert_multi_label.py index 27f9389..aac9b87 100644 --- a/train_bert_multi_label.py +++ b/train_bert_multi_label.py @@ -15,8 +15,8 @@ from pybert.callback.modelcheckpoint import ModelCheckpoint from pybert.callback.trainingmonitor import TrainingMonitor from pybert.train.metrics import F1Score,AccuracyThresh,MultiLabelReport -from pytorch_pretrained_bert.tokenization import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam +from pytorch_transformers.tokenization_bert import BertTokenizer +from pytorch_transformers.optimization import AdamW warnings.filterwarnings("ignore") # 主函数 From f4e1830428c88ed76343107659f5c95679555740 Mon Sep 17 00:00:00 2001 From: Desiree Vogt-Lee Date: Fri, 9 Aug 2019 11:10:46 +1000 Subject: [PATCH 2/3] updated adam and scheduler --- .gitignore | 3 +++ pybert/train/trainer.py | 2 ++ train_bert_multi_label.py | 15 +++++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 894a44c..09d8ccd 100644 --- a/.gitignore +++ b/.gitignore @@ -94,6 +94,9 @@ venv.bak/ .spyderproject .spyproject +# vscode +.vscode + # Rope project settings .ropeproject diff --git a/pybert/train/trainer.py b/pybert/train/trainer.py index 95ba24d..be927ff 100644 --- a/pybert/train/trainer.py +++ b/pybert/train/trainer.py @@ -16,6 +16,7 @@ def __init__(self,train_configs): self.logger = train_configs['logger'] self.verbose = train_configs['verbose'] self.criterion = train_configs['criterion'] + self.scheduler = train_configs['scheduler'] self.optimizer = train_configs['optimizer'] self.lr_scheduler = train_configs['lr_scheduler'] self.early_stopping = train_configs['early_stopping'] @@ -122,6 +123,7 @@ def _train_epoch(self,data): # 学习率更新方式 if (step + 1) % self.gradient_accumulation_steps == 0: self.lr_scheduler.batch_step(training_step = self.global_step) + self.scheduler.step() self.optimizer.step() self.optimizer.zero_grad() self.global_step += 1 diff --git a/train_bert_multi_label.py b/train_bert_multi_label.py index aac9b87..5c18573 100644 --- a/train_bert_multi_label.py +++ b/train_bert_multi_label.py @@ -16,7 +16,7 @@ from pybert.callback.trainingmonitor import TrainingMonitor from pybert.train.metrics import F1Score,AccuracyThresh,MultiLabelReport from pytorch_transformers.tokenization_bert import BertTokenizer -from pytorch_transformers.optimization import AdamW +from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule warnings.filterwarnings("ignore") # 主函数 @@ -89,10 +89,12 @@ def main(): len(train_dataset.examples) / config['train']['batch_size'] / config['train']['gradient_accumulation_steps'] * config['train']['epochs']) # t_total: total number of training steps for the learning rate schedule # warmup: portion of t_total for the warmup - optimizer = BertAdam(optimizer_grouped_parameters, - lr = config['train']['learning_rate'], - warmup = config['train']['warmup_proportion'], - t_total = num_train_steps) + optimizer = AdamW(optimizer_grouped_parameters, + lr = config['train']['learning_rate']) + + scheduler = WarmupLinearSchedule(optimizer, + warmup_steps=config['train']['warmup_steps'], + t_total=num_train_steps) # **************************** callbacks *********************** logger.info("initializing callbacks") @@ -110,7 +112,7 @@ def main(): lr_scheduler = BertLR(optimizer = optimizer, learning_rate = config['train']['learning_rate'], t_total = num_train_steps, - warmup = config['train']['warmup_proportion']) + warmup = config['train']['warmup_steps']) # **************************** training model *********************** logger.info('training model....') @@ -119,6 +121,7 @@ def main(): 'model': model, 'logger': logger, 'optimizer': optimizer, + 'scheduler': scheduler, 'resume': config['train']['resume'], 'epochs': config['train']['epochs'], 'n_gpu': config['train']['n_gpu'], From 46d82e08f41096b8d1299265beb58279a7af3a63 Mon Sep 17 00:00:00 2001 From: Desiree Vogt-Lee Date: Fri, 9 Aug 2019 11:54:03 +1000 Subject: [PATCH 3/3] bert_fine update --- pybert/model/nn/bert_fine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pybert/model/nn/bert_fine.py b/pybert/model/nn/bert_fine.py index d0d1e2c..e137b1f 100644 --- a/pybert/model/nn/bert_fine.py +++ b/pybert/model/nn/bert_fine.py @@ -9,7 +9,7 @@ def __init__(self,bertConfig,num_classes): self.bert = BertModel(bertConfig) # bert模型 self.dropout = nn.Dropout(bertConfig.hidden_dropout_prob) self.classifier = nn.Linear(in_features=bertConfig.hidden_size, out_features=num_classes) - self.apply(self.init_bert_weights) + self.apply(self.init_weights) # 默认情况下,bert encoder模型所有的参数都是参与训练的,32的batch_size大概8.7G显存 # 可以通过以下设置为将其设为不训练,只将classifier这一层进行反响传播,32的batch_size大概显存1.1G self.unfreeze_bert_encoder() @@ -25,8 +25,8 @@ def unfreeze_bert_encoder(self): def forward(self, input_ids, token_type_ids, attention_mask, label_ids=None, output_all_encoded_layers=False): _, pooled_output = self.bert(input_ids, token_type_ids, - attention_mask, - output_all_encoded_layers=output_all_encoded_layers) + attention_mask) + #output_all_encoded_layers=output_all_encoded_layers) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) return logits