准备
前几天,有人质疑说sigmoid不能做回归,我翻了一下资料
某自编码机源码,采用sigmoid+BCEloss进行矩阵重建【这个矩阵数据被变换到了0-1】,论文里面确实是采用sigmoid做回归
本CSDN,采用distilbert用sigmoid做回归,完成STSB任务,(回归到1-5)
其实可以推导一下:sigmoid+BCE,大致等于直接回归+MSE
++++++++++++++++++以下实验正文++++++++++++++++++
- 标准STSB回归+MSELoss
- Sigmoid+BCELoss
- tanh+TanhLoss
!pip install transformers datasets
task = "stsb"
model_checkpoint = "distilbert-base-uncased"
batch_size = 16
from datasets import load_dataset, load_metric
dataset = load_dataset("glue", task)
metric = load_metric('glue', task)
回归+MSE
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
def preprocess_function(examples):
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
encoded_dataset=dataset
encoded_dataset=encoded_dataset.map(lambda example: {'label': example['label']})
encoded_dataset = encoded_dataset.map(preprocess_function, batched=True)
encoded_dataset["train"][:1]
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1)
for k,v in model.named_parameters():
if k=="classifier.bias" or k=="pre_classifier.bias":
print(k)
nn.init.constant_(v, 0)
if k=="classifier.weight" or k=="pre_classifier.weight":
print(k)
nn.init.kaiming_normal_(v)
args = TrainingArguments(
"test-glue",
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=1,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="pearson",
)
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions[:, 0]
return metric.compute(predictions=predictions, references=labels)
trainer = Trainer(
model,
args,
train_dataset=encoded_dataset["train"],
eval_dataset=encoded_dataset["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
trainer.evaluate()
sigmoid+BCE
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
def preprocess_function(examples):
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
encoded_dataset=dataset
encoded_dataset=encoded_dataset.map(lambda example: {'label': (example['label']-1.0)/4.0})
encoded_dataset = encoded_dataset.map(preprocess_function, batched=True)
encoded_dataset["train"][:1]
from transformers import AutoModel, TrainingArguments, Trainer
import torch
from torch import nn
import transformers
import numpy as np
class MyModel(nn.Module):
def __init__(self, hidden_size=768):
super(MyModel, self).__init__()
self.loss_fct = nn.BCEWithLogitsLoss()
self.sub = AutoModel.from_pretrained(model_checkpoint,return_dict=True)
self.fc = nn.Sequential(
nn.Linear(hidden_size,hidden_size),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(hidden_size,1),
)
def forward(self,input_ids, attention_mask,labels):
outputs = self.sub(input_ids, attention_mask)
o=outputs.last_hidden_state[:,0]
logits = self.fc(o)
loss = self.loss_fct(logits.view(-1, 1),labels.float().view(-1, 1))
return transformers.modeling_outputs.SequenceClassifierOutput(loss=loss,logits=logits)
model=MyModel()
for k,v in model.named_parameters():
if k=="fc.0.bias" or k=="fc.3.bias":
print(k)
nn.init.constant_(v, 0)
if k=="fc.0.weight" or k=="fc.3.weight":
print(k)
nn.init.kaiming_normal_(v)
args = TrainingArguments(
"test-glue",
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=1,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="pearson",
)
def sigmoid(x):
return 1/(1 + np.exp(-x))
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions[:, 0]
return metric.compute(predictions=sigmoid(predictions), references=labels)
trainer = Trainer(
model,
args,
train_dataset=encoded_dataset["train"],
eval_dataset=encoded_dataset["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
trainer.evaluate()
tanh+自定义匹配的Loss
∫ x − y ( 1 − x ⋅ x ) d x \int \frac{x-y}{\left(1-x\cdot \:x\right)}dx ∫(1 −x ⋅x )x −y d x
= − y ( ln ∣ x + 1 ∣ 2 − ln ∣ x − 1 ∣ 2 ) − 1 2 ln ∣ 1 − x 2 ∣ + C =-y\left(\frac{\ln \left|x+1\right|}{2}-\frac{\ln \left|x-1\right|}{2}\right)-\frac{1}{2}\ln \left|1-x^2\right|+C =−y (2 l n ∣x +1 ∣−2 l n ∣x −1 ∣)−2 1 ln 1 −x 2 +C
= − ( ( 1 + y 2 ) ln ( 1 + x 2 ) + ( 1 − y 2 ) ln ( 1 − x 2 ) ) + C ′ =-\left( \left(\frac{1+y}{2} \right)\ln \left( \frac{1+x}{2}\right)+\left(\frac{1-y}{2} \right)\ln \left( \frac{1-x}{2}\right)\right)+C’=−((2 1 +y )ln (2 1 +x )+(2 1 −y )ln (2 1 −x ))+C ′
y : l a b e l , x : t a n h ( o u t ) y:label,x:tanh(out)y :l ab e l ,x :t anh (o u t )
import torch
from torch import nn
import numpy as np
from torch import Tensor
class TanhLoss(nn.Module):
def __init__(self,flag=True):
super().__init__()
self.flag=flag
def forward(self, xi, y):
x=torch.tanh(xi)
if not self.flag :
one=-0.5*y*(torch.log(1+x+1e-40)-torch.log(1-x+1e-40))-0.5*torch.log(1-torch.pow(x,2)+1e-40)+np.log(2)
else:
ya,ym,xa,xm=(y+1)/2,(1-y)/2,(1+x)/2+1e-40,(1-x)/2+1e-40
one=-1*(ya*torch.log(xa)+ym*torch.log(xm))
return torch.mean(one)
th=TanhLoss()
print(th(torch.FloatTensor(70,1).uniform_(-1,1),torch.FloatTensor(70,1).uniform_(-1,1)))
print(th(torch.FloatTensor(70,1).uniform_(-1000000,-1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(1000000,1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(0,0),torch.FloatTensor(70,1).uniform_(0,0)))
th=TanhLoss(False)
print(th(torch.FloatTensor(70,1).uniform_(-1,1),torch.FloatTensor(70,1).uniform_(-1,1)))
print(th(torch.FloatTensor(70,1).uniform_(-1000000,-1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(1000000,1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(0,0),torch.FloatTensor(70,1).uniform_(0,0)))
np.log(1e-40)
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
def preprocess_function(examples):
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
encoded_dataset=dataset
encoded_dataset=encoded_dataset.map(lambda example: {'label': (example['label']-3.0)/2.0})
encoded_dataset = encoded_dataset.map(preprocess_function, batched=True)
encoded_dataset["train"][:1]
from transformers import AutoModel, TrainingArguments, Trainer
import torch
from torch import nn
import transformers
import numpy as np
from torch import Tensor
from typing import Callable, Optional
class MyModel(nn.Module):
def __init__(self, hidden_size=768):
super(MyModel, self).__init__()
self.loss_fct = TanhLoss()
self.su = AutoModel.from_pretrained(model_checkpoint,return_dict=True)
self.fc = nn.Sequential(
nn.Linear(hidden_size,hidden_size),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(hidden_size,1),
)
def forward(self,input_ids, attention_mask,labels):
outputs = self.su(input_ids, attention_mask)
o=outputs.last_hidden_state[:,0]
logits = self.fc(o)
loss = self.loss_fct(logits.view(-1, 1),labels.float().view(-1, 1))
return transformers.modeling_outputs.SequenceClassifierOutput(loss=loss,logits=logits)
model=MyModel()
for k,v in model.named_parameters():
if k=="fc.0.bias" or k=="fc.3.bias":
print(k)
nn.init.constant_(v, 0)
if k=="fc.0.weight" or k=="fc.3.weight":
print(k)
nn.init.kaiming_normal_(v)
args = TrainingArguments(
"test-glue",
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=1,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="pearson",
)
def sigmoid(x):
return 1/(1 + np.exp(-x))
def tanh(x):
return 2*sigmoid(2*x)-1
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions[:, 0]
return metric.compute(predictions=tanh(predictions), references=labels)
trainer = Trainer(
model,
args,
train_dataset=encoded_dataset["train"],
eval_dataset=encoded_dataset["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
trainer.evaluate()
Original: https://blog.csdn.net/qq1226317595/article/details/120124998
Author: 袁一白
Title: sigmoid和tanh做有限范围内的回归问题
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/635190/
转载文章受原作者版权保护。转载请注明原作者出处!