sigmoid和tanh做有限范围内的回归问题

准备

前几天,有人质疑说sigmoid不能做回归,我翻了一下资料

某自编码机源码,采用sigmoid+BCEloss进行矩阵重建【这个矩阵数据被变换到了0-1】,论文里面确实是采用sigmoid做回归

本CSDN,采用distilbert用sigmoid做回归,完成STSB任务,(回归到1-5)

其实可以推导一下:sigmoid+BCE,大致等于直接回归+MSE

++++++++++++++++++以下实验正文++++++++++++++++++

  1. 标准STSB回归+MSELoss
  2. Sigmoid+BCELoss
  3. tanh+TanhLoss

!pip install transformers datasets

task = "stsb"
model_checkpoint = "distilbert-base-uncased"
batch_size = 16
from datasets import load_dataset, load_metric
dataset = load_dataset("glue", task)
metric = load_metric('glue', task)

回归+MSE


from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
def preprocess_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
encoded_dataset=dataset
encoded_dataset=encoded_dataset.map(lambda example: {'label': example['label']})
encoded_dataset = encoded_dataset.map(preprocess_function, batched=True)
encoded_dataset["train"][:1]
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1)

for k,v in model.named_parameters():

  if k=="classifier.bias" or k=="pre_classifier.bias":
    print(k)
    nn.init.constant_(v, 0)
  if k=="classifier.weight" or k=="pre_classifier.weight":
    print(k)
    nn.init.kaiming_normal_(v)
args = TrainingArguments(
    "test-glue",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="pearson",
)
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions[:, 0]
    return metric.compute(predictions=predictions, references=labels)

trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

trainer.evaluate()

sigmoid+BCE


from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
def preprocess_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
encoded_dataset=dataset
encoded_dataset=encoded_dataset.map(lambda example: {'label': (example['label']-1.0)/4.0})
encoded_dataset = encoded_dataset.map(preprocess_function, batched=True)
encoded_dataset["train"][:1]

from transformers import AutoModel, TrainingArguments, Trainer

import torch
from torch import nn
import transformers
import numpy as np
class MyModel(nn.Module):
    def __init__(self, hidden_size=768):
        super(MyModel, self).__init__()

        self.loss_fct = nn.BCEWithLogitsLoss()
        self.sub  = AutoModel.from_pretrained(model_checkpoint,return_dict=True)

        self.fc = nn.Sequential(
            nn.Linear(hidden_size,hidden_size),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(hidden_size,1),
        )

    def forward(self,input_ids, attention_mask,labels):

        outputs = self.sub(input_ids, attention_mask)
        o=outputs.last_hidden_state[:,0]

        logits = self.fc(o)

        loss = self.loss_fct(logits.view(-1, 1),labels.float().view(-1, 1))
        return transformers.modeling_outputs.SequenceClassifierOutput(loss=loss,logits=logits)

model=MyModel()

for k,v in model.named_parameters():
  if k=="fc.0.bias" or k=="fc.3.bias":
    print(k)
    nn.init.constant_(v, 0)
  if k=="fc.0.weight" or k=="fc.3.weight":
    print(k)
    nn.init.kaiming_normal_(v)
args = TrainingArguments(
    "test-glue",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="pearson",
)
def sigmoid(x):
    return 1/(1 + np.exp(-x))
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions[:, 0]
    return metric.compute(predictions=sigmoid(predictions), references=labels)
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

trainer.evaluate()

tanh+自定义匹配的Loss

∫ x − y ( 1 − x ⋅ x ) d x \int \frac{x-y}{\left(1-x\cdot \:x\right)}dx ∫(1 −x ⋅x )x −y ​d x

= − y ( ln ⁡ ∣ x + 1 ∣ 2 − ln ⁡ ∣ x − 1 ∣ 2 ) − 1 2 ln ⁡ ∣ 1 − x 2 ∣ + C =-y\left(\frac{\ln \left|x+1\right|}{2}-\frac{\ln \left|x-1\right|}{2}\right)-\frac{1}{2}\ln \left|1-x^2\right|+C =−y (2 l n ∣x +1 ∣​−2 l n ∣x −1 ∣​)−2 1 ​ln ​1 −x 2 ​+C

= − ( ( 1 + y 2 ) ln ⁡ ( 1 + x 2 ) + ( 1 − y 2 ) ln ⁡ ( 1 − x 2 ) ) + C ′ =-\left( \left(\frac{1+y}{2} \right)\ln \left( \frac{1+x}{2}\right)+\left(\frac{1-y}{2} \right)\ln \left( \frac{1-x}{2}\right)\right)+C’=−((2 1 +y ​)ln (2 1 +x ​)+(2 1 −y ​)ln (2 1 −x ​))+C ′

y : l a b e l , x : t a n h ( o u t ) y:label,x:tanh(out)y :l ab e l ,x :t anh (o u t )

import torch
from torch import nn
import numpy as np
from torch import Tensor
class TanhLoss(nn.Module):
    def __init__(self,flag=True):
        super().__init__()
        self.flag=flag

    def forward(self, xi, y):
        x=torch.tanh(xi)
        if not self.flag :

            one=-0.5*y*(torch.log(1+x+1e-40)-torch.log(1-x+1e-40))-0.5*torch.log(1-torch.pow(x,2)+1e-40)+np.log(2)
        else:
            ya,ym,xa,xm=(y+1)/2,(1-y)/2,(1+x)/2+1e-40,(1-x)/2+1e-40
            one=-1*(ya*torch.log(xa)+ym*torch.log(xm))
        return torch.mean(one)
th=TanhLoss()
print(th(torch.FloatTensor(70,1).uniform_(-1,1),torch.FloatTensor(70,1).uniform_(-1,1)))
print(th(torch.FloatTensor(70,1).uniform_(-1000000,-1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(1000000,1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(0,0),torch.FloatTensor(70,1).uniform_(0,0)))
th=TanhLoss(False)
print(th(torch.FloatTensor(70,1).uniform_(-1,1),torch.FloatTensor(70,1).uniform_(-1,1)))
print(th(torch.FloatTensor(70,1).uniform_(-1000000,-1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(1000000,1000000),torch.FloatTensor(70,1).uniform_(1,1)))
print(th(torch.FloatTensor(70,1).uniform_(0,0),torch.FloatTensor(70,1).uniform_(0,0)))
np.log(1e-40)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
def preprocess_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)
encoded_dataset=dataset
encoded_dataset=encoded_dataset.map(lambda example: {'label': (example['label']-3.0)/2.0})
encoded_dataset = encoded_dataset.map(preprocess_function, batched=True)
encoded_dataset["train"][:1]
from transformers import AutoModel, TrainingArguments, Trainer

import torch
from torch import nn
import transformers
import numpy as np
from torch import Tensor
from typing import Callable, Optional

class MyModel(nn.Module):
    def __init__(self, hidden_size=768):
        super(MyModel, self).__init__()
        self.loss_fct = TanhLoss()
        self.su  = AutoModel.from_pretrained(model_checkpoint,return_dict=True)

        self.fc = nn.Sequential(
            nn.Linear(hidden_size,hidden_size),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(hidden_size,1),
        )

    def forward(self,input_ids, attention_mask,labels):

        outputs = self.su(input_ids, attention_mask)
        o=outputs.last_hidden_state[:,0]

        logits = self.fc(o)

        loss = self.loss_fct(logits.view(-1, 1),labels.float().view(-1, 1))
        return transformers.modeling_outputs.SequenceClassifierOutput(loss=loss,logits=logits)

model=MyModel()

for k,v in model.named_parameters():
  if k=="fc.0.bias" or k=="fc.3.bias":
    print(k)
    nn.init.constant_(v, 0)
  if k=="fc.0.weight" or k=="fc.3.weight":
    print(k)
    nn.init.kaiming_normal_(v)
args = TrainingArguments(
    "test-glue",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="pearson",
)

def sigmoid(x):
    return 1/(1 + np.exp(-x))
def tanh(x):
    return 2*sigmoid(2*x)-1
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions[:, 0]
    return metric.compute(predictions=tanh(predictions), references=labels)
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)
trainer.train()
trainer.evaluate()

Original: https://blog.csdn.net/qq1226317595/article/details/120124998
Author: 袁一白
Title: sigmoid和tanh做有限范围内的回归问题

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/635190/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球