（1）准备数据

（2）构建模型

（3）训练模型

（4）进行预测

# 一、数据读取

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import scale
print("Tensorflow版本是:",tf.__version__)


Tensorflow版本是: 2.9.1

df=pd.read_csv("boston.csv",header=0)
print(df.describe())


df.head(3)


df.tail(3)


# 二、数据准备


ds=df.values
print(ds.shape)


(506, 13)

print(ds)


# 三、划分特征数据和标签数据


x_data = ds[:,:12]

y_data = ds[:,12]
print('x_data shape=',x_data.shape)
print('y_data shape=',y_data.shape)


x_data shape= (506, 12)
y_data shape= (506,)

# 四、特征数据归一化


for i in range(12):
x_data[:,i]=(x_data[:,i]-x_data[:,i].min())/(x_data[:,i].max()-x_data[:,i].min())
x_data


# 五、数据集划分

[En]

The purpose of building and training machine learning models is to make good predictions for new data. How to ensure the effectiveness of the training and deal with data that have never been seen before? Is it really good for all tagged data to participate in model training?

• 训练集 – 用于训练模型的子集
• 测试集 – 用于测试模型的子集

[En]

In general, good performance on test sets is a useful indicator of whether or not to perform well on new data, provided that:

（1）规模足够大，可产生具有统计意义的结果
（2）能代表整个数据集，测试集的特征应该与训练集的特征相同

[En]

In each iteration, the training data are trained and the test data are evaluated, and under the guidance of the evaluation results based on the test data, various model hyperparameters, such as learning rates and characteristics, are selected and changed. Is there a problem with this method?

[En]

The problem is that repeated execution of the process may cause the model to unwittingly fit the characteristics of a particular test set.

train_num = 300
valid_num = 100
test_num = len(x_data) - train_num - valid_num

x_train = x_data[:train_num]
y_train = y_data[:train_num]

x_valid = x_data[train_num:train_num+valid_num]
y_valid = y_data[train_num:train_num+valid_num]

x_test = x_data[train_num+valid_num:train_num+valid_num+test_num]
y_test = y_data[train_num+valid_num:train_num+valid_num+test_num]


# 六、转换数据类型

x_train = tf.cast(x_train,dtype=tf.float32)
x_valid = tf.cast(x_valid,dtype=tf.float32)
x_test = tf.cast(x_test,dtype=tf.float32)


# 七、构建模型

def model(x,w,b):
return tf.matmul(x,w)+b


# 八、创建待优化变量

W = tf.Variable(tf.random.normal([12,1],mean=0.0,stddev=1.0,dtype=tf.float32))
B= tf.Variable(tf.zeros(1),dtype = tf.float32)
print(W)
print(B)


# 九、模型训练

## 9.1 设置超参数

training_epochs = 50  #&#x8FED;&#x4EE3;&#x6B21;&#x6570;
learning_rate = 0.001  #&#x5B66;&#x4E60;&#x7387;


## 9.2 定义损失函数

#&#x91C7;&#x7528;&#x5747;&#x65B9;&#x5DEE;&#x4F5C;&#x4E3A;&#x635F;&#x5931;&#x51FD;&#x6570;

def loss(x,y,w,b):
err = model(x, w,b) - y       #&#x8BA1;&#x7B97;&#x6A21;&#x578B;&#x9884;&#x6D4B;&#x503C;&#x548C;&#x6807;&#x7B7E;&#x503C;&#x7684;&#x5DEE;&#x5F02;
squared_err = tf.square(err)  #&#x6C42;&#x5E73;&#x65B9;&#xFF0C;&#x5F97;&#x51FA;&#x65B9;&#x5DEE;
return tf.reduce_mean(squared_err) #&#x6C42;&#x5747;&#x503C;&#xFF0C;&#x5F97;&#x51FA;&#x5747;&#x65B9;&#x5DEE;.



## 9.3 定义梯度函数

#&#x8BA1;&#x7B97;&#x6837;&#x672C;&#x6570;&#x636E;[x,y]&#x5728;&#x53C2;&#x6570;[w, b]&#x70B9;&#x4E0A;&#x7684;&#x68AF;&#x5EA6;

loss_= loss(x,y,w,b)


## 9.4 选择优化器

optimizer = tf.keras.optimizers.SGD(learning_rate) #&#x521B;&#x5EFA;&#x4F18;&#x5316;&#x5668;&#xFF0C;&#x6307;&#x5B9A;&#x5B66;&#x4E60;&#x7387;


## 9.5 迭代训练

loss_list_train = []  #&#x7528;&#x4E8E;&#x4FDD;&#x5B58;&#x8BAD;&#x7EC3;&#x96C6;1oss&#x503C;&#x7684;&#x5217;&#x8868;
loss_list_valid = []  #&#x7528;&#x4E8E;&#x4FDD;&#x5B58;&#x9A8C;&#x8BC1;&#x96C6;loss&#x503C;&#x7684;&#x5217;&#x8868;
total_step = int(train_num/batch_size)

for epoch in range(training_epochs):
for step in range(total_step):
xs = x_train[step*batch_size:(step+1)*batch_size,:]
ys = y_train[step*batch_size:(step+1)*batch_size]

loss_valid = loss(x_valid,y_valid,W,B).numpy()   #&#x8BA1;&#x7B97;&#x5F53;&#x524D;&#x8F6E;&#x9A8C;&#x8BC1;&#x635F;&#x5931;
loss_list_train.append(loss_train)
loss_list_valid.append(loss_valid)
print("epoch={:3d},train_loss={:.4f},valid_loss={:.4f}".format(epoch+1,loss_train,loss_valid))


epoch= 1,train_loss=587.1567,valid_loss=414.5585
epoch= 2,train_loss=461.1470,valid_loss=304.6138
epoch= 3,train_loss=366.7652,valid_loss=227.8302
epoch= 4,train_loss=296.0490,valid_loss=175.0670
epoch= 5,train_loss=243.0432,valid_loss=139.5983
epoch= 6,train_loss=203.2935,valid_loss=116.4885
epoch= 7,train_loss=173.4678,valid_loss=102.1276
epoch= 8,train_loss=151.0732,valid_loss=93.8853
epoch= 9,train_loss=134.2448,valid_loss=89.8543
epoch= 10,train_loss=121.5869,valid_loss=88.6597
epoch= 11,train_loss=112.0550,valid_loss=89.3172
epoch= 12,train_loss=104.8675,valid_loss=91.1287
epoch= 13,train_loss=99.4389,valid_loss=93.6048
epoch= 14,train_loss=95.3310,valid_loss=96.4073
epoch= 15,train_loss=92.2155,valid_loss=99.3077
epoch= 16,train_loss=89.8464,valid_loss=102.1555
epoch= 17,train_loss=88.0393,valid_loss=104.8561
epoch= 18,train_loss=86.6560,valid_loss=107.3541
epoch= 19,train_loss=85.5927,valid_loss=109.6212
epoch= 20,train_loss=84.7715,valid_loss=111.6475
epoch= 21,train_loss=84.1340,valid_loss=113.4355
epoch= 22,train_loss=83.6362,valid_loss=114.9954
epoch= 23,train_loss=83.2449,valid_loss=116.3418
epoch= 24,train_loss=82.9353,valid_loss=117.4922
epoch= 25,train_loss=82.6886,valid_loss=118.4646
epoch= 26,train_loss=82.4904,valid_loss=119.2775
epoch= 27,train_loss=82.3301,valid_loss=119.9482
epoch= 28,train_loss=82.1996,valid_loss=120.4934
epoch= 29,train_loss=82.0925,valid_loss=120.9283
epoch= 30,train_loss=82.0043,valid_loss=121.2668
epoch= 31,train_loss=81.9312,valid_loss=121.5214
epoch= 32,train_loss=81.8704,valid_loss=121.7035
epoch= 33,train_loss=81.8199,valid_loss=121.8229
epoch= 34,train_loss=81.7781,valid_loss=121.8884
epoch= 35,train_loss=81.7434,valid_loss=121.9079
epoch= 36,train_loss=81.7151,valid_loss=121.8881
epoch= 37,train_loss=81.6921,valid_loss=121.8350
epoch= 38,train_loss=81.6740,valid_loss=121.7537
epoch= 39,train_loss=81.6602,valid_loss=121.6488
epoch= 40,train_loss=81.6503,valid_loss=121.5241
epoch= 41,train_loss=81.6439,valid_loss=121.3830
epoch= 42,train_loss=81.6407,valid_loss=121.2285
epoch= 43,train_loss=81.6406,valid_loss=121.0630
epoch= 44,train_loss=81.6432,valid_loss=120.8886
epoch= 45,train_loss=81.6484,valid_loss=120.7074
epoch= 46,train_loss=81.6561,valid_loss=120.5208
epoch= 47,train_loss=81.6661,valid_loss=120.3301
epoch= 48,train_loss=81.6783,valid_loss=120.1367
epoch= 49,train_loss=81.6926,valid_loss=119.9414
epoch= 50,train_loss=81.7088,valid_loss=119.7451

# 十、可视化损失值


plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.plot(loss_list_train,'blue',label="Train Loss")
plt.plot(loss_list_valid,'red',label="Valid Loss")
plt.legend(loc=1)


# 十一、查看测试集损失值

print("Test_loss:{:.4f}".format(loss(x_test,y_test,W,B).numpy()))


Test_loss:114.1834

# 十二、模型应用

test_house_id = np.random.randint(0,test_num)
y = y_test[test_house_id]
y_pred = model(x_test,W,B)[test_house_id]
y_predit=tf.reshape(y_pred,()).numpy()
print("House id",test_house_id, "Actual value",y,"Predicted value ",y_predit)


House id 70 Actual value 19.9 Predicted value 25.09165

# 完整代码：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import scale

def model(x,w,b):
return tf.matmul(x,w)+b

def loss(x,y,w,b):
err = model(x, w,b) - y
squared_err = tf.square(err)
return tf.reduce_mean(squared_err)

loss_= loss(x,y,w,b)

ds=df.values

x_data = ds[:,:12]

y_data = ds[:,12]

for i in range(12):
x_data[:,i]=(x_data[:,i]-x_data[:,i].min())/(x_data[:,i].max()-x_data[:,i].min())

train_num = 300
valid_num = 100
test_num = len(x_data) - train_num - valid_num

x_train = x_data[:train_num]
y_train = y_data[:train_num]

x_valid = x_data[train_num:train_num+valid_num]
y_valid = y_data[train_num:train_num+valid_num]

x_test = x_data[train_num+valid_num:train_num+valid_num+test_num]
y_test = y_data[train_num+valid_num:train_num+valid_num+test_num]

x_train = tf.cast(x_train,dtype=tf.float32)
x_valid = tf.cast(x_valid,dtype=tf.float32)
x_test = tf.cast(x_test,dtype=tf.float32)

W = tf.Variable(tf.random.normal([12,1],mean=0.0,stddev=1.0,dtype=tf.float32))
B= tf.Variable(tf.zeros(1),dtype = tf.float32)

training_epochs = 50
learning_rate = 0.001
batch_size = 10

optimizer = tf.keras.optimizers.SGD(learning_rate)

loss_list_train = []
loss_list_valid = []
total_step = int(train_num/batch_size)

for epoch in range(training_epochs):
for step in range(total_step):
xs = x_train[step*batch_size:(step+1)*batch_size,:]
ys = y_train[step*batch_size:(step+1)*batch_size]

loss_train = loss(x_train,y_train,W,B).numpy()
loss_valid = loss(x_valid,y_valid,W,B).numpy()
loss_list_train.append(loss_train)
loss_list_valid.append(loss_valid)
print("epoch={:3d},train_loss={:.4f},valid_loss={:.4f}".format(epoch+1,loss_train,loss_valid))


Original: https://blog.csdn.net/baidu/article/details/125272411
Author: 果州做题家
Title: 波士顿房价预测（TensorFlow2.9实践）

(0)

