from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
导入load_boston数据,波士顿房价数据
boston = load_boston()
x = boston['data']
y = boston['target']
names = boston['feature_names']
将数据划分为训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=22)
print('x_train前3行数据为:', x_train[0: 3], '\n','y_train前3个数据为:', y_train[0: 3])
x_train前3行数据为: [[2.24236e+00 0.00000e+00 1.95800e+01 0.00000e+00 6.05000e-01 5.85400e+00
9.18000e+01 2.42200e+00 5.00000e+00 4.03000e+02 1.47000e+01 3.95110e+02
1.16400e+01]
[2.61690e-01 0.00000e+00 9.90000e+00 0.00000e+00 5.44000e-01 6.02300e+00
9.04000e+01 2.83400e+00 4.00000e+00 3.04000e+02 1.84000e+01 3.96300e+02
1.17200e+01]
[6.89900e-02 0.00000e+00 2.56500e+01 0.00000e+00 5.81000e-01 5.87000e+00
6.97000e+01 2.25770e+00 2.00000e+00 1.88000e+02 1.91000e+01 3.89150e+02
1.43700e+01]]
y_train前3个数据为: [22.7 19.4 22. ]
使用LinearRegression类构建线性回归模型
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
训练模型
lr_model.fit(x_train, y_train)
print('LinearRegression模型中各特征系数为:\n', lr_model.coef_)
print('LinearRegression模型中截距为:', lr_model.intercept_)
LinearRegression模型中各特征系数为:
[-1.01199845e-01 4.67962110e-02 -2.06902678e-02 3.58072311e+00
-1.71288922e+01 3.92207267e+00 -5.67997339e-03 -1.54862273e+00
2.97156958e-01 -1.00709587e-02 -7.78761318e-01 9.87125185e-03
-5.25319199e-01]
LinearRegression模型中截距为: 32.428252866991016
print('预测测试集前5个结果为:\n', lr_model.predict(x_test)[: 5])
print('测试集得分为:', lr_model.score(x_test, y_test))
预测测试集前5个结果为:
[27.99617259 31.37458822 21.16274236 32.97684211 19.85350998]
测试集得分为: 0.7657465943591124
#岭回归
from sklearn.linear_model import Ridge
ridge_model = Ridge()
ridge_model.fit(x_train, y_train)
#print('训练出来的ridge模型为:\n', ridge_model)
#print('迭代次数为:', ridge_model.n_iter_)
print('Ridge模型中各特征系数为:\n', ridge_model.coef_)
print('Ridge模型中截距为:', ridge_model.intercept_)
Ridge模型中各特征系数为:
[-0.09480494 0.04771602 -0.05491252 3.31822206 -9.58446843 3.96702534
-0.01214016 -1.44131977 0.27969468 -0.01070112 -0.69650831 0.01024209
-0.53636964]
Ridge模型中截距为: 27.28471754760631
print('预测测试集前5个结果为:\n', ridge_model.predict(x_test)[: 5])
print('测试集得分为:', ridge_model.score(x_test, y_test))
预测测试集前5个结果为:
[28.34867714 31.24127881 21.57471674 32.47910016 20.12179682]
测试集得分为: 0.7630850497410888
#Lasso回归
from sklearn.linear_model import Lasso
lasso_model = Lasso(alpha=5)
lasso_model.fit(x_train, y_train)
#print('训练出来的Lasso模型为:\n', lasso_model)
#print('scipy.sparse matrix为:\n', lasso_model.sparse_coef_)
print('Lasso模型中各特征系数为:\n', lasso_model.coef_)
print('Lasso模型中截距为:', lasso_model.intercept_)
Lasso模型中各特征系数为:
[-0. 0.02579275 -0. 0. 0. 0.
0.01073689 -0. 0. -0.00508254 -0. 0.00579759
-0.74030877]
Lasso模型中截距为: 30.908571385880304
print('预测测试集前5个结果为:\n', lasso_model.predict(x_test)[: 5])
print('测试集得分为:', lasso_model.score(x_test, y_test))
#print('测试集弹性网络路径为:\n', lasso_model.path(x_test, y_test))
预测测试集前5个结果为:
[28.62381554 27.34844922 21.61895603 25.09454176 23.13495752]
测试集得分为: 0.5539853847862071
y_pred1 = lr_model.predict(x_test)
y_pred2 = ridge_model.predict(x_test)
y_pred3 = lasso_model.predict(x_test)
fig = plt.figure(figsize=(10, 8))
ax1 = fig.add_subplot(311)
ax1.set_title("线性回归")
ax1.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax1.plot(range(y_test.shape[0]), y_pred1, label="线性回归",)
ax1.legend()
ax2 = fig.add_subplot(312)
ax2.set_title("岭回归")
ax2.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax2.plot(range(y_test.shape[0]), y_pred2, label="岭回归",)
ax2.legend()
ax3 = fig.add_subplot(313)
ax3.set_title("Lasso回归")
ax3.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax3.plot(range(y_test.shape[0]), y_pred3, label="Lasso回归",)
ax3.legend()
plt.show()
Original: https://blog.csdn.net/lesdiables/article/details/124593814
Author: lesdiables
Title: 5 线性回归
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/697500/
转载文章受原作者版权保护。转载请注明原作者出处!