Python 实现多元线性回归预测
Python 实现 预测 回归 线性 多元
2023-09-14 08:59:07 时间
一、二元输入特征线性回归
测试数据为:ex1data2.txt
2104,3,399900 1600,3,329900 2400,3,369000 1416,2,232000 3000,4,539900 1985,4,299900 1534,3,314900 1427,3,198999 1380,3,212000 1494,3,242500 1940,4,239999 2000,3,347000 1890,3,329999 4478,5,699900 1268,3,259900 2300,4,449900 1320,2,299900 1236,3,199900 2609,4,499998 3031,4,599000 1767,3,252900 1888,2,255000 1604,3,242900 1962,4,259900 3890,3,573900 1100,3,249900 1458,3,464500 2526,3,469000 2200,3,475000 2637,3,299900 1839,2,349900 1000,1,169900 2040,4,314900 3137,3,579900 1811,4,285900 1437,3,249900 1239,3,229900 2132,4,345000 4215,4,549000 2162,4,287000 1664,2,368500 2238,3,329900 2567,4,314000 1200,3,299000 852,2,179900 1852,4,299900 1203,3,239500
Python代码如下:
#-*- coding: UTF-8 -*- import random import numpy as np import matplotlib.pyplot as plt #加载数据 def load_exdata(filename): data = [] with open(filename, 'r') as f: for line in f.readlines(): line = line.split(',') current = [int(item) for item in line] //根据数据输入的不同确定是int 还是其他类型 #5.5277,9.1302 data.append(current) return data data = load_exdata('ex1data2.txt'); data = np.array(data,np.int64)//根据数据输入的不同确定是int 还是其他类型 #特征缩放 def featureNormalize(X): X_norm = X; mu = np.zeros((1,X.shape[1])) sigma = np.zeros((1,X.shape[1])) for i in range(X.shape[1]): mu[0,i] = np.mean(X[:,i]) # 均值 sigma[0,i] = np.std(X[:,i]) # 标准差 # print(mu) # print(sigma) X_norm = (X - mu) / sigma return X_norm,mu,sigma #计算损失 def computeCost(X, y, theta): m = y.shape[0] # J = (np.sum((X.dot(theta) - y)**2)) / (2*m) C = X.dot(theta) - y J2 = (C.T.dot(C))/ (2*m) return J2 #梯度下降 def gradientDescent(X, y, theta, alpha, num_iters): m = y.shape[0] #print(m) # 存储历史误差 J_history = np.zeros((num_iters, 1)) for iter in range(num_iters): # 对J求导,得到 alpha/m * (WX - Y)*x(i), (3,m)*(m,1) X (m,3)*(3,1) = (m,1) theta = theta - (alpha/m) * (X.T.dot(X.dot(theta) - y)) J_history[iter] = computeCost(X, y, theta) return J_history,theta iterations = 10000 #迭代次数 alpha = 0.01 #学习率 x = data[:,(0,1)].reshape((-1,2)) y = data[:,2].reshape((-1,1)) m = y.shape[0] x,mu,sigma = featureNormalize(x) X = np.hstack([x,np.ones((x.shape[0], 1))]) # X = X[range(2),:] # y = y[range(2),:] theta = np.zeros((3, 1)) j = computeCost(X,y,theta) J_history,theta = gradientDescent(X, y, theta, alpha, iterations) print('Theta found by gradient descent',theta) def predict(data): testx = np.array(data) testx = ((testx - mu) / sigma) testx = np.hstack([testx,np.ones((testx.shape[0], 1))]) price = testx.dot(theta) print('price is %d ' % (price)) predict([1650,3])
二、多元线性回归,以三个特征输入为例
输入数据:testdata.txt。其中第一列是指输入的数据序列,不可读入
1,230.1,37.8,69.2,22.1 2,44.5,39.3,45.1,10.4 3,17.2,45.9,69.3,9.3 4,151.5,41.3,58.5,18.5 5,180.8,10.8,58.4,12.9 6,8.7,48.9,75,7.2 7,57.5,32.8,23.5,11.8 8,120.2,19.6,11.6,13.2 9,8.6,2.1,1,4.8 10,199.8,2.6,21.2,10.6 11,66.1,5.8,24.2,8.6 12,214.7,24,4,17.4 13,23.8,35.1,65.9,9.2 14,97.5,7.6,7.2,9.7 15,204.1,32.9,46,19 16,195.4,47.7,52.9,22.4 17,67.8,36.6,114,12.5 18,281.4,39.6,55.8,24.4 19,69.2,20.5,18.3,11.3 20,147.3,23.9,19.1,14.6 21,218.4,27.7,53.4,18 22,237.4,5.1,23.5,12.5 23,13.2,15.9,49.6,5.6 24,228.3,16.9,26.2,15.5 25,62.3,12.6,18.3,9.7 26,262.9,3.5,19.5,12 27,142.9,29.3,12.6,15 28,240.1,16.7,22.9,15.9 29,248.8,27.1,22.9,18.9 30,70.6,16,40.8,10.5 31,292.9,28.3,43.2,21.4 32,112.9,17.4,38.6,11.9 33,97.2,1.5,30,9.6 34,265.6,20,0.3,17.4 35,95.7,1.4,7.4,9.5 36,290.7,4.1,8.5,12.8 37,266.9,43.8,5,25.4 38,74.7,49.4,45.7,14.7 39,43.1,26.7,35.1,10.1 40,228,37.7,32,21.5 41,202.5,22.3,31.6,16.6 42,177,33.4,38.7,17.1 43,293.6,27.7,1.8,20.7 44,206.9,8.4,26.4,12.9 45,25.1,25.7,43.3,8.5 46,175.1,22.5,31.5,14.9 47,89.7,9.9,35.7,10.6 48,239.9,41.5,18.5,23.2 49,227.2,15.8,49.9,14.8 50,66.9,11.7,36.8,9.7 51,199.8,3.1,34.6,11.4 52,100.4,9.6,3.6,10.7 53,216.4,41.7,39.6,22.6 54,182.6,46.2,58.7,21.2 55,262.7,28.8,15.9,20.2 56,198.9,49.4,60,23.7 57,7.3,28.1,41.4,5.5 58,136.2,19.2,16.6,13.2 59,210.8,49.6,37.7,23.8 60,210.7,29.5,9.3,18.4 61,53.5,2,21.4,8.1 62,261.3,42.7,54.7,24.2 63,239.3,15.5,27.3,15.7 64,102.7,29.6,8.4,14 65,131.1,42.8,28.9,18 66,69,9.3,0.9,9.3 67,31.5,24.6,2.2,9.5 68,139.3,14.5,10.2,13.4 69,237.4,27.5,11,18.9 70,216.8,43.9,27.2,22.3 71,199.1,30.6,38.7,18.3 72,109.8,14.3,31.7,12.4 73,26.8,33,19.3,8.8 74,129.4,5.7,31.3,11 75,213.4,24.6,13.1,17 76,16.9,43.7,89.4,8.7 77,27.5,1.6,20.7,6.9 78,120.5,28.5,14.2,14.2 79,5.4,29.9,9.4,5.3 80,116,7.7,23.1,11 81,76.4,26.7,22.3,11.8 82,239.8,4.1,36.9,12.3 83,75.3,20.3,32.5,11.3 84,68.4,44.5,35.6,13.6 85,213.5,43,33.8,21.7 86,193.2,18.4,65.7,15.2 87,76.3,27.5,16,12 88,110.7,40.6,63.2,16 89,88.3,25.5,73.4,12.9 90,109.8,47.8,51.4,16.7 91,134.3,4.9,9.3,11.2 92,28.6,1.5,33,7.3 93,217.7,33.5,59,19.4 94,250.9,36.5,72.3,22.2 95,107.4,14,10.9,11.5 96,163.3,31.6,52.9,16.9 97,197.6,3.5,5.9,11.7 98,184.9,21,22,15.5 99,289.7,42.3,51.2,25.4 100,135.2,41.7,45.9,17.2 101,222.4,4.3,49.8,11.7 102,296.4,36.3,100.9,23.8 103,280.2,10.1,21.4,14.8 104,187.9,17.2,17.9,14.7 105,238.2,34.3,5.3,20.7 106,137.9,46.4,59,19.2 107,25,11,29.7,7.2 108,90.4,0.3,23.2,8.7 109,13.1,0.4,25.6,5.3 110,255.4,26.9,5.5,19.8 111,225.8,8.2,56.5,13.4 112,241.7,38,23.2,21.8 113,175.7,15.4,2.4,14.1 114,209.6,20.6,10.7,15.9 115,78.2,46.8,34.5,14.6 116,75.1,35,52.7,12.6 117,139.2,14.3,25.6,12.2 118,76.4,0.8,14.8,9.4 119,125.7,36.9,79.2,15.9 120,19.4,16,22.3,6.6 121,141.3,26.8,46.2,15.5 122,18.8,21.7,50.4,7 123,224,2.4,15.6,11.6 124,123.1,34.6,12.4,15.2 125,229.5,32.3,74.2,19.7 126,87.2,11.8,25.9,10.6 127,7.8,38.9,50.6,6.6 128,80.2,0,9.2,8.8 129,220.3,49,3.2,24.7 130,59.6,12,43.1,9.7 131,0.7,39.6,8.7,1.6 132,265.2,2.9,43,12.7 133,8.4,27.2,2.1,5.7 134,219.8,33.5,45.1,19.6 135,36.9,38.6,65.6,10.8 136,48.3,47,8.5,11.6 137,25.6,39,9.3,9.5 138,273.7,28.9,59.7,20.8 139,43,25.9,20.5,9.6 140,184.9,43.9,1.7,20.7 141,73.4,17,12.9,10.9 142,193.7,35.4,75.6,19.2 143,220.5,33.2,37.9,20.1 144,104.6,5.7,34.4,10.4 145,96.2,14.8,38.9,11.4 146,140.3,1.9,9,10.3 147,240.1,7.3,8.7,13.2 148,243.2,49,44.3,25.4 149,38,40.3,11.9,10.9 150,44.7,25.8,20.6,10.1 151,280.7,13.9,37,16.1 152,121,8.4,48.7,11.6 153,197.6,23.3,14.2,16.6 154,171.3,39.7,37.7,19 155,187.8,21.1,9.5,15.6 156,4.1,11.6,5.7,3.2 157,93.9,43.5,50.5,15.3 158,149.8,1.3,24.3,10.1 159,11.7,36.9,45.2,7.3 160,131.7,18.4,34.6,12.9 161,172.5,18.1,30.7,14.4 162,85.7,35.8,49.3,13.3 163,188.4,18.1,25.6,14.9 164,163.5,36.8,7.4,18 165,117.2,14.7,5.4,11.9 166,234.5,3.4,84.8,11.9 167,17.9,37.6,21.6,8 168,206.8,5.2,19.4,12.2 169,215.4,23.6,57.6,17.1 170,284.3,10.6,6.4,15 171,50,11.6,18.4,8.4 172,164.5,20.9,47.4,14.5 173,19.6,20.1,17,7.6 174,168.4,7.1,12.8,11.7 175,222.4,3.4,13.1,11.5 176,276.9,48.9,41.8,27 177,248.4,30.2,20.3,20.2 178,170.2,7.8,35.2,11.7 179,276.7,2.3,23.7,11.8 180,165.6,10,17.6,12.6 181,156.6,2.6,8.3,10.5 182,218.5,5.4,27.4,12.2 183,56.2,5.7,29.7,8.7 184,287.6,43,71.8,26.2 185,253.8,21.3,30,17.6 186,205,45.1,19.6,22.6 187,139.5,2.1,26.6,10.3 188,191.1,28.7,18.2,17.3 189,286,13.9,3.7,15.9 190,18.7,12.1,23.4,6.7 191,39.5,41.1,5.8,10.8 192,75.5,10.8,6,9.9 193,17.2,4.1,31.6,5.9 194,166.8,42,3.6,19.6 195,149.7,35.6,6,17.3 196,38.2,3.7,13.8,7.6 197,94.2,4.9,8.1,9.7 198,177,9.3,6.4,12.8 199,283.6,42,66.2,25.5 200,232.1,8.6,8.7,13.4
python 代码:
#-*- coding: UTF-8 -*- import random import numpy as np import matplotlib.pyplot as plt #加载数据 def load_exdata(filename): data = [] with open(filename, 'r') as f: for line in f.readlines(): line = line.split(',') current = [float(item) for item in line] #5.5277,9.1302 data.append(current) return data data = load_exdata('testdata.txt'); data = np.array(data,np.float64)//数据是浮点型 # 特征缩放 def featureNormalize(X): X_norm = X; mu = np.zeros((1, X.shape[1])) sigma = np.zeros((1, X.shape[1])) for i in range(X.shape[1]): mu[0, i] = np.mean(X[:, i]) # 均值 sigma[0, i] = np.std(X[:, i]) # 标准差 # print(mu) # print(sigma) X_norm = (X - mu) / sigma return X_norm, mu, sigma # 计算损失 def computeCost(X, y, theta): m = y.shape[0] # J = (np.sum((X.dot(theta) - y)**2)) / (2*m) C = X.dot(theta) - y J2 = (C.T.dot(C)) / (2 * m) return J2 # 梯度下降 def gradientDescent(X, y, theta, alpha, num_iters): m = y.shape[0] # print(m) # 存储历史误差 J_history = np.zeros((num_iters, 1)) for iter in range(num_iters): # 对J求导,得到 alpha/m * (WX - Y)*x(i), (3,m)*(m,1) X (m,3)*(3,1) = (m,1) theta = theta - (alpha / m) * (X.T.dot(X.dot(theta) - y)) J_history[iter] = computeCost(X, y, theta) return J_history, theta iterations = 10000 # 迭代次数 alpha = 0.01 # 学习率 x = data[:, ( 1,2,3)].reshape((-1, 3))//数据特征输入,采用数据集一行的,第1,2,3个数据,然后将其变成一行,所以用shape y = data[:, 4].reshape((-1, 1))//输出特征,数据集的第四位 m = y.shape[0] x, mu, sigma = featureNormalize(x) X = np.hstack([x, np.ones((x.shape[0], 1))]) # X = X[range(2),:] # y = y[range(2),:] theta = np.zeros((4, 1))//因为x+y.总共有四个输入,所以theta是四维 j = computeCost(X, y, theta) J_history, theta = gradientDescent(X, y, theta, alpha, iterations) print('Theta found by gradient descent', theta) def predict(data): testx = np.array(data) testx = ((testx - mu) / sigma) testx = np.hstack([testx, np.ones((testx.shape[0], 1))]) price = testx.dot(theta) print('predit value is %f ' % (price)) predict([151.5,41.3,58.5])//输入为3维
相关文章
- Python下的XML-RPC客户端和服务端实现(基于xmlrpclib SimpleXMLRPCServer 模块)
- python实现收邮件判断模块poplib,email
- Python 实现多元线性回归预测
- CPD 算法实现点云配准(python版本)
- Atitit web httphandler的实现 java python node.js c# net php 目录 1.1. Java 过滤器 servelet1 1.2. Python的
- 一起来用python实现一下十大经典排序算法呀
- 实战|Python六行代码实现多个Excel合并
- 编程笔试(解析及代码实现):猴子吃桃。猴子第一天吃了若干个桃子,当即吃了一半,还不解馋,又多吃了一个…的C++、Java、Python、C#等语言代码实现
- Python编程:利用python编程实现对基于时间序列的数据(dataframe格式)按照指定时间范围进行单方向关联,不存在的日期补充为默认的NaN
- Python之matplotlib:基于matplotlib库利用python语言实现一张画布显示多张图的多种方法
- Python:利用python编程实现三维图像绘制展示(六面体旋转、三维球柱状体、下雪场景等)
- Python:利用python语言绘制多个子图经典案例、代码实现之详细攻略
- 基于LSTM、BP神经网络实现电力系统负荷预测(Python代码实现)
- 基于蒙特卡洛法的规模化电动汽车充电负荷预测(Python&Matlab实现)
- 信息时代——微信防撤回(Python实现)
- Python实现点选验证码识别, 模拟登陆小破站并自动发弹幕
- Python实现PSO粒子群优化卷积神经网络CNN分类模型项目实战
- 【华为机试真题详解 Python实现】整理扑克牌【2023 Q1 | 100分】
- python-arima模型statsmodels库实现-有数据集
- python之实现ssl socket客户端(除夕快乐)
- Python编程:contextlib模块实现上下文管理
- 【异常】前端ERR! stack Error: Can‘t find Python executable “python“, you can set the PYTHON env variable.
- 【Python实战】 ---- python 实现 CSDN 的定时自动签到