指数平滑预测法python实现
说明:基于python的指数平滑预测平滑系数确定、计算、误差分析、结果输出与可视化。
指数平滑法
指数平滑法是生产预测中常用的一种方法。也用于中短期经济发展趋势预测,所有预测方法中,指数平滑是用得最多的一种。简单的全期平均法是对时间数列的过去数据一个不漏地全部加以同等利用;移动平均法则不考虑较远期的数据,并在加权移动平均法中给予近期资料更大的权重;而指数平滑法则兼容了全期平均和移动平均所长,不舍弃过去的数据,但是仅给予逐渐减弱的影响程度,即随着数据的远离,赋予逐渐收敛为零的权数。
也就是说指数平滑法是在移动平均法基础上发展起来的一种时间序列分析预测法,它是通过计算指数平滑值,配合一定的时间序列预测模型对现象的未来进行预测。其原理是任一期的指数平滑值都是本期实际观察值与前一期指数平滑值的加权平均。
代码实现
一次指数平滑
def exponential_smoothing_1(alpha, data):
'''
一次指数平滑
:param alpha: 平滑系数
:param data: 数据序列:list
:return: 返回一次指数平滑值:list
'''
s_single=[]
s_single.append(data[0])
for i in range(1, len(data)):
s_single.append(alpha * data[i] + (1 - alpha) * s_single[i-1])
return s_single
二次指数平滑
def exponential_smoothing_2(alpha, data):
'''
二次指数平滑
:param alpha: 平滑系数
:param data: 数据序列:list
:return: 返回二次指数平滑值,参数a, b:list
'''
s_single = exponential_smoothing_1(alpha, data)
s_double = exponential_smoothing_1(alpha, s_single)
a_double = [0 for i in range(len(data))]
b_double = [0 for i in range(len(data))]
F_double = [0 for i in range(len(data))]
for i in range(len(data)):
a = 2 * s_single[i] - s_double[i]
b = (alpha / (1 - alpha)) * (s_single[i] - s_double[i])
F = a + b
a_double[i] = a
b_double[i] = b
F_double[i] = F
return a_double,b_double,F_double
三次指数平滑
def exponential_smoothing_3(alpha, data):
'''
三次指数平滑
:param alpha: 平滑系数
:param data: 数据序列:list
:return: 返回二次指数平滑值,参数a, b, c,预测值Ft+1:list
'''
s_single = exponential_smoothing_1(alpha, data)
s_double = exponential_smoothing_1(alpha, s_single)
s_triple = exponential_smoothing_1(alpha, s_double)
a_triple = [0 for i in range(len(data))]
b_triple = [0 for i in range(len(data))]
c_triple = [0 for i in range(len(data))]
F_triple = [0 for i in range(len(data))]
for i in range(len(data)):
a = 3 * s_single[i] - 3 * s_double[i] + s_triple[i]
b = (alpha / (2 * ((1 - alpha) ** 2))) * ((6 - 5 * alpha) * s_single[i] - 2 * ((5 - 4 * alpha) * s_double[i]) + (4 - 3 * alpha) * s_triple[i])
c = ((alpha ** 2) / (2 * ((1 - alpha) ** 2))) * (s_single[i] - 2 * s_double[i] + s_triple[i])
F = a + b + c
a_triple[i] = a
b_triple[i] = b
c_triple[i] = c
F_triple[i] = F
return a_triple, b_triple, c_triple, F_triple
误差分析
def model_error_analysis(F, data):
'''
误差分析
:param F: 预测数列:list
:param data: 原始序列:list
:return: 返回各期绝对误差,相对误差:list,返回平均绝对误差和平均相对误差
'''
AE = [0 for i in range(len(data)-1)]
RE = []
AE_num = 0
RE_num = 0
for i in range(1,len(data)):
_AE = abs(F[i-1] - data[i])
_RE = _AE / data[i]
AE_num += _AE
RE_num += _RE
AE[i-1] = _AE
RE.append('{:.2f}%'.format(_RE*100))
MAE = AE_num / (len(data)-1)
MRE = '{:.2f}%'.format(RE_num *100 / (len(data)-1))
return AE, MAE, RE, MRE
寻找最优平滑系数(基于平均绝对误差)
def alpha_analysis(data,itype=2):
'''
判断误差最小的平滑系数
:param data: 原始序列:list
:param itype: 平滑类型:1,2,3
:return: 返回平均绝对误差最小的平滑系数和最小平均绝对误差
'''
alpha_all = [0.01 * i for i in range(1,100)] #只需要0.1-0.9修改为alpha_triple = [0.1 * i for i in range(1,10)]
best_alpha = 0
min_MAE = float('Inf') # 无穷大
if itype == 2:
for i in range(len(alpha_all)):
alpha = alpha_all[i]
a_double,b_double,F_double = exponential_smoothing_2(alpha, data)
AE_double, MAE_double, RE_double, MRE_double = model_error_analysis(F_double, data)
if MAE_double <= min_MAE:
min_MAE = MAE_double
best_alpha = alpha
else:
pass
elif itype == 3:
for i in range(len(alpha_all)):
alpha = alpha_all[i]
a_triple, b_triple, c_triple, F_triple = exponential_smoothing_3(alpha, data)
AE_triple, MAE_triple, RE_triple, MRE_triple = model_error_analysis(F_triple, data)
if MAE_triple <= min_MAE:
min_MAE = MAE_triple
best_alpha = alpha
else:
pass
else:
for i in range(len(alpha_all)):
alpha = alpha_all[i]
F_single = exponential_smoothing_1(alpha, data)
AE_single, MAE_single, RE_single, MRE_single = model_error_analysis(F_single, data)
if MAE_single <= min_MAE:
min_MAE = MAE_single
best_alpha = alpha
else:
pass
return best_alpha, min_MAE
输出二次、三次平滑计算表
def write_xls(alpha, data, t):
'''
写入表格
:param alpha: 平滑系数
:param data: 原始类型:list
:param t: 时间类型:list
'''
workbook = xlwt.Workbook()
worksheet_1 = workbook.add_sheet('二次指数平滑')
worksheet_2 = workbook.add_sheet('三次指数平滑')
s_single = exponential_smoothing_1(alpha, data)
s_double = exponential_smoothing_1(alpha, s_single)
s_triple = exponential_smoothing_1(alpha, s_double)
a_double,b_double,F_double = exponential_smoothing_2(alpha, data)
AE_double, MAE_double, RE_double, MRE_double = model_error_analysis(F_double, data)
title_1 = ['时间', 't', '实际值', '一次指数平滑值', '二次指数平滑值', 'a', 'b', 'F', '绝对误差', '相对误差']
col = 0
for w in title_1:
worksheet_1.write(0,col,w)
col += 1
worksheet_1.write(1,0,t[0])
worksheet_1.write(1,1,1)
worksheet_1.write(1,2,data[0])
worksheet_1.write(1,3,s_single[0])
worksheet_1.write(1,4,s_double[0])
worksheet_1.write(1,5,a_double[0])
worksheet_1.write(1,6,b_double[0])
row = 2
for i in range(1,len(data)):
worksheet_1.write(row,0,t[i])
worksheet_1.write(row,1,i+1)
worksheet_1.write(row,2,data[i])
worksheet_1.write(row,3,s_single[i])
worksheet_1.write(row,4,s_double[i])
worksheet_1.write(row,5,a_double[i])
worksheet_1.write(row,6,b_double[i])
worksheet_1.write(row,7,F_double[i-1])
worksheet_1.write(row,8,AE_double[i-1])
worksheet_1.write(row,9,RE_double[i-1])
row += 1
worksheet_1.write_merge(row, row, 0, 8, '平均绝对误差')
worksheet_1.write_merge(row + 1, row + 1, 0, 8, '平均相对误差')
worksheet_1.write(row,9,MAE_double)
worksheet_1.write(row + 1,9,MRE_double)
a_triple, b_triple, c_triple, F_triple = exponential_smoothing_3(alpha, data)
AE_triple, MAE_triple, RE_triple, MRE_triple = model_error_analysis(F_triple, data)
title_2 = ['时间', 't', '实际值', '一次指数平滑值', '二次指数平滑值', '三次指数平滑值', 'a', 'b', 'c', 'F', '绝对误差', '相对误差']
col = 0
for w in title_2:
worksheet_2.write(0,col,w)
col += 1
worksheet_2.write(1,0,t[0])
worksheet_2.write(1,1,1)
worksheet_2.write(1,2,data[0])
worksheet_2.write(1,3,s_single[0])
worksheet_2.write(1,4,s_double[0])
worksheet_2.write(1,5,s_triple[0])
worksheet_2.write(1,6,a_triple[0])
worksheet_2.write(1,7,b_triple[0])
worksheet_2.write(1,8,c_triple[0])
row = 2
for i in range(1,len(data)):
worksheet_2.write(row,0,t[i])
worksheet_2.write(row,1,i+1)
worksheet_2.write(row,2,data[i])
worksheet_2.write(row,3,s_single[i])
worksheet_2.write(row,4,s_double[i])
worksheet_2.write(row,5,s_triple[i])
worksheet_2.write(row,6,a_triple[i])
worksheet_2.write(row,7,b_triple[i])
worksheet_2.write(row,8,c_triple[i])
worksheet_2.write(row,9,F_triple[i-1])
worksheet_2.write(row,10,AE_triple[i-1])
worksheet_2.write(row,11,RE_triple[i-1])
row += 1
worksheet_2.write_merge(row, row, 0, 10, '平均绝对误差')
worksheet_2.write_merge(row + 1, row + 1, 0, 10, '平均相对误差')
worksheet_2.write(row, 11, MAE_triple)
worksheet_2.write(row + 1, 11, MRE_triple)
workbook.save('指数平滑预测.xls')
实际值与预测值的对比散点图、折线图
def scatter_diagram(F, data, t):
'''
绘制散点图
:param F: 预测序列:list
:param data: 原始类型:list
:param t: 时间类型:list
'''
F = F[:-1:]
data = data[1::]
t = t[1::]
plt.title("散点图",fontsize=20) #图表名称
plt.xlabel("年份", fontsize=12) #改x坐标轴标题
plt.ylabel("货邮吞吐量(千吨)", fontsize=12) #改y坐标轴标题
plt.scatter(t, data, label='实际值',s=10)
plt.scatter(t, F, marker = 'x', label='预测值',s=10)
plt.legend()
plt.savefig('散点图.png', bbox_inches='tight',dpi = 300)
plt.show()
def line_chart(F, data, t):
'''
绘制折现图
:param F: 预测序列:list
:param data: 原始类型:list
:param t: 时间类型:list
'''
F = F[:-1:]
data = data[1::]
t = t[1::]
plt.title("对比曲线",fontsize=20)
plt.xlabel("年份", fontsize=12)
plt.ylabel("货邮吞吐量(千吨)", fontsize=12)
plt.plot(t, data, label='实际值')
plt.plot(t, F, label='预测值')
plt.legend()
plt.savefig('折线图.png', bbox_inches='tight',dpi = 300)
plt.show()
github仓库地址
https://github.com/ishelo/Logistics-Demand-Forecasting-By-Python.git