PyModis/Modis_Fit.py at master · abakane1/PyModis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# coding:utf-8
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.svm import SVR
import Common_func,Modis_IO,stations.Station_ETL
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号

try:
    from osgeo import ogr
except:
    import ogr


def display_as_scatter(x,y,title):
    plt.xlabel("遥感温度值")
    plt.ylabel("气象温度值")
    plt.title(title)
    plt.scatter(x, y)
    plt.show()


def display_as_ROC(y_pred, y_test, title):
    # 做ROC曲线
    plt.figure()
    plt.plot(range(len(y_pred)), y_pred, 'b', label="predict")
    plt.plot(range(len(y_pred)), y_test, 'r', label="test", alpha=0.5)
    # plt.scatter(y_pred, y_test)
    plt.legend(loc="upper right")  # 显示图中的标签
    plt.xlabel("range")
    plt.ylabel('values')
    plt.title(str(title))
    plt.show()


def fit(fit_range='station', fit_method='numpy'):
    # data = pd.read_csv('/Users/zzl/PycharmProjects/PyModis/staion-grid-withlanlon-data.csv') # mac path
    data = pd.read_csv('D:\\abakane1\\PyModis\\staion-grid-withlanlon-data.csv')
    data = data[(data['grid_value'] < 500) & (data['station_value'] < 500) & (data['grid_value'] > 0) & (
            data['station_value'] > 0)]
    if fit_range == 'station':
        station_list = data['stationID'].unique()
        for station in station_list:
            station_value = data[data['stationID'] == station]
            x = station_value['grid_value'].values
            y = station_value['station_value'].values
            if fit_method == 'sklearn':
                a, b, RMSE, R2 = multi_linear_fit(x, y)
            else:
                a, b, RMSE, R2 = linear_fit(x, y)
    if fit_range == 'year':
        for year in range(2003, 2015):
            station_value = data[data['year'] == year]
            x = station_value['grid_value'].values
            y = station_value['station_value'].values
            if fit_method == 'sklearn':
                a, b, RMSE, R2 = multi_linear_fit(x, y)
            else:
                a, b, RMSE, R2 = linear_fit(x, y)
    else:
        y = data['grid_value'].values
        x = data['station_value'].values
        if fit_method == 'sklearn':
            a, b, RMSE, R2 = multi_linear_fit(x, y)
        else:
            a, b, RMSE, R2 = linear_fit(x, y)

    # display_as_scatter(data)
    return a, b, RMSE, R2


# 线性回归-numpy
def linear_fit(x, y):
    f1 = np.polyfit(x, y, 1)
    p1 = np.poly1d(f1)
    # fit values, and mean
    yhat = p1(x)  # or [p(z) for z in x]
    RMSE = np.sqrt(np.mean((yhat - y) ** 2))
    ybar = np.sum(y) / len(y)  # or sum(y)/len(y)
    ssreg = np.sum((yhat - ybar) ** 2)  # or sum([ (yihat - ybar)**2 for yihat in yhat])
    sstot = np.sum((y - ybar) ** 2)  # or sum([ (yi - ybar)**2 for yi in y])
    # print('a:', f1[0], 'b:', f1[1], 'r2:', ssreg / sstot)
    print(f1[0], f1[1], RMSE, ssreg / sstot)
    #display_as_scatter(x, y)
    #return f1[0], f1[1], RMSE, ssreg / sstot


# 线性回归- sklearn train:test = 8:2
def multi_linear_fit(x, y):
    # data = pd.read_csv('/Users/zzl/PycharmProjects/PyModis/staion-grid-withlanlon.csv')
    # data = data[(data['grid_value'] > 0) & (data['station_value'] > 0) & (data['stationid'] == 53898)]
    X = np.array(x).reshape(-1, 1)
    # y = pd.factorize(data.loc[:,'grid_value'].values)[0].reshape(-1, 1)
    # print(year)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
    # print('X_train.shape={}\n y_train.shape ={}\n X_test.shape={}\n,  y_test.shape={}'.format(X_train.shape,y_train.shape,X_test.shape,y_test.shape))
    linreg = LinearRegression()
    model = linreg.fit(X, y)
    # 训练后模型截距
    b = linreg.intercept_
    # 训练后模型权重（特征个数无变化）
    a = linreg.coef_[0]
    # 预测
    y_pred = linreg.predict(X_test)
    # print(y_pred)
    # calculate RMSE by hand
    RMSE = metrics.mean_absolute_error(y_test, y_pred)
    # print(metrics.mean_squared_error(y_test, y_pred))
    R2 = metrics.r2_score(y_test, y_pred)
    #display_as_scatter(x, y)
    #print(a, b, RMSE, R2)
    return a, b, RMSE, R2

# 支持向量回归
def svm_linear_fit(X,y):
    X = np.array(X).reshape(-1, 1)
    ###############################################################################
    # Fit regression model
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
    svr_lin = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    y_rbf = svr_rbf.fit(X, y).predict(X)
    #y_lin = svr_lin.fit(X, y).predict(X)
    #y_poly = svr_poly.fit(X, y).predict(X)

    ###############################################################################
    # look at the results
    lw = 2
    plt.scatter(X, y, color='darkorange', label='data')
    #plt.hold('on')
    plt.plot(X, y_rbf, color='navy', lw=lw, label='RBF model')
    #plt.plot(X, y_lin, color='c', lw=lw, label='Linear model')
    #plt.plot(X, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model')
    plt.xlabel('data')
    plt.ylabel('target')
    plt.title('Support Vector Regression')
    plt.legend()
    plt.show()


#  20190321 修改
#  1：根据每一个grid value的值model生成一个新的station value
#  2：最后生成一幅新的tif为完整的气温数据
def set_grid_value_by_model(filename, model='linear'):
     im_data, im_geotrans, im_proj = Modis_IO.read_img(filename,1)
     #
     #
     #
     return im_data, im_geotrans, im_proj
#    Modis_IO.write_img(filename, im_proj, im_geotrans, im_data)


def funcTest():
    root_path = Common_func.UsePlatform()
    data_file=root_path + 'grid_station_night.txt'
    data = pd.read_csv(data_file,",")
    data = data[(data['gridval'] > 0) & (data['stationval'] > 0)]
    X = data['gridval'].values
    y = data['stationval'].values
    stationID_list = data['stationID'].unique()
    title = '黄淮海地区2003-2018年夜间遥感-气象温度散点图'
    for stationID in stationID_list:
        lon,lat = stations.Station_ETL.get_lonlat_by_stationID(stationID)
        station_data = data[data['stationID'] == stationID]
        station_name = stations.Station_ETL.get_station_name_by_stationID(stationID)
        X = station_data['gridval'].values
        y = station_data['stationval'].values
        a, b, RMSE, R2 = multi_linear_fit(X,y)
        print (stationID,station_name,lon,lat,R2)
    # #svm_linear_fit(X,y)
    #multi_linear_fit(X,y)
    #display_as_scatter(X,y,title)

funcTest()
# a, b, RMSE, R2 = fit(fit_range='station', fit_method='numpy')
#svm_linear_fit()