-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathgetStreamflowModelGrid.m
More file actions
359 lines (312 loc) · 11.8 KB
/
Copy pathgetStreamflowModelGrid.m
File metadata and controls
359 lines (312 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The purpose of this script is to average 7-day, 14-day, and 28-day
% streamflow over each model grid, and record the number of datapoints used
% in each grid over time. 2 data structures are created for the 7-day,
% 14-day and 28-day values, one that contains all gauges within a grid and
% one that contains the mean of all gauges withing each grid. All tables
% and arrays are stored for each model grid.
% Aug 2022
% Courtney Di Vittorio
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%load data that indicates model grid for each station
load('gridGaugeAlign.mat')
load('modelGridInfo.mat')
%%
%for each model grid
%build 2D matrix of gauge data, where time is in y direction and each gauge
%is in a different column
%to find which file(s) I need to pull into MATLAB, first seartch through
%gridSites data structure and pull names of regions that contain that
%gridID. then load flow data
%then for each site ID (use gridGaugeCount to get # of sites and
%gridGaugeSites to get site ID) get vector of data
%reduce vector to 2000 - 2022 and store in table, keeping track of which
%column I am on.
%site names
snames = fieldnames(siteGeo);
%navigate to where regional flow data is stored in MATLAB data structures. (This
%data was produced using readStreamflowDataTxt.mat
%% Start with 7-day flows
%create empty data structure
flows7dayPercGrid = struct();
tic
for j= 1:length(gridID) %for each model grid
tmpgrid = gridID(j); %model grid
%find which regions I need
for k = 1:length(snames) % for each region
%get list of grids for each region
tmpgridvec = gridSites.(snames{k});
tmpf = find(tmpgridvec == tmpgrid);
%if not empty, then load matlab data, grab these sites, and clear
if isempty(tmpf)== 0
%create empty table
tmptab = table();
tmpfile = [snames{k},'FlowData.mat'];
load(tmpfile,'flows7dayPerc')
%add dates to table
tmptab.dates = flows7dayPerc.dates;
%get fieldnames
sidnames = fieldnames(flows7dayPerc);
%get siteID's for region
tmpsiteIDs = siteGeo.(snames{k}).siteID;
%site IDs that fall within grid
tmpsiteIDmatch = tmpsiteIDs(tmpf);
for m = 1:length(tmpf) %for each siteid that falls in grid
%create string that will match sidnames
tmpmatch = ['sid',num2str(tmpsiteIDmatch(m,1))];
%grab vector and store in table
tmptab.(tmpmatch) = flows7dayPerc.(tmpmatch);
end
%save table under grid name
flows7dayPercGrid.(tmpgrid) = tmptab;
end
clear flows7dayPerc
end
if j == 100
toc
tic
end
end
toc
%% Get average 7-day flow for each grid and record number of values available for each date
%create empty data structure
flows7dayPercGridMean = struct();
%get names of grids that have streamflow data
gridsWithData = fieldnames(flows7dayPercGrid);
for j= 1:length(gridID) %for each model grid
%grab table for grid
tmpgrid = gridID(j); %model grid
%need to make sure this grid has data
if isempty(find(tmpgrid == gridsWithData)) == 0
tmptab = flows7dayPercGrid.(tmpgrid);
%create new table and add dates
tmptabnew = table();
tmptabnew.('dates') = tmptab.dates;
tmptab = removevars(tmptab,'dates');
%remove dates
%convert site data to array
tmpdata = table2array(tmptab);
tmpnanind = isnan(tmpdata); %returns 1 if true
%number of non nan values in each row
tmpnumvals = size(tmpdata,2)-sum(tmpnanind,2);
tmpMean = nanmean(tmpdata,2);
%store date, mean, and number of values in table and save to structure
tmptabnew.('meanFlow') = tmpMean;
tmptabnew.('numVals') = tmpnumvals;
%store table in structure
flows7dayPercGridMean.(tmpgrid) = tmptabnew;
end
end
%% Look at results
%use gridID map to place values at a point
dateLook = datetime(2011,4,19);
%create empty maps
flowMap = NaN(size(LAT,1),size(LON,1));
numValsMap = NaN(size(LAT,1),size(LON,1));
avgNumValsMap = NaN(size(LAT,1),size(LON,1));
%for each grid in map, find mean flow at this date and number of
%observations. Also calculate average number of observations
missingData = 0;
for k = 1:size(LAT,2)
for j = 1:size(LAT,1)
%get grid value
tmpgrid = gridIDmap(j,k);
%if not missing & data exists
if ismissing(tmpgrid) == 0 && isempty(find(tmpgrid == gridsWithData)) == 0
%get table
tmpMeans = flows7dayPercGridMean.(tmpgrid);
%row that matches date
tmpind = find(tmpMeans.dates == dateLook);
%get mean value
tmpMean = tmpMeans.meanFlow(tmpind);
%get number of values
tmpVals = tmpMeans.numVals(tmpind);
%get average number of values
tmpValsAvg = mean(tmpMeans.numVals);
%put in new map
flowMap(j,k)=tmpMean;
numValsMap(j,k)=tmpVals;
avgNumValsMap(j,k)=tmpValsAvg;
%calculate number of grids with nan values
if isnan(tmpMean) == 1
missingData = missingData+1;
end
end
end
end
totMissingData = missingData+(size(gridID,1) - size(gridsWithData,1));
totPercMissing = 100*(totMissingData/size(gridID,1));
disp(['Percentage of Grids with Missing Data = ',num2str(totPercMissing)])
%% plot
close all
figure
geoshow(LAT,LON,flowMap,'DisplayType','texturemap')
colorbar
%caxis([0 20])
xlabel('Longitude')
ylabel('Latitude')
title(['7-day Percent Flows for ',datestr(dateLook)])
set(gca,'fontsize',14)
axis equal tight
figure
geoshow(LAT,LON,numValsMap,'DisplayType','texturemap')
colorbar
%caxis([0 20])
xlabel('Longitude')
ylabel('Latitude')
title(['Number of Observations in Each Grid on ',datestr(dateLook)])
set(gca,'fontsize',14)
axis equal tight
caxis([0 10])
figure
geoshow(LAT,LON,avgNumValsMap,'DisplayType','texturemap')
colorbar
%caxis([0 20])
xlabel('Longitude')
ylabel('Latitude')
title(['Average Number of Observations in Each Grid from 1990 to 2022'])
set(gca,'fontsize',14)
axis equal tight
caxis([0 10])
%% 14 day
%create empty data structure
flows14dayPercGrid = struct();
tic
for j= 1:length(gridID) %for each model grid
tmpgrid = gridID(j); %model grid
%find which regions I need
for k = 1:length(snames) % for each region
%get list of grids for each region
tmpgridvec = gridSites.(snames{k});
tmpf = find(tmpgridvec == tmpgrid);
%if not empty, then load matlab data, grab these sites, and clear
if isempty(tmpf)== 0
%create empty table
tmptab = table();
tmpfile = [snames{k},'FlowData.mat'];
load(tmpfile,'flows14dayPerc')
%add dates to table
tmptab.dates = flows14dayPerc.dates;
%get fieldnames
sidnames = fieldnames(flows14dayPerc);
%get siteID's for region
tmpsiteIDs = siteGeo.(snames{k}).siteID;
%site IDs that fall within grid
tmpsiteIDmatch = tmpsiteIDs(tmpf);
for m = 1:length(tmpf) %for each siteid that falls in grid
%create string that will match sidnames
tmpmatch = ['sid',num2str(tmpsiteIDmatch(m,1))];
%grab vector and store in table
tmptab.(tmpmatch) = flows14dayPerc.(tmpmatch);
end
%save table under grid name
flows14dayPercGrid.(tmpgrid) = tmptab;
end
clear flows14dayPerc
end
if j == 100
toc
tic
end
end
toc
%% Get average 14-day flow for each grid and record number of values available for each date
%create empty data structure
flows14dayPercGridMean = struct();
%grids with data
%gridsWithData = fieldnames(flows14dayPercGrid);
for j= 1:length(gridID) %for each model grid
%grab table for grid
tmpgrid = gridID(j); %model grid
%need to make sure this grid has data
if isempty(find(tmpgrid == gridsWithData)) == 0
tmptab = flows14dayPercGrid.(tmpgrid);
%create new table and add dates
tmptabnew = table();
tmptabnew.('dates') = tmptab.dates;
tmptab = removevars(tmptab,'dates');
%remove dates
%convert site data to array
tmpdata = table2array(tmptab);
tmpnanind = isnan(tmpdata); %returns 1 if true
%number of non nan values in each row
tmpnumvals = size(tmpdata,2)-sum(tmpnanind,2);
tmpMean = nanmean(tmpdata,2);
%store date, mean, and number of values in table and save to structure
tmptabnew.('meanFlow') = tmpMean;
tmptabnew.('numVals') = tmpnumvals;
%store table in structure
flows14dayPercGridMean.(tmpgrid) = tmptabnew;
end
end
%% 28 day
%create empty data structure
flows28dayPercGrid = struct();
tic
for j= 1:length(gridID) %for each model grid
tmpgrid = gridID(j); %model grid
%find which regions I need
for k = 1:length(snames) % for each region
%get list of grids for each region
tmpgridvec = gridSites.(snames{k});
tmpf = find(tmpgridvec == tmpgrid);
%if not empty, then load matlab data, grab these sites, and clear
if isempty(tmpf)== 0
%create empty table
tmptab = table();
tmpfile = [snames{k},'FlowData.mat'];
load(tmpfile,'flows28dayPerc')
%add dates to table
tmptab.dates = flows28dayPerc.dates;
%get fieldnames
sidnames = fieldnames(flows28dayPerc);
%get siteID's for region
tmpsiteIDs = siteGeo.(snames{k}).siteID;
%site IDs that fall within grid
tmpsiteIDmatch = tmpsiteIDs(tmpf);
for m = 1:length(tmpf) %for each siteid that falls in grid
%create string that will match sidnames
tmpmatch = ['sid',num2str(tmpsiteIDmatch(m,1))];
%grab vector and store in table
tmptab.(tmpmatch) = flows28dayPerc.(tmpmatch);
end
%save table under grid name
flows28dayPercGrid.(tmpgrid) = tmptab;
end
clear flows28dayPerc
end
if j == 100
toc
tic
end
end
toc
%% Get average 28-day flow for each grid and record number of values available for each date
%create empty data structure
flows28dayPercGridMean = struct();
%grids with data
%gridsWithData = fieldnames(flows14dayPercGrid);
for j= 1:length(gridID) %for each model grid
%grab table for grid
tmpgrid = gridID(j); %model grid
%need to make sure this grid has data
if isempty(find(tmpgrid == gridsWithData)) == 0
tmptab = flows28dayPercGrid.(tmpgrid);
%create new table and add dates
tmptabnew = table();
tmptabnew.('dates') = tmptab.dates;
tmptab = removevars(tmptab,'dates');
%remove dates
%convert site data to array
tmpdata = table2array(tmptab);
tmpnanind = isnan(tmpdata); %returns 1 if true
%number of non nan values in each row
tmpnumvals = size(tmpdata,2)-sum(tmpnanind,2);
tmpMean = nanmean(tmpdata,2);
%store date, mean, and number of values in table and save to structure
tmptabnew.('meanFlow') = tmpMean;
tmptabnew.('numVals') = tmpnumvals;
%store table in structure
flows28dayPercGridMean.(tmpgrid) = tmptabnew;
end
end