Influweb-IT · mattiamazzoli · Nov 14, 2024
diff --git a/DataTreatment.py b/DataTreatment.py
@@ -209,10 +209,19 @@ def change_regname(x):
         return "Valle d'Aosta"
     else: return x
 
-YEAR_MIN, YEAR_MAX = 2023, 2024
+## set current season
 
 dates = datetime.date.today()
 
+## define current season
+if dates.month<5:
+    current_season = str(int(dates.year)-1)+'-'+str(int(dates.year))
+elif dates.month>=11:
+    current_season = str(int(dates.year))+'-'+str(int(dates.year)+1)
+dates, current_season
+
+YEAR_MIN, YEAR_MAX = int(current_season.split('-')[0]), int(current_season.split('-')[1])
+
 last_week = lastweek(dates)
 
 my_yearweek = last_week
@@ -402,7 +411,7 @@ def transf_date(x):
 weekly_complete.insert(5, "season", weekly_complete.submission_date.map(date_season))
 
 #keep only current season
-weekly_complete = weekly_complete[weekly_complete.season =='2023-2024']
+weekly_complete = weekly_complete[weekly_complete.season == current_season]
 
 # remove duplicates within the same week, keeping the last one
 weekly = weekly_complete.drop_duplicates(['participantID','submission_week'], keep='last', inplace=False)
@@ -461,7 +470,7 @@ def transf_date(x):
 data_ILI = data_ILI[ data_ILI.season.isin(seasons) ] #get only weeks in seasons
 
 ILI_weeks=set(data_ILI.submission_week)
-submission_weeks=[x for x in list(week_season.keys()) if x<='2024-18'] #ILI_weeks
+submission_weeks=[x for x in list(week_season.keys())] #ILI_weeks
 
 data_ILI['symptoms'] = data_ILI['weekly.Q1.0'].apply(lambda x: False if x==True else True)
 
@@ -542,10 +551,12 @@ def transf_date(x):
 pd.Series(wau).to_frame('active users').to_csv(os.path.join(output_dir, 'active_users.csv'), header=True)
 
 #save participants values
-intake['gender'].value_counts().to_csv(os.path.join(output_dir, 'gender.csv'), header=True)
-intake['edu'].value_counts().to_csv(os.path.join(output_dir, 'education.csv'), header=True)
-intake['occupation'].value_counts().to_csv(os.path.join(output_dir, 'occupation.csv'), header=True)
-intake['age_class'].value_counts().to_csv(os.path.join(output_dir, 'age.csv'), header=True)
+participants = data.drop_duplicates('participantID')
+
+participants['gender'].value_counts().to_csv(os.path.join(output_dir, 'gender.csv'), header=True)
+participants['edu'].value_counts().to_csv(os.path.join(output_dir, 'education.csv'), header=True)
+participants['occupation'].value_counts().to_csv(os.path.join(output_dir, 'occupation.csv'), header=True)
+participants['age_class'].value_counts().to_csv(os.path.join(output_dir, 'age.csv'), header=True)
 
 
 # ## Mappa
@@ -555,10 +566,10 @@ def transf_date(x):
 regioni = gpd.read_file('Limiti01012024_g-2/Reg01012024_g/Reg01012024_g_WGS84.shp')
 regioni = regioni[['DEN_REG','geometry']].set_index('DEN_REG')
 
-partecipanti_reg = data_ILI.reg.value_counts().squeeze().reset_index().set_index('reg')
+partecipanti_reg = data_ILI.reg.value_counts().reindex(list(regioni.index)).fillna(0).squeeze().reset_index().set_index('reg')
 
-part_reg = intake.reg.value_counts().squeeze()/pop_reg * 100000
-part_reg = part_reg.reindex(list(regioni.index))
+part_reg = participants.reg.value_counts().squeeze()/pop_reg * 100000
+part_reg = part_reg.reindex(list(regioni.index)).fillna(0)
 part_reg = part_reg.reset_index().set_index('index')
 
 reg_map = regioni.join(part_reg).reset_index().rename(columns={0:'count'})

diff --git a/Plotting.py b/Plotting.py
@@ -113,7 +113,7 @@ def _demographic_composition_plot():
     ax3[1,1].set_xlabel('')
     ax3[1,1].spines[['right', 'top']].set_visible(False)
 
-    fig3.text(0.0, 0.6, _('Number of participants in the 2023-2024 season'), va='center', rotation='vertical')
+    fig3.text(0.0, 0.6, _('Number of participants in the current season'), va='center', rotation='vertical')
 
     plt.tight_layout()
     st.pyplot(fig3)
@@ -122,7 +122,7 @@ def _geo_plot():
 
     st.title(_('Geographic aspects'))
 
-    st.write(_("The first map shows the cumulative incidence in the 2023-2024 season of probable cases of influenza-like illness (ILI) reported in each region by Influweb participants."),
+    st.write(_("The first map shows the cumulative incidence in the current season of probable cases of influenza-like illness (ILI) reported in each region by Influweb participants."),
              _('The second map shows the regional coverage of participants in each region expressed as the number of participants per 100,000 inhabitants.'))
 
 
@@ -140,14 +140,14 @@ def _geo_plot():
     with tab4:
         fig4, ax4 = plt.subplots(figsize=(6,6))
         gdf.plot(ax=ax4, cmap='Blues', column='ar', legend=True, edgecolor="w", linewidth=.3,
-                     legend_kwds={"label":  _('Attack rate per 100,000 inhabitants in the 2023-2024 season'), "orientation": "vertical","shrink":0.6})
+                     legend_kwds={"label":  _('Attack rate per 100,000 inhabitants in the current season'), "orientation": "vertical","shrink":0.6})
         ax4.axis('off')
         st.pyplot(fig4)
 
     with tab5:
         fig5, ax5 = plt.subplots(figsize=(6,6))
         gdf.plot(ax=ax5, cmap='Reds', column='count', legend=True, edgecolor="w", linewidth=.3,
-                     legend_kwds={"label": _('Participants per 100,000 inhabitants in the 2023-2024 season'), "orientation": "vertical","shrink":0.6})
+                     legend_kwds={"label": _('Participants per 100,000 inhabitants in the current season'), "orientation": "vertical","shrink":0.6})
         ax5.axis('off')
         st.pyplot(fig5)