From 917d73921813fe1539b10107dcea8653805d7277 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 08:20:07 -0600 Subject: [PATCH 01/78] update gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 8b55d7e1..ba5690ed 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,9 @@ work-record-archive export internal data +figures +isf-basal-figures +fonts # Test htmlcov From f295c17461dd7475d9d74fd5378102f72cacb04b Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 08:35:38 -0600 Subject: [PATCH 02/78] initial commit --- .../get-users-settings-and-events.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 projects/predict-simulate/get-users-settings-and-events.py diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py new file mode 100644 index 00000000..a044a30c --- /dev/null +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +description: get users settings and events +version: 0.0.1 +created: 2019-01-11 +author: Ed Nykaza +dependencies: + * +license: BSD-2-Clause +""" + + +# %% REQUIRED LIBRARIES +import pandas as pd +import datetime as dt +import numpy as np +import os +import sys +import shutil +import glob +import argparse +import hashlib +import ast +import time + + +# %% USER INPUTS (ADD THIS IN LATER) +#codeDescription = "Get user's settings and events" +#parser = argparse.ArgumentParser(description=codeDescription) + + +# %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S + + +# %% ID & HASHID + + +# %% AGE & YLW + + +# %% UPLOAD DATE + + +# %% TIME (UTC, TIMEZONE, AND EVENTUALLY LOCAL TIME) + + +# %% PUMP AND CGM DEVICE () + + +# %% ISF + + +# %% CIR + + +# %% INSULIN ACTIVITY DURATION + + +# %% MAX BASAL RATE + + +# %% MAX BOLUS AMOUNT + + +# %% CORRECTION TARGET + + +# %% BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) + + +# %% LOOP DATA (BINARY T/F) + + +# %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + + +# %% CGM DATA + + +# %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + + +# %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + + +# %% SAVE RESULTS + + +# %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL + + +# %% V2 DATA TO GRAB +# ALERT SETTINGS +# ESTIMATED LOCAL TIME +# GLYCEMIC OUTCOMES +# DO NOT ROUND DATA +# INFUSION SITE CHANGES +# CGM CALIBRATIONS From d21d5dd9bda49df2f648f17a95f47f9066459cd9 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 11:38:16 -0600 Subject: [PATCH 03/78] add ISF and CIR --- .../get-users-settings-and-events.py | 282 +++++++++++++++++- 1 file changed, 274 insertions(+), 8 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index a044a30c..54af995a 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -15,6 +15,7 @@ import pandas as pd import datetime as dt import numpy as np +import tidals as td import os import sys import shutil @@ -23,6 +24,7 @@ import hashlib import ast import time +import pdb # %% USER INPUTS (ADD THIS IN LATER) @@ -30,28 +32,286 @@ #parser = argparse.ArgumentParser(description=codeDescription) +# %% FUNCTIONS + +dataFieldExportList = [ + 'activeSchedule', 'alarmType', 'annotations.code', 'annotations.threshold', + 'annotations.value', 'basalSchedules', 'bgInput', 'bgTarget', 'bgTarget.high', 'bgTarget.low', + 'bgTarget.range', 'bgTarget.start', 'bgTarget.target', 'bgTargets', 'bolus', 'carbInput', + 'carbRatio', 'carbRatios', 'carbRatio.amount', 'carbRatio.start', 'change.agent', + 'change.from', 'change.to', 'clockDriftOffset', 'computerTime', 'conversionOffset', + 'deliveryType', 'deviceId', 'deviceManufacturers', 'deviceModel', 'deviceSerialNumber', + 'deviceTags', 'deviceTime', 'duration', 'expectedDuration', 'expectedExtended', + 'expectedNormal', 'extended', 'highAlerts.enabled', 'highAlerts.level', + 'highAlerts.snooze', 'id', 'insulinCarbRatio', 'insulinOnBoard', 'insulinSensitivity', + 'insulinSensitivity.amount', 'insulinSensitivity.start', 'insulinSensitivities', + 'lowAlerts.enabled', 'lowAlerts.level', 'lowAlerts.snooze', 'normal', + 'outOfRangeAlerts.enabled', 'outOfRangeAlerts.snooze', + 'payload.calibration_reading', 'payload.Status', 'payload.Trend Arrow', + 'payload.Trend Rate', 'percent', 'primeTarget', 'rate', 'rateOfChangeAlerts.fallRate.enabled', + 'rateOfChangeAlerts.fallRate.rate', 'rateOfChangeAlerts.riseRate.enabled', + 'rateOfChangeAlerts.riseRate.rate', 'reason.resumed', 'reason.suspended', 'recommended.carb', + 'recommended.correction', 'recommended.net', 'scheduleName', 'status', 'subType', + 'time', 'timeProcessing', 'timezone', 'timezoneOffset', 'transmitterId', 'type', 'units', + 'units.bg', 'units.carb', 'uploadId', 'value', 'version' +] + +# CLEAN DATA FUNCTIONS +def removeNegativeDurations(df): + if "duration" in list(df): + nNegativeDurations = sum(df.duration < 0) + if nNegativeDurations > 0: + df = df[~(df.duration < 0)] + + return df, nNegativeDurations + + +def removeInvalidCgmValues(df): + + nBefore = len(df) + # remove values < 38 and > 402 mg/dL + df = df.drop(df[((df.type == "cbg") & + (df.value < 2.109284236597303))].index) + df = df.drop(df[((df.type == "cbg") & + (df.value > 22.314006924003046))].index) + nRemoved = nBefore - len(df) + + return df, nRemoved + + +def tslimCalibrationFix(df): + searchfor = ['tan'] + tandemDataIndex = ((df.deviceId.str.contains('|'.join(searchfor))) & + (df.type == "deviceEvent")) + + if "payload.calibration_reading" in list(df): + payloadCalReadingIndex = df["payload.calibration_reading"].notnull() + + nTandemAndPayloadCalReadings = sum(tandemDataIndex & + payloadCalReadingIndex) + + if nTandemAndPayloadCalReadings > 0: + # if reading is > 30 then it is in the wrong units + if df["payload.calibration_reading"].min() > 30: + df.loc[payloadCalReadingIndex, "value"] = \ + df[tandemDataIndex & payloadCalReadingIndex] \ + ["payload.calibration_reading"] / 18.01559 + else: + df.loc[payloadCalReadingIndex, "value"] = \ + df[tandemDataIndex & + payloadCalReadingIndex]["payload.calibration_reading"] + else: + nTandemAndPayloadCalReadings = 0 + return df, nTandemAndPayloadCalReadings + + +# OTHER +def tempRemoveFields(df): + removeFields = ["basalSchedules", + "bgTarget", + "bgTargets", + "carbRatio", + "carbRatios", + "insulinSensitivity", + "insulinSensitivities"] + + tempRemoveFields = list(set(df) & set(removeFields)) + tempDf = df[tempRemoveFields] + df = df.drop(columns=tempRemoveFields) + + return df, tempDf + + +def removeBrackets(df, fieldName): + if fieldName in list(df): + df.loc[df[fieldName].notnull(), fieldName] = \ + df.loc[df[fieldName].notnull(), fieldName].str[0] + + return df + + +def flattenJson(df, dataFieldsForExport): + + # remove fields that we don't want to flatten + df, holdData = tempRemoveFields(df) + + # remove [] from annotations field + df = removeBrackets(df, "annotations") + + # get a list of data types of column headings + columnHeadings = list(df) # ["payload", "suppressed"] + + # loop through each columnHeading + newDataFrame = pd.DataFrame() + + for colHead in columnHeadings: + # if the df field has embedded json + if any(isinstance(item, dict) for item in df[colHead]): + # grab the data that is in brackets + jsonBlob = df[colHead][df[colHead].astype(str).str[0] == "{"] + + # replace those values with nan + df.loc[jsonBlob.index, colHead] = np.nan + + # turn jsonBlob to dataframe + newDataFrame = pd.concat([newDataFrame, pd.DataFrame(jsonBlob.tolist(), + index=jsonBlob.index).add_prefix(colHead + '.')], axis=1) + + newColHeadings = list(newDataFrame) + + # put df back into the main dataframe + # and add the fields that were removed back in + columnFilter = list(set(newColHeadings) & set(dataFieldsForExport)) + tempDataFrame = newDataFrame.filter(items=columnFilter) + df = pd.concat([df, tempDataFrame, holdData], axis=1) + + return df + + +def mergeWizardWithBolus(df): + + if "wizard" in data["type"].unique(): + bolusData = data[data.type == "bolus"].copy().dropna(axis=1, how="all") + wizardData = data[data.type == "wizard"].copy().dropna(axis=1, how="all") + + # merge the wizard data with the bolus data + wizardData["calculatorId"] = wizardData["id"] + wizardDataFields = [ + "bgInput", + "bgTarget.high", + "bgTarget.low", + "bgTarget.range", + "bgTarget.target", + "bolus", + "carbInput", + "calculatorId", + "insulinCarbRatio", + "insulinOnBoard", + "insulinSensitivity", + "recommended.carb", + "recommended.correction", + "recommended.net", + "units", + ] + keepTheseWizardFields = \ + set(wizardDataFields).intersection(list(wizardData)) + bolusData = pd.merge(bolusData, + wizardData[list(keepTheseWizardFields)], + how="left", + left_on="id", + right_on="bolus") + + mergedBolusData = bolusData.drop("bolus", axis=1) + else: + mergedBolusData = pd.DataFrame() + + return mergedBolusData + + +def addUploadDate(df): + uploadTimes = pd.DataFrame(df[df.type == "upload"].groupby("uploadId").time.describe()["top"]) + uploadTimes.reset_index(inplace=True) + uploadTimes.rename(columns={"top": "uploadTime"}, inplace=True) + df = pd.merge(df, uploadTimes, how='left', on='uploadId') + df["uploadTime"] = pd.to_datetime(df["uploadTime"]) + + return df + + +def mmolL_to_mgdL(mmolL): + return mmolL * 18.01559 + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S +dataPulledDate = "2018-09-28" +phiDate = "PHI-" + dataPulledDate +donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") + +donorList = phiDate + "-uniqueDonorList.csv" +donors = td.load.load_csv(os.path.join(donorPath, donorList)) + +# this is where the loop will go: +dIndex = 2379 + +# %% ID, HASHID, AGE, & YLW +userID = donors.userID[dIndex] +hashID = donors.hashID[dIndex] +bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) +dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) -# %% ID & HASHID +# %% LOAD IN DONOR JSON DATA +metadata = pd.DataFrame(index=[dIndex]) +jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") +jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") +if os.path.exists(jsonFileName): + fileSize = os.stat(jsonFileName).st_size + metadata["fileSizeKB"] = fileSize / 1000 + if fileSize > 1000: + data = td.load.load_json(jsonFileName) + # sort the data by time + data.sort_values("time", inplace=True) -# %% AGE & YLW + # flatten the embedded json + data = flattenJson(data, dataFieldExportList) -# %% UPLOAD DATE +# %% CLEAN DATA + # remove negative durations + data, nNegativeDurations = removeNegativeDurations(data) + metadata["nNegativeDurations"] = nNegativeDurations -# %% TIME (UTC, TIMEZONE, AND EVENTUALLY LOCAL TIME) + # get rid of cgm values too low/high (< 38 & > 402 mg/dL) + data, nInvalidCgmValues = removeInvalidCgmValues(data) + metadata["nInvalidCgmValues"] = nInvalidCgmValues + # Tslim calibration bug fix + data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) + metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings -# %% PUMP AND CGM DEVICE () +# %% ADD UPLOAD DATE + # attach upload time to each record, for resolving duplicates + if "upload" in data.type.unique(): + data = addUploadDate(data) -# %% ISF +# %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) + data["utcTime"] = pd.to_datetime(data["time"]) + data["timezone"].fillna(method='ffill', inplace=True) + data["timezone"].fillna(method='bfill', inplace=True) + data["day"] = pd.DatetimeIndex(data["utcTime"]).date + +# %% ID, HASHID, AGE, & YLW + data["userID"] = userID + data["hashID"] = hashID + data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) + data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + + +# %% FORMAT BOLUS DATA + bolus = mergeWizardWithBolus(data) + if len(bolus) > 0: + # get rid of duplicates that have the same ["time", "normal"] + bolus, nBolusDuplicatesRemoved = \ + td.clean.remove_duplicates(bolus, bolus[["time", "normal"]]) + metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved + + +# %% ISF, CIR + if "insulinSensitivities" in list(bolus): + pdb.set_trace() + + # ISF + bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] + bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) + isf = bolus.loc[bolus["isf"].notnull(), ["utcTime", "isf", "isf_mmolL_U"]] + + # CIR + cir = bolus.loc[bolus["insulinCarbRatio"].notnull(), ["utcTime", "insulinCarbRatio"]] -# %% CIR # %% INSULIN ACTIVITY DURATION @@ -88,11 +348,17 @@ # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL - + else: + metadata["flags"] = "no bolus wizard data" + else: + metadata["flags"] = "file contains no data" +else: + metadata["flags"] = "file does not exist" # %% V2 DATA TO GRAB # ALERT SETTINGS # ESTIMATED LOCAL TIME +# PUMP AND CGM DEVICE () # GLYCEMIC OUTCOMES # DO NOT ROUND DATA # INFUSION SITE CHANGES From 16297bf1f09b2695a165c536e28b496157a0578a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 13:42:40 -0600 Subject: [PATCH 04/78] update to rounded time for tidal data analytics python tools --- tidepool-analysis-tools/tidals/clean/clean.py | 82 +++++++++++-------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/tidepool-analysis-tools/tidals/clean/clean.py b/tidepool-analysis-tools/tidals/clean/clean.py index 9a4f1836..ca61844f 100644 --- a/tidepool-analysis-tools/tidals/clean/clean.py +++ b/tidepool-analysis-tools/tidals/clean/clean.py @@ -17,48 +17,66 @@ def remove_duplicates(df, criteriaDF): def round_time(df, timeIntervalMinutes=5, timeField="time", - roundedTimeFieldName="roundedTime", verbose=False): + roundedTimeFieldName="roundedTime", startWithFirstRecord=True, + verbose=False): + ''' + A general purpose round time function that rounds the "time" + field to nearest minutes + INPUTS: + * a dataframe (df) that contains a time field that you want to round + * timeIntervalMinutes (defaults to 5 minutes given that most cgms output every 5 minutes) + * timeField to round (defaults to the UTC time "time" field) + * roundedTimeFieldName is a user specified column name (defaults to roundedTime) + * startWithFirstRecord starts the rounding with the first record if True, and the last record if False (defaults to True) + * verbose specifies whether the extra columns used to make calculations are returned + ''' + import pandas as pd - # A general purpose round time function that rounds the - # "time" field to nearest minutes - # INPUTS: - # * a dataframe (df) that contains a time field - # * timeIntervalMinutes defaults to 5 minutes given that most cgms output every 5 minutes - # * timeField defaults to UTC time "time" - # * verbose specifies whether the "TIB" and "TIB_cumsum" columns are returned - - df.sort_values(by=timeField, ascending=True, inplace=True) + df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True) df.reset_index(drop=True, inplace=True) - # calculate the time-in-between (TIB) consecutive records - t = pd.to_datetime(df.time) - t_shift = pd.to_datetime(df.time.shift(1)) - df["TIB"] = round((t - t_shift).dt.days*(86400/(60 * timeIntervalMinutes)) + - (t - t_shift).dt.seconds/(60 * timeIntervalMinutes)) * timeIntervalMinutes + # make sure the time field is in the right form + t = pd.to_datetime(df[timeField]) + + # calculate the time between consecutive records + t_shift = pd.to_datetime(df[timeField].shift(1)) + df["timeBetweenRecords"] = \ + round((t - t_shift).dt.days*(86400/(60 * timeIntervalMinutes)) + + (t - t_shift).dt.seconds/(60 * timeIntervalMinutes)) * timeIntervalMinutes - # separate the data into chunks if TIB is greater than minutes - # so that rounding process can start over - largeGaps = list(df.query("TIB > " + str(timeIntervalMinutes)).index) + # separate the data into chunks if timeBetweenRecords is greater than + # 2 times the minutes so the rounding process starts over + largeGaps = list(df.query("abs(timeBetweenRecords) > " + str(timeIntervalMinutes * 2)).index) largeGaps.insert(0, 0) largeGaps.append(len(df)) - # loop through each chunk to get the cumulative sum and the rounded time for gIndex in range(0, len(largeGaps) - 1): - - df.loc[largeGaps[gIndex], "TIB"] = 0 - - df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), "TIB_cumsum"] = \ - df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), "TIB"].cumsum() - - df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), roundedTimeFieldName] = \ - pd.to_datetime(df.loc[largeGaps[gIndex], timeField]).round(str(timeIntervalMinutes) + "min") + \ - pd.to_timedelta(df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), "TIB_cumsum"], unit="m") - - # sort descendingly by time and drop fieldsfields - df.sort_values(by=timeField, ascending=False, inplace=True) + chunk = t[largeGaps[gIndex]:largeGaps[gIndex+1]] + firstRecordChunk = t[largeGaps[gIndex]] + + # calculate the time difference between each time record and the first record + df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "minutesFromFirstRecord"] = \ + (chunk - firstRecordChunk).dt.days*(86400/(60)) + (chunk - firstRecordChunk).dt.seconds/(60) + + # then round to the nearest X Minutes + # NOTE: the ".000001" ensures that mulitples of 2:30 always rounds up. + df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "roundedMinutesFromFirstRecord"] = \ + round((df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], + "minutesFromFirstRecord"] / timeIntervalMinutes) + 0.000001) * (timeIntervalMinutes) + + roundedFirstRecord = (firstRecordChunk + pd.Timedelta("1microseconds")).round(str(timeIntervalMinutes) + "min") + df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], roundedTimeFieldName] = \ + roundedFirstRecord + \ + pd.to_timedelta(df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], + "roundedMinutesFromFirstRecord"], unit="m") + + # sort by time and drop fieldsfields + df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True) df.reset_index(drop=True, inplace=True) if verbose is False: - df.drop(columns=["TIB", "TIB_cumsum"], inplace=True) + df.drop(columns=["timeBetweenRecords", + "minutesFromFirstRecord", + "roundedMinutesFromFirstRecord"], inplace=True) return df From c8db909b4f9eeb0cd8d7ba07c1010cff906dad17 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 13:42:51 -0600 Subject: [PATCH 05/78] add bolus events --- .../get-users-settings-and-events.py | 166 +++++++++++++++++- 1 file changed, 162 insertions(+), 4 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 54af995a..33492488 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -222,6 +222,123 @@ def mmolL_to_mgdL(mmolL): return mmolL * 18.01559 +def round_time(df, timeIntervalMinutes=5, timeField="time", + roundedTimeFieldName="roundedTime", startWithFirstRecord=True, + verbose=False): + ''' + A general purpose round time function that rounds the "time" + field to nearest minutes + INPUTS: + * a dataframe (df) that contains a time field that you want to round + * timeIntervalMinutes (defaults to 5 minutes given that most cgms output every 5 minutes) + * timeField to round (defaults to the UTC time "time" field) + * roundedTimeFieldName is a user specified column name (defaults to roundedTime) + * startWithFirstRecord starts the rounding with the first record if True, and the last record if False (defaults to True) + * verbose specifies whether the extra columns used to make calculations are returned + ''' + + df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True) + df.reset_index(drop=True, inplace=True) + + # make sure the time field is in the right form + t = pd.to_datetime(df[timeField]) + + # calculate the time between consecutive records + t_shift = pd.to_datetime(df[timeField].shift(1)) + df["timeBetweenRecords"] = \ + round((t - t_shift).dt.days*(86400/(60 * timeIntervalMinutes)) + + (t - t_shift).dt.seconds/(60 * timeIntervalMinutes)) * timeIntervalMinutes + + # separate the data into chunks if timeBetweenRecords is greater than + # 2 times the minutes so the rounding process starts over + largeGaps = list(df.query("abs(timeBetweenRecords) > " + str(timeIntervalMinutes * 2)).index) + largeGaps.insert(0, 0) + largeGaps.append(len(df)) + + for gIndex in range(0, len(largeGaps) - 1): + chunk = t[largeGaps[gIndex]:largeGaps[gIndex+1]] + firstRecordChunk = t[largeGaps[gIndex]] + + # calculate the time difference between each time record and the first record + df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "minutesFromFirstRecord"] = \ + (chunk - firstRecordChunk).dt.days*(86400/(60)) + (chunk - firstRecordChunk).dt.seconds/(60) + + # then round to the nearest X Minutes + # NOTE: the ".000001" ensures that mulitples of 2:30 always rounds up. + df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "roundedMinutesFromFirstRecord"] = \ + round((df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], + "minutesFromFirstRecord"] / timeIntervalMinutes) + 0.000001) * (timeIntervalMinutes) + + roundedFirstRecord = (firstRecordChunk + pd.Timedelta("1microseconds")).round(str(timeIntervalMinutes) + "min") + df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], roundedTimeFieldName] = \ + roundedFirstRecord + \ + pd.to_timedelta(df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], + "roundedMinutesFromFirstRecord"], unit="m") + + # sort by time and drop fieldsfields + df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True) + df.reset_index(drop=True, inplace=True) + if verbose is False: + df.drop(columns=["timeBetweenRecords", + "minutesFromFirstRecord", + "roundedMinutesFromFirstRecord"], inplace=True) + + return df + + +def get_descriptive_stats(df, newName, dataSubType): + + newDf = df[dataSubType].describe().add_suffix(newName) + + newDf[("rangeOf" + newName)] = \ + newDf[("max" + newName)] - \ + newDf[("min" + newName)] + + return newDf + + +def get_bolusDaySummary(bolusData): + + if "extended" not in bolusData: + bolusData["extended"] = 0 + + bolusByDay = bolusData.groupby(bolusData["day"]) + + # total bolus insulin for each day + bolusDaySummary = pd.DataFrame(bolusByDay.normal.sum()) + bolusDaySummary = bolusDaySummary.rename(columns={"normal":"totalAmountOfNormalBolusInsulin"}) + + bolusDaySummary["totalAmountOfExtendedBolusInsulin"] = bolusByDay.extended.sum().fillna(0.0) + bolusDaySummary["totalAmountOfBolusInsulin"] = bolusDaySummary["totalAmountOfNormalBolusInsulin"].fillna(0.0) + \ + bolusDaySummary["totalAmountOfExtendedBolusInsulin"].fillna(0.0) + + # bolus range for normal boluses + normalBasalDF = get_descriptive_stats(bolusByDay, "NormalBolusAmountPerBolus", "normal") + bolusDaySummary = pd.concat([bolusDaySummary, normalBasalDF], axis = 1) + + # total number of bolus types per day + bolusTypePerDay = bolusData.groupby(["day", + "subType"]).size().unstack() + + bolusDaySummary["numberOfNormalBoluses"] = bolusTypePerDay["normal"].fillna(0) + + if "square" not in list(bolusTypePerDay): + bolusDaySummary["numberOfSquareBoluses"] = 0 + else: + bolusDaySummary["numberOfSquareBoluses"] = bolusTypePerDay["square"].fillna(0) + + if "dual/square" not in list(bolusTypePerDay): + bolusDaySummary["numberOfDualBoluses"] = 0 + else: + bolusDaySummary["numberOfDualBoluses"] = bolusTypePerDay["dual/square"].fillna(0) + + bolusDaySummary["numberOfAllBolusTypes"] = bolusDaySummary["numberOfNormalBoluses"] + \ + bolusDaySummary["numberOfSquareBoluses"] + \ + bolusDaySummary["numberOfDualBoluses"] + + return bolusDaySummary + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" phiDate = "PHI-" + dataPulledDate @@ -284,12 +401,26 @@ def mmolL_to_mgdL(mmolL): data["timezone"].fillna(method='bfill', inplace=True) data["day"] = pd.DatetimeIndex(data["utcTime"]).date + # round to the nearest 5 minutes + # TODO: once roundTime is pushed to tidals repository then this line can be replaced + # with td.clean.round_time + data = round_time(data, timeIntervalMinutes=5, timeField="time", + roundedTimeFieldName="roundedTime", startWithFirstRecord=True, + verbose=False) + + # %% ID, HASHID, AGE, & YLW data["userID"] = userID data["hashID"] = hashID data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + commonColumnHeadings = ["hashID", + "age", + "ylw", + "utcTime", + "roundedTime"] + # %% FORMAT BOLUS DATA bolus = mergeWizardWithBolus(data) @@ -301,18 +432,45 @@ def mmolL_to_mgdL(mmolL): # %% ISF, CIR + # ISF if "insulinSensitivities" in list(bolus): pdb.set_trace() - # ISF bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - isf = bolus.loc[bolus["isf"].notnull(), ["utcTime", "isf", "isf_mmolL_U"]] + + isfCH = commonColumnHeadings.copy() + isfCH.extend(["isf", "isf_mmolL_U"]) + isf = bolus.loc[bolus["isf"].notnull(), isfCH] # CIR - cir = bolus.loc[bolus["insulinCarbRatio"].notnull(), ["utcTime", "insulinCarbRatio"]] + if "carbRatios" in list(bolus): + pdb.set_trace() + + cirCH = commonColumnHeadings.copy() + cirCH.extend(["insulinCarbRatio"]) + cir = bolus.loc[bolus["insulinCarbRatio"].notnull(), cirCH] +# %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + # get a summary of boluses per day + bolusDaySummary = get_bolusDaySummary(bolus) + + if "extended" not in bolus: + bolus["extended"] = np.nan + bolus["duration"] = np.nan + + bolusCH = commonColumnHeadings.copy() + bolusCH.extend(["normal", "carbInput", "subType", + "insulinOnBoard", "bgInput"]) + bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] + bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan + bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", + "bgInput": "bg_mmolL"}) + bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) + bolusEvents["eventType"] = "correction" + bolusEvents.loc[bolusEvents["carbInput"] == 0, "eventType"] = "meal" + # %% INSULIN ACTIVITY DURATION @@ -332,7 +490,7 @@ def mmolL_to_mgdL(mmolL): # %% LOOP DATA (BINARY T/F) -# %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + # %% CGM DATA From 7418a81d03a46a3ff034df573b2eb335f5c8154f Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 16:04:59 -0600 Subject: [PATCH 06/78] flatten embedded json to cover all column headings --- .../get-users-settings-and-events.py | 64 +++++++++++++++++-- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 33492488..4fa8ebd7 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -122,6 +122,18 @@ def tempRemoveFields(df): return df, tempDf +def tempRemoveFieldsV2(df): + removeFields = ["suppressed", + "recommended", + "payload"] + + tempRemoveFields = list(set(df) & set(removeFields)) + tempDf = df[tempRemoveFields] + df = df.drop(columns=tempRemoveFields) + + return df, tempDf + + def removeBrackets(df, fieldName): if fieldName in list(df): df.loc[df[fieldName].notnull(), fieldName] = \ @@ -133,7 +145,7 @@ def removeBrackets(df, fieldName): def flattenJson(df, dataFieldsForExport): # remove fields that we don't want to flatten - df, holdData = tempRemoveFields(df) + #df, holdData = tempRemoveFields(df) # remove [] from annotations field df = removeBrackets(df, "annotations") @@ -161,18 +173,57 @@ def flattenJson(df, dataFieldsForExport): # put df back into the main dataframe # and add the fields that were removed back in + pdb.set_trace columnFilter = list(set(newColHeadings) & set(dataFieldsForExport)) tempDataFrame = newDataFrame.filter(items=columnFilter) - df = pd.concat([df, tempDataFrame, holdData], axis=1) + df = pd.concat([df, tempDataFrame], axis=1) + #df = pd.concat([df, tempDataFrame, holdData], axis=1) return df +def flattenJsonV2(df, nEmbeddings): + # repeat this N times + for nEmbed in range(0, nEmbeddings): + # remove fields that we don't want to flatten + df, holdData = tempRemoveFieldsV2(df) + + # get a list of data types of column headings + columnHeadings = list(df) # ["payload", "suppressed"] + + # loop through each columnHeading + newDataFrame = pd.DataFrame() + + for colHead in columnHeadings: + if any(isinstance(item, list) for item in df[colHead]): + listBlob = df[colHead][df[colHead].astype(str).str[0] == "["] + df.loc[listBlob.index, colHead] = df.loc[listBlob.index, colHead].str[0] + + # if the df field has embedded json + if any(isinstance(item, dict) for item in df[colHead]): + # grab the data that is in brackets + jsonBlob = df[colHead][df[colHead].astype(str).str[0] == "{"] + + # replace those values with nan + df.loc[jsonBlob.index, colHead] = np.nan + + # turn jsonBlob to dataframe + newDataFrame = pd.concat([newDataFrame, pd.DataFrame(jsonBlob.tolist(), + index=jsonBlob.index).add_prefix(colHead + '.')], axis=1) + + df = pd.concat([df, newDataFrame, holdData], axis=1) + + df.sort_index(axis=1, inplace=True) + + return df + + + def mergeWizardWithBolus(df): - if "wizard" in data["type"].unique(): - bolusData = data[data.type == "bolus"].copy().dropna(axis=1, how="all") - wizardData = data[data.type == "wizard"].copy().dropna(axis=1, how="all") + if "wizard" in df["type"].unique(): + bolusData = df[df.type == "bolus"].copy().dropna(axis=1, how="all") + wizardData = df[df.type == "wizard"].copy().dropna(axis=1, how="all") # merge the wizard data with the bolus data wizardData["calculatorId"] = wizardData["id"] @@ -371,7 +422,8 @@ def get_bolusDaySummary(bolusData): data.sort_values("time", inplace=True) # flatten the embedded json - data = flattenJson(data, dataFieldExportList) + #data = flattenJson(data, dataFieldExportList) + data = flattenJsonV2(data, 2) From a2887789c7e871dc72cfbf9b126702f2c5fd3797 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 16:05:26 -0600 Subject: [PATCH 07/78] update bolus events to include isf and cir associated with events --- .../get-users-settings-and-events.py | 41 +++++++------------ 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 4fa8ebd7..1c3fe4a1 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -474,7 +474,7 @@ def get_bolusDaySummary(bolusData): "roundedTime"] -# %% FORMAT BOLUS DATA +# %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) bolus = mergeWizardWithBolus(data) if len(bolus) > 0: # get rid of duplicates that have the same ["time", "normal"] @@ -482,29 +482,6 @@ def get_bolusDaySummary(bolusData): td.clean.remove_duplicates(bolus, bolus[["time", "normal"]]) metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved - -# %% ISF, CIR - # ISF - if "insulinSensitivities" in list(bolus): - pdb.set_trace() - - bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] - bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - - isfCH = commonColumnHeadings.copy() - isfCH.extend(["isf", "isf_mmolL_U"]) - isf = bolus.loc[bolus["isf"].notnull(), isfCH] - - # CIR - if "carbRatios" in list(bolus): - pdb.set_trace() - - cirCH = commonColumnHeadings.copy() - cirCH.extend(["insulinCarbRatio"]) - cir = bolus.loc[bolus["insulinCarbRatio"].notnull(), cirCH] - - -# %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) # get a summary of boluses per day bolusDaySummary = get_bolusDaySummary(bolus) @@ -512,9 +489,19 @@ def get_bolusDaySummary(bolusData): bolus["extended"] = np.nan bolus["duration"] = np.nan + # ISF associated with bolus event + if "insulinSensitivities" in list(bolus): + pdb.set_trace() + if "carbRatios" in list(bolus): + pdb.set_trace() + + bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] + bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) + bolusCH = commonColumnHeadings.copy() bolusCH.extend(["normal", "carbInput", "subType", - "insulinOnBoard", "bgInput"]) + "insulinOnBoard", "bgInput", + "isf", "isf_mmolL_U", "insulinCarbRatio"]) bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", @@ -524,8 +511,8 @@ def get_bolusDaySummary(bolusData): bolusEvents.loc[bolusEvents["carbInput"] == 0, "eventType"] = "meal" -# %% INSULIN ACTIVITY DURATION - +# %% PUMP SETTINGS + pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") # %% MAX BASAL RATE From ace918ba6c402d31dc2942ec7fc9667dfcd6b9ef Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 16:30:34 -0600 Subject: [PATCH 08/78] get ISF from pump settings --- .../get-users-settings-and-events.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 1c3fe4a1..1fbad34f 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -469,9 +469,7 @@ def get_bolusDaySummary(bolusData): commonColumnHeadings = ["hashID", "age", - "ylw", - "utcTime", - "roundedTime"] + "ylw"] # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) @@ -499,7 +497,7 @@ def get_bolusDaySummary(bolusData): bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) bolusCH = commonColumnHeadings.copy() - bolusCH.extend(["normal", "carbInput", "subType", + bolusCH.extend(["utcTime", "roundedTime", "normal", "carbInput", "subType", "insulinOnBoard", "bgInput", "isf", "isf_mmolL_U", "insulinCarbRatio"]) bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] @@ -514,6 +512,22 @@ def get_bolusDaySummary(bolusData): # %% PUMP SETTINGS pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") + #ISF + if "insulinSensitivity.amount" in list(pumpSettings): + isfColHead = "insulinSensitivity" + else: + isfColHead = "insulinSensitivities" + + pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] + pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) + pumpSettings["time"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") + + isfCH = commonColumnHeadings.copy() + isfCH.extend(["time", "isf", "isf_mmolL_U"]) + isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] + + # %% MAX BASAL RATE From f58ed6474ac1161b6a8efa512084abbc0141f16f Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 16:37:52 -0600 Subject: [PATCH 09/78] get CIR from the pump settings --- .../get-users-settings-and-events.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 1fbad34f..ddcde6ca 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -512,7 +512,7 @@ def get_bolusDaySummary(bolusData): # %% PUMP SETTINGS pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") - #ISF + # ISF if "insulinSensitivity.amount" in list(pumpSettings): isfColHead = "insulinSensitivity" else: @@ -520,13 +520,27 @@ def get_bolusDaySummary(bolusData): pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["time"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") isfCH = commonColumnHeadings.copy() - isfCH.extend(["time", "isf", "isf_mmolL_U"]) + isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] + # CIR + if "carbRatio.amount" in list(pumpSettings): + cirColHead = "carbRatio" + else: + cirColHead = "carbRatios" + + pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] + pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") + + cirCH = commonColumnHeadings.copy() + cirCH.extend(["cirTime", "cir"]) + cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] + # %% MAX BASAL RATE From 964c3dc8de6dd7a73f7ead352502370d84dbe6b3 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 17:01:53 -0600 Subject: [PATCH 10/78] add the correction target from the pump settings --- .../get-users-settings-and-events.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index ddcde6ca..f40b61d6 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -542,13 +542,26 @@ def get_bolusDaySummary(bolusData): cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] -# %% MAX BASAL RATE + # CORRECTION TARGET + if "bgTarget.start" in list(pumpSettings): + bgTargetColHead = "bgTarget" + else: + bgTargetColHead = "bgTargets" + pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] + pumpSettings["correctionTargetLow"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) -# %% MAX BOLUS AMOUNT + pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] + pumpSettings["correctionTargetHigh"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + pumpSettings["correctionTargetTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") -# %% CORRECTION TARGET + ctCH = commonColumnHeadings.copy() + ctCH.extend(["correctionTargetTime", "correctionTargetLow", "correctionTargetHigh"]) + correctionTarget = pumpSettings.loc[pumpSettings["correctionTargetLow"].notnull(), ctCH] # %% BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) From cd79cbdfc2f52d9e3669a108ad5b830e4bbd2328 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 17:12:30 -0600 Subject: [PATCH 11/78] clean up unused pieces of code --- .../get-users-settings-and-events.py | 103 +----------------- 1 file changed, 3 insertions(+), 100 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index f40b61d6..b1b80492 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -13,17 +13,9 @@ # %% REQUIRED LIBRARIES import pandas as pd -import datetime as dt import numpy as np import tidals as td import os -import sys -import shutil -import glob -import argparse -import hashlib -import ast -import time import pdb @@ -34,28 +26,6 @@ # %% FUNCTIONS -dataFieldExportList = [ - 'activeSchedule', 'alarmType', 'annotations.code', 'annotations.threshold', - 'annotations.value', 'basalSchedules', 'bgInput', 'bgTarget', 'bgTarget.high', 'bgTarget.low', - 'bgTarget.range', 'bgTarget.start', 'bgTarget.target', 'bgTargets', 'bolus', 'carbInput', - 'carbRatio', 'carbRatios', 'carbRatio.amount', 'carbRatio.start', 'change.agent', - 'change.from', 'change.to', 'clockDriftOffset', 'computerTime', 'conversionOffset', - 'deliveryType', 'deviceId', 'deviceManufacturers', 'deviceModel', 'deviceSerialNumber', - 'deviceTags', 'deviceTime', 'duration', 'expectedDuration', 'expectedExtended', - 'expectedNormal', 'extended', 'highAlerts.enabled', 'highAlerts.level', - 'highAlerts.snooze', 'id', 'insulinCarbRatio', 'insulinOnBoard', 'insulinSensitivity', - 'insulinSensitivity.amount', 'insulinSensitivity.start', 'insulinSensitivities', - 'lowAlerts.enabled', 'lowAlerts.level', 'lowAlerts.snooze', 'normal', - 'outOfRangeAlerts.enabled', 'outOfRangeAlerts.snooze', - 'payload.calibration_reading', 'payload.Status', 'payload.Trend Arrow', - 'payload.Trend Rate', 'percent', 'primeTarget', 'rate', 'rateOfChangeAlerts.fallRate.enabled', - 'rateOfChangeAlerts.fallRate.rate', 'rateOfChangeAlerts.riseRate.enabled', - 'rateOfChangeAlerts.riseRate.rate', 'reason.resumed', 'reason.suspended', 'recommended.carb', - 'recommended.correction', 'recommended.net', 'scheduleName', 'status', 'subType', - 'time', 'timeProcessing', 'timezone', 'timezoneOffset', 'transmitterId', 'type', 'units', - 'units.bg', 'units.carb', 'uploadId', 'value', 'version' -] - # CLEAN DATA FUNCTIONS def removeNegativeDurations(df): if "duration" in list(df): @@ -107,22 +77,6 @@ def tslimCalibrationFix(df): # OTHER def tempRemoveFields(df): - removeFields = ["basalSchedules", - "bgTarget", - "bgTargets", - "carbRatio", - "carbRatios", - "insulinSensitivity", - "insulinSensitivities"] - - tempRemoveFields = list(set(df) & set(removeFields)) - tempDf = df[tempRemoveFields] - df = df.drop(columns=tempRemoveFields) - - return df, tempDf - - -def tempRemoveFieldsV2(df): removeFields = ["suppressed", "recommended", "payload"] @@ -134,59 +88,11 @@ def tempRemoveFieldsV2(df): return df, tempDf -def removeBrackets(df, fieldName): - if fieldName in list(df): - df.loc[df[fieldName].notnull(), fieldName] = \ - df.loc[df[fieldName].notnull(), fieldName].str[0] - - return df - - -def flattenJson(df, dataFieldsForExport): - - # remove fields that we don't want to flatten - #df, holdData = tempRemoveFields(df) - - # remove [] from annotations field - df = removeBrackets(df, "annotations") - - # get a list of data types of column headings - columnHeadings = list(df) # ["payload", "suppressed"] - - # loop through each columnHeading - newDataFrame = pd.DataFrame() - - for colHead in columnHeadings: - # if the df field has embedded json - if any(isinstance(item, dict) for item in df[colHead]): - # grab the data that is in brackets - jsonBlob = df[colHead][df[colHead].astype(str).str[0] == "{"] - - # replace those values with nan - df.loc[jsonBlob.index, colHead] = np.nan - - # turn jsonBlob to dataframe - newDataFrame = pd.concat([newDataFrame, pd.DataFrame(jsonBlob.tolist(), - index=jsonBlob.index).add_prefix(colHead + '.')], axis=1) - - newColHeadings = list(newDataFrame) - - # put df back into the main dataframe - # and add the fields that were removed back in - pdb.set_trace - columnFilter = list(set(newColHeadings) & set(dataFieldsForExport)) - tempDataFrame = newDataFrame.filter(items=columnFilter) - df = pd.concat([df, tempDataFrame], axis=1) - #df = pd.concat([df, tempDataFrame, holdData], axis=1) - - return df - - -def flattenJsonV2(df, nEmbeddings): +def flattenJson(df, nEmbeddings): # repeat this N times for nEmbed in range(0, nEmbeddings): # remove fields that we don't want to flatten - df, holdData = tempRemoveFieldsV2(df) + df, holdData = tempRemoveFields(df) # get a list of data types of column headings columnHeadings = list(df) # ["payload", "suppressed"] @@ -218,7 +124,6 @@ def flattenJsonV2(df, nEmbeddings): return df - def mergeWizardWithBolus(df): if "wizard" in df["type"].unique(): @@ -422,9 +327,7 @@ def get_bolusDaySummary(bolusData): data.sort_values("time", inplace=True) # flatten the embedded json - #data = flattenJson(data, dataFieldExportList) - data = flattenJsonV2(data, 2) - + data = flattenJson(data, 2) # %% CLEAN DATA From 73e62289430fe74f3a88ebb10bbfa28ef8f8f117 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 17:21:50 -0600 Subject: [PATCH 12/78] add logic for missing data --- .../get-users-settings-and-events.py | 88 ++++++++++--------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index b1b80492..40ccf7bf 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -413,63 +413,65 @@ def get_bolusDaySummary(bolusData): # %% PUMP SETTINGS - pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") + if "pumpSettings" in data.type.unique(): + pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") - # ISF - if "insulinSensitivity.amount" in list(pumpSettings): - isfColHead = "insulinSensitivity" - else: - isfColHead = "insulinSensitivities" + # ISF + if "insulinSensitivity.amount" in list(pumpSettings): + isfColHead = "insulinSensitivity" + else: + isfColHead = "insulinSensitivities" - pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] - pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") + pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] + pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) + pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") - isfCH = commonColumnHeadings.copy() - isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) - isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] + isfCH = commonColumnHeadings.copy() + isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) + isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] - # CIR - if "carbRatio.amount" in list(pumpSettings): - cirColHead = "carbRatio" - else: - cirColHead = "carbRatios" + # CIR + if "carbRatio.amount" in list(pumpSettings): + cirColHead = "carbRatio" + else: + cirColHead = "carbRatios" - pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") + pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] + pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") - cirCH = commonColumnHeadings.copy() - cirCH.extend(["cirTime", "cir"]) - cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] + cirCH = commonColumnHeadings.copy() + cirCH.extend(["cirTime", "cir"]) + cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] - # CORRECTION TARGET - if "bgTarget.start" in list(pumpSettings): - bgTargetColHead = "bgTarget" - else: - bgTargetColHead = "bgTargets" + # CORRECTION TARGET + if "bgTarget.start" in list(pumpSettings): + bgTargetColHead = "bgTarget" + else: + bgTargetColHead = "bgTargets" - pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] - pumpSettings["correctionTargetLow"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) + pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] + pumpSettings["correctionTargetLow"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) - pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] - pumpSettings["correctionTargetHigh"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] + pumpSettings["correctionTargetHigh"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) - pumpSettings["correctionTargetTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") + pumpSettings["correctionTargetTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") - ctCH = commonColumnHeadings.copy() - ctCH.extend(["correctionTargetTime", "correctionTargetLow", "correctionTargetHigh"]) - correctionTarget = pumpSettings.loc[pumpSettings["correctionTargetLow"].notnull(), ctCH] + ctCH = commonColumnHeadings.copy() + ctCH.extend(["correctionTargetTime", "correctionTargetLow", "correctionTargetHigh"]) + correctionTarget = pumpSettings.loc[pumpSettings["correctionTargetLow"].notnull(), ctCH] # %% BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) + # %% LOOP DATA (BINARY T/F) @@ -489,8 +491,12 @@ def get_bolusDaySummary(bolusData): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL + else: + metadata["flags"] = "no pump settings" + else: + metadata["flags"] = "no bolus wizard data" else: - metadata["flags"] = "no bolus wizard data" + metadata["flags"] = "no upload data" else: metadata["flags"] = "file contains no data" else: From 71f90006ebceb57ff734f74dc3afd5e29f90231b Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 20:59:18 -0600 Subject: [PATCH 13/78] get actual basal rates and scheduled basal rates --- .../get-users-settings-and-events.py | 92 ++++++++++++++++--- 1 file changed, 80 insertions(+), 12 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 40ccf7bf..8831f4ab 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -88,17 +88,16 @@ def tempRemoveFields(df): return df, tempDf -def flattenJson(df, nEmbeddings): - # repeat this N times - for nEmbed in range(0, nEmbeddings): - # remove fields that we don't want to flatten - df, holdData = tempRemoveFields(df) +def flattenJson(df): - # get a list of data types of column headings - columnHeadings = list(df) # ["payload", "suppressed"] + # remove fields that we don't want to flatten + df, holdData = tempRemoveFields(df) - # loop through each columnHeading - newDataFrame = pd.DataFrame() + # get a list of data types of column headings + columnHeadings = list(df) + + # loop through each columnHeading + newDataFrame = pd.DataFrame() for colHead in columnHeadings: if any(isinstance(item, list) for item in df[colHead]): @@ -295,6 +294,32 @@ def get_bolusDaySummary(bolusData): return bolusDaySummary +def get_basalDaySummary(basal): + # group data by day + basalByDay = basal.groupby(basal["day"]) + + # total basal insulin per day + basalDaySummary = pd.DataFrame(basalByDay.totalAmountOfBasalInsulin.sum()) + + # total number of basals types per day + basalTypePerDay = basal.groupby(["day", "deliveryType"]).size().unstack() + + basalDaySummary["numberOfScheduledBasals"] = basalTypePerDay["scheduled"].fillna(0) + if "suspend" not in list(basalTypePerDay): + basalDaySummary["numberOfSuspendedBasals"] = 0 + else: + basalDaySummary["numberOfSuspendedBasals"] = basalTypePerDay["suspend"].fillna(0) + if "temp" not in list(basalTypePerDay): + basalDaySummary["numberOfTempBasals"] = 0 + else: + basalDaySummary["numberOfTempBasals"] = basalTypePerDay["temp"].fillna(0) + + basalDaySummary["totalNumberOfBasals"] = basalDaySummary["numberOfScheduledBasals"] + \ + basalDaySummary["numberOfTempBasals"] + + return basalDaySummary + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" phiDate = "PHI-" + dataPulledDate @@ -323,14 +348,15 @@ def get_bolusDaySummary(bolusData): metadata["fileSizeKB"] = fileSize / 1000 if fileSize > 1000: data = td.load.load_json(jsonFileName) + # sort the data by time data.sort_values("time", inplace=True) # flatten the embedded json - data = flattenJson(data, 2) + data = flattenJson(data) -# %% CLEAN DATA + # %% CLEAN DATA # remove negative durations data, nNegativeDurations = removeNegativeDurations(data) metadata["nNegativeDurations"] = nNegativeDurations @@ -467,9 +493,48 @@ def get_bolusDaySummary(bolusData): ctCH.extend(["correctionTargetTime", "correctionTargetLow", "correctionTargetHigh"]) correctionTarget = pumpSettings.loc[pumpSettings["correctionTargetLow"].notnull(), ctCH] + # SCHEDULED BASAL RATES + sbrCH = commonColumnHeadings.copy() + sbrCH.extend(["time", "rate"]) + sbr = pd.DataFrame(columns=sbrCH) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["hashID"] = pumpSettings.loc[p, "hashID"] + tempDF["age"] = pumpSettings.loc[p, "age"] + tempDF["ylw"] = pumpSettings.loc[p, "ylw"] + tempDF["time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + sbr = pd.concat([sbr, tempDF[sbrCH]], ignore_index=True) + + + # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) + if "basal" in data.type.unique(): + basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") + basal.sort_values("uploadTime", ascending=False, inplace=True) + + basal, nBasalDuplicatesRemoved = \ + td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) + metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved -# %% BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) + # fill NaNs with 0, as it indicates a suspend (temp basal of 0) + basal.rate.fillna(0, inplace=True) + # get rid of basals that have durations of 0 + nBasalDuration0 = sum(basal.duration > 0) + basal = basal[basal.duration > 0] + metadata["basal.nBasalDuration0"] = nBasalDuration0 + + # get rid of basal durations that are unrealistic + nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) + metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) + basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan + + # calculate the total amount of insulin delivered (duration * rate) + basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 + basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] + + # get a summary of basals per day + basalDaySummary = get_basalDaySummary(basal) # %% LOOP DATA (BINARY T/F) @@ -491,6 +556,8 @@ def get_bolusDaySummary(bolusData): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL + else: + metadata["flags"] = "no basal data" else: metadata["flags"] = "no pump settings" else: @@ -503,6 +570,7 @@ def get_bolusDaySummary(bolusData): metadata["flags"] = "file does not exist" # %% V2 DATA TO GRAB +# RE-EVALUATE THE WAY EXTENDED BOLUSES ARE BEING ACCOUNTED (ARE THEY ALSO SHOWING UP IN BASAL DATA?) # ALERT SETTINGS # ESTIMATED LOCAL TIME # PUMP AND CGM DEVICE () From eb7a77dd080cb5d837489533013f9d4e96e73177 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 20:59:31 -0600 Subject: [PATCH 14/78] changes related to time --- .../get-users-settings-and-events.py | 57 +++++++++++-------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 8831f4ab..91e348a1 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -99,24 +99,24 @@ def flattenJson(df): # loop through each columnHeading newDataFrame = pd.DataFrame() - for colHead in columnHeadings: - if any(isinstance(item, list) for item in df[colHead]): - listBlob = df[colHead][df[colHead].astype(str).str[0] == "["] - df.loc[listBlob.index, colHead] = df.loc[listBlob.index, colHead].str[0] + for colHead in columnHeadings: + if any(isinstance(item, list) for item in df[colHead]): + listBlob = df[colHead][df[colHead].astype(str).str[0] == "["] + df.loc[listBlob.index, colHead] = df.loc[listBlob.index, colHead].str[0] - # if the df field has embedded json - if any(isinstance(item, dict) for item in df[colHead]): - # grab the data that is in brackets - jsonBlob = df[colHead][df[colHead].astype(str).str[0] == "{"] + # if the df field has embedded json + if any(isinstance(item, dict) for item in df[colHead]): + # grab the data that is in brackets + jsonBlob = df[colHead][df[colHead].astype(str).str[0] == "{"] - # replace those values with nan - df.loc[jsonBlob.index, colHead] = np.nan + # replace those values with nan + df.loc[jsonBlob.index, colHead] = np.nan - # turn jsonBlob to dataframe - newDataFrame = pd.concat([newDataFrame, pd.DataFrame(jsonBlob.tolist(), - index=jsonBlob.index).add_prefix(colHead + '.')], axis=1) + # turn jsonBlob to dataframe + newDataFrame = pd.concat([newDataFrame, pd.DataFrame(jsonBlob.tolist(), + index=jsonBlob.index).add_prefix(colHead + '.')], axis=1) - df = pd.concat([df, newDataFrame, holdData], axis=1) + df = pd.concat([df, newDataFrame, holdData], axis=1) df.sort_index(axis=1, inplace=True) @@ -334,6 +334,7 @@ def get_basalDaySummary(basal): # %% ID, HASHID, AGE, & YLW userID = donors.userID[dIndex] hashID = donors.hashID[dIndex] +# round all birthdays and diagnosis dates to the first day of the month (to protect identities) bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) @@ -370,7 +371,7 @@ def get_basalDaySummary(basal): metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings -# %% ADD UPLOAD DATE + # %% ADD UPLOAD DATE # attach upload time to each record, for resolving duplicates if "upload" in data.type.unique(): data = addUploadDate(data) @@ -388,9 +389,10 @@ def get_basalDaySummary(basal): data = round_time(data, timeIntervalMinutes=5, timeField="time", roundedTimeFieldName="roundedTime", startWithFirstRecord=True, verbose=False) + data.sort_values("uploadTime", ascending=False, inplace=True) -# %% ID, HASHID, AGE, & YLW + # %% ID, HASHID, AGE, & YLW data["userID"] = userID data["hashID"] = hashID data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) @@ -401,12 +403,13 @@ def get_basalDaySummary(basal): "ylw"] -# %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) bolus = mergeWizardWithBolus(data) if len(bolus) > 0: # get rid of duplicates that have the same ["time", "normal"] + bolus.sort_values("uploadTime", ascending=False, inplace=True) bolus, nBolusDuplicatesRemoved = \ - td.clean.remove_duplicates(bolus, bolus[["time", "normal"]]) + td.clean.remove_duplicates(bolus, bolus[["deviceTime", "normal"]]) metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved # get a summary of boluses per day @@ -438,23 +441,29 @@ def get_basalDaySummary(basal): bolusEvents.loc[bolusEvents["carbInput"] == 0, "eventType"] = "meal" -# %% PUMP SETTINGS + # %% PUMP SETTINGS if "pumpSettings" in data.type.unique(): pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") + pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) + + pumpSettings, nPumpSettingsDuplicatesRemoved = \ + td.clean.remove_duplicates(pumpSettings, pumpSettings[["deviceTime"]]) + metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved # ISF if "insulinSensitivity.amount" in list(pumpSettings): isfColHead = "insulinSensitivity" else: isfColHead = "insulinSensitivities" + pdb.set_trace() pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["time"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") isfCH = commonColumnHeadings.copy() - isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) + isfCH.extend(["time", "isf", "isf_mmolL_U"]) isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] # CIR @@ -462,13 +471,14 @@ def get_basalDaySummary(basal): cirColHead = "carbRatio" else: cirColHead = "carbRatios" + pdb.set_trace() pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["time"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") cirCH = commonColumnHeadings.copy() - cirCH.extend(["cirTime", "cir"]) + cirCH.extend(["time", "cir"]) cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] @@ -477,6 +487,7 @@ def get_basalDaySummary(basal): bgTargetColHead = "bgTarget" else: bgTargetColHead = "bgTargets" + pdb.set_trace() pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] pumpSettings["correctionTargetLow"] = \ From aa007045d849bbf7d533436ba37dc29d983f8c6c Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 11 Jan 2019 21:48:00 -0600 Subject: [PATCH 15/78] expand correction target cases and auto mode basal rates --- .../get-users-settings-and-events.py | 79 ++++++++++++++----- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 91e348a1..bba04127 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -329,7 +329,7 @@ def get_basalDaySummary(basal): donors = td.load.load_csv(os.path.join(donorPath, donorList)) # this is where the loop will go: -dIndex = 2379 +dIndex = 2 # %% ID, HASHID, AGE, & YLW userID = donors.userID[dIndex] @@ -459,11 +459,11 @@ def get_basalDaySummary(basal): pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["time"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") isfCH = commonColumnHeadings.copy() - isfCH.extend(["time", "isf", "isf_mmolL_U"]) + isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] # CIR @@ -474,11 +474,11 @@ def get_basalDaySummary(basal): pdb.set_trace() pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["time"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") cirCH = commonColumnHeadings.copy() - cirCH.extend(["time", "cir"]) + cirCH.extend(["cirTime", "cir"]) cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] @@ -489,32 +489,72 @@ def get_basalDaySummary(basal): bgTargetColHead = "bgTargets" pdb.set_trace() - pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] - pumpSettings["correctionTargetLow"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) + # low + if bgTargetColHead + ".low" in list(pumpSettings): + pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] + pumpSettings["correctionTargetLow"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) + else: + pumpSettings["correctionTargetLow_mmolL"] = np.nan + pumpSettings["correctionTargetLow"] = np.nan + + # high + if bgTargetColHead + ".high" in list(pumpSettings): + pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] + pumpSettings["correctionTargetHigh"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + + else: + pumpSettings["correctionTargetHigh_mmolL"] = np.nan + pumpSettings["correctionTargetHigh"] = np.nan + + # target + if bgTargetColHead + ".target" in list(pumpSettings): + pumpSettings["correctionTarget_mmolL"] = pumpSettings[bgTargetColHead + ".target"] + pumpSettings["correctionTarget"] = \ + mmolL_to_mgdL(pumpSettings["correctionTarget_mmolL"]) + + else: + pumpSettings["correctionTarget_mmolL"] = np.nan + pumpSettings["correctionTarget"] = np.nan - pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] - pumpSettings["correctionTargetHigh"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + # range + if bgTargetColHead + ".range" in list(pumpSettings): + pumpSettings["correctionTargetRange_mmolL"] = pumpSettings[bgTargetColHead + ".range"] + pumpSettings["correctionTargetRange"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetRange_mmolL"]) - pumpSettings["correctionTargetTime"] = pd.to_datetime(pumpSettings["day"]) + \ + else: + pumpSettings["correctionTargetRange_mmolL"] = np.nan + pumpSettings["correctionTargetRange"] =np.nan + + pumpSettings["ctTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") ctCH = commonColumnHeadings.copy() - ctCH.extend(["correctionTargetTime", "correctionTargetLow", "correctionTargetHigh"]) - correctionTarget = pumpSettings.loc[pumpSettings["correctionTargetLow"].notnull(), ctCH] + ctCH.extend(["ctTime", "correctionTargetLow", "correctionTargetHigh", + "correctionTarget", "correctionTargetRange"]) + correctionTarget = pumpSettings.loc[pumpSettings["ctTime"].notnull(), ctCH] # SCHEDULED BASAL RATES sbrCH = commonColumnHeadings.copy() - sbrCH.extend(["time", "rate"]) + sbrCH.extend(["sbrTime", "rate", "type"]) sbr = pd.DataFrame(columns=sbrCH) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): - tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) - tempDF["day"] = pumpSettings.loc[p, "day"] + if 'Auto Mode' not in actSched: + tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["type"] = np.nan + tempDF["sbrTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + else: + tempDF = pd.DataFrame(index=[0]) + tempDF["sbrTime"] = np.nan + tempDF["rate"] = np.nan + tempDF["type"] = "AutoMode" + tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] - tempDF["time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") sbr = pd.concat([sbr, tempDF[sbrCH]], ignore_index=True) @@ -551,9 +591,6 @@ def get_basalDaySummary(basal): # %% LOOP DATA (BINARY T/F) - - - # %% CGM DATA From 88682afe10d885d2a409bb110084a29042cc7da8 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 05:58:25 -0600 Subject: [PATCH 16/78] handle insulinSensitivities and carbRatios schedules --- .../get-users-settings-and-events.py | 521 ++++++++++-------- 1 file changed, 276 insertions(+), 245 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index bba04127..9fb1f148 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -32,6 +32,8 @@ def removeNegativeDurations(df): nNegativeDurations = sum(df.duration < 0) if nNegativeDurations > 0: df = df[~(df.duration < 0)] + else: + nNegativeDurations = np.nan return df, nNegativeDurations @@ -329,295 +331,324 @@ def get_basalDaySummary(basal): donors = td.load.load_csv(os.path.join(donorPath, donorList)) # this is where the loop will go: -dIndex = 2 +for dIndex in range(0, len(donors)): -# %% ID, HASHID, AGE, & YLW -userID = donors.userID[dIndex] -hashID = donors.hashID[dIndex] -# round all birthdays and diagnosis dates to the first day of the month (to protect identities) -bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) -dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) - - -# %% LOAD IN DONOR JSON DATA -metadata = pd.DataFrame(index=[dIndex]) -jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") -jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") - -if os.path.exists(jsonFileName): - fileSize = os.stat(jsonFileName).st_size - metadata["fileSizeKB"] = fileSize / 1000 - if fileSize > 1000: - data = td.load.load_json(jsonFileName) - - # sort the data by time - data.sort_values("time", inplace=True) - - # flatten the embedded json - data = flattenJson(data) - - - # %% CLEAN DATA - # remove negative durations - data, nNegativeDurations = removeNegativeDurations(data) - metadata["nNegativeDurations"] = nNegativeDurations - - # get rid of cgm values too low/high (< 38 & > 402 mg/dL) - data, nInvalidCgmValues = removeInvalidCgmValues(data) - metadata["nInvalidCgmValues"] = nInvalidCgmValues - - # Tslim calibration bug fix - data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) - metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings - - - # %% ADD UPLOAD DATE - # attach upload time to each record, for resolving duplicates - if "upload" in data.type.unique(): - data = addUploadDate(data) - - -# %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) - data["utcTime"] = pd.to_datetime(data["time"]) - data["timezone"].fillna(method='ffill', inplace=True) - data["timezone"].fillna(method='bfill', inplace=True) - data["day"] = pd.DatetimeIndex(data["utcTime"]).date - - # round to the nearest 5 minutes - # TODO: once roundTime is pushed to tidals repository then this line can be replaced - # with td.clean.round_time - data = round_time(data, timeIntervalMinutes=5, timeField="time", - roundedTimeFieldName="roundedTime", startWithFirstRecord=True, - verbose=False) - data.sort_values("uploadTime", ascending=False, inplace=True) - - - # %% ID, HASHID, AGE, & YLW - data["userID"] = userID - data["hashID"] = hashID - data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) - data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) - - commonColumnHeadings = ["hashID", - "age", - "ylw"] - - - # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) - bolus = mergeWizardWithBolus(data) - if len(bolus) > 0: - # get rid of duplicates that have the same ["time", "normal"] - bolus.sort_values("uploadTime", ascending=False, inplace=True) - bolus, nBolusDuplicatesRemoved = \ - td.clean.remove_duplicates(bolus, bolus[["deviceTime", "normal"]]) - metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved - - # get a summary of boluses per day - bolusDaySummary = get_bolusDaySummary(bolus) - - if "extended" not in bolus: - bolus["extended"] = np.nan - bolus["duration"] = np.nan - - # ISF associated with bolus event - if "insulinSensitivities" in list(bolus): - pdb.set_trace() - if "carbRatios" in list(bolus): - pdb.set_trace() - - bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] - bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - - bolusCH = commonColumnHeadings.copy() - bolusCH.extend(["utcTime", "roundedTime", "normal", "carbInput", "subType", - "insulinOnBoard", "bgInput", - "isf", "isf_mmolL_U", "insulinCarbRatio"]) - bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] - bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan - bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", - "bgInput": "bg_mmolL"}) - bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) - bolusEvents["eventType"] = "correction" - bolusEvents.loc[bolusEvents["carbInput"] == 0, "eventType"] = "meal" - - - # %% PUMP SETTINGS - if "pumpSettings" in data.type.unique(): - pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") - pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) - - pumpSettings, nPumpSettingsDuplicatesRemoved = \ - td.clean.remove_duplicates(pumpSettings, pumpSettings[["deviceTime"]]) - metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved - - # ISF - if "insulinSensitivity.amount" in list(pumpSettings): - isfColHead = "insulinSensitivity" - else: - isfColHead = "insulinSensitivities" - pdb.set_trace() + # %% ID, HASHID, AGE, & YLW + userID = donors.userID[dIndex] + hashID = donors.hashID[dIndex] + # round all birthdays and diagnosis dates to the first day of the month (to protect identities) + bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) + dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) - pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] - pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") - isfCH = commonColumnHeadings.copy() - isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) - isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] + # %% LOAD IN DONOR JSON DATA + metadata = pd.DataFrame(index=[dIndex]) + jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") + jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") - # CIR - if "carbRatio.amount" in list(pumpSettings): - cirColHead = "carbRatio" - else: - cirColHead = "carbRatios" - pdb.set_trace() + if os.path.exists(jsonFileName): + fileSize = os.stat(jsonFileName).st_size + metadata["fileSizeKB"] = fileSize / 1000 + if fileSize > 1000: + data = td.load.load_json(jsonFileName) - pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") + # sort the data by time + data.sort_values("time", inplace=True) - cirCH = commonColumnHeadings.copy() - cirCH.extend(["cirTime", "cir"]) - cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] + # flatten the embedded json + data = flattenJson(data) - # CORRECTION TARGET - if "bgTarget.start" in list(pumpSettings): - bgTargetColHead = "bgTarget" - else: - bgTargetColHead = "bgTargets" - pdb.set_trace() + # %% CLEAN DATA + # remove negative durations + data, nNegativeDurations = removeNegativeDurations(data) + metadata["nNegativeDurations"] = nNegativeDurations - # low - if bgTargetColHead + ".low" in list(pumpSettings): - pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] - pumpSettings["correctionTargetLow"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) - else: - pumpSettings["correctionTargetLow_mmolL"] = np.nan - pumpSettings["correctionTargetLow"] = np.nan + # get rid of cgm values too low/high (< 38 & > 402 mg/dL) + data, nInvalidCgmValues = removeInvalidCgmValues(data) + metadata["nInvalidCgmValues"] = nInvalidCgmValues - # high - if bgTargetColHead + ".high" in list(pumpSettings): - pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] - pumpSettings["correctionTargetHigh"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + # Tslim calibration bug fix + data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) + metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings - else: - pumpSettings["correctionTargetHigh_mmolL"] = np.nan - pumpSettings["correctionTargetHigh"] = np.nan - # target - if bgTargetColHead + ".target" in list(pumpSettings): - pumpSettings["correctionTarget_mmolL"] = pumpSettings[bgTargetColHead + ".target"] - pumpSettings["correctionTarget"] = \ - mmolL_to_mgdL(pumpSettings["correctionTarget_mmolL"]) + # %% ADD UPLOAD DATE + # attach upload time to each record, for resolving duplicates + if "upload" in data.type.unique(): + data = addUploadDate(data) - else: - pumpSettings["correctionTarget_mmolL"] = np.nan - pumpSettings["correctionTarget"] = np.nan - # range - if bgTargetColHead + ".range" in list(pumpSettings): - pumpSettings["correctionTargetRange_mmolL"] = pumpSettings[bgTargetColHead + ".range"] - pumpSettings["correctionTargetRange"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetRange_mmolL"]) + # %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) + data["utcTime"] = pd.to_datetime(data["time"]) + data["timezone"].fillna(method='ffill', inplace=True) + data["timezone"].fillna(method='bfill', inplace=True) + data["day"] = pd.DatetimeIndex(data["utcTime"]).date - else: - pumpSettings["correctionTargetRange_mmolL"] = np.nan - pumpSettings["correctionTargetRange"] =np.nan - - pumpSettings["ctTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") - - ctCH = commonColumnHeadings.copy() - ctCH.extend(["ctTime", "correctionTargetLow", "correctionTargetHigh", - "correctionTarget", "correctionTargetRange"]) - correctionTarget = pumpSettings.loc[pumpSettings["ctTime"].notnull(), ctCH] - - # SCHEDULED BASAL RATES - sbrCH = commonColumnHeadings.copy() - sbrCH.extend(["sbrTime", "rate", "type"]) - sbr = pd.DataFrame(columns=sbrCH) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): - if 'Auto Mode' not in actSched: - tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["type"] = np.nan - tempDF["sbrTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + # round to the nearest 5 minutes + # TODO: once roundTime is pushed to tidals repository then this line can be replaced + # with td.clean.round_time + data = round_time(data, timeIntervalMinutes=5, timeField="time", + roundedTimeFieldName="roundedTime", startWithFirstRecord=True, + verbose=False) + data.sort_values("uploadTime", ascending=False, inplace=True) + + + # %% ID, HASHID, AGE, & YLW + data["userID"] = userID + data["hashID"] = hashID + data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) + data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + + commonColumnHeadings = ["hashID", + "age", + "ylw"] + + + # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + bolus = mergeWizardWithBolus(data) + if len(bolus) > 0: + # get rid of duplicates that have the same ["time", "normal"] + bolus.sort_values("uploadTime", ascending=False, inplace=True) + bolus, nBolusDuplicatesRemoved = \ + td.clean.remove_duplicates(bolus, bolus[["deviceTime", "normal"]]) + metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved + + # get a summary of boluses per day + bolusDaySummary = get_bolusDaySummary(bolus) + + if "extended" not in bolus: + bolus["extended"] = np.nan + bolus["duration"] = np.nan + + # cir associated with bolus event + if "insulinSensitivities" in list(bolus): + pdb.set_trace() + if "carbRatios" in list(bolus): + pdb.set_trace() + + bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] + bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) + + bolusCH = commonColumnHeadings.copy() + bolusCH.extend(["utcTime", "roundedTime", "normal", "carbInput", "subType", + "insulinOnBoard", "bgInput", + "isf", "isf_mmolL_U", "insulinCarbRatio"]) + bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] + bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan + bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", + "bgInput": "bg_mmolL"}) + bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) + bolusEvents["eventType"] = "correction" + bolusEvents.loc[bolusEvents["carbInput"] == 0, "eventType"] = "meal" + + + # %% PUMP SETTINGS + if "pumpSettings" in data.type.unique(): + pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") + pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) + + pumpSettings, nPumpSettingsDuplicatesRemoved = \ + td.clean.remove_duplicates(pumpSettings, pumpSettings[["deviceTime"]]) + metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved + + # ISF + isfCH = commonColumnHeadings.copy() + isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) + + if "insulinSensitivity.amount" in list(pumpSettings): + isfColHead = "insulinSensitivity" + pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] + pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) + pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") + + isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] else: - tempDF = pd.DataFrame(index=[0]) - tempDF["sbrTime"] = np.nan - tempDF["rate"] = np.nan - tempDF["type"] = "AutoMode" + isfColHead = "insulinSensitivities" + isf = pd.DataFrame(columns=isfCH) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["isfTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["hashID"] = pumpSettings.loc[p, "hashID"] + tempDF["age"] = pumpSettings.loc[p, "age"] + tempDF["ylw"] = pumpSettings.loc[p, "ylw"] + tempDF["isf_mmolL_U"] = tempDF["amount"] + tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) + isf = pd.concat([isf, tempDF[isfCH]], ignore_index=True) + + + + # CIR + cirCH = commonColumnHeadings.copy() + cirCH.extend(["cirTime", "cir"]) + + if "carbRatio.amount" in list(pumpSettings): + cirColHead = "carbRatio" + pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] + pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") + + cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] + else: + cirColHead = "carbRatios" + cir = pd.DataFrame(columns=cirCH) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["cirTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["hashID"] = pumpSettings.loc[p, "hashID"] + tempDF["age"] = pumpSettings.loc[p, "age"] + tempDF["ylw"] = pumpSettings.loc[p, "ylw"] + tempDF["cir"] = tempDF["amount"].astype(float) + cir = pd.concat([cir, tempDF[cirCH]], ignore_index=True) + + + # CORRECTION TARGET + ctCH = commonColumnHeadings.copy() + ctCH.extend(["ctTime", "correctionTargetLow", "correctionTargetHigh", + "correctionTarget", "correctionTargetRange"]) + if "bgTarget.start" in list(pumpSettings): + bgTargetColHead = "bgTarget" + + # low + if bgTargetColHead + ".low" in list(pumpSettings): + pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] + pumpSettings["correctionTargetLow"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) + else: + pumpSettings["correctionTargetLow_mmolL"] = np.nan + pumpSettings["correctionTargetLow"] = np.nan + + # high + if bgTargetColHead + ".high" in list(pumpSettings): + pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] + pumpSettings["correctionTargetHigh"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + + else: + pumpSettings["correctionTargetHigh_mmolL"] = np.nan + pumpSettings["correctionTargetHigh"] = np.nan + + # target + if bgTargetColHead + ".target" in list(pumpSettings): + pumpSettings["correctionTarget_mmolL"] = pumpSettings[bgTargetColHead + ".target"] + pumpSettings["correctionTarget"] = \ + mmolL_to_mgdL(pumpSettings["correctionTarget_mmolL"]) + + else: + pumpSettings["correctionTarget_mmolL"] = np.nan + pumpSettings["correctionTarget"] = np.nan + + # range + if bgTargetColHead + ".range" in list(pumpSettings): + pumpSettings["correctionTargetRange_mmolL"] = pumpSettings[bgTargetColHead + ".range"] + pumpSettings["correctionTargetRange"] = \ + mmolL_to_mgdL(pumpSettings["correctionTargetRange_mmolL"]) + + else: + pumpSettings["correctionTargetRange_mmolL"] = np.nan + pumpSettings["correctionTargetRange"] =np.nan + + pumpSettings["ctTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") + + + correctionTarget = pumpSettings.loc[pumpSettings["ctTime"].notnull(), ctCH] - tempDF["hashID"] = pumpSettings.loc[p, "hashID"] - tempDF["age"] = pumpSettings.loc[p, "age"] - tempDF["ylw"] = pumpSettings.loc[p, "ylw"] - sbr = pd.concat([sbr, tempDF[sbrCH]], ignore_index=True) + else: + bgTargetColHead = "bgTargets" + pdb.set_trace() - # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) - if "basal" in data.type.unique(): - basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") - basal.sort_values("uploadTime", ascending=False, inplace=True) - basal, nBasalDuplicatesRemoved = \ - td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) - metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved + # SCHEDULED BASAL RATES + sbrCH = commonColumnHeadings.copy() + sbrCH.extend(["sbrTime", "rate", "type"]) + sbr = pd.DataFrame(columns=sbrCH) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + if 'Auto Mode' not in actSched: + tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["type"] = np.nan + tempDF["sbrTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + else: + tempDF = pd.DataFrame(index=[0]) + tempDF["sbrTime"] = np.nan + tempDF["rate"] = np.nan + tempDF["type"] = "AutoMode" - # fill NaNs with 0, as it indicates a suspend (temp basal of 0) - basal.rate.fillna(0, inplace=True) + tempDF["hashID"] = pumpSettings.loc[p, "hashID"] + tempDF["age"] = pumpSettings.loc[p, "age"] + tempDF["ylw"] = pumpSettings.loc[p, "ylw"] + sbr = pd.concat([sbr, tempDF[sbrCH]], ignore_index=True) - # get rid of basals that have durations of 0 - nBasalDuration0 = sum(basal.duration > 0) - basal = basal[basal.duration > 0] - metadata["basal.nBasalDuration0"] = nBasalDuration0 - # get rid of basal durations that are unrealistic - nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) - metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) - basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan + # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) + if "basal" in data.type.unique(): + basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") + basal.sort_values("uploadTime", ascending=False, inplace=True) - # calculate the total amount of insulin delivered (duration * rate) - basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 - basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] + basal, nBasalDuplicatesRemoved = \ + td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) + metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved - # get a summary of basals per day - basalDaySummary = get_basalDaySummary(basal) + # fill NaNs with 0, as it indicates a suspend (temp basal of 0) + basal.rate.fillna(0, inplace=True) + # get rid of basals that have durations of 0 + nBasalDuration0 = sum(basal.duration > 0) + basal = basal[basal.duration > 0] + metadata["basal.nBasalDuration0"] = nBasalDuration0 -# %% LOOP DATA (BINARY T/F) + # get rid of basal durations that are unrealistic + nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) + metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) + basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan + # calculate the total amount of insulin delivered (duration * rate) + basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 + basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] -# %% CGM DATA + # get a summary of basals per day + basalDaySummary = get_basalDaySummary(basal) -# %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + # %% LOOP DATA (BINARY T/F) -# %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + # %% CGM DATA -# %% SAVE RESULTS + # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW -# %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + + + # %% SAVE RESULTS + + + # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL + else: + metadata["flags"] = "no basal data" else: - metadata["flags"] = "no basal data" + metadata["flags"] = "no pump settings" else: - metadata["flags"] = "no pump settings" + metadata["flags"] = "no bolus wizard data" else: - metadata["flags"] = "no bolus wizard data" + metadata["flags"] = "no upload data" else: - metadata["flags"] = "no upload data" + metadata["flags"] = "file contains no data" else: - metadata["flags"] = "file contains no data" -else: - metadata["flags"] = "file does not exist" + metadata["flags"] = "file does not exist" + + print("done with", dIndex) + # %% V2 DATA TO GRAB +# MAX BASAL RATE, MAX BOLUS AMOUNT, AND INSULIN DURATION SET ON SELECT PUMPS # RE-EVALUATE THE WAY EXTENDED BOLUSES ARE BEING ACCOUNTED (ARE THEY ALSO SHOWING UP IN BASAL DATA?) # ALERT SETTINGS # ESTIMATED LOCAL TIME From 9d1349f2e3e4e993ef115e5b7ced54cbb1aa319a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 06:38:04 -0600 Subject: [PATCH 17/78] refactor correction target --- .../get-users-settings-and-events.py | 103 ++++++------------ 1 file changed, 36 insertions(+), 67 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 9fb1f148..bda5ea62 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -333,6 +333,9 @@ def get_basalDaySummary(basal): # this is where the loop will go: for dIndex in range(0, len(donors)): + # clear output dataframes + isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + # %% ID, HASHID, AGE, & YLW userID = donors.userID[dIndex] hashID = donors.hashID[dIndex] @@ -453,120 +456,86 @@ def get_basalDaySummary(basal): metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved # ISF - isfCH = commonColumnHeadings.copy() - isfCH.extend(["isfTime", "isf", "isf_mmolL_U"]) + isfColHeadings = commonColumnHeadings.copy() + isfColHeadings.extend(["isf.time", "isf", "isf_mmolL_U"]) if "insulinSensitivity.amount" in list(pumpSettings): isfColHead = "insulinSensitivity" pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["isfTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["isf.time"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") - isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfCH] + isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings] else: isfColHead = "insulinSensitivities" - isf = pd.DataFrame(columns=isfCH) + isf = pd.DataFrame(columns=isfColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["isfTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["isf.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] tempDF["isf_mmolL_U"] = tempDF["amount"] tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) - isf = pd.concat([isf, tempDF[isfCH]], ignore_index=True) - - + isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) # CIR - cirCH = commonColumnHeadings.copy() - cirCH.extend(["cirTime", "cir"]) + cirColHeadings = commonColumnHeadings.copy() + cirColHeadings.extend(["cir.time", "cir"]) if "carbRatio.amount" in list(pumpSettings): cirColHead = "carbRatio" pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["cirTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["cir.time"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") - cir = pumpSettings.loc[pumpSettings["cir"].notnull(), cirCH] + cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings] else: cirColHead = "carbRatios" - cir = pd.DataFrame(columns=cirCH) + cir = pd.DataFrame(columns=cirColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["cirTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["cir.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] tempDF["cir"] = tempDF["amount"].astype(float) - cir = pd.concat([cir, tempDF[cirCH]], ignore_index=True) + cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True) # CORRECTION TARGET - ctCH = commonColumnHeadings.copy() - ctCH.extend(["ctTime", "correctionTargetLow", "correctionTargetHigh", - "correctionTarget", "correctionTargetRange"]) + ctColHeadings = commonColumnHeadings.copy() + ctColHeadings.extend(["ct.time", "ct.low", "ct.high", "ct.target", "ct.range"]) if "bgTarget.start" in list(pumpSettings): - bgTargetColHead = "bgTarget" - - # low - if bgTargetColHead + ".low" in list(pumpSettings): - pumpSettings["correctionTargetLow_mmolL"] = pumpSettings[bgTargetColHead + ".low"] - pumpSettings["correctionTargetLow"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetLow_mmolL"]) - else: - pumpSettings["correctionTargetLow_mmolL"] = np.nan - pumpSettings["correctionTargetLow"] = np.nan - - # high - if bgTargetColHead + ".high" in list(pumpSettings): - pumpSettings["correctionTargetHigh_mmolL"] = pumpSettings[bgTargetColHead + ".high"] - pumpSettings["correctionTargetHigh"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetHigh_mmolL"]) + bgTargetColHead = "bgTarget." - else: - pumpSettings["correctionTargetHigh_mmolL"] = np.nan - pumpSettings["correctionTargetHigh"] = np.nan + for targetType in ["low", "high", "target", "range"]: + if bgTargetColHead + targetType in list(pumpSettings): + pumpSettings["ct." + targetType + "_mmolL"] = \ + pumpSettings[bgTargetColHead + targetType] - # target - if bgTargetColHead + ".target" in list(pumpSettings): - pumpSettings["correctionTarget_mmolL"] = pumpSettings[bgTargetColHead + ".target"] - pumpSettings["correctionTarget"] = \ - mmolL_to_mgdL(pumpSettings["correctionTarget_mmolL"]) + pumpSettings["ct." + targetType] = \ + mmolL_to_mgdL(pumpSettings["ct." + targetType + "_mmolL"]) + else: + pumpSettings["ct." + targetType + "_mmolL"] = np.nan + pumpSettings["ct." + targetType] = np.nan - else: - pumpSettings["correctionTarget_mmolL"] = np.nan - pumpSettings["correctionTarget"] = np.nan + pumpSettings["ct.time"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[bgTargetColHead + "start"], unit="ms") - # range - if bgTargetColHead + ".range" in list(pumpSettings): - pumpSettings["correctionTargetRange_mmolL"] = pumpSettings[bgTargetColHead + ".range"] - pumpSettings["correctionTargetRange"] = \ - mmolL_to_mgdL(pumpSettings["correctionTargetRange_mmolL"]) - - else: - pumpSettings["correctionTargetRange_mmolL"] = np.nan - pumpSettings["correctionTargetRange"] =np.nan - - pumpSettings["ctTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[bgTargetColHead + ".start"], unit="ms") - - - correctionTarget = pumpSettings.loc[pumpSettings["ctTime"].notnull(), ctCH] + correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] else: bgTargetColHead = "bgTargets" pdb.set_trace() - - # SCHEDULED BASAL RATES - sbrCH = commonColumnHeadings.copy() - sbrCH.extend(["sbrTime", "rate", "type"]) - sbr = pd.DataFrame(columns=sbrCH) + sbrColHeadings = commonColumnHeadings.copy() + sbrColHeadings.extend(["sbrTime", "rate", "type"]) + sbr = pd.DataFrame(columns=sbrColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): if 'Auto Mode' not in actSched: tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) @@ -582,7 +551,7 @@ def get_basalDaySummary(basal): tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] - sbr = pd.concat([sbr, tempDF[sbrCH]], ignore_index=True) + sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True) # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) From 307bb2efc2dcd11d6f8c9e2691300b50b973e75b Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 07:46:48 -0600 Subject: [PATCH 18/78] handle multiple correctionTargets --- .../get-users-settings-and-events.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index bda5ea62..75d928f8 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -509,13 +509,14 @@ def get_basalDaySummary(basal): # CORRECTION TARGET ctColHeadings = commonColumnHeadings.copy() ctColHeadings.extend(["ct.time", "ct.low", "ct.high", "ct.target", "ct.range"]) + correctionTarget if "bgTarget.start" in list(pumpSettings): - bgTargetColHead = "bgTarget." + ctColHead = "bgTarget." for targetType in ["low", "high", "target", "range"]: - if bgTargetColHead + targetType in list(pumpSettings): + if ctColHead + targetType in list(pumpSettings): pumpSettings["ct." + targetType + "_mmolL"] = \ - pumpSettings[bgTargetColHead + targetType] + pumpSettings[ctColHead + targetType] pumpSettings["ct." + targetType] = \ mmolL_to_mgdL(pumpSettings["ct." + targetType + "_mmolL"]) @@ -524,13 +525,32 @@ def get_basalDaySummary(basal): pumpSettings["ct." + targetType] = np.nan pumpSettings["ct.time"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[bgTargetColHead + "start"], unit="ms") + pd.to_timedelta(pumpSettings[ctColHead + "start"], unit="ms") correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] else: - bgTargetColHead = "bgTargets" - pdb.set_trace() + ctColHead = "bgTargets" + correctionTarget = pd.DataFrame(columns=ctColHeadings) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["ct.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["hashID"] = pumpSettings.loc[p, "hashID"] + tempDF["age"] = pumpSettings.loc[p, "age"] + tempDF["ylw"] = pumpSettings.loc[p, "ylw"] + for targetType in ["low", "high", "target", "range"]: + if targetType in list(tempDF): + tempDF["ct." + targetType + "_mmolL"] = \ + tempDF[targetType] + + tempDF["ct." + targetType] = \ + mmolL_to_mgdL(tempDF["ct." + targetType + "_mmolL"]) + else: + tempDF["ct." + targetType + "_mmolL"] = np.nan + tempDF["ct." + targetType] = np.nan + + correctionTarget = pd.concat([correctionTarget, tempDF[ctColHeadings]], ignore_index=True) # SCHEDULED BASAL RATES sbrColHeadings = commonColumnHeadings.copy() From 2190b24705c8109997a377e1512686dfbf0bced6 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 08:58:41 -0600 Subject: [PATCH 19/78] add cgm data --- .../get-users-settings-and-events.py | 166 +++++++++++++++++- 1 file changed, 160 insertions(+), 6 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 75d928f8..701d2302 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -322,6 +322,97 @@ def get_basalDaySummary(basal): return basalDaySummary +def filterAndSort(groupedDF, filterByField, sortByField): + filterDF = groupedDF.get_group(filterByField).dropna(axis=1, how="all") + filterDF = filterDF.sort_values(sortByField) + return filterDF + + +def getClosedLoopDays(groupedData, nTempBasalsPerDayIsClosedLoop, metadata): + # filter by basal data and sort by time + if "basal" in groupedData.type.unique(): + basalData = filterAndSort(groupedData, "basal", "time") + + # get closed loop days + nTB = nTempBasalsPerDayIsClosedLoop + + tbDataFrame = basalData.loc[basalData.deliveryType == "temp", ["time"]] + tbDataFrame.index = pd.to_datetime(tbDataFrame["time"]) + tbDataFrame = tbDataFrame.drop(["time"], axis=1) + tbDataFrame["basal.temp.count"] = 1 + nTempBasalsPerDay = tbDataFrame.resample("D").sum() + closedLoopDF = pd.DataFrame(nTempBasalsPerDay, + index=nTempBasalsPerDay.index.date) + closedLoopDF["date"] = nTempBasalsPerDay.index.date + closedLoopDF["basal.closedLoopDays"] = \ + closedLoopDF["basal.temp.count"] >= nTB + nClosedLoopDays = closedLoopDF["basal.closedLoopDays"].sum() + + # get the number of days with 670g + basalData["date"] = pd.to_datetime(basalData.time).dt.date + bdGroup = basalData.groupby("date") + topPump = bdGroup.deviceId.describe()["top"] + med670g = pd.DataFrame(topPump.str.contains("1780")).rename(columns={"top":"670g"}) + med670g.reset_index(inplace=True) + n670gDays = med670g["670g"].sum() + + else: + closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "date"]) + med670g = pd.DataFrame(columns=["670g", "date"]) + nClosedLoopDays = 0 + n670gDays = 0 + + metadata["basal.closedLoopDays.count"] = nClosedLoopDays + metadata["med670gDays.count"] = n670gDays + + return closedLoopDF, med670g, metadata + + +def removeDuplicates(df, criteriaDF): + nBefore = len(df) + df = df.loc[~(df[criteriaDF].duplicated())] + df = df.reset_index(drop=True) + nDuplicatesRemoved = nBefore - len(df) + + return df, nDuplicatesRemoved + + +def removeCgmDuplicates(df, timeCriterion): + if timeCriterion in df: + df.sort_values(by=[timeCriterion, "uploadTime"], + ascending=[False, False], + inplace=True) + dfIsNull = df[df[timeCriterion].isnull()] + dfNotNull = df[df[timeCriterion].notnull()] + dfNotNull, nDuplicatesRemoved = removeDuplicates(dfNotNull, [timeCriterion, "value"]) + df = pd.concat([dfIsNull, dfNotNull]) + df.sort_values(by=[timeCriterion, "uploadTime"], + ascending=[False, False], + inplace=True) + else: + nDuplicatesRemoved = 0 + + return df, nDuplicatesRemoved + + +def getStartAndEndTimes(df, dateTimeField): + dfBeginDate = df[dateTimeField].min() + dfEndDate = df[dateTimeField].max() + + return dfBeginDate, dfEndDate + + +def getListOfDexcomCGMDays(df): + # search for dexcom cgms + searchfor = ["Dex", "tan", "IR", "unk"] + # create dexcom boolean field + if "deviceId" in df.columns.values: + totalCgms = len(df.deviceId.notnull()) + df["dexcomCGM"] = df.deviceId.str.contains("|".join(searchfor)) + percentDexcomCGM = df.dexcomCGM.sum() / totalCgms * 100 + return df, percentDexcomCGM + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" phiDate = "PHI-" + dataPulledDate @@ -443,7 +534,12 @@ def get_basalDaySummary(basal): "bgInput": "bg_mmolL"}) bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) bolusEvents["eventType"] = "correction" - bolusEvents.loc[bolusEvents["carbInput"] == 0, "eventType"] = "meal" + bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal" + + # get start and end times + bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day") + metadata["bolus.beginDate"] = bolusBeginDate + metadata["bolus.endDate"] = bolusEndDate # %% PUMP SETTINGS @@ -509,7 +605,7 @@ def get_basalDaySummary(basal): # CORRECTION TARGET ctColHeadings = commonColumnHeadings.copy() ctColHeadings.extend(["ct.time", "ct.low", "ct.high", "ct.target", "ct.range"]) - correctionTarget + if "bgTarget.start" in list(pumpSettings): ctColHead = "bgTarget." @@ -573,6 +669,14 @@ def get_basalDaySummary(basal): tempDF["ylw"] = pumpSettings.loc[p, "ylw"] sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True) + # max basal rate, max bolus amount, and insulin duration + if "rateMaximum" in list(data): + pdb.set_trace() + if "amountMaximum" in list(data): + pdb.set_trace() + if "bolus.calculator" in list(data): + pdb.set_trace() + # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) if "basal" in data.type.unique(): @@ -604,10 +708,60 @@ def get_basalDaySummary(basal): basalDaySummary = get_basalDaySummary(basal) - # %% LOOP DATA (BINARY T/F) - - - # %% CGM DATA + # %% GET CLOSED LOOP DAYS WITH TEMP BASAL DATA + # group data by type + groupedData = data.groupby(by="type") + + isClosedLoopDay, is670g, metadata = \ + getClosedLoopDays(groupedData, 30, metadata) + + # %% CGM DATA + if "cbg" in data.type.unique(): + + # filter by cgm and sort by uploadTime + cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all") + + # get rid of duplicates that have the same ["deviceTime", "value"] + cgmData, nCgmDuplicatesRemovedDeviceTime = removeCgmDuplicates(cgmData, "deviceTime") + metadata["nCgmDuplicatesRemovedDeviceTime"] = nCgmDuplicatesRemovedDeviceTime + + # get rid of duplicates that have the same ["time", "value"] + cgmData, nCgmDuplicatesRemovedUtcTime = removeCgmDuplicates(cgmData, "time") + metadata["cnCgmDuplicatesRemovedUtcTime"] = nCgmDuplicatesRemovedUtcTime + + # get rid of duplicates that have the same "roundedTime" + cgmData, nCgmDuplicatesRemovedRoundedTime = removeDuplicates(cgmData, "roundedTime") + metadata["nCgmDuplicatesRemovedRoundedTime"] = nCgmDuplicatesRemovedRoundedTime + + # get start and end times + cgmBeginDate, cgmEndDate = getStartAndEndTimes(cgmData, "day") + metadata["cgm.beginDate"] = cgmBeginDate + metadata["cgm.endDate"] = cgmEndDate + + # get a list of dexcom cgms + cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData) + metadata["cgm.percentDexcomCGM"] = percentDexcom + + # group by date (day) and get stats + catDF = cgmData.groupby(cgmData["day"]) + cgmRecordsPerDay = \ + pd.DataFrame(catDF.value.count()). \ + rename(columns={"value": "cgm.count"}) + dayDate = catDF.day.describe()["top"] + dexcomCGM = catDF.dexcomCGM.describe()["top"] + nTypesCGM = catDF.dexcomCGM.describe()["unique"] + cgmRecordsPerDay["cgm.dexcomOnly"] = \ + (dexcomCGM & (nTypesCGM == 1)) + cgmRecordsPerDay["date"] = cgmRecordsPerDay.index + + # filter the cgm data + cgmColHeadings = commonColumnHeadings.copy() + cgmColHeadings.extend(["utcTime", "roundedTime", "value"]) + + # get data in mg/dL units + cgm = cgmData[cgmColHeadings] + cgm = cgm.rename(columns={'value': 'mmol_L'}) + cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int) # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW From 1050b6338d456ed41f3d0d3eee94bd381e156a7c Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 09:17:18 -0600 Subject: [PATCH 20/78] make an actual basal rate delivered df --- .../predict-simulate/get-users-settings-and-events.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 701d2302..53718d25 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -704,6 +704,11 @@ def getListOfDexcomCGMDays(df): basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] + # actual basal delivered + abrColHeadings = commonColumnHeadings.copy() + abrColHeadings.extend(["utcTime", "roundedTime", "durationHours", "rate"]) + abr = basal[abrColHeadings] + # get a summary of basals per day basalDaySummary = get_basalDaySummary(basal) @@ -764,7 +769,8 @@ def getListOfDexcomCGMDays(df): cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int) - # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + pdb.set_trace() # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) From 53cfe1ff485db617749b93bcba50f8ad8ba299cd Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 10:10:36 -0600 Subject: [PATCH 21/78] add in extended boluses to the actual basals delivered --- .../get-users-settings-and-events.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 53718d25..1fcb7ac5 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -511,13 +511,10 @@ def getListOfDexcomCGMDays(df): # get a summary of boluses per day bolusDaySummary = get_bolusDaySummary(bolus) - if "extended" not in bolus: - bolus["extended"] = np.nan - bolus["duration"] = np.nan - - # cir associated with bolus event + # isf and cir associated with bolus event if "insulinSensitivities" in list(bolus): pdb.set_trace() + if "carbRatios" in list(bolus): pdb.set_trace() @@ -536,6 +533,20 @@ def getListOfDexcomCGMDays(df): bolusEvents["eventType"] = "correction" bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal" + if "duration" in list(bolus): + bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 + bolus["rate"] = bolus["extended"] / bolus["durationHours"] + bolusExtendedCH = commonColumnHeadings.copy() + bolusExtendedCH.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) + bolusExtendedEvents = bolus.loc[ + ((bolus["extended"].notnull()) & + (bolus["duration"] > 0)), bolusExtendedCH] + + if "extended" not in bolus: + bolus["extended"] = np.nan + bolus["duration"] = np.nan + + # get start and end times bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day") metadata["bolus.beginDate"] = bolusBeginDate @@ -706,8 +717,12 @@ def getListOfDexcomCGMDays(df): # actual basal delivered abrColHeadings = commonColumnHeadings.copy() - abrColHeadings.extend(["utcTime", "roundedTime", "durationHours", "rate"]) + abrColHeadings.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) abr = basal[abrColHeadings] + if "duration" in list(bolus): + abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) + abr.sort_values("utcTime", inplace=True) + # get a summary of basals per day basalDaySummary = get_basalDaySummary(basal) @@ -770,7 +785,7 @@ def getListOfDexcomCGMDays(df): # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW - pdb.set_trace() + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) From 3258b1bda4812d88d671dd697bfb2cb928ac024b Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 11:53:43 -0600 Subject: [PATCH 22/78] get day level summary stats by age and years living with (ylw) --- .../get-users-settings-and-events.py | 82 +++++++++++++++---- 1 file changed, 67 insertions(+), 15 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 1fcb7ac5..11780289 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -343,22 +343,22 @@ def getClosedLoopDays(groupedData, nTempBasalsPerDayIsClosedLoop, metadata): nTempBasalsPerDay = tbDataFrame.resample("D").sum() closedLoopDF = pd.DataFrame(nTempBasalsPerDay, index=nTempBasalsPerDay.index.date) - closedLoopDF["date"] = nTempBasalsPerDay.index.date + closedLoopDF["day"] = nTempBasalsPerDay.index.date closedLoopDF["basal.closedLoopDays"] = \ closedLoopDF["basal.temp.count"] >= nTB nClosedLoopDays = closedLoopDF["basal.closedLoopDays"].sum() # get the number of days with 670g - basalData["date"] = pd.to_datetime(basalData.time).dt.date - bdGroup = basalData.groupby("date") + basalData["day"] = pd.to_datetime(basalData.time).dt.date + bdGroup = basalData.groupby("day") topPump = bdGroup.deviceId.describe()["top"] med670g = pd.DataFrame(topPump.str.contains("1780")).rename(columns={"top":"670g"}) med670g.reset_index(inplace=True) n670gDays = med670g["670g"].sum() else: - closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "date"]) - med670g = pd.DataFrame(columns=["670g", "date"]) + closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "day"]) + med670g = pd.DataFrame(columns=["670g", "day"]) nClosedLoopDays = 0 n670gDays = 0 @@ -421,6 +421,8 @@ def getListOfDexcomCGMDays(df): donorList = phiDate + "-uniqueDonorList.csv" donors = td.load.load_csv(os.path.join(donorPath, donorList)) +# %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL + # this is where the loop will go: for dIndex in range(0, len(donors)): @@ -694,6 +696,10 @@ def getListOfDexcomCGMDays(df): basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") basal.sort_values("uploadTime", ascending=False, inplace=True) + basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day") + metadata["basal.beginDate"] = basalBeginDate + metadata["basal.endDate"] = basalEndDate + basal, nBasalDuplicatesRemoved = \ td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved @@ -723,7 +729,6 @@ def getListOfDexcomCGMDays(df): abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) abr.sort_values("utcTime", inplace=True) - # get a summary of basals per day basalDaySummary = get_basalDaySummary(basal) @@ -786,15 +791,62 @@ def getListOfDexcomCGMDays(df): # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + # COMBINE DAY SUMMARIES + # group by date (day) and get stats + catDF = data.groupby(data["day"]) + dataPerDay = \ + pd.DataFrame(catDF.hashID.describe()["top"]). \ + rename(columns={"top": "hashID"}) + dataPerDay["age"] = catDF.age.mean() + dataPerDay["ylw"] = catDF.ylw.mean() + + + # calculate all of the data start and end range + # this can be used for looking at settings + dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate) + dayEndDate = max(cgmEndDate, bolusEndDate, basalEndDate) + metadata["day.beginDate"] = dayBeginDate + metadata["day.endDate"] = dayEndDate + rng = pd.date_range(dayBeginDate, dayEndDate).date + dayData = pd.DataFrame(rng, columns=["day"]) + for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]: + dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left") + for dfType in [isClosedLoopDay, is670g]: + dayData = pd.merge(dayData, dfType, on="day", how="left") + + + dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 + dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) + # calculate the start and end of contiguous data + # these dates can be used when simulating and predicting, where + # you need both pump and cgm data + contiguousBeginDate = max(cgmBeginDate, bolusBeginDate, basalBeginDate) + contiguousEndDate = min(cgmEndDate, bolusEndDate, basalEndDate) + metadata["contiguous.beginDate"] = contiguousBeginDate + metadata["contiguous.endDate"] = contiguousEndDate + + # get a summary by age, and ylw + catDF = dayData.groupby("age") + ageSummary = pd.DataFrame(catDF.validPumpData.sum()) + ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ageSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + ageSummary.reset_index(inplace=True) + + catDF = dayData.groupby("ylw") + ylwSummary = pd.DataFrame(catDF.validPumpData.sum()) + ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ylwSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + ylwSummary.reset_index(inplace=True) + + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + + + # %% SAVE RESULTS - - # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) - - - # %% SAVE RESULTS - - - # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL else: metadata["flags"] = "no basal data" else: @@ -812,8 +864,8 @@ def getListOfDexcomCGMDays(df): # %% V2 DATA TO GRAB +# FIGURE OUT WHY TEMP BASAL COUNTS ARE DIFFERENT BETWEEN THE TWO DIFFERENT METHODS # MAX BASAL RATE, MAX BOLUS AMOUNT, AND INSULIN DURATION SET ON SELECT PUMPS -# RE-EVALUATE THE WAY EXTENDED BOLUSES ARE BEING ACCOUNTED (ARE THEY ALSO SHOWING UP IN BASAL DATA?) # ALERT SETTINGS # ESTIMATED LOCAL TIME # PUMP AND CGM DEVICE () From 5cf9cdcb328fa5c271f85b7272edee8543412257 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 13:59:09 -0600 Subject: [PATCH 23/78] require pump and cgm data for this analysis --- .../get-users-settings-and-events.py | 570 +++++++++--------- 1 file changed, 286 insertions(+), 284 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 11780289..96a2ed0d 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -424,7 +424,7 @@ def getListOfDexcomCGMDays(df): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL # this is where the loop will go: -for dIndex in range(0, len(donors)): +for dIndex in range(140, len(donors)): # clear output dataframes isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() @@ -432,131 +432,139 @@ def getListOfDexcomCGMDays(df): # %% ID, HASHID, AGE, & YLW userID = donors.userID[dIndex] hashID = donors.hashID[dIndex] + metadata = pd.DataFrame(index=[dIndex]) # round all birthdays and diagnosis dates to the first day of the month (to protect identities) - bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) - dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) + if (pd.isnull(donors.bDay[dIndex]) + pd.isnull(donors.dDay[dIndex])) == 0: + bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) + dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) - # %% LOAD IN DONOR JSON DATA - metadata = pd.DataFrame(index=[dIndex]) - jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") - jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") - - if os.path.exists(jsonFileName): - fileSize = os.stat(jsonFileName).st_size - metadata["fileSizeKB"] = fileSize / 1000 - if fileSize > 1000: - data = td.load.load_json(jsonFileName) - - # sort the data by time - data.sort_values("time", inplace=True) - - # flatten the embedded json - data = flattenJson(data) - - - # %% CLEAN DATA - # remove negative durations - data, nNegativeDurations = removeNegativeDurations(data) - metadata["nNegativeDurations"] = nNegativeDurations - - # get rid of cgm values too low/high (< 38 & > 402 mg/dL) - data, nInvalidCgmValues = removeInvalidCgmValues(data) - metadata["nInvalidCgmValues"] = nInvalidCgmValues - - # Tslim calibration bug fix - data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) - metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings - - - # %% ADD UPLOAD DATE - # attach upload time to each record, for resolving duplicates - if "upload" in data.type.unique(): - data = addUploadDate(data) - - - # %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) - data["utcTime"] = pd.to_datetime(data["time"]) - data["timezone"].fillna(method='ffill', inplace=True) - data["timezone"].fillna(method='bfill', inplace=True) - data["day"] = pd.DatetimeIndex(data["utcTime"]).date - - # round to the nearest 5 minutes - # TODO: once roundTime is pushed to tidals repository then this line can be replaced - # with td.clean.round_time - data = round_time(data, timeIntervalMinutes=5, timeField="time", - roundedTimeFieldName="roundedTime", startWithFirstRecord=True, - verbose=False) - data.sort_values("uploadTime", ascending=False, inplace=True) - - - # %% ID, HASHID, AGE, & YLW - data["userID"] = userID - data["hashID"] = hashID - data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) - data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) - - commonColumnHeadings = ["hashID", - "age", - "ylw"] - - - # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) - bolus = mergeWizardWithBolus(data) - if len(bolus) > 0: - # get rid of duplicates that have the same ["time", "normal"] - bolus.sort_values("uploadTime", ascending=False, inplace=True) - bolus, nBolusDuplicatesRemoved = \ - td.clean.remove_duplicates(bolus, bolus[["deviceTime", "normal"]]) - metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved - - # get a summary of boluses per day - bolusDaySummary = get_bolusDaySummary(bolus) - - # isf and cir associated with bolus event - if "insulinSensitivities" in list(bolus): - pdb.set_trace() - - if "carbRatios" in list(bolus): - pdb.set_trace() - - bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] - bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - - bolusCH = commonColumnHeadings.copy() - bolusCH.extend(["utcTime", "roundedTime", "normal", "carbInput", "subType", - "insulinOnBoard", "bgInput", - "isf", "isf_mmolL_U", "insulinCarbRatio"]) - bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] - bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan - bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", - "bgInput": "bg_mmolL"}) - bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) - bolusEvents["eventType"] = "correction" - bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal" - - if "duration" in list(bolus): - bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 - bolus["rate"] = bolus["extended"] / bolus["durationHours"] - bolusExtendedCH = commonColumnHeadings.copy() - bolusExtendedCH.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) - bolusExtendedEvents = bolus.loc[ - ((bolus["extended"].notnull()) & - (bolus["duration"] > 0)), bolusExtendedCH] - - if "extended" not in bolus: - bolus["extended"] = np.nan - bolus["duration"] = np.nan - - # get start and end times - bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day") - metadata["bolus.beginDate"] = bolusBeginDate - metadata["bolus.endDate"] = bolusEndDate - - - # %% PUMP SETTINGS - if "pumpSettings" in data.type.unique(): + # %% LOAD IN DONOR JSON DATA + + jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") + jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") + + if os.path.exists(jsonFileName): + fileSize = os.stat(jsonFileName).st_size + metadata["fileSizeKB"] = fileSize / 1000 + if fileSize > 1000: + data = td.load.load_json(jsonFileName) + + # sort the data by time + data.sort_values("time", inplace=True) + + # flatten the embedded json + data = flattenJson(data) + + + # %% CLEAN DATA + # remove negative durations + data, nNegativeDurations = removeNegativeDurations(data) + metadata["nNegativeDurations"] = nNegativeDurations + + # get rid of cgm values too low/high (< 38 & > 402 mg/dL) + data, nInvalidCgmValues = removeInvalidCgmValues(data) + metadata["nInvalidCgmValues"] = nInvalidCgmValues + + # Tslim calibration bug fix + data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) + metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings + + + # %% ADD UPLOAD DATE + # attach upload time to each record, for resolving duplicates + if (("upload" in data.type.unique()) & + ("basal" in data.type.unique()) & + ("bolus" in data.type.unique()) & + ("cbg" in data.type.unique()) & + ("pumpSettings" in data.type.unique())): + data = addUploadDate(data) + + + # %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) + data["utcTime"] = pd.to_datetime(data["time"]) + data["timezone"].fillna(method='ffill', inplace=True) + data["timezone"].fillna(method='bfill', inplace=True) + data["day"] = pd.DatetimeIndex(data["utcTime"]).date + + # round to the nearest 5 minutes + # TODO: once roundTime is pushed to tidals repository then this line can be replaced + # with td.clean.round_time + data = round_time(data, timeIntervalMinutes=5, timeField="time", + roundedTimeFieldName="roundedTime", startWithFirstRecord=True, + verbose=False) + data.sort_values("uploadTime", ascending=False, inplace=True) + + + # %% ID, HASHID, AGE, & YLW + data["userID"] = userID + data["hashID"] = hashID + data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) + data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + + commonColumnHeadings = ["hashID", + "age", + "ylw"] + + + # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + bolus = mergeWizardWithBolus(data) + if len(bolus) > 0: + # get rid of duplicates that have the same ["time", "normal"] + bolus.sort_values("uploadTime", ascending=False, inplace=True) + bolus, nBolusDuplicatesRemoved = \ + td.clean.remove_duplicates(bolus, bolus[["deviceTime", "normal"]]) + metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved + + # get a summary of boluses per day + bolusDaySummary = get_bolusDaySummary(bolus) + + # isf and cir associated with bolus event + if "insulinSensitivities" in list(bolus): + pdb.set_trace() + + if "carbRatios" in list(bolus): + pdb.set_trace() + + bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] + bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) + + bolusCH = commonColumnHeadings.copy() + bolusCH.extend(["utcTime", "roundedTime", "normal", "carbInput", "subType", + "insulinOnBoard", "bgInput", + "isf", "isf_mmolL_U", "insulinCarbRatio"]) + bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] + bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan + bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", + "bgInput": "bg_mmolL"}) + bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) + bolusEvents["eventType"] = "correction" + bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal" + + if "duration" in list(bolus): + bolus["duration"].replace(0, np.nan, inplace=True) + bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 + bolus["rate"] = bolus["extended"] / bolus["durationHours"] + bolusExtendedCH = commonColumnHeadings.copy() + bolusExtendedCH.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) + bolusExtendedEvents = bolus.loc[ + ((bolus["extended"].notnull()) & + (bolus["duration"] > 0)), bolusExtendedCH] + + if "extended" not in bolus: + bolus["extended"] = np.nan + bolus["duration"] = np.nan + + + # get start and end times + bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day") + metadata["bolus.beginDate"] = bolusBeginDate + metadata["bolus.endDate"] = bolusEndDate + + + # %% PUMP SETTINGS + pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) @@ -692,174 +700,168 @@ def getListOfDexcomCGMDays(df): # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) - if "basal" in data.type.unique(): - basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") - basal.sort_values("uploadTime", ascending=False, inplace=True) - - basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day") - metadata["basal.beginDate"] = basalBeginDate - metadata["basal.endDate"] = basalEndDate - - basal, nBasalDuplicatesRemoved = \ - td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) - metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved - - # fill NaNs with 0, as it indicates a suspend (temp basal of 0) - basal.rate.fillna(0, inplace=True) - - # get rid of basals that have durations of 0 - nBasalDuration0 = sum(basal.duration > 0) - basal = basal[basal.duration > 0] - metadata["basal.nBasalDuration0"] = nBasalDuration0 - - # get rid of basal durations that are unrealistic - nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) - metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) - basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan - - # calculate the total amount of insulin delivered (duration * rate) - basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 - basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] - - # actual basal delivered - abrColHeadings = commonColumnHeadings.copy() - abrColHeadings.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) - abr = basal[abrColHeadings] - if "duration" in list(bolus): - abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) - abr.sort_values("utcTime", inplace=True) - - # get a summary of basals per day - basalDaySummary = get_basalDaySummary(basal) - - - # %% GET CLOSED LOOP DAYS WITH TEMP BASAL DATA - # group data by type - groupedData = data.groupby(by="type") - - isClosedLoopDay, is670g, metadata = \ - getClosedLoopDays(groupedData, 30, metadata) - - # %% CGM DATA - if "cbg" in data.type.unique(): - - # filter by cgm and sort by uploadTime - cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all") - - # get rid of duplicates that have the same ["deviceTime", "value"] - cgmData, nCgmDuplicatesRemovedDeviceTime = removeCgmDuplicates(cgmData, "deviceTime") - metadata["nCgmDuplicatesRemovedDeviceTime"] = nCgmDuplicatesRemovedDeviceTime - - # get rid of duplicates that have the same ["time", "value"] - cgmData, nCgmDuplicatesRemovedUtcTime = removeCgmDuplicates(cgmData, "time") - metadata["cnCgmDuplicatesRemovedUtcTime"] = nCgmDuplicatesRemovedUtcTime - - # get rid of duplicates that have the same "roundedTime" - cgmData, nCgmDuplicatesRemovedRoundedTime = removeDuplicates(cgmData, "roundedTime") - metadata["nCgmDuplicatesRemovedRoundedTime"] = nCgmDuplicatesRemovedRoundedTime - - # get start and end times - cgmBeginDate, cgmEndDate = getStartAndEndTimes(cgmData, "day") - metadata["cgm.beginDate"] = cgmBeginDate - metadata["cgm.endDate"] = cgmEndDate - - # get a list of dexcom cgms - cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData) - metadata["cgm.percentDexcomCGM"] = percentDexcom - - # group by date (day) and get stats - catDF = cgmData.groupby(cgmData["day"]) - cgmRecordsPerDay = \ - pd.DataFrame(catDF.value.count()). \ - rename(columns={"value": "cgm.count"}) - dayDate = catDF.day.describe()["top"] - dexcomCGM = catDF.dexcomCGM.describe()["top"] - nTypesCGM = catDF.dexcomCGM.describe()["unique"] - cgmRecordsPerDay["cgm.dexcomOnly"] = \ - (dexcomCGM & (nTypesCGM == 1)) - cgmRecordsPerDay["date"] = cgmRecordsPerDay.index - - # filter the cgm data - cgmColHeadings = commonColumnHeadings.copy() - cgmColHeadings.extend(["utcTime", "roundedTime", "value"]) - - # get data in mg/dL units - cgm = cgmData[cgmColHeadings] - cgm = cgm.rename(columns={'value': 'mmol_L'}) - cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int) - - - # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW - - # COMBINE DAY SUMMARIES - # group by date (day) and get stats - catDF = data.groupby(data["day"]) - dataPerDay = \ - pd.DataFrame(catDF.hashID.describe()["top"]). \ - rename(columns={"top": "hashID"}) - dataPerDay["age"] = catDF.age.mean() - dataPerDay["ylw"] = catDF.ylw.mean() - - - # calculate all of the data start and end range - # this can be used for looking at settings - dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate) - dayEndDate = max(cgmEndDate, bolusEndDate, basalEndDate) - metadata["day.beginDate"] = dayBeginDate - metadata["day.endDate"] = dayEndDate - rng = pd.date_range(dayBeginDate, dayEndDate).date - dayData = pd.DataFrame(rng, columns=["day"]) - for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]: - dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left") - for dfType in [isClosedLoopDay, is670g]: - dayData = pd.merge(dayData, dfType, on="day", how="left") - - - dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 - dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) - # calculate the start and end of contiguous data - # these dates can be used when simulating and predicting, where - # you need both pump and cgm data - contiguousBeginDate = max(cgmBeginDate, bolusBeginDate, basalBeginDate) - contiguousEndDate = min(cgmEndDate, bolusEndDate, basalEndDate) - metadata["contiguous.beginDate"] = contiguousBeginDate - metadata["contiguous.endDate"] = contiguousEndDate - - # get a summary by age, and ylw - catDF = dayData.groupby("age") - ageSummary = pd.DataFrame(catDF.validPumpData.sum()) - ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) - ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) - ageSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) - ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) - ageSummary.reset_index(inplace=True) - - catDF = dayData.groupby("ylw") - ylwSummary = pd.DataFrame(catDF.validPumpData.sum()) - ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) - ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) - ylwSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) - ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) - ylwSummary.reset_index(inplace=True) - - # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) - - - # %% SAVE RESULTS + basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") + basal.sort_values("uploadTime", ascending=False, inplace=True) + + basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day") + metadata["basal.beginDate"] = basalBeginDate + metadata["basal.endDate"] = basalEndDate + + basal, nBasalDuplicatesRemoved = \ + td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) + metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved + + # fill NaNs with 0, as it indicates a suspend (temp basal of 0) + basal.rate.fillna(0, inplace=True) + + # get rid of basals that have durations of 0 + nBasalDuration0 = sum(basal.duration > 0) + basal = basal[basal.duration > 0] + metadata["basal.nBasalDuration0"] = nBasalDuration0 + + # get rid of basal durations that are unrealistic + nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) + metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) + basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan + + # calculate the total amount of insulin delivered (duration * rate) + basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 + basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] + + # actual basal delivered + abrColHeadings = commonColumnHeadings.copy() + abrColHeadings.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) + abr = basal[abrColHeadings] + if "duration" in list(bolus): + abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) + abr.sort_values("utcTime", inplace=True) + + # get a summary of basals per day + basalDaySummary = get_basalDaySummary(basal) + + + # %% GET CLOSED LOOP DAYS WITH TEMP BASAL DATA + # group data by type + groupedData = data.groupby(by="type") + + isClosedLoopDay, is670g, metadata = \ + getClosedLoopDays(groupedData, 30, metadata) + + # %% CGM DATA + # filter by cgm and sort by uploadTime + cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all") + + # get rid of duplicates that have the same ["deviceTime", "value"] + cgmData, nCgmDuplicatesRemovedDeviceTime = removeCgmDuplicates(cgmData, "deviceTime") + metadata["nCgmDuplicatesRemovedDeviceTime"] = nCgmDuplicatesRemovedDeviceTime + + # get rid of duplicates that have the same ["time", "value"] + cgmData, nCgmDuplicatesRemovedUtcTime = removeCgmDuplicates(cgmData, "time") + metadata["cnCgmDuplicatesRemovedUtcTime"] = nCgmDuplicatesRemovedUtcTime + + # get rid of duplicates that have the same "roundedTime" + cgmData, nCgmDuplicatesRemovedRoundedTime = removeDuplicates(cgmData, "roundedTime") + metadata["nCgmDuplicatesRemovedRoundedTime"] = nCgmDuplicatesRemovedRoundedTime + + # get start and end times + cgmBeginDate, cgmEndDate = getStartAndEndTimes(cgmData, "day") + metadata["cgm.beginDate"] = cgmBeginDate + metadata["cgm.endDate"] = cgmEndDate + + # get a list of dexcom cgms + cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData) + metadata["cgm.percentDexcomCGM"] = percentDexcom + + # group by date (day) and get stats + catDF = cgmData.groupby(cgmData["day"]) + cgmRecordsPerDay = \ + pd.DataFrame(catDF.value.count()). \ + rename(columns={"value": "cgm.count"}) + dayDate = catDF.day.describe()["top"] + dexcomCGM = catDF.dexcomCGM.describe()["top"] + nTypesCGM = catDF.dexcomCGM.describe()["unique"] + cgmRecordsPerDay["cgm.dexcomOnly"] = \ + (dexcomCGM & (nTypesCGM == 1)) + cgmRecordsPerDay["date"] = cgmRecordsPerDay.index + + # filter the cgm data + cgmColHeadings = commonColumnHeadings.copy() + cgmColHeadings.extend(["utcTime", "roundedTime", "value"]) + + # get data in mg/dL units + cgm = cgmData[cgmColHeadings] + cgm = cgm.rename(columns={'value': 'mmol_L'}) + cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int) + + + # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + + # COMBINE DAY SUMMARIES + # group by date (day) and get stats + catDF = data.groupby(data["day"]) + dataPerDay = \ + pd.DataFrame(catDF.hashID.describe()["top"]). \ + rename(columns={"top": "hashID"}) + dataPerDay["age"] = catDF.age.mean() + dataPerDay["ylw"] = catDF.ylw.mean() + + + # calculate all of the data start and end range + # this can be used for looking at settings + dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate) + dayEndDate = max(cgmEndDate, bolusEndDate, basalEndDate) + metadata["day.beginDate"] = dayBeginDate + metadata["day.endDate"] = dayEndDate + rng = pd.date_range(dayBeginDate, dayEndDate).date + dayData = pd.DataFrame(rng, columns=["day"]) + for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]: + dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left") + for dfType in [isClosedLoopDay, is670g]: + dayData = pd.merge(dayData, dfType, on="day", how="left") + + + dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 + dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) + # calculate the start and end of contiguous data + # these dates can be used when simulating and predicting, where + # you need both pump and cgm data + contiguousBeginDate = max(cgmBeginDate, bolusBeginDate, basalBeginDate) + contiguousEndDate = min(cgmEndDate, bolusEndDate, basalEndDate) + metadata["contiguous.beginDate"] = contiguousBeginDate + metadata["contiguous.endDate"] = contiguousEndDate + + # get a summary by age, and ylw + catDF = dayData.groupby("age") + ageSummary = pd.DataFrame(catDF.validPumpData.sum()) + ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ageSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + ageSummary.reset_index(inplace=True) + + catDF = dayData.groupby("ylw") + ylwSummary = pd.DataFrame(catDF.validPumpData.sum()) + ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ylwSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + ylwSummary.reset_index(inplace=True) + + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + + + # %% SAVE RESULTS - else: - metadata["flags"] = "no basal data" else: - metadata["flags"] = "no pump settings" + metadata["flags"] = "no bolus wizard data" else: - metadata["flags"] = "no bolus wizard data" + metadata["flags"] = "missing either pump or cgm data" else: - metadata["flags"] = "no upload data" + metadata["flags"] = "file contains no data" else: - metadata["flags"] = "file contains no data" + metadata["flags"] = "file does not exist" else: - metadata["flags"] = "file does not exist" - + metadata["flags"] = "fmissing bDay/dDay" print("done with", dIndex) From 75b78a2504c01aa0562035a4f07fbdac30db7853 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 14:09:57 -0600 Subject: [PATCH 24/78] remove dependence on tidals --- .../get-users-settings-and-events.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 96a2ed0d..1a016850 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -14,7 +14,6 @@ # %% REQUIRED LIBRARIES import pandas as pd import numpy as np -import tidals as td import os import pdb @@ -413,18 +412,28 @@ def getListOfDexcomCGMDays(df): return df, percentDexcomCGM +def load_csv(dataPathAndName): + df = pd.read_csv(dataPathAndName, low_memory=False) + return df + + +def load_json(dataPathAndName): + df = pd.read_json(dataPathAndName, orient="records") + return df + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" phiDate = "PHI-" + dataPulledDate donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") donorList = phiDate + "-uniqueDonorList.csv" -donors = td.load.load_csv(os.path.join(donorPath, donorList)) +donors = load_csv(os.path.join(donorPath, donorList)) # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL # this is where the loop will go: -for dIndex in range(140, len(donors)): +for dIndex in range(335, len(donors)): # clear output dataframes isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() @@ -449,7 +458,7 @@ def getListOfDexcomCGMDays(df): fileSize = os.stat(jsonFileName).st_size metadata["fileSizeKB"] = fileSize / 1000 if fileSize > 1000: - data = td.load.load_json(jsonFileName) + data = load_json(jsonFileName) # sort the data by time data.sort_values("time", inplace=True) @@ -514,7 +523,7 @@ def getListOfDexcomCGMDays(df): # get rid of duplicates that have the same ["time", "normal"] bolus.sort_values("uploadTime", ascending=False, inplace=True) bolus, nBolusDuplicatesRemoved = \ - td.clean.remove_duplicates(bolus, bolus[["deviceTime", "normal"]]) + removeDuplicates(bolus, ["deviceTime", "normal"]) metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved # get a summary of boluses per day @@ -569,7 +578,7 @@ def getListOfDexcomCGMDays(df): pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) pumpSettings, nPumpSettingsDuplicatesRemoved = \ - td.clean.remove_duplicates(pumpSettings, pumpSettings[["deviceTime"]]) + removeDuplicates(pumpSettings, "deviceTime") metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved # ISF @@ -587,7 +596,8 @@ def getListOfDexcomCGMDays(df): else: isfColHead = "insulinSensitivities" isf = pd.DataFrame(columns=isfColHeadings) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"].astype(str)): + print(p, actSched) tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["isf.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") @@ -708,7 +718,7 @@ def getListOfDexcomCGMDays(df): metadata["basal.endDate"] = basalEndDate basal, nBasalDuplicatesRemoved = \ - td.clean.remove_duplicates(basal, basal[["deliveryType", "deviceTime", "duration", "rate"]]) + removeDuplicates(basal, ["deliveryType", "deviceTime", "duration", "rate"]) metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved # fill NaNs with 0, as it indicates a suspend (temp basal of 0) From 28a46545e032915945cf7924eceb0cda39a7e089 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 14:18:28 -0600 Subject: [PATCH 25/78] edge case where the active schedule is a float (convert back to string) --- .../get-users-settings-and-events.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 1a016850..34db1228 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -596,8 +596,15 @@ def load_json(dataPathAndName): else: isfColHead = "insulinSensitivities" isf = pd.DataFrame(columns=isfColHeadings) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"].astype(str)): - print(p, actSched) + + # edge case where active schedule is a float + + + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) + tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["isf.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") @@ -623,6 +630,9 @@ def load_json(dataPathAndName): cirColHead = "carbRatios" cir = pd.DataFrame(columns=cirColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["cir.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") @@ -660,6 +670,9 @@ def load_json(dataPathAndName): ctColHead = "bgTargets" correctionTarget = pd.DataFrame(columns=ctColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["ct.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") @@ -684,6 +697,9 @@ def load_json(dataPathAndName): sbrColHeadings.extend(["sbrTime", "rate", "type"]) sbr = pd.DataFrame(columns=sbrColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) if 'Auto Mode' not in actSched: tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] From 2ceaf1501a8b462f58ddd78805127cdf96f2aa82 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sat, 12 Jan 2019 15:04:48 -0600 Subject: [PATCH 26/78] save all preprocessed data --- .../get-users-settings-and-events.py | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 34db1228..d6060056 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -427,13 +427,24 @@ def load_json(dataPathAndName): phiDate = "PHI-" + dataPulledDate donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") +phiOutputPath = os.path.join(donorPath, "PHI-settings-and-events") +outputPath = os.path.join(donorPath, "settings-and-events") + + +# create anonExportDataPath folders +if not os.path.exists(phiOutputPath): + os.makedirs(phiOutputPath) + os.makedirs(outputPath) + donorList = phiDate + "-uniqueDonorList.csv" donors = load_csv(os.path.join(donorPath, donorList)) +allMetadata = donors[['hashID', 'diagnosisType']].copy() + # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL # this is where the loop will go: -for dIndex in range(335, len(donors)): +for dIndex in range(0, len(donors)): # clear output dataframes isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() @@ -442,6 +453,14 @@ def load_json(dataPathAndName): userID = donors.userID[dIndex] hashID = donors.hashID[dIndex] metadata = pd.DataFrame(index=[dIndex]) + metadata["hashID"] = hashID + + # make folder to save data + processedDataPath = os.path.join(phiOutputPath, "PHI-" + userID) + if not os.path.exists(processedDataPath): + os.makedirs(processedDataPath) + + # round all birthdays and diagnosis dates to the first day of the month (to protect identities) if (pd.isnull(donors.bDay[dIndex]) + pd.isnull(donors.dDay[dIndex])) == 0: @@ -873,10 +892,20 @@ def load_json(dataPathAndName): ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) ylwSummary.reset_index(inplace=True) - # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) - # %% SAVE RESULTS + + + + # %% SAVE RESULTS + + + # save the processed data + basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) + bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) + cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) + pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) else: metadata["flags"] = "no bolus wizard data" @@ -888,6 +917,11 @@ def load_json(dataPathAndName): metadata["flags"] = "file does not exist" else: metadata["flags"] = "fmissing bDay/dDay" + + # write metaData to allMetadata + allMetadata = pd.merge(allMetadata, metadata, how="left", on="hashID") + allMetadata.to_csv(os.path.join(outputPath, "allMetadata.csv")) + print("done with", dIndex) From ba45b20901bc14291a1512069fa806e821752791 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 08:15:26 -0600 Subject: [PATCH 27/78] calc local time and save settings and events --- .../get-users-settings-and-events.py | 237 +++++++++++++++--- 1 file changed, 200 insertions(+), 37 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index d6060056..88c31088 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -14,6 +14,9 @@ # %% REQUIRED LIBRARIES import pandas as pd import numpy as np +from pytz import timezone +from datetime import timedelta +import datetime as dt import os import pdb @@ -422,6 +425,43 @@ def load_json(dataPathAndName): return df +def getTzoForDateTime(utcTime, currentTimezone): + + tz = timezone(currentTimezone) + tzoNum = int(tz.localize(utcTime).strftime("%z")) + tzoNum = int(tz.localize(utcTime).strftime("%z")) + tzoHours = np.floor(tzoNum / 100) + tzoMinutes = round((tzoNum / 100 - tzoHours) * 100, 0) + tzoSign = np.sign(tzoHours) + tzo = int((tzoHours * 60) + (tzoMinutes * tzoSign)) + localTime = utcTime + pd.to_timedelta(tzo, unit="m") + + return localTime + + +def getTimezoneOffset(currentDate, currentTimezone): + + tz = timezone(currentTimezone) + # here we add 1 day to the current date to account for changes to/from DST + tzoNum = int(tz.localize(currentDate + timedelta(days=1)).strftime("%z")) + tzoHours = np.floor(tzoNum / 100) + tzoMinutes = round((tzoNum / 100 - tzoHours) * 100, 0) + tzoSign = np.sign(tzoHours) + tzo = int((tzoHours * 60) + (tzoMinutes * tzoSign)) + + return tzo + + +def isDSTChangeDay(currentDate, currentTimezone): + tzoCurrentDay = getTimezoneOffset(pd.to_datetime(currentDate), + currentTimezone) + tzoPreviousDay = getTimezoneOffset(pd.to_datetime(currentDate) + + timedelta(days=-1), currentTimezone) + + return (tzoCurrentDay != tzoPreviousDay) + + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" phiDate = "PHI-" + dataPulledDate @@ -531,9 +571,9 @@ def load_json(dataPathAndName): data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) - commonColumnHeadings = ["hashID", - "age", - "ylw"] +# commonColumnHeadings = ["hashID", +# "age", +# "ylw"] # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) @@ -558,10 +598,10 @@ def load_json(dataPathAndName): bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - bolusCH = commonColumnHeadings.copy() - bolusCH.extend(["utcTime", "roundedTime", "normal", "carbInput", "subType", +# bolusCH = commonColumnHeadings.copy() + bolusCH = ["utcTime", "timezone", "roundedTime", "normal", "carbInput", "subType", "insulinOnBoard", "bgInput", - "isf", "isf_mmolL_U", "insulinCarbRatio"]) + "isf", "isf_mmolL_U", "insulinCarbRatio"] bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", @@ -574,8 +614,8 @@ def load_json(dataPathAndName): bolus["duration"].replace(0, np.nan, inplace=True) bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 bolus["rate"] = bolus["extended"] / bolus["durationHours"] - bolusExtendedCH = commonColumnHeadings.copy() - bolusExtendedCH.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) +# bolusExtendedCH = commonColumnHeadings.copy() + bolusExtendedCH = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] bolusExtendedEvents = bolus.loc[ ((bolus["extended"].notnull()) & (bolus["duration"] > 0)), bolusExtendedCH] @@ -601,14 +641,14 @@ def load_json(dataPathAndName): metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved # ISF - isfColHeadings = commonColumnHeadings.copy() - isfColHeadings.extend(["isf.time", "isf", "isf_mmolL_U"]) +# isfColHeadings = commonColumnHeadings.copy() + isfColHeadings = ["localTime", "isf", "isf_mmolL_U"] if "insulinSensitivity.amount" in list(pumpSettings): isfColHead = "insulinSensitivity" pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["isf.time"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings] @@ -626,7 +666,7 @@ def load_json(dataPathAndName): tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["isf.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] @@ -635,13 +675,13 @@ def load_json(dataPathAndName): isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) # CIR - cirColHeadings = commonColumnHeadings.copy() - cirColHeadings.extend(["cir.time", "cir"]) +# cirColHeadings = commonColumnHeadings.copy() + cirColHeadings = ["localTime", "cir"] if "carbRatio.amount" in list(pumpSettings): cirColHead = "carbRatio" pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["cir.time"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings] @@ -654,7 +694,7 @@ def load_json(dataPathAndName): actSched = str(int(actSched)) tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["cir.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] @@ -663,8 +703,8 @@ def load_json(dataPathAndName): # CORRECTION TARGET - ctColHeadings = commonColumnHeadings.copy() - ctColHeadings.extend(["ct.time", "ct.low", "ct.high", "ct.target", "ct.range"]) +# ctColHeadings = commonColumnHeadings.copy() + ctColHeadings = ["localTime", "ct.low", "ct.high", "ct.target", "ct.range"] if "bgTarget.start" in list(pumpSettings): ctColHead = "bgTarget." @@ -680,7 +720,7 @@ def load_json(dataPathAndName): pumpSettings["ct." + targetType + "_mmolL"] = np.nan pumpSettings["ct." + targetType] = np.nan - pumpSettings["ct.time"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[ctColHead + "start"], unit="ms") correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] @@ -694,7 +734,7 @@ def load_json(dataPathAndName): actSched = str(int(actSched)) tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["ct.time"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] @@ -712,8 +752,8 @@ def load_json(dataPathAndName): correctionTarget = pd.concat([correctionTarget, tempDF[ctColHeadings]], ignore_index=True) # SCHEDULED BASAL RATES - sbrColHeadings = commonColumnHeadings.copy() - sbrColHeadings.extend(["sbrTime", "rate", "type"]) +# sbrColHeadings = commonColumnHeadings.copy() + sbrColHeadings = ["localTime", "rate", "type"] sbr = pd.DataFrame(columns=sbrColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float @@ -723,10 +763,10 @@ def load_json(dataPathAndName): tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["type"] = np.nan - tempDF["sbrTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") else: tempDF = pd.DataFrame(index=[0]) - tempDF["sbrTime"] = np.nan + tempDF["localTime"] = np.nan tempDF["rate"] = np.nan tempDF["type"] = "AutoMode" @@ -774,13 +814,16 @@ def load_json(dataPathAndName): basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] # actual basal delivered - abrColHeadings = commonColumnHeadings.copy() - abrColHeadings.extend(["utcTime", "roundedTime", "durationHours", "rate", "type"]) +# abrColHeadings = commonColumnHeadings.copy() + abrColHeadings = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] abr = basal[abrColHeadings] if "duration" in list(bolus): abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) abr.sort_values("utcTime", inplace=True) + abr["timezone"].fillna(method='ffill', inplace=True) + abr["timezone"].fillna(method='bfill', inplace=True) + # get a summary of basals per day basalDaySummary = get_basalDaySummary(basal) @@ -830,8 +873,8 @@ def load_json(dataPathAndName): cgmRecordsPerDay["date"] = cgmRecordsPerDay.index # filter the cgm data - cgmColHeadings = commonColumnHeadings.copy() - cgmColHeadings.extend(["utcTime", "roundedTime", "value"]) +# cgmColHeadings = commonColumnHeadings.copy() + cgmColHeadings = ["utcTime", "timezone", "roundedTime", "value"] # get data in mg/dL units cgm = cgmData[cgmColHeadings] @@ -849,6 +892,7 @@ def load_json(dataPathAndName): rename(columns={"top": "hashID"}) dataPerDay["age"] = catDF.age.mean() dataPerDay["ylw"] = catDF.ylw.mean() + dataPerDay["timezone"] = catDF.timezone.describe()["top"] # calculate all of the data start and end range @@ -867,6 +911,11 @@ def load_json(dataPathAndName): dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) + + dayData["isDSTChangeDay"] = dayData[['day', 'timezone']].apply(lambda x: isDSTChangeDay(*x), axis=1) + dayData["date"] = pd.to_datetime(dayData["day"]) + dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) + # calculate the start and end of contiguous data # these dates can be used when simulating and predicting, where # you need both pump and cgm data @@ -880,32 +929,142 @@ def load_json(dataPathAndName): ageSummary = pd.DataFrame(catDF.validPumpData.sum()) ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) - ageSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) ageSummary.reset_index(inplace=True) + analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & + (ageSummary["nDaysValidCgm"]> 28))] + minAge = analysisCriterion["age"].min() + maxAge = analysisCriterion["age"].max() + nDaysClosedLoop = analysisCriterion["nDaysClosedLoop"].sum() + n670gDays = analysisCriterion["n670gDays"].sum() + metadata["minAge"] = minAge + metadata["maxAge"] = maxAge + metadata["nDaysClosedLoop"] = nDaysClosedLoop + metadata["n670gDays"] = n670gDays + catDF = dayData.groupby("ylw") ylwSummary = pd.DataFrame(catDF.validPumpData.sum()) ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) - ylwSummary["nDaysclosedLopp"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) ylwSummary.reset_index(inplace=True) - # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 28) & + (ylwSummary["nDaysValidCgm"]> 28))] + minYLW = analysisCriterion["ylw"].min() + maxYLW = analysisCriterion["ylw"].max() + metadata["minYLW"] = minYLW + metadata["maxYLW"] = maxYLW + # %% calculate local time + abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) + abr = pd.merge(abr, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") + abr["localTime"] = abr["utcTime"] + pd.to_timedelta(abr["tzo"], unit="m") + cgm["date"] = pd.to_datetime(cgm["utcTime"].dt.date) + cgm = pd.merge(cgm, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") + cgm["localTime"] = cgm["utcTime"] + pd.to_timedelta(cgm["tzo"], unit="m") + + bolusEvents["date"] = pd.to_datetime(bolusEvents["utcTime"].dt.date) + bolusEvents = pd.merge(bolusEvents, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") + bolusEvents["localTime"] = bolusEvents["utcTime"] + pd.to_timedelta(bolusEvents["tzo"], unit="m") - # %% SAVE RESULTS + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + # all settings + allSettings = pd.merge(isf, cir, how="outer", on="localTime") + allSettings = pd.merge(allSettings, + sbr.rename(columns={"rate": "sbr", "type": "sbr.type"}), + how="outer", on="localTime") + allSettings = pd.merge(allSettings, correctionTarget, how="outer", on="localTime") + allSettings["hashID"] = hashID + allSettings["age"] = np.floor((allSettings["localTime"] - bDate).dt.days/365.25).astype(int) + allSettings["ylw"] = np.floor((allSettings["localTime"] - dDate).dt.days/365.25).astype(int) + allSettings = round_time(allSettings, timeIntervalMinutes=5, + timeField="localTime", + roundedTimeFieldName="localRoundedTime", + startWithFirstRecord=True, verbose=False) + + colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + "isf", "cir", "sbr", + "ct.low", "ct.high", "ct.target", "ct.range", + "sbr.type", "isf_mmolL_U"] + allSettings = allSettings[colOrder] + + + fieldsToDrop = ["utcTime", "timezone", "roundedTime", "date", "tzo", "isDSTChangeDay"] + pumpEvents = pd.merge(abr.drop(columns=fieldsToDrop), + bolusEvents.drop(columns=fieldsToDrop), + how="outer", on="localTime") + pumpEvents["type"].fillna("bolus", inplace=True) + pumpEvents["eventType"].fillna("basal", inplace=True) + pumpEvents["hashID"] = hashID + pumpEvents["age"] = np.floor((pumpEvents["localTime"] - bDate).dt.days/365.25).astype(int) + pumpEvents["ylw"] = np.floor((pumpEvents["localTime"] - dDate).dt.days/365.25).astype(int) + pumpEvents = round_time(pumpEvents, timeIntervalMinutes=5, + timeField="localTime", + roundedTimeFieldName="localRoundedTime", + startWithFirstRecord=True, verbose=False) + + + colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + "rate", "durationHours", + "unitsInsulin", "carbInput", "type", "eventType", "subType", + "isf", "isf_mmolL_U", "insulinCarbRatio", "insulinOnBoard", + "bg_mgdL", "bg_mmolL"] + + pumpEvents = pumpEvents[colOrder] + + cgmLite = cgm.drop(columns=fieldsToDrop) + cgmLite["hashID"] = hashID + cgmLite["age"] = np.floor((cgmLite["localTime"] - bDate).dt.days/365.25).astype(int) + cgmLite["ylw"] = np.floor((cgmLite["localTime"] - dDate).dt.days/365.25).astype(int) + cgmLite = round_time(cgmLite, timeIntervalMinutes=5, + timeField="localTime", + roundedTimeFieldName="localRoundedTime", + startWithFirstRecord=True, verbose=False) + + colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + "mg_dL", "mmol_L"] + + cgmLite = cgmLite[colOrder] - # save the processed data - basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) - bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) - cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) - pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) + + # %% SAVE RESULTS + outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" + outputFormat = (f"{minAge:02d}", + f"{maxAge:02d}", + f"{minYLW:02d}", + f"{maxYLW:02d}", + f"{nDaysClosedLoop:03d}", + f"{n670gDays:03d}", + hashID[0:4]) + outputFolderName = outputString % outputFormat + outputFolderName_Path = os.path.join(outputPath,"data", outputFolderName) + if not os.path.exists(outputFolderName_Path): + os.makedirs(outputFolderName_Path) + + # save data for this person + fName = outputFolderName + "-allSettings.csv" + allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-pumpEvents.csv" + pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-cgmLite.csv" + cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) + + + + # %% save the processed data (saving this data will take up a lot of space and time) + #data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) + #basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) + #bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) + #cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) + #pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) else: metadata["flags"] = "no bolus wizard data" @@ -926,6 +1085,10 @@ def load_json(dataPathAndName): # %% V2 DATA TO GRAB +# ADD ROUNDEDLOCAL TIME TO THE END RESULTS +# GET RID OF ROUNDING TIME AT THE BEGINNING +# EXPAND THE CORRECTION TIME VALUES TO BE UNIFORM ACROSS ALL USERS AND DEVICES +# FIX DAYLIGHT SAVINGS TIME TIMES # FIGURE OUT WHY TEMP BASAL COUNTS ARE DIFFERENT BETWEEN THE TWO DIFFERENT METHODS # MAX BASAL RATE, MAX BOLUS AMOUNT, AND INSULIN DURATION SET ON SELECT PUMPS # ALERT SETTINGS From 334f19bc2770bd5c690efa3a87957caec855f336 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 13:28:46 -0600 Subject: [PATCH 28/78] add isf day stats --- .../get-users-settings-and-events.py | 150 +++++++++++++++--- 1 file changed, 126 insertions(+), 24 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 88c31088..df162541 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -461,9 +461,90 @@ def isDSTChangeDay(currentDate, currentTimezone): return (tzoCurrentDay != tzoPreviousDay) +def get_setting_durations(df, col, dataPulledDF): + df = pd.concat([df, dataPulledDF], sort=False) + df.sort_values(col + ".localTime", inplace=True) + df.reset_index(inplace=True, drop=True) + df.fillna(method='ffill', inplace=True) + durationHours = (df[col + ".localTime"].shift(-1) - + df[col + ".localTime"]).dt.total_seconds() / 3600 + durationHours.fillna(0, inplace=True) + durationHours[durationHours > 24] = 24 + df[col + ".durationHours"] = durationHours + + return df + + +def get_settingStats(df, col, pumpCol): + df[col] = df[pumpCol] + df[col + ".min"] = df[col].min() + df[col + ".weightedMean"] = np.sum(df[col] * df[col + ".durationHours"]) / df[col + ".durationHours"].sum() + df[col + ".max"] = df[col].max() + + return df + + +def getPumpSettingsStats(df, col, pumpCol): + pumpColHeadings = [col + ".localTime", col, col + ".min", + col + ".weightedMean", col + ".max"] + df[col] = df[pumpCol + ".amount"] + df[col + ".localTime"] = pd.to_datetime(df["day"]) + \ + pd.to_timedelta(df[pumpCol + ".start"], unit="ms") + df[col + ".min"] = df[col] + df[col + ".weightedMean"] = df[col] + df[col + ".max"] = df[col] + + df2 = df.loc[df[pumpCol + ".amount"].notnull(), pumpColHeadings] + + return df, df2 + + +def processBasalSchedule(df, col): + colHeadings = [col + ".localTime", col, col + ".durationHours", col + ".type", + col + ".min", col + ".weightedMean", col + ".max"] + summaryColHeadings = ["day", col + ".min", col + ".weightedMean", col + ".max"] + dropCols = ["rate", "start", col + ".localTime", col, col + ".durationHours", col + ".type"] + + dailySchedule = pd.DataFrame(columns=colHeadings) + dailySummary = pd.DataFrame(columns=summaryColHeadings) + + for p, actSched in zip(df.index, df["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) + if 'Auto Mode' not in actSched: + tempDF = pd.DataFrame(df.loc[p, "basalSchedules." + actSched]) + tempDF["day"] = df.loc[p, "day"] + tempDF[col + ".type"] = np.nan + tempDF[col + ".localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + endOfDay = pd.DataFrame(pd.to_datetime(df.loc[p, "day"] + pd.Timedelta(1, "D")), columns=[col + ".localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, col, endOfDay) + tempDF = tempDF[:-1] + tempDF = get_settingStats(tempDF, col, "rate") + dailySchedule = pd.concat([dailySchedule, tempDF[colHeadings]], ignore_index=True, sort=False) + tempSummary = tempDF.drop(columns=dropCols) + tempSummary["day"] = df.loc[p, "day"] + tempSummary = tempSummary[0:1] + dailySummary = pd.concat([dailySummary, tempSummary], ignore_index=True, sort=False) + + else: + pdb.set_trace() + tempDF = pd.DataFrame(index=[0]) + tempDF[col + ".type"] = "AutoMode" + dailySchedule = pd.concat([dailySchedule, tempDF], ignore_index=True, sort=False) + tempSummary["day"] = df.loc[p, "day"] + tempSummary = tempSummary[0:1] + dailySummary = pd.concat([dailySummary, tempSummary], ignore_index=True, sort=False) + + return dailySchedule, dailySummary + + + # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" +dataPulledDF = pd.DataFrame(pd.to_datetime(dataPulledDate), columns=["day"], index=[0]) +dataPulledDF["day"] = dataPulledDF["day"].dt.date phiDate = "PHI-" + dataPulledDate donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") @@ -484,7 +565,7 @@ def isDSTChangeDay(currentDate, currentTimezone): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL # this is where the loop will go: -for dIndex in range(0, len(donors)): +for dIndex in [0]: #range(0, len(donors)): # clear output dataframes isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() @@ -640,18 +721,33 @@ def isDSTChangeDay(currentDate, currentTimezone): removeDuplicates(pumpSettings, "deviceTime") metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved + pumpSettings.sort_values("utcTime", ascending=True, inplace=True) + pumpSettings.reset_index(drop=True, inplace=True) + # ISF # isfColHeadings = commonColumnHeadings.copy() - isfColHeadings = ["localTime", "isf", "isf_mmolL_U"] + isfColHeadings = ["isf.localTime", "isf", "isf_mmolL_U"] if "insulinSensitivity.amount" in list(pumpSettings): isfColHead = "insulinSensitivity" pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["localTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["isf.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings] + + # add a day summary + isfDaySummary = isf.copy() + isfDaySummary["day"] = isfDaySummary["isf.localTime"].dt.date + isfDaySummary.drop(columns=["isf.localTime"], inplace=True) + isfDaySummary["isf.min"] = isfDaySummary["isf"] + isfDaySummary["isf.weightedMean"] = isfDaySummary["isf"] + isfDaySummary["isf.max"] = isfDaySummary["isf"] + isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) + isfDaySummary.reset_index(inplace=True, drop=True) + isfDaySummary.fillna(method='ffill', inplace=True) + else: isfColHead = "insulinSensitivities" isf = pd.DataFrame(columns=isfColHeadings) @@ -673,6 +769,7 @@ def isDSTChangeDay(currentDate, currentTimezone): tempDF["isf_mmolL_U"] = tempDF["amount"] tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) + pdb.set_trace() # CIR # cirColHeadings = commonColumnHeadings.copy() @@ -1035,27 +1132,31 @@ def isDSTChangeDay(currentDate, currentTimezone): cgmLite = cgmLite[colOrder] + # %% day level stats + + + # %% SAVE RESULTS - outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" - outputFormat = (f"{minAge:02d}", - f"{maxAge:02d}", - f"{minYLW:02d}", - f"{maxYLW:02d}", - f"{nDaysClosedLoop:03d}", - f"{n670gDays:03d}", - hashID[0:4]) - outputFolderName = outputString % outputFormat - outputFolderName_Path = os.path.join(outputPath,"data", outputFolderName) - if not os.path.exists(outputFolderName_Path): - os.makedirs(outputFolderName_Path) - - # save data for this person - fName = outputFolderName + "-allSettings.csv" - allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) - fName = outputFolderName + "-pumpEvents.csv" - pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) - fName = outputFolderName + "-cgmLite.csv" - cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) +# outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" +# outputFormat = (f"{minAge:02d}", +# f"{maxAge:02d}", +# f"{minYLW:02d}", +# f"{maxYLW:02d}", +# f"{nDaysClosedLoop:03d}", +# f"{n670gDays:03d}", +# hashID[0:4]) +# outputFolderName = outputString % outputFormat +# outputFolderName_Path = os.path.join(outputPath,"data", outputFolderName) +# if not os.path.exists(outputFolderName_Path): +# os.makedirs(outputFolderName_Path) +# +# # save data for this person +# fName = outputFolderName + "-allSettings.csv" +# allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) +# fName = outputFolderName + "-pumpEvents.csv" +# pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) +# fName = outputFolderName + "-cgmLite.csv" +# cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) @@ -1075,7 +1176,7 @@ def isDSTChangeDay(currentDate, currentTimezone): else: metadata["flags"] = "file does not exist" else: - metadata["flags"] = "fmissing bDay/dDay" + metadata["flags"] = "missing bDay/dDay" # write metaData to allMetadata allMetadata = pd.merge(allMetadata, metadata, how="left", on="hashID") @@ -1087,6 +1188,7 @@ def isDSTChangeDay(currentDate, currentTimezone): # %% V2 DATA TO GRAB # ADD ROUNDEDLOCAL TIME TO THE END RESULTS # GET RID OF ROUNDING TIME AT THE BEGINNING +# DEFINE A DAY BETWEEN 6AM AND 6AM # EXPAND THE CORRECTION TIME VALUES TO BE UNIFORM ACROSS ALL USERS AND DEVICES # FIX DAYLIGHT SAVINGS TIME TIMES # FIGURE OUT WHY TEMP BASAL COUNTS ARE DIFFERENT BETWEEN THE TWO DIFFERENT METHODS From 7076abab48866549dbcbfaffd97f3f1cfa54c934 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 14:32:57 -0600 Subject: [PATCH 29/78] calculate day summaries for settings (isf, cir, ct, and sbr) --- .../get-users-settings-and-events.py | 102 ++++++++++++++---- 1 file changed, 82 insertions(+), 20 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index df162541..a6cb2906 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -752,9 +752,6 @@ def processBasalSchedule(df, col): isfColHead = "insulinSensitivities" isf = pd.DataFrame(columns=isfColHeadings) - # edge case where active schedule is a float - - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float if isinstance(actSched, float): @@ -762,7 +759,7 @@ def processBasalSchedule(df, col): tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["isf.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] @@ -773,16 +770,29 @@ def processBasalSchedule(df, col): # CIR # cirColHeadings = commonColumnHeadings.copy() - cirColHeadings = ["localTime", "cir"] + cirColHeadings = ["cir.localTime", "cir"] if "carbRatio.amount" in list(pumpSettings): cirColHead = "carbRatio" pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["localTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["cir.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings] + + # add a day summary + cirDaySummary = cir.copy() + cirDaySummary["day"] = cirDaySummary["cir.localTime"].dt.date + cirDaySummary.drop(columns=["cir.localTime"], inplace=True) + cirDaySummary["cir.min"] = cirDaySummary["cir"] + cirDaySummary["cir.weightedMean"] = cirDaySummary["cir"] + cirDaySummary["cir.max"] = cirDaySummary["cir"] + cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) + cirDaySummary.reset_index(inplace=True, drop=True) + cirDaySummary.fillna(method='ffill', inplace=True) + else: + cirColHead = "carbRatios" cir = pd.DataFrame(columns=cirColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): @@ -791,17 +801,18 @@ def processBasalSchedule(df, col): actSched = str(int(actSched)) tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["cir.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] tempDF["cir"] = tempDF["amount"].astype(float) cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True) + pdb.set_trace() # CORRECTION TARGET # ctColHeadings = commonColumnHeadings.copy() - ctColHeadings = ["localTime", "ct.low", "ct.high", "ct.target", "ct.range"] + ctColHeadings = ["ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] if "bgTarget.start" in list(pumpSettings): ctColHead = "bgTarget." @@ -817,12 +828,25 @@ def processBasalSchedule(df, col): pumpSettings["ct." + targetType + "_mmolL"] = np.nan pumpSettings["ct." + targetType] = np.nan - pumpSettings["localTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pumpSettings["ct.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[ctColHead + "start"], unit="ms") correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] + + # add a day summary + ctDaySummary = correctionTarget.copy() + ctDaySummary["day"] = ctDaySummary["ct.localTime"].dt.date + ctDaySummary.drop(columns=["ct.localTime"], inplace=True) +# ctDaySummary["ct.min"] = ctDaySummary["ct.target"] +# ctDaySummary["ct.weightedMean"] = ctDaySummary["ct"] +# ctDaySummary["ct.max"] = ctDaySummary["ct"] + ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) + ctDaySummary.reset_index(inplace=True, drop=True) + ctDaySummary.fillna(method='ffill', inplace=True) + else: + ctColHead = "bgTargets" correctionTarget = pd.DataFrame(columns=ctColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): @@ -831,7 +855,7 @@ def processBasalSchedule(df, col): actSched = str(int(actSched)) tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") tempDF["hashID"] = pumpSettings.loc[p, "hashID"] tempDF["age"] = pumpSettings.loc[p, "age"] tempDF["ylw"] = pumpSettings.loc[p, "ylw"] @@ -847,11 +871,14 @@ def processBasalSchedule(df, col): tempDF["ct." + targetType] = np.nan correctionTarget = pd.concat([correctionTarget, tempDF[ctColHeadings]], ignore_index=True) + pdb.set_trace() # SCHEDULED BASAL RATES # sbrColHeadings = commonColumnHeadings.copy() - sbrColHeadings = ["localTime", "rate", "type"] + sbrColHeadings = ["sbr.localTime", "rate", "type"] sbr = pd.DataFrame(columns=sbrColHeadings) + sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'type'] + sbrDaySummary = pd.DataFrame(columns=sbrDayColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float if isinstance(actSched, float): @@ -860,17 +887,39 @@ def processBasalSchedule(df, col): tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["type"] = np.nan - tempDF["localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "sbr", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date + tempDaySummary["sbr.min"] = tempDF["rate"].min() + tempDaySummary["sbr.weightedMean"] = \ + np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() + tempDaySummary["sbr.max"] = tempDF["rate"].max() + tempDaySummary["type"] = np.nan + else: tempDF = pd.DataFrame(index=[0]) - tempDF["localTime"] = np.nan + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) tempDF["rate"] = np.nan tempDF["type"] = "AutoMode" - tempDF["hashID"] = pumpSettings.loc[p, "hashID"] - tempDF["age"] = pumpSettings.loc[p, "age"] - tempDF["ylw"] = pumpSettings.loc[p, "ylw"] + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date + tempDaySummary["sbr.min"] = np.nan + tempDaySummary["sbr.weightedMean"] = np.nan + tempDaySummary["sbr.max"] = np.nan + tempDaySummary["type"] = "AutoMode" + sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True) + sbrDaySummary = pd.concat([sbrDaySummary, tempDaySummary], ignore_index=True) + + sbrDaySummary = pd.concat([sbrDaySummary, dataPulledDF], sort=False) + sbrDaySummary.reset_index(inplace=True, drop=True) + sbrDaySummary.fillna(method='ffill', inplace=True) # max basal rate, max bolus amount, and insulin duration if "rateMaximum" in list(data): @@ -1013,6 +1062,10 @@ def processBasalSchedule(df, col): dayData["date"] = pd.to_datetime(dayData["day"]) dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) + + + + # calculate the start and end of contiguous data # these dates can be used when simulating and predicting, where # you need both pump and cgm data @@ -1057,7 +1110,6 @@ def processBasalSchedule(df, col): metadata["maxYLW"] = maxYLW - # %% calculate local time abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) abr = pd.merge(abr, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") @@ -1074,11 +1126,18 @@ def processBasalSchedule(df, col): # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) # all settings - allSettings = pd.merge(isf, cir, how="outer", on="localTime") + + allSettings = pd.merge(isf.rename(columns={"isf.localTime": "localTime"}), + cir.rename(columns={"cir.localTime": "localTime"}), + how="outer", on="localTime") + allSettings = pd.merge(allSettings, + sbr.rename(columns={"rate": "sbr", + "type": "sbr.type", + "sbr.localTime": "localTime"}), + how="outer", on="localTime") allSettings = pd.merge(allSettings, - sbr.rename(columns={"rate": "sbr", "type": "sbr.type"}), + correctionTarget.rename(columns={"ct.localTime": "localTime"}), how="outer", on="localTime") - allSettings = pd.merge(allSettings, correctionTarget, how="outer", on="localTime") allSettings["hashID"] = hashID allSettings["age"] = np.floor((allSettings["localTime"] - bDate).dt.days/365.25).astype(int) allSettings["ylw"] = np.floor((allSettings["localTime"] - dDate).dt.days/365.25).astype(int) @@ -1136,6 +1195,9 @@ def processBasalSchedule(df, col): + # %% age and ylw stats + + # %% SAVE RESULTS # outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" # outputFormat = (f"{minAge:02d}", From 98d10f6a9cb47ff6fc394d653fa8234ad9f2434b Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 14:41:19 -0600 Subject: [PATCH 30/78] day summaries only include summary stats --- .../get-users-settings-and-events.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index a6cb2906..8923c5ab 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -738,12 +738,11 @@ def processBasalSchedule(df, col): isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings] # add a day summary - isfDaySummary = isf.copy() - isfDaySummary["day"] = isfDaySummary["isf.localTime"].dt.date - isfDaySummary.drop(columns=["isf.localTime"], inplace=True) - isfDaySummary["isf.min"] = isfDaySummary["isf"] - isfDaySummary["isf.weightedMean"] = isfDaySummary["isf"] - isfDaySummary["isf.max"] = isfDaySummary["isf"] + isfDaySummary = pd.DataFrame() + isfDaySummary["day"] = isf["isf.localTime"].dt.date + isfDaySummary["isf.min"] = isf["isf"] + isfDaySummary["isf.weightedMean"] = isf["isf"] + isfDaySummary["isf.max"] = isf["isf"] isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) isfDaySummary.reset_index(inplace=True, drop=True) isfDaySummary.fillna(method='ffill', inplace=True) @@ -781,12 +780,11 @@ def processBasalSchedule(df, col): cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings] # add a day summary - cirDaySummary = cir.copy() - cirDaySummary["day"] = cirDaySummary["cir.localTime"].dt.date - cirDaySummary.drop(columns=["cir.localTime"], inplace=True) - cirDaySummary["cir.min"] = cirDaySummary["cir"] - cirDaySummary["cir.weightedMean"] = cirDaySummary["cir"] - cirDaySummary["cir.max"] = cirDaySummary["cir"] + cirDaySummary = pd.DataFrame() + cirDaySummary["day"] = cir["cir.localTime"].dt.date + cirDaySummary["cir.min"] = cir["cir"] + cirDaySummary["cir.weightedMean"] = cir["cir"] + cirDaySummary["cir.max"] = cir["cir"] cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) cirDaySummary.reset_index(inplace=True, drop=True) cirDaySummary.fillna(method='ffill', inplace=True) @@ -838,9 +836,6 @@ def processBasalSchedule(df, col): ctDaySummary = correctionTarget.copy() ctDaySummary["day"] = ctDaySummary["ct.localTime"].dt.date ctDaySummary.drop(columns=["ct.localTime"], inplace=True) -# ctDaySummary["ct.min"] = ctDaySummary["ct.target"] -# ctDaySummary["ct.weightedMean"] = ctDaySummary["ct"] -# ctDaySummary["ct.max"] = ctDaySummary["ct"] ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) ctDaySummary.reset_index(inplace=True, drop=True) ctDaySummary.fillna(method='ffill', inplace=True) @@ -1062,7 +1057,8 @@ def processBasalSchedule(df, col): dayData["date"] = pd.to_datetime(dayData["day"]) dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) - + # add settings to the dayData + dayData = pd.merge(dayData, isfDaySummary, on="day", how="left") From 0dbe2d91914dee3881e6b124a5924344763b7ea2 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 15:25:18 -0600 Subject: [PATCH 31/78] get settings summaries across for each age and ylw --- .../get-users-settings-and-events.py | 96 ++++++++++++++++--- 1 file changed, 81 insertions(+), 15 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 8923c5ab..103747aa 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -565,7 +565,7 @@ def processBasalSchedule(df, col): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL # this is where the loop will go: -for dIndex in [0]: #range(0, len(donors)): +for dIndex in range(0, len(donors)): # clear output dataframes isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() @@ -831,7 +831,6 @@ def processBasalSchedule(df, col): correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] - # add a day summary ctDaySummary = correctionTarget.copy() ctDaySummary["day"] = ctDaySummary["ct.localTime"].dt.date @@ -870,9 +869,9 @@ def processBasalSchedule(df, col): # SCHEDULED BASAL RATES # sbrColHeadings = commonColumnHeadings.copy() - sbrColHeadings = ["sbr.localTime", "rate", "type"] + sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] sbr = pd.DataFrame(columns=sbrColHeadings) - sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'type'] + sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'sbr.type'] sbrDaySummary = pd.DataFrame(columns=sbrDayColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float @@ -881,7 +880,7 @@ def processBasalSchedule(df, col): if 'Auto Mode' not in actSched: tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["type"] = np.nan + tempDF["sbr.type"] = np.nan tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) tempDF = get_setting_durations(tempDF, "sbr", endOfDay) @@ -893,21 +892,21 @@ def processBasalSchedule(df, col): tempDaySummary["sbr.weightedMean"] = \ np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() tempDaySummary["sbr.max"] = tempDF["rate"].max() - tempDaySummary["type"] = np.nan + tempDaySummary["sbr.type"] = np.nan else: tempDF = pd.DataFrame(index=[0]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) tempDF["rate"] = np.nan - tempDF["type"] = "AutoMode" + tempDF["sbr.type"] = "AutoMode" tempDaySummary = pd.DataFrame(index=[0]) tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date tempDaySummary["sbr.min"] = np.nan tempDaySummary["sbr.weightedMean"] = np.nan tempDaySummary["sbr.max"] = np.nan - tempDaySummary["type"] = "AutoMode" + tempDaySummary["sbr.type"] = "AutoMode" sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True) sbrDaySummary = pd.concat([sbrDaySummary, tempDaySummary], ignore_index=True) @@ -1049,7 +1048,6 @@ def processBasalSchedule(df, col): for dfType in [isClosedLoopDay, is670g]: dayData = pd.merge(dayData, dfType, on="day", how="left") - dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) @@ -1059,8 +1057,27 @@ def processBasalSchedule(df, col): # add settings to the dayData dayData = pd.merge(dayData, isfDaySummary, on="day", how="left") - - + dayData = pd.merge(dayData, cirDaySummary, on="day", how="left") + dayData = pd.merge(dayData, ctDaySummary, on="day", how="left") + dayData = pd.merge(dayData, sbrDaySummary, on="day", how="left") + + # fill data forward + fillList = ['isf.min', + 'isf.weightedMean', + 'isf.max', + 'cir.min', + 'cir.weightedMean', + 'cir.max', + 'ct.low', + 'ct.high', + 'ct.target', + 'ct.range', + 'sbr.min', + 'sbr.weightedMean', + 'sbr.max', + 'sbr.type'] + for fl in fillList: + dayData[fl].fillna(method='ffill', inplace=True) # calculate the start and end of contiguous data # these dates can be used when simulating and predicting, where @@ -1077,6 +1094,33 @@ def processBasalSchedule(df, col): ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + + # add in isf stats + ageSummary["isf.nDays"] = catDF["isf.min"].count() + ageSummary["isf.min"] = catDF["isf.min"].min() + ageSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() + ageSummary["isf.max"] = catDF["isf.max"].max() + + # add cir stats + ageSummary["cir.nDays"] = catDF["cir.min"].count() + ageSummary["cir.min"] = catDF["cir.min"].min() + ageSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() + ageSummary["cir.max"] = catDF["cir.max"].max() + + # correctionTarget stats + for ch in ['ct.low','ct.high','ct.target', 'ct.range']: + ageSummary[ch + ".nDays"] = catDF[ch].count() + ageSummary[ch + ".min"] = catDF[ch].min() + ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() + ageSummary[ch + ".max"] = catDF[ch].max() + + # add sbr stats + ageSummary["sbr.nDays"] = catDF["sbr.min"].count() + ageSummary["sbr.min"] = catDF["sbr.min"].min() + ageSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() + ageSummary["sbr.max"] = catDF["sbr.max"].max() + ageSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + ageSummary.reset_index(inplace=True) analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & @@ -1096,6 +1140,32 @@ def processBasalSchedule(df, col): ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) ylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + + ylwSummary["isf.nDays"] = catDF["isf.min"].count() + ylwSummary["isf.min"] = catDF["isf.min"].min() + ylwSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() + ylwSummary["isf.max"] = catDF["isf.max"].max() + + # add cir stats + ylwSummary["cir.nDays"] = catDF["cir.min"].count() + ylwSummary["cir.min"] = catDF["cir.min"].min() + ylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() + ylwSummary["cir.max"] = catDF["cir.max"].max() + + # correctionTarget stats + for ch in ['ct.low','ct.high','ct.target', 'ct.range']: + ylwSummary[ch + ".nDays"] = catDF[ch].count() + ylwSummary[ch + ".min"] = catDF[ch].min() + ylwSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() + ylwSummary[ch + ".max"] = catDF[ch].max() + + # add sbr stats + ylwSummary["sbr.nDays"] = catDF["sbr.min"].count() + ylwSummary["sbr.min"] = catDF["sbr.min"].min() + ylwSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() + ylwSummary["sbr.max"] = catDF["sbr.max"].max() + ylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + ylwSummary.reset_index(inplace=True) analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 28) & @@ -1187,10 +1257,6 @@ def processBasalSchedule(df, col): cgmLite = cgmLite[colOrder] - # %% day level stats - - - # %% age and ylw stats From fc4a253ff7b05c2ead31f162f11df0c13dada0e9 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 18:34:08 -0600 Subject: [PATCH 32/78] fix edge case 'US/Pacific-New' --- projects/predict-simulate/get-users-settings-and-events.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 103747aa..e692481c 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -441,6 +441,10 @@ def getTzoForDateTime(utcTime, currentTimezone): def getTimezoneOffset(currentDate, currentTimezone): + # edge case for 'US/Pacific-New' + if currentTimezone in 'US/Pacific-New': + currentTimezone = 'US/Pacific' + tz = timezone(currentTimezone) # here we add 1 day to the current date to account for changes to/from DST tzoNum = int(tz.localize(currentDate + timedelta(days=1)).strftime("%z")) From b5ed0c6e727b257b81174eee7ba16ffd01c46046 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 18:35:14 -0600 Subject: [PATCH 33/78] fix scheduled isf and cir --- .../get-users-settings-and-events.py | 48 +++++++++++++++---- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index e692481c..50a3d33b 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -754,7 +754,8 @@ def processBasalSchedule(df, col): else: isfColHead = "insulinSensitivities" isf = pd.DataFrame(columns=isfColHeadings) - + isfDayColHeadings = ['day', 'isf.min', 'isf.weightedMean', 'isf.max'] + isfDaySummary = pd.DataFrame(columns=isfDayColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float if isinstance(actSched, float): @@ -763,13 +764,25 @@ def processBasalSchedule(df, col): tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["isf.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - tempDF["hashID"] = pumpSettings.loc[p, "hashID"] - tempDF["age"] = pumpSettings.loc[p, "age"] - tempDF["ylw"] = pumpSettings.loc[p, "ylw"] tempDF["isf_mmolL_U"] = tempDF["amount"] tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["isf.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "isf", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["isf.localTime"].dt.date + tempDaySummary["isf.min"] = tempDF["isf"].min() + tempDaySummary["isf.weightedMean"] = \ + np.sum(tempDF["isf"] * tempDF["isf.durationHours"]) / tempDF["isf.durationHours"].sum() + tempDaySummary["isf.max"] = tempDF["isf"].max() + isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) - pdb.set_trace() + isfDaySummary = pd.concat([isfDaySummary, tempDaySummary], ignore_index=True) + + isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) + isfDaySummary.reset_index(inplace=True, drop=True) + isfDaySummary.fillna(method='ffill', inplace=True) # CIR # cirColHeadings = commonColumnHeadings.copy() @@ -797,19 +810,34 @@ def processBasalSchedule(df, col): cirColHead = "carbRatios" cir = pd.DataFrame(columns=cirColHeadings) + cirDayColHeadings = ['day', 'cir.min', 'cir.weightedMean', 'cir.max'] + cirDaySummary = pd.DataFrame(columns=cirDayColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float if isinstance(actSched, float): actSched = str(int(actSched)) + tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["cir.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - tempDF["hashID"] = pumpSettings.loc[p, "hashID"] - tempDF["age"] = pumpSettings.loc[p, "age"] - tempDF["ylw"] = pumpSettings.loc[p, "ylw"] - tempDF["cir"] = tempDF["amount"].astype(float) + tempDF["cir"] = tempDF["amount"] + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["cir.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "cir", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["cir.localTime"].dt.date + tempDaySummary["cir.min"] = tempDF["cir"].min() + tempDaySummary["cir.weightedMean"] = \ + np.sum(tempDF["cir"] * tempDF["cir.durationHours"]) / tempDF["cir.durationHours"].sum() + tempDaySummary["cir.max"] = tempDF["cir"].max() + cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True) - pdb.set_trace() + cirDaySummary = pd.concat([cirDaySummary, tempDaySummary], ignore_index=True) + + cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) + cirDaySummary.reset_index(inplace=True, drop=True) + cirDaySummary.fillna(method='ffill', inplace=True) # CORRECTION TARGET From cf7759c392e083a1cbb63906afd1aa451581e661 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 19:57:18 -0600 Subject: [PATCH 34/78] fix correction target age summaries to include min, wMean, and max --- .../get-users-settings-and-events.py | 111 +++++++++++------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 50a3d33b..c1631a0e 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -729,7 +729,6 @@ def processBasalSchedule(df, col): pumpSettings.reset_index(drop=True, inplace=True) # ISF -# isfColHeadings = commonColumnHeadings.copy() isfColHeadings = ["isf.localTime", "isf", "isf_mmolL_U"] if "insulinSensitivity.amount" in list(pumpSettings): @@ -785,7 +784,6 @@ def processBasalSchedule(df, col): isfDaySummary.fillna(method='ffill', inplace=True) # CIR -# cirColHeadings = commonColumnHeadings.copy() cirColHeadings = ["cir.localTime", "cir"] if "carbRatio.amount" in list(pumpSettings): @@ -841,8 +839,12 @@ def processBasalSchedule(df, col): # CORRECTION TARGET -# ctColHeadings = commonColumnHeadings.copy() ctColHeadings = ["ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] + ctDayColHeadings = ['day', + "ct.low.min", "ct.low.weightedMean", "ct.low.max", + "ct.high.min", "ct.high.weightedMean", "ct.high.max", + "ct.target.min", "ct.target.weightedMean", "ct.target.max", + "ct.range.min", "ct.range.weightedMean", "ct.range.max"] if "bgTarget.start" in list(pumpSettings): ctColHead = "bgTarget." @@ -864,43 +866,63 @@ def processBasalSchedule(df, col): correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] # add a day summary - ctDaySummary = correctionTarget.copy() - ctDaySummary["day"] = ctDaySummary["ct.localTime"].dt.date - ctDaySummary.drop(columns=["ct.localTime"], inplace=True) + ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) + ctDaySummary["day"] = correctionTarget["ct.localTime"].dt.date + # add min, weightedMean, and max + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + for stat in [".min", ".weightedMean", ".max"]: + ctDaySummary[targetType + stat] = correctionTarget[targetType] + + ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) ctDaySummary.reset_index(inplace=True, drop=True) ctDaySummary.fillna(method='ffill', inplace=True) else: - ctColHead = "bgTargets" correctionTarget = pd.DataFrame(columns=ctColHeadings) + + ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): # edge case where actSchedule is float if isinstance(actSched, float): actSched = str(int(actSched)) + tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) + targetTypes = list(set(list(tempDF)) - set(["start"])) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - tempDF["hashID"] = pumpSettings.loc[p, "hashID"] - tempDF["age"] = pumpSettings.loc[p, "age"] - tempDF["ylw"] = pumpSettings.loc[p, "ylw"] - for targetType in ["low", "high", "target", "range"]: - if targetType in list(tempDF): - tempDF["ct." + targetType + "_mmolL"] = \ - tempDF[targetType] - - tempDF["ct." + targetType] = \ - mmolL_to_mgdL(tempDF["ct." + targetType + "_mmolL"]) - else: - tempDF["ct." + targetType + "_mmolL"] = np.nan - tempDF["ct." + targetType] = np.nan - - correctionTarget = pd.concat([correctionTarget, tempDF[ctColHeadings]], ignore_index=True) - pdb.set_trace() + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["ct.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "ct", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(columns=ctDayColHeadings, index=[0]) + tempDaySummary["day"] = tempDF["ct.localTime"].dt.date + + for targetType in targetTypes: + tempDF["ct." + targetType] = mmolL_to_mgdL(tempDF[targetType]) + + tempDaySummary["ct." + targetType + ".min"] = tempDF["ct." + targetType].min() + tempDaySummary["ct." + targetType + ".weightedMean"] = \ + np.sum(tempDF["ct." + targetType] * tempDF["ct.durationHours"]) / tempDF["ct.durationHours"].sum() + tempDaySummary["ct." + targetType + ".max"] = tempDF["ct." + targetType].max() + + correctionTarget = \ + pd.concat([correctionTarget, + tempDF.drop(columns=['start', + 'target', + 'day', + 'ct.durationHours'])], + ignore_index=True, sort=False) + ctDaySummary = pd.concat([ctDaySummary, tempDaySummary], + ignore_index=True, sort=False) + + ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) + ctDaySummary.fillna(method='ffill', inplace=True) + ctDaySummary.drop_duplicates(inplace=True) + ctDaySummary.reset_index(inplace=True, drop=True) # SCHEDULED BASAL RATES -# sbrColHeadings = commonColumnHeadings.copy() sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] sbr = pd.DataFrame(columns=sbrColHeadings) sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'sbr.type'] @@ -1100,10 +1122,10 @@ def processBasalSchedule(df, col): 'cir.min', 'cir.weightedMean', 'cir.max', - 'ct.low', - 'ct.high', - 'ct.target', - 'ct.range', + 'ct.low.min', 'ct.low.weightedMean', 'ct.low.max', + 'ct.high.min', 'ct.high.weightedMean', 'ct.high.max', + 'ct.target.min', 'ct.target.weightedMean', 'ct.target.max', + 'ct.range.min', 'ct.range.weightedMean', 'ct.range.max', 'sbr.min', 'sbr.weightedMean', 'sbr.max', @@ -1139,13 +1161,6 @@ def processBasalSchedule(df, col): ageSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() ageSummary["cir.max"] = catDF["cir.max"].max() - # correctionTarget stats - for ch in ['ct.low','ct.high','ct.target', 'ct.range']: - ageSummary[ch + ".nDays"] = catDF[ch].count() - ageSummary[ch + ".min"] = catDF[ch].min() - ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() - ageSummary[ch + ".max"] = catDF[ch].max() - # add sbr stats ageSummary["sbr.nDays"] = catDF["sbr.min"].count() ageSummary["sbr.min"] = catDF["sbr.min"].min() @@ -1153,6 +1168,15 @@ def processBasalSchedule(df, col): ageSummary["sbr.max"] = catDF["sbr.max"].max() ageSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + # correctionTarget stats + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + for stat in [".min", ".weightedMean", ".max"]: + ch = targetType + stat + ageSummary[ch + ".nDays"] = catDF[ch].count() + ageSummary[ch + ".min"] = catDF[ch].min() + ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() + ageSummary[ch + ".max"] = catDF[ch].max() + ageSummary.reset_index(inplace=True) analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & @@ -1184,13 +1208,6 @@ def processBasalSchedule(df, col): ylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() ylwSummary["cir.max"] = catDF["cir.max"].max() - # correctionTarget stats - for ch in ['ct.low','ct.high','ct.target', 'ct.range']: - ylwSummary[ch + ".nDays"] = catDF[ch].count() - ylwSummary[ch + ".min"] = catDF[ch].min() - ylwSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() - ylwSummary[ch + ".max"] = catDF[ch].max() - # add sbr stats ylwSummary["sbr.nDays"] = catDF["sbr.min"].count() ylwSummary["sbr.min"] = catDF["sbr.min"].min() @@ -1198,6 +1215,15 @@ def processBasalSchedule(df, col): ylwSummary["sbr.max"] = catDF["sbr.max"].max() ylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + # correctionTarget stats + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + for stat in [".min", ".weightedMean", ".max"]: + ch = targetType + stat + ylwSummary[ch + ".nDays"] = catDF[ch].count() + ylwSummary[ch + ".min"] = catDF[ch].min() + ylwSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() + ylwSummary[ch + ".max"] = catDF[ch].max() + ylwSummary.reset_index(inplace=True) analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 28) & @@ -1343,6 +1369,7 @@ def processBasalSchedule(df, col): # %% V2 DATA TO GRAB # ADD ROUNDEDLOCAL TIME TO THE END RESULTS +# CALCULATE MMOL SUMMARIES # GET RID OF ROUNDING TIME AT THE BEGINNING # DEFINE A DAY BETWEEN 6AM AND 6AM # EXPAND THE CORRECTION TIME VALUES TO BE UNIFORM ACROSS ALL USERS AND DEVICES From 72e568bc686c9d9f5df1ee00eef687aefe98c746 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 22:07:55 -0600 Subject: [PATCH 35/78] add insulin/carb events and basic cgm stats --- .../get-users-settings-and-events.py | 93 ++++++++++++++++--- 1 file changed, 82 insertions(+), 11 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index c1631a0e..479e35ec 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -442,7 +442,7 @@ def getTzoForDateTime(utcTime, currentTimezone): def getTimezoneOffset(currentDate, currentTimezone): # edge case for 'US/Pacific-New' - if currentTimezone in 'US/Pacific-New': + if currentTimezone == 'US/Pacific-New': currentTimezone = 'US/Pacific' tz = timezone(currentTimezone) @@ -457,6 +457,8 @@ def getTimezoneOffset(currentDate, currentTimezone): def isDSTChangeDay(currentDate, currentTimezone): + if currentTimezone == 'US/Pacific-New': + currentTimezone = 'US/Pacific' tzoCurrentDay = getTimezoneOffset(pd.to_datetime(currentDate), currentTimezone) tzoPreviousDay = getTimezoneOffset(pd.to_datetime(currentDate) + @@ -543,8 +545,6 @@ def processBasalSchedule(df, col): return dailySchedule, dailySummary - - # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S dataPulledDate = "2018-09-28" dataPulledDF = pd.DataFrame(pd.to_datetime(dataPulledDate), columns=["day"], index=[0]) @@ -564,12 +564,17 @@ def processBasalSchedule(df, col): donorList = phiDate + "-uniqueDonorList.csv" donors = load_csv(os.path.join(donorPath, donorList)) -allMetadata = donors[['hashID', 'diagnosisType']].copy() +allMetadata = pd.DataFrame() +allAgeSummaries = pd.DataFrame() +allYlwSummaries = pd.DataFrame() + # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL # this is where the loop will go: -for dIndex in range(0, len(donors)): +startIndex = 0 +endIndex = len(donors) +for dIndex in range(startIndex, endIndex): # clear output dataframes isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() @@ -1088,7 +1093,6 @@ def processBasalSchedule(df, col): dataPerDay["ylw"] = catDF.ylw.mean() dataPerDay["timezone"] = catDF.timezone.describe()["top"] - # calculate all of the data start and end range # this can be used for looking at settings dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate) @@ -1105,6 +1109,9 @@ def processBasalSchedule(df, col): dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) + dayData["timezone"].fillna(method='ffill', inplace=True) + dayData["timezone"].fillna(method='bfill', inplace=True) + dayData["isDSTChangeDay"] = dayData[['day', 'timezone']].apply(lambda x: isDSTChangeDay(*x), axis=1) dayData["date"] = pd.to_datetime(dayData["day"]) dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) @@ -1315,10 +1322,73 @@ def processBasalSchedule(df, col): cgmLite = cgmLite[colOrder] - # %% age and ylw stats + # %% SAVE RESULTS + # age and ylw stats + pumpEvents["rateTimesDurationHours"] = pumpEvents["rate"] * pumpEvents["durationHours"] + pumpEvents.rename(columns={"rate":"basalRate"}, inplace=True) + catDF = pumpEvents.groupby("age") - # %% SAVE RESULTS + # actual basal rates + agePump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") + agePump["basalRate.min"] = catDF["basalRate"].min() + agePump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() + agePump["basalRate.max"] = catDF["basalRate"].max() + + # insulin events + insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") + agePump = pd.concat([agePump, insulinEvents], axis=1) + + # carbs entered in bolus calculator + carbEvents = catDF["carbInput"].describe().add_prefix("carb.") + agePump = pd.concat([agePump, carbEvents], axis=1) + + # very low level cgm stats per age + catDF = cgmLite.groupby("age") + cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + agePumpCgm = pd.concat([agePump, cgmStats], axis=1) + + agePumpCgm.reset_index(inplace=True) + + ageSummary = pd.merge(ageSummary, agePumpCgm, on="age", how="left") + ageSummary["hashID"] = hashID + allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) + + allAgeSummaries.to_csv(os.path.join(outputPath, + "allAgeSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) + + # repoeat for years living with + catDF = pumpEvents.groupby("ylw") + # actual basal rates + ylwPump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") + ylwPump["basalRate.min"] = catDF["basalRate"].min() + ylwPump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() + ylwPump["basalRate.max"] = catDF["basalRate"].max() + + # insulin events + insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") + ylwPump = pd.concat([ylwPump, insulinEvents], axis=1) + + # carbs entered in bolus calculator + carbEvents = catDF["carbInput"].describe().add_prefix("carb.") + ylwPump = pd.concat([ylwPump, carbEvents], axis=1) + + # very low level cgm stats per age + catDF = cgmLite.groupby("ylw") + cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + ylwPumpCgm = pd.concat([ylwPump, cgmStats], axis=1) + + ylwPumpCgm.reset_index(inplace=True) + + ylwSummary = pd.merge(ylwSummary, ylwPumpCgm, on="ylw", how="left") + + ylwSummary["hashID"] = hashID + allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) + + allYlwSummaries.to_csv(os.path.join(outputPath, + "allYlwSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) + + # %% save data for this person # outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" # outputFormat = (f"{minAge:02d}", # f"{maxAge:02d}", @@ -1332,7 +1402,6 @@ def processBasalSchedule(df, col): # if not os.path.exists(outputFolderName_Path): # os.makedirs(outputFolderName_Path) # -# # save data for this person # fName = outputFolderName + "-allSettings.csv" # allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) # fName = outputFolderName + "-pumpEvents.csv" @@ -1361,13 +1430,15 @@ def processBasalSchedule(df, col): metadata["flags"] = "missing bDay/dDay" # write metaData to allMetadata - allMetadata = pd.merge(allMetadata, metadata, how="left", on="hashID") - allMetadata.to_csv(os.path.join(outputPath, "allMetadata.csv")) + allMetadata = pd.concat([allMetadata, metadata], axis=0, sort=True) + allMetadata.to_csv(os.path.join(outputPath, + "allMetadata-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) print("done with", dIndex) # %% V2 DATA TO GRAB +# THERE IS AN ISSUE WITH COUNTING 670G SETTINGS # ADD ROUNDEDLOCAL TIME TO THE END RESULTS # CALCULATE MMOL SUMMARIES # GET RID OF ROUNDING TIME AT THE BEGINNING From 42c86bf1fdc1f0b9b1dddb7ecb966f3b8f51c0d1 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Sun, 13 Jan 2019 23:12:45 -0600 Subject: [PATCH 36/78] add argparse to run from commandline --- .../get-users-settings-and-events.py | 122 +++++++++--------- 1 file changed, 64 insertions(+), 58 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 479e35ec..47780ce0 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -18,15 +18,51 @@ from datetime import timedelta import datetime as dt import os +import argparse import pdb # %% USER INPUTS (ADD THIS IN LATER) -#codeDescription = "Get user's settings and events" -#parser = argparse.ArgumentParser(description=codeDescription) - - +codeDescription = "Get user's settings and events" +parser = argparse.ArgumentParser(description=codeDescription) + +parser.add_argument("-d", + "--date-stamp", + dest="dateStamp", + default="2019-01-10", + help="date in '%Y-%m-%d' format of unique donor list" + + "(e.g., PHI-2018-03-02-uniqueDonorList)") + +parser.add_argument("-s", + "--start-index", + dest="startIndex", + default=0, + help="donor index (integer) to start at") + +parser.add_argument("-e", + "--end-index", + dest="endIndex", + default=-1, + help="donor index (integer) to end at," + + "-1 will result in 1 file if startIndex != 0," + + "and will default to number of unique donors" + + "if startIndex = 0, or endIndex = -2") + + +args = parser.parse_args() # %% FUNCTIONS +def defineStartAndEndIndex(args, nDonors): + startIndex = int(args.startIndex) + endIndex = int(args.endIndex) + if endIndex == -1: + if startIndex == 0: + endIndex = nDonors + else: + endIndex = startIndex + 1 + if endIndex == -2: + endIndex = nDonors + return startIndex, endIndex + # CLEAN DATA FUNCTIONS def removeNegativeDurations(df): @@ -412,6 +448,8 @@ def getListOfDexcomCGMDays(df): totalCgms = len(df.deviceId.notnull()) df["dexcomCGM"] = df.deviceId.str.contains("|".join(searchfor)) percentDexcomCGM = df.dexcomCGM.sum() / totalCgms * 100 + else: + percentDexcomCGM = np.nan return df, percentDexcomCGM @@ -505,48 +543,14 @@ def getPumpSettingsStats(df, col, pumpCol): return df, df2 -def processBasalSchedule(df, col): - colHeadings = [col + ".localTime", col, col + ".durationHours", col + ".type", - col + ".min", col + ".weightedMean", col + ".max"] - summaryColHeadings = ["day", col + ".min", col + ".weightedMean", col + ".max"] - dropCols = ["rate", "start", col + ".localTime", col, col + ".durationHours", col + ".type"] - - dailySchedule = pd.DataFrame(columns=colHeadings) - dailySummary = pd.DataFrame(columns=summaryColHeadings) - - for p, actSched in zip(df.index, df["activeSchedule"]): - # edge case where actSchedule is float - if isinstance(actSched, float): - actSched = str(int(actSched)) - if 'Auto Mode' not in actSched: - tempDF = pd.DataFrame(df.loc[p, "basalSchedules." + actSched]) - tempDF["day"] = df.loc[p, "day"] - tempDF[col + ".type"] = np.nan - tempDF[col + ".localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - endOfDay = pd.DataFrame(pd.to_datetime(df.loc[p, "day"] + pd.Timedelta(1, "D")), columns=[col + ".localTime"], index=[0]) - tempDF = get_setting_durations(tempDF, col, endOfDay) - tempDF = tempDF[:-1] - tempDF = get_settingStats(tempDF, col, "rate") - dailySchedule = pd.concat([dailySchedule, tempDF[colHeadings]], ignore_index=True, sort=False) - tempSummary = tempDF.drop(columns=dropCols) - tempSummary["day"] = df.loc[p, "day"] - tempSummary = tempSummary[0:1] - dailySummary = pd.concat([dailySummary, tempSummary], ignore_index=True, sort=False) - - else: - pdb.set_trace() - tempDF = pd.DataFrame(index=[0]) - tempDF[col + ".type"] = "AutoMode" - dailySchedule = pd.concat([dailySchedule, tempDF], ignore_index=True, sort=False) - tempSummary["day"] = df.loc[p, "day"] - tempSummary = tempSummary[0:1] - dailySummary = pd.concat([dailySummary, tempSummary], ignore_index=True, sort=False) - return dailySchedule, dailySummary # %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S -dataPulledDate = "2018-09-28" + + + +dataPulledDate = args.dateStamp dataPulledDF = pd.DataFrame(pd.to_datetime(dataPulledDate), columns=["day"], index=[0]) dataPulledDF["day"] = dataPulledDF["day"].dt.date phiDate = "PHI-" + dataPulledDate @@ -571,9 +575,11 @@ def processBasalSchedule(df, col): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL -# this is where the loop will go: -startIndex = 0 -endIndex = len(donors) +nUniqueDonors = len(donors) + +# define start and end index +startIndex, endIndex = defineStartAndEndIndex(args, nUniqueDonors) + for dIndex in range(startIndex, endIndex): # clear output dataframes @@ -678,12 +684,12 @@ def processBasalSchedule(df, col): # get a summary of boluses per day bolusDaySummary = get_bolusDaySummary(bolus) - # isf and cir associated with bolus event - if "insulinSensitivities" in list(bolus): - pdb.set_trace() - - if "carbRatios" in list(bolus): - pdb.set_trace() +# # isf and cir associated with bolus event +# if "insulinSensitivities" in list(bolus): +# pdb.set_trace() +# +# if "carbRatios" in list(bolus): +# pdb.set_trace() bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) @@ -974,13 +980,13 @@ def processBasalSchedule(df, col): sbrDaySummary.reset_index(inplace=True, drop=True) sbrDaySummary.fillna(method='ffill', inplace=True) - # max basal rate, max bolus amount, and insulin duration - if "rateMaximum" in list(data): - pdb.set_trace() - if "amountMaximum" in list(data): - pdb.set_trace() - if "bolus.calculator" in list(data): - pdb.set_trace() +# # max basal rate, max bolus amount, and insulin duration +# if "rateMaximum" in list(data): +# pdb.set_trace() +# if "amountMaximum" in list(data): +# pdb.set_trace() +# if "bolus.calculator" in list(data): +# pdb.set_trace() # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) From 640eaf61953861d63a6648819d18cdac7bef43e0 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Mon, 14 Jan 2019 00:04:57 -0600 Subject: [PATCH 37/78] add try catch to help batch process --- .../get-users-settings-and-events.py | 1622 ++++++++--------- 1 file changed, 811 insertions(+), 811 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 47780ce0..1c979d2f 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -29,7 +29,7 @@ parser.add_argument("-d", "--date-stamp", dest="dateStamp", - default="2019-01-10", + default="2018-09-28", help="date in '%Y-%m-%d' format of unique donor list" + "(e.g., PHI-2018-03-02-uniqueDonorList)") @@ -574,866 +574,866 @@ def getPumpSettingsStats(df, col, pumpCol): # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL - nUniqueDonors = len(donors) # define start and end index startIndex, endIndex = defineStartAndEndIndex(args, nUniqueDonors) for dIndex in range(startIndex, endIndex): - - # clear output dataframes - isf, cir, correctionTarget = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() - - # %% ID, HASHID, AGE, & YLW + # % ID, HASHID, AGE, & YLW userID = donors.userID[dIndex] hashID = donors.hashID[dIndex] metadata = pd.DataFrame(index=[dIndex]) metadata["hashID"] = hashID - # make folder to save data - processedDataPath = os.path.join(phiOutputPath, "PHI-" + userID) - if not os.path.exists(processedDataPath): - os.makedirs(processedDataPath) - - - # round all birthdays and diagnosis dates to the first day of the month (to protect identities) - if (pd.isnull(donors.bDay[dIndex]) + pd.isnull(donors.dDay[dIndex])) == 0: - - bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) - dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) + try: + # make folder to save data + processedDataPath = os.path.join(phiOutputPath, "PHI-" + userID) + if not os.path.exists(processedDataPath): + os.makedirs(processedDataPath) + # round all birthdays and diagnosis dates to the first day of the month (to protect identities) + if (pd.isnull(donors.bDay[dIndex]) + pd.isnull(donors.dDay[dIndex])) == 0: + + bDate = pd.to_datetime(donors.bDay[dIndex][0:7]) + dDate = pd.to_datetime(donors.dDay[dIndex][0:7]) + + + # %% LOAD IN DONOR JSON DATA + + jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") + jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") + + if os.path.exists(jsonFileName): + fileSize = os.stat(jsonFileName).st_size + metadata["fileSizeKB"] = fileSize / 1000 + if fileSize > 1000: + data = load_json(jsonFileName) + + # sort the data by time + data.sort_values("time", inplace=True) + + # flatten the embedded json + data = flattenJson(data) + + + # %% CLEAN DATA + # remove negative durations + data, nNegativeDurations = removeNegativeDurations(data) + metadata["nNegativeDurations"] = nNegativeDurations + + # get rid of cgm values too low/high (< 38 & > 402 mg/dL) + data, nInvalidCgmValues = removeInvalidCgmValues(data) + metadata["nInvalidCgmValues"] = nInvalidCgmValues + + # Tslim calibration bug fix + data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) + metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings - # %% LOAD IN DONOR JSON DATA - jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") - jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") + # %% ADD UPLOAD DATE + # attach upload time to each record, for resolving duplicates + if (("upload" in data.type.unique()) & + ("basal" in data.type.unique()) & + ("bolus" in data.type.unique()) & + ("cbg" in data.type.unique()) & + ("pumpSettings" in data.type.unique())): + data = addUploadDate(data) - if os.path.exists(jsonFileName): - fileSize = os.stat(jsonFileName).st_size - metadata["fileSizeKB"] = fileSize / 1000 - if fileSize > 1000: - data = load_json(jsonFileName) - - # sort the data by time - data.sort_values("time", inplace=True) - - # flatten the embedded json - data = flattenJson(data) + # %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) + data["utcTime"] = pd.to_datetime(data["time"]) + data["timezone"].fillna(method='ffill', inplace=True) + data["timezone"].fillna(method='bfill', inplace=True) + data["day"] = pd.DatetimeIndex(data["utcTime"]).date + + # round to the nearest 5 minutes + # TODO: once roundTime is pushed to tidals repository then this line can be replaced + # with td.clean.round_time + data = round_time(data, timeIntervalMinutes=5, timeField="time", + roundedTimeFieldName="roundedTime", startWithFirstRecord=True, + verbose=False) + data.sort_values("uploadTime", ascending=False, inplace=True) + + + # %% ID, HASHID, AGE, & YLW + data["userID"] = userID + data["hashID"] = hashID + data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) + data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + + # commonColumnHeadings = ["hashID", + # "age", + # "ylw"] + + + # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) + bolus = mergeWizardWithBolus(data) + if len(bolus) > 0: + # get rid of duplicates that have the same ["time", "normal"] + bolus.sort_values("uploadTime", ascending=False, inplace=True) + bolus, nBolusDuplicatesRemoved = \ + removeDuplicates(bolus, ["deviceTime", "normal"]) + metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved + + # get a summary of boluses per day + bolusDaySummary = get_bolusDaySummary(bolus) + + # # isf and cir associated with bolus event + # if "insulinSensitivities" in list(bolus): + # pdb.set_trace() + # + # if "carbRatios" in list(bolus): + # pdb.set_trace() + + bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] + bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) + + # bolusCH = commonColumnHeadings.copy() + bolusCH = ["utcTime", "timezone", "roundedTime", "normal", "carbInput", "subType", + "insulinOnBoard", "bgInput", + "isf", "isf_mmolL_U", "insulinCarbRatio"] + bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] + bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan + bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", + "bgInput": "bg_mmolL"}) + bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) + bolusEvents["eventType"] = "correction" + bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal" + + if "duration" in list(bolus): + bolus["duration"].replace(0, np.nan, inplace=True) + bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 + bolus["rate"] = bolus["extended"] / bolus["durationHours"] + # bolusExtendedCH = commonColumnHeadings.copy() + bolusExtendedCH = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] + bolusExtendedEvents = bolus.loc[ + ((bolus["extended"].notnull()) & + (bolus["duration"] > 0)), bolusExtendedCH] + + if "extended" not in bolus: + bolus["extended"] = np.nan + bolus["duration"] = np.nan + + + # get start and end times + bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day") + metadata["bolus.beginDate"] = bolusBeginDate + metadata["bolus.endDate"] = bolusEndDate + + + # %% PUMP SETTINGS + + pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") + pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) + + pumpSettings, nPumpSettingsDuplicatesRemoved = \ + removeDuplicates(pumpSettings, "deviceTime") + metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved + + pumpSettings.sort_values("utcTime", ascending=True, inplace=True) + pumpSettings.reset_index(drop=True, inplace=True) + + # ISF + isfColHeadings = ["isf.localTime", "isf", "isf_mmolL_U"] + + if "insulinSensitivity.amount" in list(pumpSettings): + isfColHead = "insulinSensitivity" + pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] + pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) + pumpSettings["isf.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") + + isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings] + + # add a day summary + isfDaySummary = pd.DataFrame() + isfDaySummary["day"] = isf["isf.localTime"].dt.date + isfDaySummary["isf.min"] = isf["isf"] + isfDaySummary["isf.weightedMean"] = isf["isf"] + isfDaySummary["isf.max"] = isf["isf"] + isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) + isfDaySummary.reset_index(inplace=True, drop=True) + isfDaySummary.fillna(method='ffill', inplace=True) - # %% CLEAN DATA - # remove negative durations - data, nNegativeDurations = removeNegativeDurations(data) - metadata["nNegativeDurations"] = nNegativeDurations - - # get rid of cgm values too low/high (< 38 & > 402 mg/dL) - data, nInvalidCgmValues = removeInvalidCgmValues(data) - metadata["nInvalidCgmValues"] = nInvalidCgmValues - - # Tslim calibration bug fix - data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data) - metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings + else: + isfColHead = "insulinSensitivities" + isf = pd.DataFrame(columns=isfColHeadings) + isfDayColHeadings = ['day', 'isf.min', 'isf.weightedMean', 'isf.max'] + isfDaySummary = pd.DataFrame(columns=isfDayColHeadings) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) + + tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["isf.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["isf_mmolL_U"] = tempDF["amount"] + tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["isf.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "isf", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["isf.localTime"].dt.date + tempDaySummary["isf.min"] = tempDF["isf"].min() + tempDaySummary["isf.weightedMean"] = \ + np.sum(tempDF["isf"] * tempDF["isf.durationHours"]) / tempDF["isf.durationHours"].sum() + tempDaySummary["isf.max"] = tempDF["isf"].max() + + isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) + isfDaySummary = pd.concat([isfDaySummary, tempDaySummary], ignore_index=True) + + isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) + isfDaySummary.reset_index(inplace=True, drop=True) + isfDaySummary.fillna(method='ffill', inplace=True) + + # CIR + cirColHeadings = ["cir.localTime", "cir"] + + if "carbRatio.amount" in list(pumpSettings): + cirColHead = "carbRatio" + pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] + pumpSettings["cir.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") + + cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings] + + # add a day summary + cirDaySummary = pd.DataFrame() + cirDaySummary["day"] = cir["cir.localTime"].dt.date + cirDaySummary["cir.min"] = cir["cir"] + cirDaySummary["cir.weightedMean"] = cir["cir"] + cirDaySummary["cir.max"] = cir["cir"] + cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) + cirDaySummary.reset_index(inplace=True, drop=True) + cirDaySummary.fillna(method='ffill', inplace=True) + else: - # %% ADD UPLOAD DATE - # attach upload time to each record, for resolving duplicates - if (("upload" in data.type.unique()) & - ("basal" in data.type.unique()) & - ("bolus" in data.type.unique()) & - ("cbg" in data.type.unique()) & - ("pumpSettings" in data.type.unique())): - data = addUploadDate(data) + cirColHead = "carbRatios" + cir = pd.DataFrame(columns=cirColHeadings) + cirDayColHeadings = ['day', 'cir.min', 'cir.weightedMean', 'cir.max'] + cirDaySummary = pd.DataFrame(columns=cirDayColHeadings) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) + + tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["cir.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + tempDF["cir"] = tempDF["amount"] + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["cir.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "cir", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["cir.localTime"].dt.date + tempDaySummary["cir.min"] = tempDF["cir"].min() + tempDaySummary["cir.weightedMean"] = \ + np.sum(tempDF["cir"] * tempDF["cir.durationHours"]) / tempDF["cir.durationHours"].sum() + tempDaySummary["cir.max"] = tempDF["cir"].max() + + cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True) + cirDaySummary = pd.concat([cirDaySummary, tempDaySummary], ignore_index=True) + + cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) + cirDaySummary.reset_index(inplace=True, drop=True) + cirDaySummary.fillna(method='ffill', inplace=True) + + + # CORRECTION TARGET + ctColHeadings = ["ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] + ctDayColHeadings = ['day', + "ct.low.min", "ct.low.weightedMean", "ct.low.max", + "ct.high.min", "ct.high.weightedMean", "ct.high.max", + "ct.target.min", "ct.target.weightedMean", "ct.target.max", + "ct.range.min", "ct.range.weightedMean", "ct.range.max"] + + if "bgTarget.start" in list(pumpSettings): + ctColHead = "bgTarget." + + for targetType in ["low", "high", "target", "range"]: + if ctColHead + targetType in list(pumpSettings): + pumpSettings["ct." + targetType + "_mmolL"] = \ + pumpSettings[ctColHead + targetType] + + pumpSettings["ct." + targetType] = \ + mmolL_to_mgdL(pumpSettings["ct." + targetType + "_mmolL"]) + else: + pumpSettings["ct." + targetType + "_mmolL"] = np.nan + pumpSettings["ct." + targetType] = np.nan + + pumpSettings["ct.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ + pd.to_timedelta(pumpSettings[ctColHead + "start"], unit="ms") + + correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] + + # add a day summary + ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) + ctDaySummary["day"] = correctionTarget["ct.localTime"].dt.date + # add min, weightedMean, and max + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + for stat in [".min", ".weightedMean", ".max"]: + ctDaySummary[targetType + stat] = correctionTarget[targetType] + + + ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) + ctDaySummary.reset_index(inplace=True, drop=True) + ctDaySummary.fillna(method='ffill', inplace=True) + else: + ctColHead = "bgTargets" + correctionTarget = pd.DataFrame(columns=ctColHeadings) + + ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) + + tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) + targetTypes = list(set(list(tempDF)) - set(["start"])) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["ct.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "ct", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(columns=ctDayColHeadings, index=[0]) + tempDaySummary["day"] = tempDF["ct.localTime"].dt.date + + for targetType in targetTypes: + tempDF["ct." + targetType] = mmolL_to_mgdL(tempDF[targetType]) + + tempDaySummary["ct." + targetType + ".min"] = tempDF["ct." + targetType].min() + tempDaySummary["ct." + targetType + ".weightedMean"] = \ + np.sum(tempDF["ct." + targetType] * tempDF["ct.durationHours"]) / tempDF["ct.durationHours"].sum() + tempDaySummary["ct." + targetType + ".max"] = tempDF["ct." + targetType].max() + + correctionTarget = \ + pd.concat([correctionTarget, + tempDF.drop(columns=['start', + 'target', + 'day', + 'ct.durationHours'])], + ignore_index=True, sort=False) + ctDaySummary = pd.concat([ctDaySummary, tempDaySummary], + ignore_index=True, sort=False) + + ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) + ctDaySummary.fillna(method='ffill', inplace=True) + ctDaySummary.drop_duplicates(inplace=True) + ctDaySummary.reset_index(inplace=True, drop=True) + + # SCHEDULED BASAL RATES + sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] + sbr = pd.DataFrame(columns=sbrColHeadings) + sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'sbr.type'] + sbrDaySummary = pd.DataFrame(columns=sbrDayColHeadings) + for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): + # edge case where actSchedule is float + if isinstance(actSched, float): + actSched = str(int(actSched)) + if 'Auto Mode' not in actSched: + tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["sbr.type"] = np.nan + tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "sbr", endOfDay) + tempDF = tempDF[:-1] + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date + tempDaySummary["sbr.min"] = tempDF["rate"].min() + tempDaySummary["sbr.weightedMean"] = \ + np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() + tempDaySummary["sbr.max"] = tempDF["rate"].max() + tempDaySummary["sbr.type"] = np.nan - # %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME) - data["utcTime"] = pd.to_datetime(data["time"]) - data["timezone"].fillna(method='ffill', inplace=True) - data["timezone"].fillna(method='bfill', inplace=True) - data["day"] = pd.DatetimeIndex(data["utcTime"]).date - - # round to the nearest 5 minutes - # TODO: once roundTime is pushed to tidals repository then this line can be replaced - # with td.clean.round_time - data = round_time(data, timeIntervalMinutes=5, timeField="time", - roundedTimeFieldName="roundedTime", startWithFirstRecord=True, - verbose=False) - data.sort_values("uploadTime", ascending=False, inplace=True) + else: + tempDF = pd.DataFrame(index=[0]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + tempDF["rate"] = np.nan + tempDF["sbr.type"] = "AutoMode" + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date + tempDaySummary["sbr.min"] = np.nan + tempDaySummary["sbr.weightedMean"] = np.nan + tempDaySummary["sbr.max"] = np.nan + tempDaySummary["sbr.type"] = "AutoMode" + + sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True) + sbrDaySummary = pd.concat([sbrDaySummary, tempDaySummary], ignore_index=True) + + sbrDaySummary = pd.concat([sbrDaySummary, dataPulledDF], sort=False) + sbrDaySummary.reset_index(inplace=True, drop=True) + sbrDaySummary.fillna(method='ffill', inplace=True) + + # # max basal rate, max bolus amount, and insulin duration + # if "rateMaximum" in list(data): + # pdb.set_trace() + # if "amountMaximum" in list(data): + # pdb.set_trace() + # if "bolus.calculator" in list(data): + # pdb.set_trace() + + + # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) + basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") + basal.sort_values("uploadTime", ascending=False, inplace=True) + + basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day") + metadata["basal.beginDate"] = basalBeginDate + metadata["basal.endDate"] = basalEndDate + + basal, nBasalDuplicatesRemoved = \ + removeDuplicates(basal, ["deliveryType", "deviceTime", "duration", "rate"]) + metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved + + # fill NaNs with 0, as it indicates a suspend (temp basal of 0) + basal.rate.fillna(0, inplace=True) + + # get rid of basals that have durations of 0 + nBasalDuration0 = sum(basal.duration > 0) + basal = basal[basal.duration > 0] + metadata["basal.nBasalDuration0"] = nBasalDuration0 + + # get rid of basal durations that are unrealistic + nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) + metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) + basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan + + # calculate the total amount of insulin delivered (duration * rate) + basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 + basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] + + # actual basal delivered + # abrColHeadings = commonColumnHeadings.copy() + abrColHeadings = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] + abr = basal[abrColHeadings] + if "duration" in list(bolus): + abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) + abr.sort_values("utcTime", inplace=True) + + abr["timezone"].fillna(method='ffill', inplace=True) + abr["timezone"].fillna(method='bfill', inplace=True) + + # get a summary of basals per day + basalDaySummary = get_basalDaySummary(basal) + + + # %% GET CLOSED LOOP DAYS WITH TEMP BASAL DATA + # group data by type + groupedData = data.groupby(by="type") + + isClosedLoopDay, is670g, metadata = \ + getClosedLoopDays(groupedData, 30, metadata) + + # %% CGM DATA + # filter by cgm and sort by uploadTime + cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all") + + # get rid of duplicates that have the same ["deviceTime", "value"] + cgmData, nCgmDuplicatesRemovedDeviceTime = removeCgmDuplicates(cgmData, "deviceTime") + metadata["nCgmDuplicatesRemovedDeviceTime"] = nCgmDuplicatesRemovedDeviceTime + + # get rid of duplicates that have the same ["time", "value"] + cgmData, nCgmDuplicatesRemovedUtcTime = removeCgmDuplicates(cgmData, "time") + metadata["cnCgmDuplicatesRemovedUtcTime"] = nCgmDuplicatesRemovedUtcTime + + # get rid of duplicates that have the same "roundedTime" + cgmData, nCgmDuplicatesRemovedRoundedTime = removeDuplicates(cgmData, "roundedTime") + metadata["nCgmDuplicatesRemovedRoundedTime"] = nCgmDuplicatesRemovedRoundedTime + + # get start and end times + cgmBeginDate, cgmEndDate = getStartAndEndTimes(cgmData, "day") + metadata["cgm.beginDate"] = cgmBeginDate + metadata["cgm.endDate"] = cgmEndDate + + # get a list of dexcom cgms + cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData) + metadata["cgm.percentDexcomCGM"] = percentDexcom + + # group by date (day) and get stats + catDF = cgmData.groupby(cgmData["day"]) + cgmRecordsPerDay = \ + pd.DataFrame(catDF.value.count()). \ + rename(columns={"value": "cgm.count"}) + dayDate = catDF.day.describe()["top"] + dexcomCGM = catDF.dexcomCGM.describe()["top"] + nTypesCGM = catDF.dexcomCGM.describe()["unique"] + cgmRecordsPerDay["cgm.dexcomOnly"] = \ + (dexcomCGM & (nTypesCGM == 1)) + cgmRecordsPerDay["date"] = cgmRecordsPerDay.index + + # filter the cgm data + # cgmColHeadings = commonColumnHeadings.copy() + cgmColHeadings = ["utcTime", "timezone", "roundedTime", "value"] + + # get data in mg/dL units + cgm = cgmData[cgmColHeadings] + cgm = cgm.rename(columns={'value': 'mmol_L'}) + cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int) + + + # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW + + # COMBINE DAY SUMMARIES + # group by date (day) and get stats + catDF = data.groupby(data["day"]) + dataPerDay = \ + pd.DataFrame(catDF.hashID.describe()["top"]). \ + rename(columns={"top": "hashID"}) + dataPerDay["age"] = catDF.age.mean() + dataPerDay["ylw"] = catDF.ylw.mean() + dataPerDay["timezone"] = catDF.timezone.describe()["top"] + + # calculate all of the data start and end range + # this can be used for looking at settings + dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate) + dayEndDate = max(cgmEndDate, bolusEndDate, basalEndDate) + metadata["day.beginDate"] = dayBeginDate + metadata["day.endDate"] = dayEndDate + rng = pd.date_range(dayBeginDate, dayEndDate).date + dayData = pd.DataFrame(rng, columns=["day"]) + for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]: + dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left") + for dfType in [isClosedLoopDay, is670g]: + dayData = pd.merge(dayData, dfType, on="day", how="left") + + dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 + dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) + + dayData["timezone"].fillna(method='ffill', inplace=True) + dayData["timezone"].fillna(method='bfill', inplace=True) + + dayData["isDSTChangeDay"] = dayData[['day', 'timezone']].apply(lambda x: isDSTChangeDay(*x), axis=1) + dayData["date"] = pd.to_datetime(dayData["day"]) + dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) + + # add settings to the dayData + dayData = pd.merge(dayData, isfDaySummary, on="day", how="left") + dayData = pd.merge(dayData, cirDaySummary, on="day", how="left") + dayData = pd.merge(dayData, ctDaySummary, on="day", how="left") + dayData = pd.merge(dayData, sbrDaySummary, on="day", how="left") + + # fill data forward + fillList = ['isf.min', + 'isf.weightedMean', + 'isf.max', + 'cir.min', + 'cir.weightedMean', + 'cir.max', + 'ct.low.min', 'ct.low.weightedMean', 'ct.low.max', + 'ct.high.min', 'ct.high.weightedMean', 'ct.high.max', + 'ct.target.min', 'ct.target.weightedMean', 'ct.target.max', + 'ct.range.min', 'ct.range.weightedMean', 'ct.range.max', + 'sbr.min', + 'sbr.weightedMean', + 'sbr.max', + 'sbr.type'] + for fl in fillList: + dayData[fl].fillna(method='ffill', inplace=True) + + # calculate the start and end of contiguous data + # these dates can be used when simulating and predicting, where + # you need both pump and cgm data + contiguousBeginDate = max(cgmBeginDate, bolusBeginDate, basalBeginDate) + contiguousEndDate = min(cgmEndDate, bolusEndDate, basalEndDate) + metadata["contiguous.beginDate"] = contiguousBeginDate + metadata["contiguous.endDate"] = contiguousEndDate + + # get a summary by age, and ylw + catDF = dayData.groupby("age") + ageSummary = pd.DataFrame(catDF.validPumpData.sum()) + ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + + # add in isf stats + ageSummary["isf.nDays"] = catDF["isf.min"].count() + ageSummary["isf.min"] = catDF["isf.min"].min() + ageSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() + ageSummary["isf.max"] = catDF["isf.max"].max() + + # add cir stats + ageSummary["cir.nDays"] = catDF["cir.min"].count() + ageSummary["cir.min"] = catDF["cir.min"].min() + ageSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() + ageSummary["cir.max"] = catDF["cir.max"].max() + + # add sbr stats + ageSummary["sbr.nDays"] = catDF["sbr.min"].count() + ageSummary["sbr.min"] = catDF["sbr.min"].min() + ageSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() + ageSummary["sbr.max"] = catDF["sbr.max"].max() + ageSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + + # correctionTarget stats + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + for stat in [".min", ".weightedMean", ".max"]: + ch = targetType + stat + ageSummary[ch + ".nDays"] = catDF[ch].count() + ageSummary[ch + ".min"] = catDF[ch].min() + ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() + ageSummary[ch + ".max"] = catDF[ch].max() + + ageSummary.reset_index(inplace=True) + + analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & + (ageSummary["nDaysValidCgm"]> 28))] + minAge = analysisCriterion["age"].min() + maxAge = analysisCriterion["age"].max() + nDaysClosedLoop = analysisCriterion["nDaysClosedLoop"].sum() + n670gDays = analysisCriterion["n670gDays"].sum() + metadata["minAge"] = minAge + metadata["maxAge"] = maxAge + metadata["nDaysClosedLoop"] = nDaysClosedLoop + metadata["n670gDays"] = n670gDays + + catDF = dayData.groupby("ylw") + ylwSummary = pd.DataFrame(catDF.validPumpData.sum()) + ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + + ylwSummary["isf.nDays"] = catDF["isf.min"].count() + ylwSummary["isf.min"] = catDF["isf.min"].min() + ylwSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() + ylwSummary["isf.max"] = catDF["isf.max"].max() + + # add cir stats + ylwSummary["cir.nDays"] = catDF["cir.min"].count() + ylwSummary["cir.min"] = catDF["cir.min"].min() + ylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() + ylwSummary["cir.max"] = catDF["cir.max"].max() + + # add sbr stats + ylwSummary["sbr.nDays"] = catDF["sbr.min"].count() + ylwSummary["sbr.min"] = catDF["sbr.min"].min() + ylwSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() + ylwSummary["sbr.max"] = catDF["sbr.max"].max() + ylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + + # correctionTarget stats + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + for stat in [".min", ".weightedMean", ".max"]: + ch = targetType + stat + ylwSummary[ch + ".nDays"] = catDF[ch].count() + ylwSummary[ch + ".min"] = catDF[ch].min() + ylwSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() + ylwSummary[ch + ".max"] = catDF[ch].max() + + ylwSummary.reset_index(inplace=True) + + analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 28) & + (ylwSummary["nDaysValidCgm"]> 28))] + minYLW = analysisCriterion["ylw"].min() + maxYLW = analysisCriterion["ylw"].max() + metadata["minYLW"] = minYLW + metadata["maxYLW"] = maxYLW + + + # %% calculate local time + abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) + abr = pd.merge(abr, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") + abr["localTime"] = abr["utcTime"] + pd.to_timedelta(abr["tzo"], unit="m") + + cgm["date"] = pd.to_datetime(cgm["utcTime"].dt.date) + cgm = pd.merge(cgm, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") + cgm["localTime"] = cgm["utcTime"] + pd.to_timedelta(cgm["tzo"], unit="m") + + bolusEvents["date"] = pd.to_datetime(bolusEvents["utcTime"].dt.date) + bolusEvents = pd.merge(bolusEvents, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") + bolusEvents["localTime"] = bolusEvents["utcTime"] + pd.to_timedelta(bolusEvents["tzo"], unit="m") + + + # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) + # all settings + + allSettings = pd.merge(isf.rename(columns={"isf.localTime": "localTime"}), + cir.rename(columns={"cir.localTime": "localTime"}), + how="outer", on="localTime") + allSettings = pd.merge(allSettings, + sbr.rename(columns={"rate": "sbr", + "type": "sbr.type", + "sbr.localTime": "localTime"}), + how="outer", on="localTime") + allSettings = pd.merge(allSettings, + correctionTarget.rename(columns={"ct.localTime": "localTime"}), + how="outer", on="localTime") + allSettings["hashID"] = hashID + allSettings["age"] = np.floor((allSettings["localTime"] - bDate).dt.days/365.25).astype(int) + allSettings["ylw"] = np.floor((allSettings["localTime"] - dDate).dt.days/365.25).astype(int) + allSettings = round_time(allSettings, timeIntervalMinutes=5, + timeField="localTime", + roundedTimeFieldName="localRoundedTime", + startWithFirstRecord=True, verbose=False) + + colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + "isf", "cir", "sbr", + "ct.low", "ct.high", "ct.target", "ct.range", + "sbr.type", "isf_mmolL_U"] + allSettings = allSettings[colOrder] + + + fieldsToDrop = ["utcTime", "timezone", "roundedTime", "date", "tzo", "isDSTChangeDay"] + pumpEvents = pd.merge(abr.drop(columns=fieldsToDrop), + bolusEvents.drop(columns=fieldsToDrop), + how="outer", on="localTime") + pumpEvents["type"].fillna("bolus", inplace=True) + pumpEvents["eventType"].fillna("basal", inplace=True) + pumpEvents["hashID"] = hashID + pumpEvents["age"] = np.floor((pumpEvents["localTime"] - bDate).dt.days/365.25).astype(int) + pumpEvents["ylw"] = np.floor((pumpEvents["localTime"] - dDate).dt.days/365.25).astype(int) + pumpEvents = round_time(pumpEvents, timeIntervalMinutes=5, + timeField="localTime", + roundedTimeFieldName="localRoundedTime", + startWithFirstRecord=True, verbose=False) + + + colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + "rate", "durationHours", + "unitsInsulin", "carbInput", "type", "eventType", "subType", + "isf", "isf_mmolL_U", "insulinCarbRatio", "insulinOnBoard", + "bg_mgdL", "bg_mmolL"] + + pumpEvents = pumpEvents[colOrder] + + cgmLite = cgm.drop(columns=fieldsToDrop) + cgmLite["hashID"] = hashID + cgmLite["age"] = np.floor((cgmLite["localTime"] - bDate).dt.days/365.25).astype(int) + cgmLite["ylw"] = np.floor((cgmLite["localTime"] - dDate).dt.days/365.25).astype(int) + cgmLite = round_time(cgmLite, timeIntervalMinutes=5, + timeField="localTime", + roundedTimeFieldName="localRoundedTime", + startWithFirstRecord=True, verbose=False) + colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + "mg_dL", "mmol_L"] - # %% ID, HASHID, AGE, & YLW - data["userID"] = userID - data["hashID"] = hashID - data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) - data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + cgmLite = cgmLite[colOrder] -# commonColumnHeadings = ["hashID", -# "age", -# "ylw"] + # %% SAVE RESULTS - # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) - bolus = mergeWizardWithBolus(data) - if len(bolus) > 0: - # get rid of duplicates that have the same ["time", "normal"] - bolus.sort_values("uploadTime", ascending=False, inplace=True) - bolus, nBolusDuplicatesRemoved = \ - removeDuplicates(bolus, ["deviceTime", "normal"]) - metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved + # age and ylw stats + pumpEvents["rateTimesDurationHours"] = pumpEvents["rate"] * pumpEvents["durationHours"] + pumpEvents.rename(columns={"rate":"basalRate"}, inplace=True) + catDF = pumpEvents.groupby("age") - # get a summary of boluses per day - bolusDaySummary = get_bolusDaySummary(bolus) + # actual basal rates + agePump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") + agePump["basalRate.min"] = catDF["basalRate"].min() + agePump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() + agePump["basalRate.max"] = catDF["basalRate"].max() -# # isf and cir associated with bolus event -# if "insulinSensitivities" in list(bolus): -# pdb.set_trace() -# -# if "carbRatios" in list(bolus): -# pdb.set_trace() + # insulin events + insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") + agePump = pd.concat([agePump, insulinEvents], axis=1) - bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] - bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - -# bolusCH = commonColumnHeadings.copy() - bolusCH = ["utcTime", "timezone", "roundedTime", "normal", "carbInput", "subType", - "insulinOnBoard", "bgInput", - "isf", "isf_mmolL_U", "insulinCarbRatio"] - bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] - bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan - bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", - "bgInput": "bg_mmolL"}) - bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"]) - bolusEvents["eventType"] = "correction" - bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal" - - if "duration" in list(bolus): - bolus["duration"].replace(0, np.nan, inplace=True) - bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 - bolus["rate"] = bolus["extended"] / bolus["durationHours"] -# bolusExtendedCH = commonColumnHeadings.copy() - bolusExtendedCH = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] - bolusExtendedEvents = bolus.loc[ - ((bolus["extended"].notnull()) & - (bolus["duration"] > 0)), bolusExtendedCH] - - if "extended" not in bolus: - bolus["extended"] = np.nan - bolus["duration"] = np.nan - - - # get start and end times - bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day") - metadata["bolus.beginDate"] = bolusBeginDate - metadata["bolus.endDate"] = bolusEndDate - - - # %% PUMP SETTINGS - - pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all") - pumpSettings.sort_values("uploadTime", ascending=False, inplace=True) - - pumpSettings, nPumpSettingsDuplicatesRemoved = \ - removeDuplicates(pumpSettings, "deviceTime") - metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved - - pumpSettings.sort_values("utcTime", ascending=True, inplace=True) - pumpSettings.reset_index(drop=True, inplace=True) - - # ISF - isfColHeadings = ["isf.localTime", "isf", "isf_mmolL_U"] - - if "insulinSensitivity.amount" in list(pumpSettings): - isfColHead = "insulinSensitivity" - pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] - pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) - pumpSettings["isf.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") - - isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings] - - # add a day summary - isfDaySummary = pd.DataFrame() - isfDaySummary["day"] = isf["isf.localTime"].dt.date - isfDaySummary["isf.min"] = isf["isf"] - isfDaySummary["isf.weightedMean"] = isf["isf"] - isfDaySummary["isf.max"] = isf["isf"] - isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) - isfDaySummary.reset_index(inplace=True, drop=True) - isfDaySummary.fillna(method='ffill', inplace=True) + # carbs entered in bolus calculator + carbEvents = catDF["carbInput"].describe().add_prefix("carb.") + agePump = pd.concat([agePump, carbEvents], axis=1) - else: - isfColHead = "insulinSensitivities" - isf = pd.DataFrame(columns=isfColHeadings) - isfDayColHeadings = ['day', 'isf.min', 'isf.weightedMean', 'isf.max'] - isfDaySummary = pd.DataFrame(columns=isfDayColHeadings) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): - # edge case where actSchedule is float - if isinstance(actSched, float): - actSched = str(int(actSched)) + # very low level cgm stats per age + catDF = cgmLite.groupby("age") + cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + agePumpCgm = pd.concat([agePump, cgmStats], axis=1) - tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["isf.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - tempDF["isf_mmolL_U"] = tempDF["amount"] - tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) - endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["isf.localTime"], index=[0]) - tempDF = get_setting_durations(tempDF, "isf", endOfDay) - tempDF = tempDF[:-1] - - tempDaySummary = pd.DataFrame(index=[0]) - tempDaySummary["day"] = tempDF["isf.localTime"].dt.date - tempDaySummary["isf.min"] = tempDF["isf"].min() - tempDaySummary["isf.weightedMean"] = \ - np.sum(tempDF["isf"] * tempDF["isf.durationHours"]) / tempDF["isf.durationHours"].sum() - tempDaySummary["isf.max"] = tempDF["isf"].max() - - isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) - isfDaySummary = pd.concat([isfDaySummary, tempDaySummary], ignore_index=True) - - isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) - isfDaySummary.reset_index(inplace=True, drop=True) - isfDaySummary.fillna(method='ffill', inplace=True) - - # CIR - cirColHeadings = ["cir.localTime", "cir"] - - if "carbRatio.amount" in list(pumpSettings): - cirColHead = "carbRatio" - pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"] - pumpSettings["cir.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms") - - cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings] - - # add a day summary - cirDaySummary = pd.DataFrame() - cirDaySummary["day"] = cir["cir.localTime"].dt.date - cirDaySummary["cir.min"] = cir["cir"] - cirDaySummary["cir.weightedMean"] = cir["cir"] - cirDaySummary["cir.max"] = cir["cir"] - cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) - cirDaySummary.reset_index(inplace=True, drop=True) - cirDaySummary.fillna(method='ffill', inplace=True) + agePumpCgm.reset_index(inplace=True) - else: + ageSummary = pd.merge(ageSummary, agePumpCgm, on="age", how="left") + ageSummary["hashID"] = hashID + allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) - cirColHead = "carbRatios" - cir = pd.DataFrame(columns=cirColHeadings) - cirDayColHeadings = ['day', 'cir.min', 'cir.weightedMean', 'cir.max'] - cirDaySummary = pd.DataFrame(columns=cirDayColHeadings) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): - # edge case where actSchedule is float - if isinstance(actSched, float): - actSched = str(int(actSched)) + allAgeSummaries.to_csv(os.path.join(outputPath, + "allAgeSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) - tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched]) - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["cir.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - tempDF["cir"] = tempDF["amount"] - endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["cir.localTime"], index=[0]) - tempDF = get_setting_durations(tempDF, "cir", endOfDay) - tempDF = tempDF[:-1] - - tempDaySummary = pd.DataFrame(index=[0]) - tempDaySummary["day"] = tempDF["cir.localTime"].dt.date - tempDaySummary["cir.min"] = tempDF["cir"].min() - tempDaySummary["cir.weightedMean"] = \ - np.sum(tempDF["cir"] * tempDF["cir.durationHours"]) / tempDF["cir.durationHours"].sum() - tempDaySummary["cir.max"] = tempDF["cir"].max() - - cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True) - cirDaySummary = pd.concat([cirDaySummary, tempDaySummary], ignore_index=True) - - cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) - cirDaySummary.reset_index(inplace=True, drop=True) - cirDaySummary.fillna(method='ffill', inplace=True) - - - # CORRECTION TARGET - ctColHeadings = ["ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] - ctDayColHeadings = ['day', - "ct.low.min", "ct.low.weightedMean", "ct.low.max", - "ct.high.min", "ct.high.weightedMean", "ct.high.max", - "ct.target.min", "ct.target.weightedMean", "ct.target.max", - "ct.range.min", "ct.range.weightedMean", "ct.range.max"] - - if "bgTarget.start" in list(pumpSettings): - ctColHead = "bgTarget." - - for targetType in ["low", "high", "target", "range"]: - if ctColHead + targetType in list(pumpSettings): - pumpSettings["ct." + targetType + "_mmolL"] = \ - pumpSettings[ctColHead + targetType] - - pumpSettings["ct." + targetType] = \ - mmolL_to_mgdL(pumpSettings["ct." + targetType + "_mmolL"]) - else: - pumpSettings["ct." + targetType + "_mmolL"] = np.nan - pumpSettings["ct." + targetType] = np.nan + # repoeat for years living with + catDF = pumpEvents.groupby("ylw") + # actual basal rates + ylwPump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") + ylwPump["basalRate.min"] = catDF["basalRate"].min() + ylwPump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() + ylwPump["basalRate.max"] = catDF["basalRate"].max() - pumpSettings["ct.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ - pd.to_timedelta(pumpSettings[ctColHead + "start"], unit="ms") + # insulin events + insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") + ylwPump = pd.concat([ylwPump, insulinEvents], axis=1) - correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings] + # carbs entered in bolus calculator + carbEvents = catDF["carbInput"].describe().add_prefix("carb.") + ylwPump = pd.concat([ylwPump, carbEvents], axis=1) - # add a day summary - ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) - ctDaySummary["day"] = correctionTarget["ct.localTime"].dt.date - # add min, weightedMean, and max - for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: - for stat in [".min", ".weightedMean", ".max"]: - ctDaySummary[targetType + stat] = correctionTarget[targetType] + # very low level cgm stats per age + catDF = cgmLite.groupby("ylw") + cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + ylwPumpCgm = pd.concat([ylwPump, cgmStats], axis=1) + ylwPumpCgm.reset_index(inplace=True) - ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) - ctDaySummary.reset_index(inplace=True, drop=True) - ctDaySummary.fillna(method='ffill', inplace=True) + ylwSummary = pd.merge(ylwSummary, ylwPumpCgm, on="ylw", how="left") - else: - ctColHead = "bgTargets" - correctionTarget = pd.DataFrame(columns=ctColHeadings) + ylwSummary["hashID"] = hashID + allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) - ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): - # edge case where actSchedule is float - if isinstance(actSched, float): - actSched = str(int(actSched)) + allYlwSummaries.to_csv(os.path.join(outputPath, + "allYlwSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) - tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) - targetTypes = list(set(list(tempDF)) - set(["start"])) - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["ct.localTime"], index=[0]) - tempDF = get_setting_durations(tempDF, "ct", endOfDay) - tempDF = tempDF[:-1] - - tempDaySummary = pd.DataFrame(columns=ctDayColHeadings, index=[0]) - tempDaySummary["day"] = tempDF["ct.localTime"].dt.date - - for targetType in targetTypes: - tempDF["ct." + targetType] = mmolL_to_mgdL(tempDF[targetType]) - - tempDaySummary["ct." + targetType + ".min"] = tempDF["ct." + targetType].min() - tempDaySummary["ct." + targetType + ".weightedMean"] = \ - np.sum(tempDF["ct." + targetType] * tempDF["ct.durationHours"]) / tempDF["ct.durationHours"].sum() - tempDaySummary["ct." + targetType + ".max"] = tempDF["ct." + targetType].max() - - correctionTarget = \ - pd.concat([correctionTarget, - tempDF.drop(columns=['start', - 'target', - 'day', - 'ct.durationHours'])], - ignore_index=True, sort=False) - ctDaySummary = pd.concat([ctDaySummary, tempDaySummary], - ignore_index=True, sort=False) - - ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) - ctDaySummary.fillna(method='ffill', inplace=True) - ctDaySummary.drop_duplicates(inplace=True) - ctDaySummary.reset_index(inplace=True, drop=True) - - # SCHEDULED BASAL RATES - sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] - sbr = pd.DataFrame(columns=sbrColHeadings) - sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'sbr.type'] - sbrDaySummary = pd.DataFrame(columns=sbrDayColHeadings) - for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]): - # edge case where actSchedule is float - if isinstance(actSched, float): - actSched = str(int(actSched)) - if 'Auto Mode' not in actSched: - tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["sbr.type"] = np.nan - tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) - tempDF = get_setting_durations(tempDF, "sbr", endOfDay) - tempDF = tempDF[:-1] - - tempDaySummary = pd.DataFrame(index=[0]) - tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date - tempDaySummary["sbr.min"] = tempDF["rate"].min() - tempDaySummary["sbr.weightedMean"] = \ - np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() - tempDaySummary["sbr.max"] = tempDF["rate"].max() - tempDaySummary["sbr.type"] = np.nan + # %% save data for this person + # outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" + # outputFormat = (f"{minAge:02d}", + # f"{maxAge:02d}", + # f"{minYLW:02d}", + # f"{maxYLW:02d}", + # f"{nDaysClosedLoop:03d}", + # f"{n670gDays:03d}", + # hashID[0:4]) + # outputFolderName = outputString % outputFormat + # outputFolderName_Path = os.path.join(outputPath,"data", outputFolderName) + # if not os.path.exists(outputFolderName_Path): + # os.makedirs(outputFolderName_Path) + # + # fName = outputFolderName + "-allSettings.csv" + # allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) + # fName = outputFolderName + "-pumpEvents.csv" + # pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) + # fName = outputFolderName + "-cgmLite.csv" + # cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) - else: - tempDF = pd.DataFrame(index=[0]) - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) - tempDF["rate"] = np.nan - tempDF["sbr.type"] = "AutoMode" - - tempDaySummary = pd.DataFrame(index=[0]) - tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date - tempDaySummary["sbr.min"] = np.nan - tempDaySummary["sbr.weightedMean"] = np.nan - tempDaySummary["sbr.max"] = np.nan - tempDaySummary["sbr.type"] = "AutoMode" - - sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True) - sbrDaySummary = pd.concat([sbrDaySummary, tempDaySummary], ignore_index=True) - - sbrDaySummary = pd.concat([sbrDaySummary, dataPulledDF], sort=False) - sbrDaySummary.reset_index(inplace=True, drop=True) - sbrDaySummary.fillna(method='ffill', inplace=True) - -# # max basal rate, max bolus amount, and insulin duration -# if "rateMaximum" in list(data): -# pdb.set_trace() -# if "amountMaximum" in list(data): -# pdb.set_trace() -# if "bolus.calculator" in list(data): -# pdb.set_trace() - - - # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND)) - basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") - basal.sort_values("uploadTime", ascending=False, inplace=True) - - basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day") - metadata["basal.beginDate"] = basalBeginDate - metadata["basal.endDate"] = basalEndDate - - basal, nBasalDuplicatesRemoved = \ - removeDuplicates(basal, ["deliveryType", "deviceTime", "duration", "rate"]) - metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved - - # fill NaNs with 0, as it indicates a suspend (temp basal of 0) - basal.rate.fillna(0, inplace=True) - - # get rid of basals that have durations of 0 - nBasalDuration0 = sum(basal.duration > 0) - basal = basal[basal.duration > 0] - metadata["basal.nBasalDuration0"] = nBasalDuration0 - - # get rid of basal durations that are unrealistic - nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000)) - metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration) - basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan - - # calculate the total amount of insulin delivered (duration * rate) - basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0 - basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] - - # actual basal delivered -# abrColHeadings = commonColumnHeadings.copy() - abrColHeadings = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] - abr = basal[abrColHeadings] - if "duration" in list(bolus): - abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) - abr.sort_values("utcTime", inplace=True) - - abr["timezone"].fillna(method='ffill', inplace=True) - abr["timezone"].fillna(method='bfill', inplace=True) - - # get a summary of basals per day - basalDaySummary = get_basalDaySummary(basal) - - - # %% GET CLOSED LOOP DAYS WITH TEMP BASAL DATA - # group data by type - groupedData = data.groupby(by="type") - - isClosedLoopDay, is670g, metadata = \ - getClosedLoopDays(groupedData, 30, metadata) - - # %% CGM DATA - # filter by cgm and sort by uploadTime - cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all") - - # get rid of duplicates that have the same ["deviceTime", "value"] - cgmData, nCgmDuplicatesRemovedDeviceTime = removeCgmDuplicates(cgmData, "deviceTime") - metadata["nCgmDuplicatesRemovedDeviceTime"] = nCgmDuplicatesRemovedDeviceTime - - # get rid of duplicates that have the same ["time", "value"] - cgmData, nCgmDuplicatesRemovedUtcTime = removeCgmDuplicates(cgmData, "time") - metadata["cnCgmDuplicatesRemovedUtcTime"] = nCgmDuplicatesRemovedUtcTime - - # get rid of duplicates that have the same "roundedTime" - cgmData, nCgmDuplicatesRemovedRoundedTime = removeDuplicates(cgmData, "roundedTime") - metadata["nCgmDuplicatesRemovedRoundedTime"] = nCgmDuplicatesRemovedRoundedTime - - # get start and end times - cgmBeginDate, cgmEndDate = getStartAndEndTimes(cgmData, "day") - metadata["cgm.beginDate"] = cgmBeginDate - metadata["cgm.endDate"] = cgmEndDate - - # get a list of dexcom cgms - cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData) - metadata["cgm.percentDexcomCGM"] = percentDexcom - - # group by date (day) and get stats - catDF = cgmData.groupby(cgmData["day"]) - cgmRecordsPerDay = \ - pd.DataFrame(catDF.value.count()). \ - rename(columns={"value": "cgm.count"}) - dayDate = catDF.day.describe()["top"] - dexcomCGM = catDF.dexcomCGM.describe()["top"] - nTypesCGM = catDF.dexcomCGM.describe()["unique"] - cgmRecordsPerDay["cgm.dexcomOnly"] = \ - (dexcomCGM & (nTypesCGM == 1)) - cgmRecordsPerDay["date"] = cgmRecordsPerDay.index - - # filter the cgm data -# cgmColHeadings = commonColumnHeadings.copy() - cgmColHeadings = ["utcTime", "timezone", "roundedTime", "value"] - - # get data in mg/dL units - cgm = cgmData[cgmColHeadings] - cgm = cgm.rename(columns={'value': 'mmol_L'}) - cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int) - - - # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW - - # COMBINE DAY SUMMARIES - # group by date (day) and get stats - catDF = data.groupby(data["day"]) - dataPerDay = \ - pd.DataFrame(catDF.hashID.describe()["top"]). \ - rename(columns={"top": "hashID"}) - dataPerDay["age"] = catDF.age.mean() - dataPerDay["ylw"] = catDF.ylw.mean() - dataPerDay["timezone"] = catDF.timezone.describe()["top"] - - # calculate all of the data start and end range - # this can be used for looking at settings - dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate) - dayEndDate = max(cgmEndDate, bolusEndDate, basalEndDate) - metadata["day.beginDate"] = dayBeginDate - metadata["day.endDate"] = dayEndDate - rng = pd.date_range(dayBeginDate, dayEndDate).date - dayData = pd.DataFrame(rng, columns=["day"]) - for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]: - dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left") - for dfType in [isClosedLoopDay, is670g]: - dayData = pd.merge(dayData, dfType, on="day", how="left") - - dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 - dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) - - dayData["timezone"].fillna(method='ffill', inplace=True) - dayData["timezone"].fillna(method='bfill', inplace=True) - - dayData["isDSTChangeDay"] = dayData[['day', 'timezone']].apply(lambda x: isDSTChangeDay(*x), axis=1) - dayData["date"] = pd.to_datetime(dayData["day"]) - dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) - - # add settings to the dayData - dayData = pd.merge(dayData, isfDaySummary, on="day", how="left") - dayData = pd.merge(dayData, cirDaySummary, on="day", how="left") - dayData = pd.merge(dayData, ctDaySummary, on="day", how="left") - dayData = pd.merge(dayData, sbrDaySummary, on="day", how="left") - - # fill data forward - fillList = ['isf.min', - 'isf.weightedMean', - 'isf.max', - 'cir.min', - 'cir.weightedMean', - 'cir.max', - 'ct.low.min', 'ct.low.weightedMean', 'ct.low.max', - 'ct.high.min', 'ct.high.weightedMean', 'ct.high.max', - 'ct.target.min', 'ct.target.weightedMean', 'ct.target.max', - 'ct.range.min', 'ct.range.weightedMean', 'ct.range.max', - 'sbr.min', - 'sbr.weightedMean', - 'sbr.max', - 'sbr.type'] - for fl in fillList: - dayData[fl].fillna(method='ffill', inplace=True) - - # calculate the start and end of contiguous data - # these dates can be used when simulating and predicting, where - # you need both pump and cgm data - contiguousBeginDate = max(cgmBeginDate, bolusBeginDate, basalBeginDate) - contiguousEndDate = min(cgmEndDate, bolusEndDate, basalEndDate) - metadata["contiguous.beginDate"] = contiguousBeginDate - metadata["contiguous.endDate"] = contiguousEndDate - - # get a summary by age, and ylw - catDF = dayData.groupby("age") - ageSummary = pd.DataFrame(catDF.validPumpData.sum()) - ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) - ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) - ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) - ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) - - # add in isf stats - ageSummary["isf.nDays"] = catDF["isf.min"].count() - ageSummary["isf.min"] = catDF["isf.min"].min() - ageSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() - ageSummary["isf.max"] = catDF["isf.max"].max() - - # add cir stats - ageSummary["cir.nDays"] = catDF["cir.min"].count() - ageSummary["cir.min"] = catDF["cir.min"].min() - ageSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() - ageSummary["cir.max"] = catDF["cir.max"].max() - - # add sbr stats - ageSummary["sbr.nDays"] = catDF["sbr.min"].count() - ageSummary["sbr.min"] = catDF["sbr.min"].min() - ageSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() - ageSummary["sbr.max"] = catDF["sbr.max"].max() - ageSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() - - # correctionTarget stats - for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: - for stat in [".min", ".weightedMean", ".max"]: - ch = targetType + stat - ageSummary[ch + ".nDays"] = catDF[ch].count() - ageSummary[ch + ".min"] = catDF[ch].min() - ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() - ageSummary[ch + ".max"] = catDF[ch].max() - - ageSummary.reset_index(inplace=True) - - analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & - (ageSummary["nDaysValidCgm"]> 28))] - minAge = analysisCriterion["age"].min() - maxAge = analysisCriterion["age"].max() - nDaysClosedLoop = analysisCriterion["nDaysClosedLoop"].sum() - n670gDays = analysisCriterion["n670gDays"].sum() - metadata["minAge"] = minAge - metadata["maxAge"] = maxAge - metadata["nDaysClosedLoop"] = nDaysClosedLoop - metadata["n670gDays"] = n670gDays - - catDF = dayData.groupby("ylw") - ylwSummary = pd.DataFrame(catDF.validPumpData.sum()) - ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) - ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) - ylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) - ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) - - ylwSummary["isf.nDays"] = catDF["isf.min"].count() - ylwSummary["isf.min"] = catDF["isf.min"].min() - ylwSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() - ylwSummary["isf.max"] = catDF["isf.max"].max() - - # add cir stats - ylwSummary["cir.nDays"] = catDF["cir.min"].count() - ylwSummary["cir.min"] = catDF["cir.min"].min() - ylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() - ylwSummary["cir.max"] = catDF["cir.max"].max() - - # add sbr stats - ylwSummary["sbr.nDays"] = catDF["sbr.min"].count() - ylwSummary["sbr.min"] = catDF["sbr.min"].min() - ylwSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() - ylwSummary["sbr.max"] = catDF["sbr.max"].max() - ylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() - - # correctionTarget stats - for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: - for stat in [".min", ".weightedMean", ".max"]: - ch = targetType + stat - ylwSummary[ch + ".nDays"] = catDF[ch].count() - ylwSummary[ch + ".min"] = catDF[ch].min() - ylwSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() - ylwSummary[ch + ".max"] = catDF[ch].max() - - ylwSummary.reset_index(inplace=True) - - analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 28) & - (ylwSummary["nDaysValidCgm"]> 28))] - minYLW = analysisCriterion["ylw"].min() - maxYLW = analysisCriterion["ylw"].max() - metadata["minYLW"] = minYLW - metadata["maxYLW"] = maxYLW - - - # %% calculate local time - abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) - abr = pd.merge(abr, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") - abr["localTime"] = abr["utcTime"] + pd.to_timedelta(abr["tzo"], unit="m") - - cgm["date"] = pd.to_datetime(cgm["utcTime"].dt.date) - cgm = pd.merge(cgm, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") - cgm["localTime"] = cgm["utcTime"] + pd.to_timedelta(cgm["tzo"], unit="m") - - bolusEvents["date"] = pd.to_datetime(bolusEvents["utcTime"].dt.date) - bolusEvents = pd.merge(bolusEvents, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") - bolusEvents["localTime"] = bolusEvents["utcTime"] + pd.to_timedelta(bolusEvents["tzo"], unit="m") - - - # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) - # all settings - - allSettings = pd.merge(isf.rename(columns={"isf.localTime": "localTime"}), - cir.rename(columns={"cir.localTime": "localTime"}), - how="outer", on="localTime") - allSettings = pd.merge(allSettings, - sbr.rename(columns={"rate": "sbr", - "type": "sbr.type", - "sbr.localTime": "localTime"}), - how="outer", on="localTime") - allSettings = pd.merge(allSettings, - correctionTarget.rename(columns={"ct.localTime": "localTime"}), - how="outer", on="localTime") - allSettings["hashID"] = hashID - allSettings["age"] = np.floor((allSettings["localTime"] - bDate).dt.days/365.25).astype(int) - allSettings["ylw"] = np.floor((allSettings["localTime"] - dDate).dt.days/365.25).astype(int) - allSettings = round_time(allSettings, timeIntervalMinutes=5, - timeField="localTime", - roundedTimeFieldName="localRoundedTime", - startWithFirstRecord=True, verbose=False) - colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", - "isf", "cir", "sbr", - "ct.low", "ct.high", "ct.target", "ct.range", - "sbr.type", "isf_mmolL_U"] - allSettings = allSettings[colOrder] + # %% save the processed data (saving this data will take up a lot of space and time) + #data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) + #basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) + #bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) + #cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) + #pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) - fieldsToDrop = ["utcTime", "timezone", "roundedTime", "date", "tzo", "isDSTChangeDay"] - pumpEvents = pd.merge(abr.drop(columns=fieldsToDrop), - bolusEvents.drop(columns=fieldsToDrop), - how="outer", on="localTime") - pumpEvents["type"].fillna("bolus", inplace=True) - pumpEvents["eventType"].fillna("basal", inplace=True) - pumpEvents["hashID"] = hashID - pumpEvents["age"] = np.floor((pumpEvents["localTime"] - bDate).dt.days/365.25).astype(int) - pumpEvents["ylw"] = np.floor((pumpEvents["localTime"] - dDate).dt.days/365.25).astype(int) - pumpEvents = round_time(pumpEvents, timeIntervalMinutes=5, - timeField="localTime", - roundedTimeFieldName="localRoundedTime", - startWithFirstRecord=True, verbose=False) - - - colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", - "rate", "durationHours", - "unitsInsulin", "carbInput", "type", "eventType", "subType", - "isf", "isf_mmolL_U", "insulinCarbRatio", "insulinOnBoard", - "bg_mgdL", "bg_mmolL"] - - pumpEvents = pumpEvents[colOrder] - - cgmLite = cgm.drop(columns=fieldsToDrop) - cgmLite["hashID"] = hashID - cgmLite["age"] = np.floor((cgmLite["localTime"] - bDate).dt.days/365.25).astype(int) - cgmLite["ylw"] = np.floor((cgmLite["localTime"] - dDate).dt.days/365.25).astype(int) - cgmLite = round_time(cgmLite, timeIntervalMinutes=5, - timeField="localTime", - roundedTimeFieldName="localRoundedTime", - startWithFirstRecord=True, verbose=False) - - colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", - "mg_dL", "mmol_L"] - - cgmLite = cgmLite[colOrder] - - - # %% SAVE RESULTS - - # age and ylw stats - pumpEvents["rateTimesDurationHours"] = pumpEvents["rate"] * pumpEvents["durationHours"] - pumpEvents.rename(columns={"rate":"basalRate"}, inplace=True) - catDF = pumpEvents.groupby("age") - - # actual basal rates - agePump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") - agePump["basalRate.min"] = catDF["basalRate"].min() - agePump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() - agePump["basalRate.max"] = catDF["basalRate"].max() - - # insulin events - insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") - agePump = pd.concat([agePump, insulinEvents], axis=1) - - # carbs entered in bolus calculator - carbEvents = catDF["carbInput"].describe().add_prefix("carb.") - agePump = pd.concat([agePump, carbEvents], axis=1) - - # very low level cgm stats per age - catDF = cgmLite.groupby("age") - cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") - agePumpCgm = pd.concat([agePump, cgmStats], axis=1) - - agePumpCgm.reset_index(inplace=True) - - ageSummary = pd.merge(ageSummary, agePumpCgm, on="age", how="left") - ageSummary["hashID"] = hashID - allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) - - allAgeSummaries.to_csv(os.path.join(outputPath, - "allAgeSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) - - # repoeat for years living with - catDF = pumpEvents.groupby("ylw") - # actual basal rates - ylwPump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") - ylwPump["basalRate.min"] = catDF["basalRate"].min() - ylwPump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() - ylwPump["basalRate.max"] = catDF["basalRate"].max() - - # insulin events - insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") - ylwPump = pd.concat([ylwPump, insulinEvents], axis=1) - - # carbs entered in bolus calculator - carbEvents = catDF["carbInput"].describe().add_prefix("carb.") - ylwPump = pd.concat([ylwPump, carbEvents], axis=1) - - # very low level cgm stats per age - catDF = cgmLite.groupby("ylw") - cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") - ylwPumpCgm = pd.concat([ylwPump, cgmStats], axis=1) - - ylwPumpCgm.reset_index(inplace=True) - - ylwSummary = pd.merge(ylwSummary, ylwPumpCgm, on="ylw", how="left") - - ylwSummary["hashID"] = hashID - allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) - - allYlwSummaries.to_csv(os.path.join(outputPath, - "allYlwSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) - - # %% save data for this person -# outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" -# outputFormat = (f"{minAge:02d}", -# f"{maxAge:02d}", -# f"{minYLW:02d}", -# f"{maxYLW:02d}", -# f"{nDaysClosedLoop:03d}", -# f"{n670gDays:03d}", -# hashID[0:4]) -# outputFolderName = outputString % outputFormat -# outputFolderName_Path = os.path.join(outputPath,"data", outputFolderName) -# if not os.path.exists(outputFolderName_Path): -# os.makedirs(outputFolderName_Path) -# -# fName = outputFolderName + "-allSettings.csv" -# allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) -# fName = outputFolderName + "-pumpEvents.csv" -# pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) -# fName = outputFolderName + "-cgmLite.csv" -# cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) - - - - # %% save the processed data (saving this data will take up a lot of space and time) - #data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) - #basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) - #bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) - #cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) - #pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) - + else: + metadata["flags"] = "no bolus wizard data" else: - metadata["flags"] = "no bolus wizard data" + metadata["flags"] = "missing either pump or cgm data" else: - metadata["flags"] = "missing either pump or cgm data" + metadata["flags"] = "file contains no data" else: - metadata["flags"] = "file contains no data" + metadata["flags"] = "file does not exist" else: - metadata["flags"] = "file does not exist" - else: - metadata["flags"] = "missing bDay/dDay" + metadata["flags"] = "missing bDay/dDay" + + except: + print("something is broke dIndex=", dIndex) + metadata["flags"] = "something is broke" + # write metaData to allMetadata allMetadata = pd.concat([allMetadata, metadata], axis=0, sort=True) From ef2f955d29dc0de91d8dcd3ee2d12d8ea0ef3c58 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 15 Jan 2019 18:52:14 -0600 Subject: [PATCH 38/78] update flatten_json to include a list of fields to NOT flatten --- .../predict-simulate/get-users-settings-and-events.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 1c979d2f..6de58293 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -116,10 +116,8 @@ def tslimCalibrationFix(df): # OTHER -def tempRemoveFields(df): - removeFields = ["suppressed", - "recommended", - "payload"] +def tempRemoveFields(df, removeFields): + tempRemoveFields = list(set(df) & set(removeFields)) tempDf = df[tempRemoveFields] @@ -128,10 +126,9 @@ def tempRemoveFields(df): return df, tempDf -def flattenJson(df): - +def flattenJson(df, doNotFlattenList): # remove fields that we don't want to flatten - df, holdData = tempRemoveFields(df) + df, holdData = tempRemoveFields(df, doNotFlattenList) # get a list of data types of column headings columnHeadings = list(df) From c10f4c6183bbff15b91d86091d040756becfc001 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 17 Jan 2019 04:43:49 -0600 Subject: [PATCH 39/78] syntax of new flatten_json function --- projects/predict-simulate/get-users-settings-and-events.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 6de58293..fe4c00ec 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -611,7 +611,10 @@ def getPumpSettingsStats(df, col, pumpCol): data.sort_values("time", inplace=True) # flatten the embedded json - data = flattenJson(data) + doNotFlattenList = ["suppressed", "recommended", "payload"] + data = flattenJson(data, doNotFlattenList) + + pdb.set_trace() # %% CLEAN DATA From 7750289c40d42f10d8cef06d522b2e5351ab6fc2 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 18 Jan 2019 05:44:55 -0600 Subject: [PATCH 40/78] setting summaries per day should only have one entry per day --- .../get-users-settings-and-events.py | 78 +++++++++---------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index fe4c00ec..693032a3 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -540,13 +540,7 @@ def getPumpSettingsStats(df, col, pumpCol): return df, df2 - - - -# %% LOAD IN ONE FILE, BUT EVENTUALLY THIS WILL LOOOP THROUGH ALL USER'S - - - +# %% START OF CODE dataPulledDate = args.dateStamp dataPulledDF = pd.DataFrame(pd.to_datetime(dataPulledDate), columns=["day"], index=[0]) dataPulledDF["day"] = dataPulledDF["day"].dt.date @@ -556,7 +550,6 @@ def getPumpSettingsStats(df, col, pumpCol): phiOutputPath = os.path.join(donorPath, "PHI-settings-and-events") outputPath = os.path.join(donorPath, "settings-and-events") - # create anonExportDataPath folders if not os.path.exists(phiOutputPath): os.makedirs(phiOutputPath) @@ -614,8 +607,6 @@ def getPumpSettingsStats(df, col, pumpCol): doNotFlattenList = ["suppressed", "recommended", "payload"] data = flattenJson(data, doNotFlattenList) - pdb.set_trace() - # %% CLEAN DATA # remove negative durations @@ -662,10 +653,6 @@ def getPumpSettingsStats(df, col, pumpCol): data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) - # commonColumnHeadings = ["hashID", - # "age", - # "ylw"] - # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) bolus = mergeWizardWithBolus(data) @@ -689,7 +676,6 @@ def getPumpSettingsStats(df, col, pumpCol): bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) - # bolusCH = commonColumnHeadings.copy() bolusCH = ["utcTime", "timezone", "roundedTime", "normal", "carbInput", "subType", "insulinOnBoard", "bgInput", "isf", "isf_mmolL_U", "insulinCarbRatio"] @@ -752,9 +738,6 @@ def getPumpSettingsStats(df, col, pumpCol): isfDaySummary["isf.min"] = isf["isf"] isfDaySummary["isf.weightedMean"] = isf["isf"] isfDaySummary["isf.max"] = isf["isf"] - isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) - isfDaySummary.reset_index(inplace=True, drop=True) - isfDaySummary.fillna(method='ffill', inplace=True) else: isfColHead = "insulinSensitivities" @@ -785,9 +768,14 @@ def getPumpSettingsStats(df, col, pumpCol): isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True) isfDaySummary = pd.concat([isfDaySummary, tempDaySummary], ignore_index=True) - isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) - isfDaySummary.reset_index(inplace=True, drop=True) - isfDaySummary.fillna(method='ffill', inplace=True) + isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False) + isfDaySummary.reset_index(inplace=True, drop=True) + isfDaySummary.fillna(method='ffill', inplace=True) + # it is possible for someone to someone to change their schedule + # in the middle of the day, take the latest change as the schedule + # for that day. + isfDaySummary.drop_duplicates(subset="day", keep="last", inplace=True) + isfDaySummary.reset_index(inplace=True, drop=True) # CIR cirColHeadings = ["cir.localTime", "cir"] @@ -806,9 +794,6 @@ def getPumpSettingsStats(df, col, pumpCol): cirDaySummary["cir.min"] = cir["cir"] cirDaySummary["cir.weightedMean"] = cir["cir"] cirDaySummary["cir.max"] = cir["cir"] - cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) - cirDaySummary.reset_index(inplace=True, drop=True) - cirDaySummary.fillna(method='ffill', inplace=True) else: @@ -839,9 +824,13 @@ def getPumpSettingsStats(df, col, pumpCol): cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True) cirDaySummary = pd.concat([cirDaySummary, tempDaySummary], ignore_index=True) - cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) - cirDaySummary.reset_index(inplace=True, drop=True) - cirDaySummary.fillna(method='ffill', inplace=True) + cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False) + cirDaySummary.fillna(method='ffill', inplace=True) + # it is possible for someone to someone to change their schedule + # in the middle of the day, take the latest change as the schedule + # for that day. + cirDaySummary.drop_duplicates(subset="day", keep="last", inplace=True) + cirDaySummary.reset_index(inplace=True, drop=True) # CORRECTION TARGET @@ -880,10 +869,6 @@ def getPumpSettingsStats(df, col, pumpCol): ctDaySummary[targetType + stat] = correctionTarget[targetType] - ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) - ctDaySummary.reset_index(inplace=True, drop=True) - ctDaySummary.fillna(method='ffill', inplace=True) - else: ctColHead = "bgTargets" correctionTarget = pd.DataFrame(columns=ctColHeadings) @@ -923,10 +908,13 @@ def getPumpSettingsStats(df, col, pumpCol): ctDaySummary = pd.concat([ctDaySummary, tempDaySummary], ignore_index=True, sort=False) - ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) - ctDaySummary.fillna(method='ffill', inplace=True) - ctDaySummary.drop_duplicates(inplace=True) - ctDaySummary.reset_index(inplace=True, drop=True) + ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) + ctDaySummary.fillna(method='ffill', inplace=True) + # it is possible for someone to someone to change their schedule + # in the middle of the day, take the latest change as the schedule + # for that day. + ctDaySummary.drop_duplicates(subset="day", keep="last", inplace=True) + ctDaySummary.reset_index(inplace=True, drop=True) # SCHEDULED BASAL RATES sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] @@ -940,7 +928,7 @@ def getPumpSettingsStats(df, col, pumpCol): if 'Auto Mode' not in actSched: tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["sbr.type"] = np.nan + tempDF["sbr.type"] = "regular" tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) tempDF = get_setting_durations(tempDF, "sbr", endOfDay) @@ -952,7 +940,7 @@ def getPumpSettingsStats(df, col, pumpCol): tempDaySummary["sbr.weightedMean"] = \ np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() tempDaySummary["sbr.max"] = tempDF["rate"].max() - tempDaySummary["sbr.type"] = np.nan + tempDaySummary["sbr.type"] = "regular" else: tempDF = pd.DataFrame(index=[0]) @@ -972,9 +960,15 @@ def getPumpSettingsStats(df, col, pumpCol): sbrDaySummary = pd.concat([sbrDaySummary, tempDaySummary], ignore_index=True) sbrDaySummary = pd.concat([sbrDaySummary, dataPulledDF], sort=False) - sbrDaySummary.reset_index(inplace=True, drop=True) sbrDaySummary.fillna(method='ffill', inplace=True) + # it is possible for someone to someone to change their schedule + # in the middle of the day, take the latest change as the schedule + # for that day. + sbrDaySummary.drop_duplicates(subset="day", keep="last", inplace=True) + sbrDaySummary.reset_index(inplace=True, drop=True) + + # %% test this later # # max basal rate, max bolus amount, and insulin duration # if "rateMaximum" in list(data): # pdb.set_trace() @@ -1014,7 +1008,6 @@ def getPumpSettingsStats(df, col, pumpCol): basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] # actual basal delivered - # abrColHeadings = commonColumnHeadings.copy() abrColHeadings = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] abr = basal[abrColHeadings] if "duration" in list(bolus): @@ -1073,7 +1066,6 @@ def getPumpSettingsStats(df, col, pumpCol): cgmRecordsPerDay["date"] = cgmRecordsPerDay.index # filter the cgm data - # cgmColHeadings = commonColumnHeadings.copy() cgmColHeadings = ["utcTime", "timezone", "roundedTime", "value"] # get data in mg/dL units @@ -1156,6 +1148,7 @@ def getPumpSettingsStats(df, col, pumpCol): ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + pdb.set_trace() # add in isf stats ageSummary["isf.nDays"] = catDF["isf.min"].count() @@ -1356,7 +1349,7 @@ def getPumpSettingsStats(df, col, pumpCol): allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) allAgeSummaries.to_csv(os.path.join(outputPath, - "allAgeSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) + "allAgeSummaries-dIndex-" + str(startIndex) + ".csv")) # repoeat for years living with catDF = pumpEvents.groupby("ylw") @@ -1387,7 +1380,8 @@ def getPumpSettingsStats(df, col, pumpCol): allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) allYlwSummaries.to_csv(os.path.join(outputPath, - "allYlwSummaries-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) + "allYlwSummaries-dIndex-" + str(startIndex) + ".csv")) + # %% save data for this person # outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" From a2c02e423a8f12b095a7ff1a88974719b477af6a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 18 Jan 2019 05:45:50 -0600 Subject: [PATCH 41/78] get rid of break --- projects/predict-simulate/get-users-settings-and-events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 693032a3..ff850b21 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1148,7 +1148,6 @@ def getPumpSettingsStats(df, col, pumpCol): ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) - pdb.set_trace() # add in isf stats ageSummary["isf.nDays"] = catDF["isf.min"].count() From 7ce58a93d147e8a8bb543c47582f7988610cc252 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 18 Jan 2019 06:30:09 -0600 Subject: [PATCH 42/78] add deviceId to correction target data --- .../predict-simulate/get-users-settings-and-events.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index ff850b21..b09ca4e6 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -834,8 +834,8 @@ def getPumpSettingsStats(df, col, pumpCol): # CORRECTION TARGET - ctColHeadings = ["ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] - ctDayColHeadings = ['day', + ctColHeadings = ['deviceId', "ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] + ctDayColHeadings = ['day', 'deviceId', "ct.low.min", "ct.low.weightedMean", "ct.low.max", "ct.high.min", "ct.high.weightedMean", "ct.high.max", "ct.target.min", "ct.target.weightedMean", "ct.target.max", @@ -863,6 +863,7 @@ def getPumpSettingsStats(df, col, pumpCol): # add a day summary ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) ctDaySummary["day"] = correctionTarget["ct.localTime"].dt.date + ctDaySummary["deviceId"] = correctionTarget["deviceId"] # add min, weightedMean, and max for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: for stat in [".min", ".weightedMean", ".max"]: @@ -882,6 +883,7 @@ def getPumpSettingsStats(df, col, pumpCol): tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched]) targetTypes = list(set(list(tempDF)) - set(["start"])) tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["deviceId"] = pumpSettings.loc[p, "deviceId"] tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["ct.localTime"], index=[0]) tempDF = get_setting_durations(tempDF, "ct", endOfDay) @@ -889,6 +891,7 @@ def getPumpSettingsStats(df, col, pumpCol): tempDaySummary = pd.DataFrame(columns=ctDayColHeadings, index=[0]) tempDaySummary["day"] = tempDF["ct.localTime"].dt.date + tempDaySummary["deviceId"] = tempDF["deviceId"] for targetType in targetTypes: tempDF["ct." + targetType] = mmolL_to_mgdL(tempDF[targetType]) @@ -1177,6 +1180,8 @@ def getPumpSettingsStats(df, col, pumpCol): ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() ageSummary[ch + ".max"] = catDF[ch].max() + + ageSummary.reset_index(inplace=True) analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & @@ -1271,7 +1276,7 @@ def getPumpSettingsStats(df, col, pumpCol): startWithFirstRecord=True, verbose=False) colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", - "isf", "cir", "sbr", + "isf", "cir", "sbr", "deviceId", "ct.low", "ct.high", "ct.target", "ct.range", "sbr.type", "isf_mmolL_U"] allSettings = allSettings[colOrder] From d576b42ec8fa0eb790868a1c9511d1cb097a6f13 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 18 Jan 2019 08:46:39 -0600 Subject: [PATCH 43/78] fix correction target to match how pumps set correction target medtronic uses the upper limit of the range --- .../get-users-settings-and-events.py | 111 +++++++++++------- 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index b09ca4e6..74a142a2 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -835,11 +835,8 @@ def getPumpSettingsStats(df, col, pumpCol): # CORRECTION TARGET ctColHeadings = ['deviceId', "ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"] - ctDayColHeadings = ['day', 'deviceId', - "ct.low.min", "ct.low.weightedMean", "ct.low.max", - "ct.high.min", "ct.high.weightedMean", "ct.high.max", - "ct.target.min", "ct.target.weightedMean", "ct.target.max", - "ct.range.min", "ct.range.weightedMean", "ct.range.max"] + ctDayColHeadings = ['day', 'deviceId', "ct.low", "ct.high", "ct.target", "ct.range", + "ct.target.min", "ct.target.weightedMean", "ct.target.max"] if "bgTarget.start" in list(pumpSettings): ctColHead = "bgTarget." @@ -864,13 +861,26 @@ def getPumpSettingsStats(df, col, pumpCol): ctDaySummary = pd.DataFrame(columns=ctDayColHeadings) ctDaySummary["day"] = correctionTarget["ct.localTime"].dt.date ctDaySummary["deviceId"] = correctionTarget["deviceId"] - # add min, weightedMean, and max + + # medtronic pumps use the target high as the correction target + if sum(correctionTarget.deviceId.str.contains("ed")) > 0: + correctionTarget.loc[correctionTarget.deviceId.str.contains("ed"), "ct.target"] = \ + correctionTarget.loc[correctionTarget.deviceId.str.contains("ed"), 'ct.high'] + + if sum(correctionTarget.deviceId.str.contains("MMT")) > 0: + correctionTarget.loc[correctionTarget.deviceId.str.contains("MMT"), "ct.target"] = \ + correctionTarget.loc[correctionTarget.deviceId.str.contains("MMT"), 'ct.high'] + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: - for stat in [".min", ".weightedMean", ".max"]: - ctDaySummary[targetType + stat] = correctionTarget[targetType] + ctDaySummary[targetType] = correctionTarget[targetType] + + ctDaySummary["ct.target.min"] = correctionTarget["ct.target"] + ctDaySummary["ct.target.weightedMean"] = correctionTarget["ct.target"] + ctDaySummary["ct.target.max"] = correctionTarget["ct.target"] else: + ctColHead = "bgTargets" correctionTarget = pd.DataFrame(columns=ctColHeadings) @@ -884,32 +894,46 @@ def getPumpSettingsStats(df, col, pumpCol): targetTypes = list(set(list(tempDF)) - set(["start"])) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["deviceId"] = pumpSettings.loc[p, "deviceId"] + + for targetType in ["low", "high", "target", "range"]: + if targetType in list(tempDF): + tempDF["ct." + targetType + "_mmolL"] = \ + tempDF[targetType] + + tempDF["ct." + targetType] = \ + mmolL_to_mgdL(tempDF["ct." + targetType + "_mmolL"]) + else: + tempDF["ct." + targetType + "_mmolL"] = np.nan + tempDF["ct." + targetType] = np.nan + tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["ct.localTime"], index=[0]) tempDF = get_setting_durations(tempDF, "ct", endOfDay) tempDF = tempDF[:-1] - tempDaySummary = pd.DataFrame(columns=ctDayColHeadings, index=[0]) + # medtronic pumps use the target high as the correction target + if sum(tempDF.deviceId.str.contains("ed")) > 0: + tempDF.loc[tempDF.deviceId.str.contains("ed"), "ct.target"] = \ + tempDF.loc[tempDF.deviceId.str.contains("ed"), 'ct.high'] + + if sum(tempDF.deviceId.str.contains("MMT")) > 0: + tempDF.loc[tempDF.deviceId.str.contains("MMT"), "ct.target"] = \ + tempDF.loc[tempDF.deviceId.str.contains("MMT"), 'ct.high'] + + tempDaySummary = pd.DataFrame(index=[0], columns=ctDayColHeadings) tempDaySummary["day"] = tempDF["ct.localTime"].dt.date tempDaySummary["deviceId"] = tempDF["deviceId"] + tempDaySummary["ct.target.min"] = tempDF["ct.target"].min() + tempDaySummary["ct.target.weightedMean"] = \ + np.sum(tempDF["ct.target"] * tempDF["ct.durationHours"]) / tempDF["ct.durationHours"].sum() + tempDaySummary["ct.target.max"] = tempDF["ct.target"].max() - for targetType in targetTypes: - tempDF["ct." + targetType] = mmolL_to_mgdL(tempDF[targetType]) + for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: + tempDaySummary[targetType] = tempDF[targetType] - tempDaySummary["ct." + targetType + ".min"] = tempDF["ct." + targetType].min() - tempDaySummary["ct." + targetType + ".weightedMean"] = \ - np.sum(tempDF["ct." + targetType] * tempDF["ct.durationHours"]) / tempDF["ct.durationHours"].sum() - tempDaySummary["ct." + targetType + ".max"] = tempDF["ct." + targetType].max() - correctionTarget = \ - pd.concat([correctionTarget, - tempDF.drop(columns=['start', - 'target', - 'day', - 'ct.durationHours'])], - ignore_index=True, sort=False) - ctDaySummary = pd.concat([ctDaySummary, tempDaySummary], - ignore_index=True, sort=False) + correctionTarget = pd.concat([correctionTarget, tempDF[ctColHeadings]], ignore_index=True) + ctDaySummary = pd.concat([ctDaySummary, tempDaySummary[ctDayColHeadings]], ignore_index=True) ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False) ctDaySummary.fillna(method='ffill', inplace=True) @@ -919,6 +943,10 @@ def getPumpSettingsStats(df, col, pumpCol): ctDaySummary.drop_duplicates(subset="day", keep="last", inplace=True) ctDaySummary.reset_index(inplace=True, drop=True) + + print(correctionTarget) + print(ctDaySummary) + # SCHEDULED BASAL RATES sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] sbr = pd.DataFrame(columns=sbrColHeadings) @@ -1125,10 +1153,13 @@ def getPumpSettingsStats(df, col, pumpCol): 'cir.min', 'cir.weightedMean', 'cir.max', - 'ct.low.min', 'ct.low.weightedMean', 'ct.low.max', - 'ct.high.min', 'ct.high.weightedMean', 'ct.high.max', - 'ct.target.min', 'ct.target.weightedMean', 'ct.target.max', - 'ct.range.min', 'ct.range.weightedMean', 'ct.range.max', + 'ct.low', + 'ct.high', + 'ct.target', + 'ct.range', + 'ct.target.min', + 'ct.target.weightedMean', + 'ct.target.max', 'sbr.min', 'sbr.weightedMean', 'sbr.max', @@ -1172,15 +1203,10 @@ def getPumpSettingsStats(df, col, pumpCol): ageSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() # correctionTarget stats - for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: - for stat in [".min", ".weightedMean", ".max"]: - ch = targetType + stat - ageSummary[ch + ".nDays"] = catDF[ch].count() - ageSummary[ch + ".min"] = catDF[ch].min() - ageSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() - ageSummary[ch + ".max"] = catDF[ch].max() - - + ageSummary["ct.nDays"] = catDF["ct.target.min"].count() + ageSummary["ct.target.min"] = catDF["ct.target.min"].min() + ageSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count() + ageSummary["ct.target.max"] = catDF["ct.target.max"].max() ageSummary.reset_index(inplace=True) @@ -1221,13 +1247,10 @@ def getPumpSettingsStats(df, col, pumpCol): ylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() # correctionTarget stats - for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]: - for stat in [".min", ".weightedMean", ".max"]: - ch = targetType + stat - ylwSummary[ch + ".nDays"] = catDF[ch].count() - ylwSummary[ch + ".min"] = catDF[ch].min() - ylwSummary[ch + ".weightedMean"] = catDF[ch].sum() / catDF[ch].count() - ylwSummary[ch + ".max"] = catDF[ch].max() + ylwSummary["ct.nDays"] = catDF["ct.target.min"].count() + ylwSummary["ct.target.min"] = catDF["ct.target.min"].min() + ylwSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count() + ylwSummary["ct.target.max"] = catDF["ct.target.max"].max() ylwSummary.reset_index(inplace=True) From 5ed36901549819d997e275eeb646b2955d880968 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 18 Jan 2019 09:07:26 -0600 Subject: [PATCH 44/78] only check deviceId if payload exists --- .../predict-simulate/get-users-settings-and-events.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 74a142a2..4c8cff6f 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -90,11 +90,15 @@ def removeInvalidCgmValues(df): def tslimCalibrationFix(df): - searchfor = ['tan'] - tandemDataIndex = ((df.deviceId.str.contains('|'.join(searchfor))) & - (df.type == "deviceEvent")) + if "payload.calibration_reading" in list(df): + + searchfor = ['tan'] + tandemDataIndex = ((df.deviceId.str.contains('|'.join(searchfor))) & + (df.type == "deviceEvent")) + + payloadCalReadingIndex = df["payload.calibration_reading"].notnull() nTandemAndPayloadCalReadings = sum(tandemDataIndex & From 39eee21e18ef4a37ad5ad2162cd5addebdb145b2 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 18 Jan 2019 09:14:32 -0600 Subject: [PATCH 45/78] add age and years with summaries, and save all data --- .../get-users-settings-and-events.py | 123 ++++++++++++++---- 1 file changed, 97 insertions(+), 26 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 4c8cff6f..d27ef501 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -543,6 +543,9 @@ def getPumpSettingsStats(df, col, pumpCol): return df, df2 +# %% DELELET LATER +#args.startIndex = 2 + # %% START OF CODE dataPulledDate = args.dateStamp @@ -565,7 +568,7 @@ def getPumpSettingsStats(df, col, pumpCol): allMetadata = pd.DataFrame() allAgeSummaries = pd.DataFrame() allYlwSummaries = pd.DataFrame() - +allAgeANDylwSummaries = pd.DataFrame() # %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL nUniqueDonors = len(donors) @@ -1265,6 +1268,41 @@ def getPumpSettingsStats(df, col, pumpCol): metadata["minYLW"] = minYLW metadata["maxYLW"] = maxYLW + # age and ylw + catDF = dayData.groupby(["age", "ylw"]) + ageANDylwSummary = pd.DataFrame(catDF.validPumpData.sum()) + ageANDylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True) + ageANDylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum()) + ageANDylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum()) + ageANDylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum()) + + ageANDylwSummary["isf.nDays"] = catDF["isf.min"].count() + ageANDylwSummary["isf.min"] = catDF["isf.min"].min() + ageANDylwSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count() + ageANDylwSummary["isf.max"] = catDF["isf.max"].max() + + # add cir stats + ageANDylwSummary["cir.nDays"] = catDF["cir.min"].count() + ageANDylwSummary["cir.min"] = catDF["cir.min"].min() + ageANDylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count() + ageANDylwSummary["cir.max"] = catDF["cir.max"].max() + + # add sbr stats + ageANDylwSummary["sbr.nDays"] = catDF["sbr.min"].count() + ageANDylwSummary["sbr.min"] = catDF["sbr.min"].min() + ageANDylwSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() + ageANDylwSummary["sbr.max"] = catDF["sbr.max"].max() + ageANDylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + + # correctionTarget stats + ageANDylwSummary["ct.nDays"] = catDF["ct.target.min"].count() + ageANDylwSummary["ct.target.min"] = catDF["ct.target.min"].min() + ageANDylwSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count() + ageANDylwSummary["ct.target.max"] = catDF["ct.target.max"].max() + +# analysisCriterion = ageANDylwSummary[((ageANDylwSummary["nDaysValidPump"]> 28) & +# (ageANDylwSummary["nDaysValidCgm"]> 28))] + # %% calculate local time abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) @@ -1414,35 +1452,68 @@ def getPumpSettingsStats(df, col, pumpCol): "allYlwSummaries-dIndex-" + str(startIndex) + ".csv")) + # repoeat for agne AND years living with + catDF = pumpEvents.groupby(["age", "ylw"]) + # actual basal rates + ageANDylwPump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") + ageANDylwPump["basalRate.min"] = catDF["basalRate"].min() + ageANDylwPump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() + ageANDylwPump["basalRate.max"] = catDF["basalRate"].max() + + # insulin events + insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") + ageANDylwPump = pd.concat([ageANDylwPump, insulinEvents], axis=1) + + # carbs entered in bolus calculator + carbEvents = catDF["carbInput"].describe().add_prefix("carb.") + ageANDylwPump = pd.concat([ageANDylwPump, carbEvents], axis=1) + + # very low level cgm stats per age + catDF = cgmLite.groupby(["age", "ylw"]) + cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + ageANDylwPumpCgm = pd.concat([ageANDylwPump, cgmStats], axis=1) + + ageANDylwSummary = ageANDylwSummary.join(ageANDylwPumpCgm, how="left") + + ageANDylwPumpCgm.reset_index(inplace=True) + ageANDylwSummary.reset_index(inplace=True) + + ageANDylwSummary["hashID"] = hashID + allAgeANDylwSummaries = pd.concat([allAgeANDylwSummaries, ageANDylwSummary], ignore_index=True) + + allAgeANDylwSummaries.to_csv(os.path.join(outputPath, + "allAgeANDylwSummaries-dIndex-" + str(startIndex) + ".csv")) + + # %% save data for this person - # outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" - # outputFormat = (f"{minAge:02d}", - # f"{maxAge:02d}", - # f"{minYLW:02d}", - # f"{maxYLW:02d}", - # f"{nDaysClosedLoop:03d}", - # f"{n670gDays:03d}", - # hashID[0:4]) - # outputFolderName = outputString % outputFormat - # outputFolderName_Path = os.path.join(outputPath,"data", outputFolderName) - # if not os.path.exists(outputFolderName_Path): - # os.makedirs(outputFolderName_Path) - # - # fName = outputFolderName + "-allSettings.csv" - # allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) - # fName = outputFolderName + "-pumpEvents.csv" - # pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) - # fName = outputFolderName + "-cgmLite.csv" - # cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) + outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" + outputFormat = (f"{minAge:02d}", + f"{maxAge:02d}", + f"{minYLW:02d}", + f"{maxYLW:02d}", + f"{int(nDaysClosedLoop):03d}", + f"{int(n670gDays):03d}", + hashID[0:4]) + outputFolderName = outputString % outputFormat + outputFolderName_Path = os.path.join(outputPath, "data", outputFolderName) + if not os.path.exists(outputFolderName_Path): + os.makedirs(outputFolderName_Path) + + fName = outputFolderName + "-allSettings.csv" + allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-pumpEvents.csv" + pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-cgmLite.csv" + cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) # %% save the processed data (saving this data will take up a lot of space and time) - #data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) - #basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) - #bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) - #cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) - #pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) + data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) + basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) + bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) + cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) + pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) else: metadata["flags"] = "no bolus wizard data" @@ -1463,7 +1534,7 @@ def getPumpSettingsStats(df, col, pumpCol): # write metaData to allMetadata allMetadata = pd.concat([allMetadata, metadata], axis=0, sort=True) allMetadata.to_csv(os.path.join(outputPath, - "allMetadata-dIndex-" + str(startIndex) + "-" + str(dIndex) + ".csv")) + "allMetadata-dIndex-" + str(startIndex) + ".csv")) print("done with", dIndex) From 4833b06dbc22931e19a3ebb4e1e4b21cdbd89c6e Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 08:41:21 -0600 Subject: [PATCH 46/78] get rid of print correction target --- projects/predict-simulate/get-users-settings-and-events.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index d27ef501..a0a3328b 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -951,9 +951,6 @@ def getPumpSettingsStats(df, col, pumpCol): ctDaySummary.reset_index(inplace=True, drop=True) - print(correctionTarget) - print(ctDaySummary) - # SCHEDULED BASAL RATES sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"] sbr = pd.DataFrame(columns=sbrColHeadings) From 47025c6935b74b87a61fe20330f4d351e0b8b926 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 14:39:20 -0600 Subject: [PATCH 47/78] ignore copy of slice warning --- projects/predict-simulate/get-users-settings-and-events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index a0a3328b..5cab1f4d 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -20,7 +20,7 @@ import os import argparse import pdb - +pd.options.mode.chained_assignment = None # default='warn' # %% USER INPUTS (ADD THIS IN LATER) codeDescription = "Get user's settings and events" From ca91f4e1dbee22b1d4fb9f35a04183bc0b103ed0 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 14:40:37 -0600 Subject: [PATCH 48/78] sense units of isf --- .../get-users-settings-and-events.py | 77 +++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 5cab1f4d..2ba00129 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -218,6 +218,10 @@ def mmolL_to_mgdL(mmolL): return mmolL * 18.01559 +def mgdL_to_mmolL(mgdL): + return mgdL / 18.01559 + + def round_time(df, timeIntervalMinutes=5, timeField="time", roundedTimeFieldName="roundedTime", startWithFirstRecord=True, verbose=False): @@ -543,8 +547,23 @@ def getPumpSettingsStats(df, col, pumpCol): return df, df2 + +def isf_likely_units(df, columnHeading): + isfNotNull = df[df[columnHeading].notnull()][columnHeading] + minVal = np.min(isfNotNull) + maxVal = np.max(isfNotNull) + minDiff = np.abs(minVal - np.round(minVal)) + maxDiff = np.abs(maxVal - np.round(maxVal)) + if ((maxDiff == 0) & (maxDiff == 0) & (maxVal > 22.1)): + likelyUnits = "mg/dL" + else: + likelyUnits = "mmol/L" + return likelyUnits + + + # %% DELELET LATER -#args.startIndex = 2 +args.startIndex = 96 # %% START OF CODE @@ -673,15 +692,21 @@ def getPumpSettingsStats(df, col, pumpCol): # get a summary of boluses per day bolusDaySummary = get_bolusDaySummary(bolus) - # # isf and cir associated with bolus event - # if "insulinSensitivities" in list(bolus): - # pdb.set_trace() - # - # if "carbRatios" in list(bolus): - # pdb.set_trace() + # figure out likely isf units + isfUnits = isf_likely_units(bolus, "insulinSensitivity") + metadata["bolus.isfLikelyUnits"] = isfUnits + + if isfUnits in "mmol/L": + + bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] + bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) + + else: + # I am pretty sure this case does NOT exist + pdb.set_trace() + bolus["isf"] = bolus["insulinSensitivity"] + bolus["isf_mmolL_U"] = mgdL_to_mmolL(bolus["isf"]) - bolus["isf_mmolL_U"] = bolus["insulinSensitivity"] - bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"]) bolusCH = ["utcTime", "timezone", "roundedTime", "normal", "carbInput", "subType", "insulinOnBoard", "bgInput", @@ -732,8 +757,21 @@ def getPumpSettingsStats(df, col, pumpCol): if "insulinSensitivity.amount" in list(pumpSettings): isfColHead = "insulinSensitivity" - pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] - pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) + + # figure out likely isf units + isfUnits = isf_likely_units(pumpSettings, "insulinSensitivity.amount") + metadata["pumpSettings.isfLikelyUnits"] = isfUnits + + if isfUnits in "mmol/L": + + pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"] + pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"]) + + else: + + pumpSettings["isf"] = pumpSettings[isfColHead + ".amount"] + pumpSettings["isf_mmolL_U"] = mgdL_to_mmolL(pumpSettings["isf"]) + pumpSettings["isf.localTime"] = pd.to_datetime(pumpSettings["day"]) + \ pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms") @@ -759,8 +797,21 @@ def getPumpSettingsStats(df, col, pumpCol): tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched]) tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["isf.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - tempDF["isf_mmolL_U"] = tempDF["amount"] - tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) + + # figure out likely isf units + isfUnits = isf_likely_units(tempDF, "amount") + metadata["tempDF.isfLikelyUnits"] = isfUnits + + if isfUnits in "mmol/L": + + tempDF["isf_mmolL_U"] = tempDF["amount"] + tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"]) + + else: + + tempDF["isf"] = tempDF["amount"] + tempDF["isf_mmolL_U"] = mgdL_to_mmolL(tempDF["isf"]) + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["isf.localTime"], index=[0]) tempDF = get_setting_durations(tempDF, "isf", endOfDay) tempDF = tempDF[:-1] From 691b0e0d589ddd84942044be1760db5ae4a8501c Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 14:41:03 -0600 Subject: [PATCH 49/78] make sure n670g days returns 0 instead of False when no data --- projects/predict-simulate/get-users-settings-and-events.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 2ba00129..dfbc6f1a 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -398,6 +398,9 @@ def getClosedLoopDays(groupedData, nTempBasalsPerDayIsClosedLoop, metadata): med670g = pd.DataFrame(topPump.str.contains("1780")).rename(columns={"top":"670g"}) med670g.reset_index(inplace=True) n670gDays = med670g["670g"].sum() + if n670gDays == 0: + med670g = pd.DataFrame(columns=["670g", "day"]) + else: closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "day"]) From a7469788bbb5707cfc1752b8e50e5fd4903be9d1 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 14:42:24 -0600 Subject: [PATCH 50/78] allow for valid cgm to account for free style 15 minute data interval and a few other small refactors --- .../get-users-settings-and-events.py | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index dfbc6f1a..81a4ee66 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -726,7 +726,6 @@ def isf_likely_units(df, columnHeading): bolus["duration"].replace(0, np.nan, inplace=True) bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 bolus["rate"] = bolus["extended"] / bolus["durationHours"] - # bolusExtendedCH = commonColumnHeadings.copy() bolusExtendedCH = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] bolusExtendedEvents = bolus.loc[ ((bolus["extended"].notnull()) & @@ -1071,6 +1070,8 @@ def isf_likely_units(df, columnHeading): basal = data[data.type == "basal"].copy().dropna(axis=1, how="all") basal.sort_values("uploadTime", ascending=False, inplace=True) + metadata["pump.top"] = basal.deviceId.describe()["top"] + basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day") metadata["basal.beginDate"] = basalBeginDate metadata["basal.endDate"] = basalEndDate @@ -1142,6 +1143,10 @@ def isf_likely_units(df, columnHeading): cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData) metadata["cgm.percentDexcomCGM"] = percentDexcom + # see if cgm is freestyle + cgmData["isFreeStyle"] = cgmData["deviceId"].str.contains("Free") + metadata["cgm.top"] = cgmData.deviceId.describe()["top"] + # group by date (day) and get stats catDF = cgmData.groupby(cgmData["day"]) cgmRecordsPerDay = \ @@ -1149,9 +1154,12 @@ def isf_likely_units(df, columnHeading): rename(columns={"value": "cgm.count"}) dayDate = catDF.day.describe()["top"] dexcomCGM = catDF.dexcomCGM.describe()["top"] - nTypesCGM = catDF.dexcomCGM.describe()["unique"] + freeStyleCGM = catDF.isFreeStyle.describe()["top"] +# nTypesCGM = catDF.dexcomCGM.describe()["unique"] cgmRecordsPerDay["cgm.dexcomOnly"] = \ - (dexcomCGM & (nTypesCGM == 1)) + (dexcomCGM & (catDF.dexcomCGM.describe()["unique"] == 1)) + cgmRecordsPerDay["cgm.freeStyleOnly"] = \ + (freeStyleCGM & (catDF.isFreeStyle.describe()["unique"] == 1)) cgmRecordsPerDay["date"] = cgmRecordsPerDay.index # filter the cgm data @@ -1189,7 +1197,10 @@ def isf_likely_units(df, columnHeading): dayData = pd.merge(dayData, dfType, on="day", how="left") dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 - dayData["validCGMData"] = dayData["cgm.count"] > (288*.75) + + dayData["validCGMData"] = \ + ((dayData["cgm.count"] > (288*.75)) | + (dayData["cgm.count"] > (96*.75)) & (dayData["cgm.freeStyleOnly"])) dayData["timezone"].fillna(method='ffill', inplace=True) dayData["timezone"].fillna(method='bfill', inplace=True) @@ -1268,8 +1279,8 @@ def isf_likely_units(df, columnHeading): ageSummary.reset_index(inplace=True) - analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 28) & - (ageSummary["nDaysValidCgm"]> 28))] + analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 0) & + (ageSummary["nDaysValidCgm"]> 0))] minAge = analysisCriterion["age"].min() maxAge = analysisCriterion["age"].max() nDaysClosedLoop = analysisCriterion["nDaysClosedLoop"].sum() @@ -1312,8 +1323,8 @@ def isf_likely_units(df, columnHeading): ylwSummary.reset_index(inplace=True) - analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 28) & - (ylwSummary["nDaysValidCgm"]> 28))] + analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 0) & + (ylwSummary["nDaysValidCgm"]> 0))] minYLW = analysisCriterion["ylw"].min() maxYLW = analysisCriterion["ylw"].max() metadata["minYLW"] = minYLW @@ -1351,12 +1362,10 @@ def isf_likely_units(df, columnHeading): ageANDylwSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count() ageANDylwSummary["ct.target.max"] = catDF["ct.target.max"].max() -# analysisCriterion = ageANDylwSummary[((ageANDylwSummary["nDaysValidPump"]> 28) & -# (ageANDylwSummary["nDaysValidCgm"]> 28))] - # %% calculate local time abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) + abr = pd.merge(abr, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") abr["localTime"] = abr["utcTime"] + pd.to_timedelta(abr["tzo"], unit="m") @@ -1538,10 +1547,10 @@ def isf_likely_units(df, columnHeading): # %% save data for this person outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" - outputFormat = (f"{minAge:02d}", - f"{maxAge:02d}", - f"{minYLW:02d}", - f"{maxYLW:02d}", + outputFormat = (f"{int(minAge):02d}", + f"{int(maxAge):02d}", + f"{int(minYLW):02d}", + f"{int(maxYLW):02d}", f"{int(nDaysClosedLoop):03d}", f"{int(n670gDays):03d}", hashID[0:4]) From d92521401d22789ef70632d9ca04699c5eeb3b60 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 14:45:31 -0600 Subject: [PATCH 51/78] comment out breakpoints (for now while developing) --- projects/predict-simulate/get-users-settings-and-events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 81a4ee66..05fd87b8 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -566,7 +566,7 @@ def isf_likely_units(df, columnHeading): # %% DELELET LATER -args.startIndex = 96 +#args.startIndex = 96 # %% START OF CODE @@ -706,7 +706,7 @@ def isf_likely_units(df, columnHeading): else: # I am pretty sure this case does NOT exist - pdb.set_trace() +# pdb.set_trace() bolus["isf"] = bolus["insulinSensitivity"] bolus["isf_mmolL_U"] = mgdL_to_mmolL(bolus["isf"]) From 62202bd54d1cb81a1be8b1a305f0fd6160cebb2f Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 14:51:59 -0600 Subject: [PATCH 52/78] make sure nDays with closed loop data is 0 and not False --- projects/predict-simulate/get-users-settings-and-events.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 05fd87b8..59f0f0f4 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -391,6 +391,9 @@ def getClosedLoopDays(groupedData, nTempBasalsPerDayIsClosedLoop, metadata): closedLoopDF["basal.temp.count"] >= nTB nClosedLoopDays = closedLoopDF["basal.closedLoopDays"].sum() + if nClosedLoopDays == 0: + closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "day"]) + # get the number of days with 670g basalData["day"] = pd.to_datetime(basalData.time).dt.date bdGroup = basalData.groupby("day") From 748d375e53ca8863f0f67ee2db96f1cb80c50378 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 20:52:56 -0600 Subject: [PATCH 53/78] deal with edge case where schedule has no information --- projects/predict-simulate/get-users-settings-and-events.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 59f0f0f4..fcba153d 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1018,6 +1018,10 @@ def isf_likely_units(df, columnHeading): actSched = str(int(actSched)) if 'Auto Mode' not in actSched: tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) + if len(tempDF) == 0: + tempDF.loc[0, "start"] = 0 + tempDF.loc[0, "rate"] = 0 + tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["sbr.type"] = "regular" tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") From 76b83cf7fc989078af49ffc13c944e383efae3a5 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 22 Jan 2019 21:58:03 -0600 Subject: [PATCH 54/78] deal with edge case where there is not enough pump and/or cgm data --- .../get-users-settings-and-events.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index fcba153d..47c1b7ab 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1037,6 +1037,7 @@ def isf_likely_units(df, columnHeading): tempDaySummary["sbr.max"] = tempDF["rate"].max() tempDaySummary["sbr.type"] = "regular" + else: tempDF = pd.DataFrame(index=[0]) tempDF["day"] = pumpSettings.loc[p, "day"] @@ -1552,16 +1553,20 @@ def isf_likely_units(df, columnHeading): "allAgeANDylwSummaries-dIndex-" + str(startIndex) + ".csv")) - # %% save data for this person - outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" - outputFormat = (f"{int(minAge):02d}", - f"{int(maxAge):02d}", + # %% save data for this person + if ((pd.notna(minAge)) & (pd.notna(minYLW))): + outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" + outputFormat = (f"{int(minAge):02d}", + f"{int(maxAge):02d}", f"{int(minYLW):02d}", f"{int(maxYLW):02d}", f"{int(nDaysClosedLoop):03d}", - f"{int(n670gDays):03d}", - hashID[0:4]) - outputFolderName = outputString % outputFormat + f"{int(n670gDays):03d}", + hashID[0:4]) + outputFolderName = outputString % outputFormat + else: + outputFolderName = "dIndex-" + str(dIndex) + "-investigate-" + str(hashID[0:4]) + outputFolderName_Path = os.path.join(outputPath, "data", outputFolderName) if not os.path.exists(outputFolderName_Path): os.makedirs(outputFolderName_Path) @@ -1574,7 +1579,6 @@ def isf_likely_units(df, columnHeading): cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) - # %% save the processed data (saving this data will take up a lot of space and time) data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) From f4fdd04d50c09f2b749dd879dfd8523f52092e33 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Wed, 23 Jan 2019 05:36:29 -0600 Subject: [PATCH 55/78] edge case where active schedule is null --- .../get-users-settings-and-events.py | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 47c1b7ab..6040af43 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1017,14 +1017,16 @@ def isf_likely_units(df, columnHeading): if isinstance(actSched, float): actSched = str(int(actSched)) if 'Auto Mode' not in actSched: - tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) - if len(tempDF) == 0: - tempDF.loc[0, "start"] = 0 - tempDF.loc[0, "rate"] = 0 - - tempDF["day"] = pumpSettings.loc[p, "day"] - tempDF["sbr.type"] = "regular" - tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") + # edge case where a active schedule is nan + try: + tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched]) + except: + tempDF = pd.DataFrame() + metadata["issueWithBasalSchedule"] = True + if len(tempDF) > 0: + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["sbr.type"] = "regular" + tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) tempDF = get_setting_durations(tempDF, "sbr", endOfDay) tempDF = tempDF[:-1] @@ -1033,11 +1035,22 @@ def isf_likely_units(df, columnHeading): tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date tempDaySummary["sbr.min"] = tempDF["rate"].min() tempDaySummary["sbr.weightedMean"] = \ - np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() - tempDaySummary["sbr.max"] = tempDF["rate"].max() - tempDaySummary["sbr.type"] = "regular" - - + np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() + tempDaySummary["sbr.max"] = tempDF["rate"].max() + tempDaySummary["sbr.type"] = "regular" + else: + tempDF = pd.DataFrame(index=[0]) + tempDF["day"] = pumpSettings.loc[p, "day"] + tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + tempDF["rate"] = np.nan + tempDF["sbr.type"] = "AutoMode" + + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date + tempDaySummary["sbr.min"] = np.nan + tempDaySummary["sbr.weightedMean"] = np.nan + tempDaySummary["sbr.max"] = np.nan + tempDaySummary["sbr.type"] = "missingNullOrIssue" else: tempDF = pd.DataFrame(index=[0]) tempDF["day"] = pumpSettings.loc[p, "day"] From 61c97b8b1ebfda44ed9f1b212c2e4b15183e843a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 24 Jan 2019 05:18:06 -0600 Subject: [PATCH 56/78] rename schedule basal rate summary data columns --- projects/predict-simulate/get-users-settings-and-events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 6040af43..4cb374b8 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1290,7 +1290,8 @@ def isf_likely_units(df, columnHeading): ageSummary["sbr.min"] = catDF["sbr.min"].min() ageSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count() ageSummary["sbr.max"] = catDF["sbr.max"].max() - ageSummary["sbr.nAutoMode"] = catDF["sbr.type"].count() + ageSummary["sbr.typeTop"] = catDF["sbr.type"].describe()["top"] + ageSummary["sbr.typeCount"] = catDF["sbr.type"].count() # correctionTarget stats ageSummary["ct.nDays"] = catDF["ct.target.min"].count() From 9961c0c02b0cb9f7f73fa3db37b229c1a9c83fae Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 24 Jan 2019 05:33:19 -0600 Subject: [PATCH 57/78] update to the todo list at end of file --- projects/predict-simulate/get-users-settings-and-events.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 4cb374b8..a19dd5a4 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1625,18 +1625,15 @@ def isf_likely_units(df, columnHeading): # %% V2 DATA TO GRAB -# THERE IS AN ISSUE WITH COUNTING 670G SETTINGS +# INVESTIGATE SETTINGS OUTLIERS # ADD ROUNDEDLOCAL TIME TO THE END RESULTS # CALCULATE MMOL SUMMARIES -# GET RID OF ROUNDING TIME AT THE BEGINNING # DEFINE A DAY BETWEEN 6AM AND 6AM -# EXPAND THE CORRECTION TIME VALUES TO BE UNIFORM ACROSS ALL USERS AND DEVICES # FIX DAYLIGHT SAVINGS TIME TIMES # FIGURE OUT WHY TEMP BASAL COUNTS ARE DIFFERENT BETWEEN THE TWO DIFFERENT METHODS # MAX BASAL RATE, MAX BOLUS AMOUNT, AND INSULIN DURATION SET ON SELECT PUMPS # ALERT SETTINGS # ESTIMATED LOCAL TIME -# PUMP AND CGM DEVICE () # GLYCEMIC OUTCOMES # DO NOT ROUND DATA # INFUSION SITE CHANGES From c43efc51715ee3202f9bf0f811a5fd3de0677132 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 24 Jan 2019 09:11:46 -0600 Subject: [PATCH 58/78] add to list of issues to investigate --- projects/predict-simulate/get-users-settings-and-events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index a19dd5a4..021f5f5e 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1625,7 +1625,7 @@ def isf_likely_units(df, columnHeading): # %% V2 DATA TO GRAB -# INVESTIGATE SETTINGS OUTLIERS +# INVESTIGATE SETTINGS OUTLIERS (Paradigm Veo pumps have unrealistic high ISF) # ADD ROUNDEDLOCAL TIME TO THE END RESULTS # CALCULATE MMOL SUMMARIES # DEFINE A DAY BETWEEN 6AM AND 6AM From 9d69760ef3234273ec8af478096086763e1955dd Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 24 Jan 2019 12:33:08 -0600 Subject: [PATCH 59/78] adding to list of potential issues to examine --- projects/predict-simulate/get-users-settings-and-events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 021f5f5e..b393d953 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -1625,7 +1625,7 @@ def isf_likely_units(df, columnHeading): # %% V2 DATA TO GRAB -# INVESTIGATE SETTINGS OUTLIERS (Paradigm Veo pumps have unrealistic high ISF) +# INVESTIGATE SETTINGS OUTLIERS (Paradigm Veo pumps have unrealistic high ISF, ommipod with likely mg/dL have wrong correction target) # ADD ROUNDEDLOCAL TIME TO THE END RESULTS # CALCULATE MMOL SUMMARIES # DEFINE A DAY BETWEEN 6AM AND 6AM From b015a1e2191197a049e4791d144c65b9c156992b Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 29 Jan 2019 04:55:44 -0600 Subject: [PATCH 60/78] changing a day to reflect local time --- .../get-users-settings-and-events.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index b393d953..e90f12f2 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -668,7 +668,11 @@ def isf_likely_units(df, columnHeading): data["utcTime"] = pd.to_datetime(data["time"]) data["timezone"].fillna(method='ffill', inplace=True) data["timezone"].fillna(method='bfill', inplace=True) - data["day"] = pd.DatetimeIndex(data["utcTime"]).date + + # estimate local time (simple method) + data["tzo"] = data[['utcTime', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1) + data["localTime"] = data["utcTime"] + pd.to_timedelta(data["tzo"], unit="m") + data["day"] = pd.DatetimeIndex(data["localTime"]).date # round to the nearest 5 minutes # TODO: once roundTime is pushed to tidals repository then this line can be replaced @@ -676,14 +680,16 @@ def isf_likely_units(df, columnHeading): data = round_time(data, timeIntervalMinutes=5, timeField="time", roundedTimeFieldName="roundedTime", startWithFirstRecord=True, verbose=False) + + data["roundedLocalTime"] = data["roundedTime"] + pd.to_timedelta(data["tzo"], unit="m") data.sort_values("uploadTime", ascending=False, inplace=True) # %% ID, HASHID, AGE, & YLW data["userID"] = userID data["hashID"] = hashID - data["age"] = np.floor((data["utcTime"] - bDate).dt.days/365.25).astype(int) - data["ylw"] = np.floor((data["utcTime"] - dDate).dt.days/365.25).astype(int) + data["age"] = np.floor((data["localTime"] - bDate).dt.days/365.25).astype(int) + data["ylw"] = np.floor((data["localTime"] - dDate).dt.days/365.25).astype(int) # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL) From 5597a85b4ba30cc68f6b1c291d5b0e9ef17acccd Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 29 Jan 2019 12:15:01 -0600 Subject: [PATCH 61/78] add total daily dose and correct basals that extend past midnight --- .../get-users-settings-and-events.py | 412 ++++++++++-------- 1 file changed, 227 insertions(+), 185 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index e90f12f2..21e616b2 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -339,15 +339,18 @@ def get_bolusDaySummary(bolusData): return bolusDaySummary -def get_basalDaySummary(basal): +def get_basalDaySummary(df): # group data by day - basalByDay = basal.groupby(basal["day"]) + basalByDay = df.groupby(df["day"]) # total basal insulin per day basalDaySummary = pd.DataFrame(basalByDay.totalAmountOfBasalInsulin.sum()) + # total duration per each day (this should add up to 24 hours) + basalDaySummary["totalBasalDuration"] = basalByDay.durationHours.sum() + # total number of basals types per day - basalTypePerDay = basal.groupby(["day", "deliveryType"]).size().unstack() + basalTypePerDay = df.groupby(["day", "deliveryType"]).size().unstack() basalDaySummary["numberOfScheduledBasals"] = basalTypePerDay["scheduled"].fillna(0) if "suspend" not in list(basalTypePerDay): @@ -567,9 +570,96 @@ def isf_likely_units(df, columnHeading): return likelyUnits +def correct_basal_extends_past_midnight(df, timeCol, dayCol): + # deal with case when basal extends past midnight due to utcTime and localTime difference + df.sort_values(timeCol, inplace=True) + uniqueDays = pd.DatetimeIndex(df[dayCol].unique()) + midnightsNotInBasalData = uniqueDays[~uniqueDays.isin(df[timeCol])] + for midnight in midnightsNotInBasalData: + # find the last basal prior to midnight + dayBefore = midnight - pd.Timedelta(24, unit="h") + dataDayBefore = df[(df[timeCol] < midnight) & (df[timeCol] > dayBefore)] + + if len(dataDayBefore) > 0: + + basalPriorToMidnight = dataDayBefore[dataDayBefore[timeCol] == dataDayBefore[timeCol].max()] + indexToDrop = basalPriorToMidnight.index.values[0] + oldDuration = basalPriorToMidnight.loc[indexToDrop, "duration"] + newDuration = (midnight - basalPriorToMidnight.loc[indexToDrop, timeCol]).seconds * 1000.0 + newMidnightDuration = oldDuration - newDuration + + newBasalPriorToMidnight = df.copy().drop(index=df.index) + newBasalPriorToMidnight.loc[0,:] = basalPriorToMidnight.loc[indexToDrop,:] + newBasalPriorToMidnight["duration"] = newDuration + + # new basal at midnight + newBasalAtMidnight = df.copy().drop(index=df.index) + newBasalAtMidnight.loc[1,:] = basalPriorToMidnight.loc[indexToDrop,:] + newBasalAtMidnight["duration"] = newMidnightDuration + newBasalAtMidnight[timeCol] = midnight.to_pydatetime() + newBasalAtMidnight[dayCol] = newBasalAtMidnight[timeCol].dt.date + + # add data back to the basal data frame + newRowsToAdd = pd.concat([newBasalPriorToMidnight, newBasalAtMidnight], ignore_index = True) + newRowsToAdd = newRowsToAdd.astype({"rate": "float64", + "duration": "float64"}) + df = df.drop(indexToDrop) + df = pd.concat([df, newRowsToAdd], ignore_index=True) + + return df + + +def get_basalEvent_summary(df, categories): + catDF = df[df["type"] == "basal"].groupby(categories) + summaryDF = pd.DataFrame(catDF["rate"].count()).add_suffix(".count") + summaryDF["basalRate.min"] = catDF["rate"].min() + summaryDF["basalRate.weightedMean"] = catDF["totalAmountOfBasalInsulin"].sum() / catDF["durationHours"].sum() + summaryDF["basalRate.max"] = catDF["rate"].max() + + # max basal rate including extended boluses + catDF = df.groupby(categories) + summaryDF["basalRateIncludingExtendedBoluses.count"] = catDF["rate"].count() + summaryDF["basalRateIncludingExtendedBoluses.max"] = catDF["rate"].max() + + return summaryDF + + +def get_bolusEvent_summary(df, categories): + + catDF = df.groupby(categories) + summaryDF = pd.DataFrame(catDF["unitsInsulin"].describe().add_prefix("insulin.")) + + # carbs entered in bolus calculator + carbEvents = catDF["carbInput"].describe().add_prefix("carbsPerMeal.") + summaryDF = pd.concat([summaryDF, carbEvents], axis=1) + + return summaryDF + + +def get_dayData_summary(df, categories): + + catDF = df[df["validPumpData"]].groupby(categories) + summaryDF = pd.DataFrame(catDF["totalAmountOfInsulin"].describe().add_prefix("totalDailyDose.")) + totalDailyCarbs = catDF["totalDailyCarbs"].describe().add_prefix("totalDailyCarbs.") + percentBasal = catDF["percentBasal"].describe().add_prefix("percentBasal.") + percentBolus = catDF["percentBolus"].describe().add_prefix("percentBolus.") + summaryDF = pd.concat([summaryDF, totalDailyCarbs, percentBasal, percentBolus], axis=1) + + return summaryDF + + +def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories): + basalEventSummary = get_basalEvent_summary(basalEventsDF, categories) + bolusEventSummary = get_bolusEvent_summary(bolusEventsDF, categories) + dailySummary = get_dayData_summary(dayDataDF, categories) + pumpSummaryDF = pd.concat([basalEventSummary, bolusEventSummary, dailySummary], axis=1) + + return pumpSummaryDF + # %% DELELET LATER -#args.startIndex = 96 +args.startIndex = 0 +args.endIndex = 4226 # %% START OF CODE @@ -608,7 +698,7 @@ def isf_likely_units(df, columnHeading): metadata = pd.DataFrame(index=[dIndex]) metadata["hashID"] = hashID - try: + if 1 == 1: # try: # make folder to save data processedDataPath = os.path.join(phiOutputPath, "PHI-" + userID) if not os.path.exists(processedDataPath): @@ -720,9 +810,12 @@ def isf_likely_units(df, columnHeading): bolus["isf_mmolL_U"] = mgdL_to_mmolL(bolus["isf"]) - bolusCH = ["utcTime", "timezone", "roundedTime", "normal", "carbInput", "subType", - "insulinOnBoard", "bgInput", - "isf", "isf_mmolL_U", "insulinCarbRatio"] + bolusCH = ["hashID", "age", "ylw", "day", + "utcTime", "localTime", "timezone", "tzo", + "roundedTime", "roundedLocalTime", + "normal", "carbInput", "subType", + "insulinOnBoard", "bgInput", + "isf", "isf_mmolL_U", "insulinCarbRatio"] bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH] bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin", @@ -735,7 +828,12 @@ def isf_likely_units(df, columnHeading): bolus["duration"].replace(0, np.nan, inplace=True) bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 bolus["rate"] = bolus["extended"] / bolus["durationHours"] - bolusExtendedCH = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] +# bolusExtendedCH = ["localTime", "timezone", "roundedTime", "roundedLocalTime", +# "durationHours", "rate", "type"] + bolusExtendedCH = ["hashID", "age", "ylw", "day", + "utcTime", "localTime", "timezone", "tzo", + "roundedTime", "roundedLocalTime", + "durationHours", "rate", "type"] bolusExtendedEvents = bolus.loc[ ((bolus["extended"].notnull()) & (bolus["duration"] > 0)), bolusExtendedCH] @@ -1033,14 +1131,14 @@ def isf_likely_units(df, columnHeading): tempDF["day"] = pumpSettings.loc[p, "day"] tempDF["sbr.type"] = "regular" tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms") - endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) - tempDF = get_setting_durations(tempDF, "sbr", endOfDay) - tempDF = tempDF[:-1] + endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0]) + tempDF = get_setting_durations(tempDF, "sbr", endOfDay) + tempDF = tempDF[:-1] - tempDaySummary = pd.DataFrame(index=[0]) - tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date - tempDaySummary["sbr.min"] = tempDF["rate"].min() - tempDaySummary["sbr.weightedMean"] = \ + tempDaySummary = pd.DataFrame(index=[0]) + tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date + tempDaySummary["sbr.min"] = tempDF["rate"].min() + tempDaySummary["sbr.weightedMean"] = \ np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum() tempDaySummary["sbr.max"] = tempDF["rate"].max() tempDaySummary["sbr.type"] = "regular" @@ -1107,6 +1205,9 @@ def isf_likely_units(df, columnHeading): removeDuplicates(basal, ["deliveryType", "deviceTime", "duration", "rate"]) metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved + # deal with case when basal extends past midnight due to utcTime and localTime difference + basal = correct_basal_extends_past_midnight(basal, "localTime", "day") + # fill NaNs with 0, as it indicates a suspend (temp basal of 0) basal.rate.fillna(0, inplace=True) @@ -1125,14 +1226,18 @@ def isf_likely_units(df, columnHeading): basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"] # actual basal delivered - abrColHeadings = ["utcTime", "timezone", "roundedTime", "durationHours", "rate", "type"] - abr = basal[abrColHeadings] + basalEventsColHeadings = ["hashID", "age", "ylw", "day", + "utcTime", "localTime", "timezone", "tzo", + "roundedTime", "roundedLocalTime", + "durationHours", "rate", "type"] + basalEvents = basal[basalEventsColHeadings] if "duration" in list(bolus): - abr = pd.concat([abr, bolusExtendedEvents], ignore_index=True) - abr.sort_values("utcTime", inplace=True) + basalEvents = pd.concat([basalEvents, bolusExtendedEvents], ignore_index=True) + basalEvents.sort_values("localTime", inplace=True) - abr["timezone"].fillna(method='ffill', inplace=True) - abr["timezone"].fillna(method='bfill', inplace=True) + basalEvents["timezone"].fillna(method='ffill', inplace=True) + basalEvents["timezone"].fillna(method='bfill', inplace=True) + basalEvents["totalAmountOfBasalInsulin"] = basalEvents["rate"] * basalEvents["durationHours"] # get a summary of basals per day basalDaySummary = get_basalDaySummary(basal) @@ -1145,6 +1250,7 @@ def isf_likely_units(df, columnHeading): isClosedLoopDay, is670g, metadata = \ getClosedLoopDays(groupedData, 30, metadata) + # %% CGM DATA # filter by cgm and sort by uploadTime cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all") @@ -1190,7 +1296,10 @@ def isf_likely_units(df, columnHeading): cgmRecordsPerDay["date"] = cgmRecordsPerDay.index # filter the cgm data - cgmColHeadings = ["utcTime", "timezone", "roundedTime", "value"] + cgmColHeadings = ["hashID", "age", "ylw", "day", + "utcTime", "localTime", + "timezone", "tzo", + "roundedTime", "roundedLocalTime", "value"] # get data in mg/dL units cgm = cgmData[cgmColHeadings] @@ -1218,12 +1327,39 @@ def isf_likely_units(df, columnHeading): metadata["day.endDate"] = dayEndDate rng = pd.date_range(dayBeginDate, dayEndDate).date dayData = pd.DataFrame(rng, columns=["day"]) + for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]: dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left") + for dfType in [isClosedLoopDay, is670g]: dayData = pd.merge(dayData, dfType, on="day", how="left") - dayData["validPumpData"] = dayData["numberOfNormalBoluses"] > 3 + # calculate the total amount of daily insulin + dayData["totalAmountOfInsulin"] = ( + dayData["totalAmountOfBasalInsulin"] + + dayData["totalAmountOfBolusInsulin"] + ) + + # calculate the percent bolus and percent basal + dayData["percentBasal"] = ( + dayData["totalAmountOfBasalInsulin"] / + dayData["totalAmountOfInsulin"] + ) + + dayData["percentBolus"] = ( + dayData["totalAmountOfBolusInsulin"] / + dayData["totalAmountOfInsulin"] + ) + + # total daily carbs + totalDailyCarbs = pd.DataFrame(bolusEvents.groupby("day").carbInput.sum()) + totalDailyCarbs.reset_index(inplace=True) + totalDailyCarbs.rename(columns={"carbInput": "totalDailyCarbs"}, inplace=True) + dayData = pd.merge(dayData, totalDailyCarbs, how="left", on="day") + + # valid pump should be having exactly 24 hours of basal rate + dayData["validPumpData"] = dayData["totalBasalDuration"] == 24 + dayData["atLeast3Boluses"] = dayData["numberOfNormalBoluses"] >= 3 dayData["validCGMData"] = \ ((dayData["cgm.count"] > (288*.75)) | @@ -1392,18 +1528,14 @@ def isf_likely_units(df, columnHeading): # %% calculate local time - abr["date"] = pd.to_datetime(abr["utcTime"].dt.date) - - abr = pd.merge(abr, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") - abr["localTime"] = abr["utcTime"] + pd.to_timedelta(abr["tzo"], unit="m") + basalEvents["day"] = basalEvents["localTime"].dt.date + basalEvents = pd.merge(basalEvents, dayData[["day", "isDSTChangeDay"]], how="left", on="day") - cgm["date"] = pd.to_datetime(cgm["utcTime"].dt.date) - cgm = pd.merge(cgm, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") - cgm["localTime"] = cgm["utcTime"] + pd.to_timedelta(cgm["tzo"], unit="m") + cgm["day"] = cgm["localTime"].dt.date + cgm = pd.merge(cgm, dayData[["day", "isDSTChangeDay"]], how="left", on="day") - bolusEvents["date"] = pd.to_datetime(bolusEvents["utcTime"].dt.date) - bolusEvents = pd.merge(bolusEvents, dayData[["date", "tzo", "isDSTChangeDay"]], how="left", on="date") - bolusEvents["localTime"] = bolusEvents["utcTime"] + pd.to_timedelta(bolusEvents["tzo"], unit="m") + bolusEvents["day"] = bolusEvents["localTime"].dt.date + bolusEvents = pd.merge(bolusEvents, dayData[["day", "isDSTChangeDay"]], how="left", on="day") # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV) @@ -1428,149 +1560,51 @@ def isf_likely_units(df, columnHeading): roundedTimeFieldName="localRoundedTime", startWithFirstRecord=True, verbose=False) - colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", + allSettings["day"] = allSettings["localTime"].dt.date + allSettings = pd.merge(allSettings, dayData[["day", "isDSTChangeDay"]], how="left", on="day") + + + colOrder = ["hashID", "age", "ylw", "day", "isDSTChangeDay", + "localTime", "localRoundedTime", "isf", "cir", "sbr", "deviceId", "ct.low", "ct.high", "ct.target", "ct.range", "sbr.type", "isf_mmolL_U"] allSettings = allSettings[colOrder] - fieldsToDrop = ["utcTime", "timezone", "roundedTime", "date", "tzo", "isDSTChangeDay"] - pumpEvents = pd.merge(abr.drop(columns=fieldsToDrop), - bolusEvents.drop(columns=fieldsToDrop), - how="outer", on="localTime") - pumpEvents["type"].fillna("bolus", inplace=True) - pumpEvents["eventType"].fillna("basal", inplace=True) - pumpEvents["hashID"] = hashID - pumpEvents["age"] = np.floor((pumpEvents["localTime"] - bDate).dt.days/365.25).astype(int) - pumpEvents["ylw"] = np.floor((pumpEvents["localTime"] - dDate).dt.days/365.25).astype(int) - pumpEvents = round_time(pumpEvents, timeIntervalMinutes=5, - timeField="localTime", - roundedTimeFieldName="localRoundedTime", - startWithFirstRecord=True, verbose=False) - - - colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", - "rate", "durationHours", - "unitsInsulin", "carbInput", "type", "eventType", "subType", - "isf", "isf_mmolL_U", "insulinCarbRatio", "insulinOnBoard", - "bg_mgdL", "bg_mmolL"] - - pumpEvents = pumpEvents[colOrder] - - cgmLite = cgm.drop(columns=fieldsToDrop) - cgmLite["hashID"] = hashID - cgmLite["age"] = np.floor((cgmLite["localTime"] - bDate).dt.days/365.25).astype(int) - cgmLite["ylw"] = np.floor((cgmLite["localTime"] - dDate).dt.days/365.25).astype(int) - cgmLite = round_time(cgmLite, timeIntervalMinutes=5, - timeField="localTime", - roundedTimeFieldName="localRoundedTime", - startWithFirstRecord=True, verbose=False) - - colOrder = ["hashID", "age", "ylw", "localTime", "localRoundedTime", - "mg_dL", "mmol_L"] - - cgmLite = cgmLite[colOrder] - - - # %% SAVE RESULTS - - # age and ylw stats - pumpEvents["rateTimesDurationHours"] = pumpEvents["rate"] * pumpEvents["durationHours"] - pumpEvents.rename(columns={"rate":"basalRate"}, inplace=True) - catDF = pumpEvents.groupby("age") - - # actual basal rates - agePump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") - agePump["basalRate.min"] = catDF["basalRate"].min() - agePump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() - agePump["basalRate.max"] = catDF["basalRate"].max() - - # insulin events - insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") - agePump = pd.concat([agePump, insulinEvents], axis=1) - - # carbs entered in bolus calculator - carbEvents = catDF["carbInput"].describe().add_prefix("carb.") - agePump = pd.concat([agePump, carbEvents], axis=1) - - # very low level cgm stats per age - catDF = cgmLite.groupby("age") - cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") - agePumpCgm = pd.concat([agePump, cgmStats], axis=1) - - agePumpCgm.reset_index(inplace=True) - - ageSummary = pd.merge(ageSummary, agePumpCgm, on="age", how="left") - ageSummary["hashID"] = hashID - allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) - - allAgeSummaries.to_csv(os.path.join(outputPath, - "allAgeSummaries-dIndex-" + str(startIndex) + ".csv")) - - # repoeat for years living with - catDF = pumpEvents.groupby("ylw") - # actual basal rates - ylwPump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") - ylwPump["basalRate.min"] = catDF["basalRate"].min() - ylwPump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() - ylwPump["basalRate.max"] = catDF["basalRate"].max() - - # insulin events - insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") - ylwPump = pd.concat([ylwPump, insulinEvents], axis=1) - - # carbs entered in bolus calculator - carbEvents = catDF["carbInput"].describe().add_prefix("carb.") - ylwPump = pd.concat([ylwPump, carbEvents], axis=1) - - # very low level cgm stats per age - catDF = cgmLite.groupby("ylw") - cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") - ylwPumpCgm = pd.concat([ylwPump, cgmStats], axis=1) - - ylwPumpCgm.reset_index(inplace=True) - - ylwSummary = pd.merge(ylwSummary, ylwPumpCgm, on="ylw", how="left") - - ylwSummary["hashID"] = hashID - allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) - - allYlwSummaries.to_csv(os.path.join(outputPath, - "allYlwSummaries-dIndex-" + str(startIndex) + ".csv")) - - - # repoeat for agne AND years living with - catDF = pumpEvents.groupby(["age", "ylw"]) - # actual basal rates - ageANDylwPump = pd.DataFrame(catDF["basalRate"].count()).add_suffix(".count") - ageANDylwPump["basalRate.min"] = catDF["basalRate"].min() - ageANDylwPump["basalRate.weightedMean"] = catDF["rateTimesDurationHours"].sum() / catDF["durationHours"].sum() - ageANDylwPump["basalRate.max"] = catDF["basalRate"].max() - - # insulin events - insulinEvents = catDF["unitsInsulin"].describe().add_prefix("insulin.") - ageANDylwPump = pd.concat([ageANDylwPump, insulinEvents], axis=1) - - # carbs entered in bolus calculator - carbEvents = catDF["carbInput"].describe().add_prefix("carb.") - ageANDylwPump = pd.concat([ageANDylwPump, carbEvents], axis=1) - - # very low level cgm stats per age - catDF = cgmLite.groupby(["age", "ylw"]) - cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") - ageANDylwPumpCgm = pd.concat([ageANDylwPump, cgmStats], axis=1) - - ageANDylwSummary = ageANDylwSummary.join(ageANDylwPumpCgm, how="left") - - ageANDylwPumpCgm.reset_index(inplace=True) - ageANDylwSummary.reset_index(inplace=True) + # %% GET AND SAVE RESULTS BY AGE AND YLW + for category in ["age", "ylw", ["age", "ylw"]]: + pumpSummary = get_pumpSummary(basalEvents, bolusEvents, dayData, category) + + # very low level cgm stats per age + catDF = cgm.groupby(category) + cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + pumpCgmSummary = pd.concat([pumpSummary, cgmStats], axis=1) + + if category == "age": + pumpCgmSummary.reset_index(inplace=True) + ageSummary = pd.merge(ageSummary, pumpCgmSummary, on=category, how="left") + ageSummary["hashID"] = hashID + allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) + allAgeSummaries.to_csv(os.path.join(outputPath, + "allAgeSummaries-dIndex-" + str(startIndex) + ".csv")) + elif category == "ylw": + pumpCgmSummary.reset_index(inplace=True) + ylwSummary = pd.merge(ylwSummary, pumpCgmSummary, on=category, how="left") + ylwSummary["hashID"] = hashID + allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) + allYlwSummaries.to_csv(os.path.join(outputPath, + "allYlwSummaries-dIndex-" + str(startIndex) + ".csv")) + else: - ageANDylwSummary["hashID"] = hashID - allAgeANDylwSummaries = pd.concat([allAgeANDylwSummaries, ageANDylwSummary], ignore_index=True) + ageANDylwSummary = ageANDylwSummary.join(pumpCgmSummary, how="left") + pumpCgmSummary.reset_index(inplace=True) + pumpCgmSummary.reset_index(inplace=True) + pumpCgmSummary["hashID"] = hashID + allAgeANDylwSummaries = pd.concat([allAgeANDylwSummaries, pumpCgmSummary], ignore_index=True) - allAgeANDylwSummaries.to_csv(os.path.join(outputPath, - "allAgeANDylwSummaries-dIndex-" + str(startIndex) + ".csv")) + allAgeANDylwSummaries.to_csv(os.path.join(outputPath, + "allAgeANDylwSummaries-dIndex-" + str(startIndex) + ".csv")) # %% save data for this person @@ -1578,9 +1612,9 @@ def isf_likely_units(df, columnHeading): outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s" outputFormat = (f"{int(minAge):02d}", f"{int(maxAge):02d}", - f"{int(minYLW):02d}", - f"{int(maxYLW):02d}", - f"{int(nDaysClosedLoop):03d}", + f"{int(minYLW):02d}", + f"{int(maxYLW):02d}", + f"{int(nDaysClosedLoop):03d}", f"{int(n670gDays):03d}", hashID[0:4]) outputFolderName = outputString % outputFormat @@ -1592,19 +1626,27 @@ def isf_likely_units(df, columnHeading): os.makedirs(outputFolderName_Path) fName = outputFolderName + "-allSettings.csv" - allSettings.to_csv(os.path.join(outputFolderName_Path, fName)) - fName = outputFolderName + "-pumpEvents.csv" - pumpEvents.to_csv(os.path.join(outputFolderName_Path, fName)) - fName = outputFolderName + "-cgmLite.csv" - cgmLite.to_csv(os.path.join(outputFolderName_Path, fName)) + allSettingsMinusPumpSerial = allSettings.copy().drop(columns=["deviceId"]) + allSettingsMinusPumpSerial.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-dayData.csv" + dayDataMinusPumpSerial = dayData.copy().drop(columns=["deviceId"]) + dayDataMinusPumpSerial.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-basalEvents.csv" + basalEvents.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-bolusEvents.csv" + bolusEvents.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-cgm.csv" + cgm.to_csv(os.path.join(outputFolderName_Path, fName)) # %% save the processed data (saving this data will take up a lot of space and time) - data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) - basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) - bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) - cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) - pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) +# data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) +# basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) +# bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) +# cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) +# pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) +# allSettings.to_csv(os.path.join(processedDataPath, "allSettings-PHI-" + userID + ".csv")) +# dayData.to_csv(os.path.join(processedDataPath, "dayData-PHI-" + userID + ".csv")) else: metadata["flags"] = "no bolus wizard data" @@ -1617,9 +1659,9 @@ def isf_likely_units(df, columnHeading): else: metadata["flags"] = "missing bDay/dDay" - except: - print("something is broke dIndex=", dIndex) - metadata["flags"] = "something is broke" +# except: +# print("something is broke dIndex=", dIndex) +# metadata["flags"] = "something is broke" # write metaData to allMetadata From 99c0de78c890cc9c1c45079488989707c70e8230 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 29 Jan 2019 18:24:26 -0600 Subject: [PATCH 62/78] add cgm and episode stats --- .../get-users-settings-and-events.py | 193 ++++++++++++++++-- 1 file changed, 178 insertions(+), 15 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 21e616b2..fb523d18 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -657,8 +657,130 @@ def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories): return pumpSummaryDF +def get_episodes(df): + df = df.copy().sort_values("localTime").reset_index(drop=True) + allEpisodes = pd.DataFrame() + cgmFrequency = 5.0 + episodeCriteria = pd.DataFrame({"threshold": [54, 70, 180, 250], + "duration": [15, 60, 120, 120], + "percentReadings": [75, 75, 75, 75], + "episodeName": ["extreme-hypo", "hypo", + "hyper", "extreme-hyper"]}) + episodes = pd.DataFrame() + for episodeType in range(0,len(episodeCriteria)): + + # first find all of the cross points + episodeThreshold = episodeCriteria.loc[episodeType, "threshold"] + episodeDurationRequirement = episodeCriteria.loc[episodeType, "duration"] + episodePercentOfReadings = episodeCriteria.loc[episodeType, "percentReadings"] + episodeName = episodeCriteria.loc[episodeType, "episodeName"] + + if episodeThreshold > 110: + + df["startCrossPoint"] = ((df.mg_dL.shift(1) <= episodeThreshold) & + (df.mg_dL > episodeThreshold)) + + df["endCrossPoint"] = ((df.mg_dL.shift(1) > episodeThreshold) & + (df.mg_dL <= episodeThreshold)) + + else: + df["startCrossPoint"] = ((df.mg_dL.shift(1) >= episodeThreshold) & + (df.mg_dL < episodeThreshold)) + + df["endCrossPoint"] = ((df.mg_dL.shift(1) < episodeThreshold) & + (df.mg_dL >= episodeThreshold)) + + + startList = pd.DataFrame(df[df.startCrossPoint].roundedLocalTime) + endList = pd.DataFrame(df[df.endCrossPoint].roundedLocalTime) + if len(startList) > len(endList): + endList = endList.append( + df.loc[df.roundedLocalTime == df.roundedLocalTime.max(), + ["roundedLocalTime"]] + ) + elif len(startList) < len(endList): + startList = startList.append( + df.loc[df.roundedLocalTime == df.roundedLocalTime.min(), + ["roundedLocalTime"]] + ).sort_index() + + if len(startList) == len(endList): + + episodes = pd.concat([startList.reset_index().add_prefix("start."), + endList.reset_index().add_prefix("end.")], axis=1) + + episodes["durationMinutes"] = \ + (episodes["end.roundedLocalTime"] - episodes["start.roundedLocalTime"]).dt.seconds / 60 + + episodes["totalPoints"] = episodes["end.index"] - episodes["start.index"] + episodes["totalPossiblePoints"] = episodes["durationMinutes"] / cgmFrequency + episodes["percentOfReadings"] = episodes["totalPoints"] / episodes["totalPossiblePoints"] * 100 + + else: + "figure out how to resolve this case if it exists" + pdb.set_trace() + + episodes = episodes[(episodes.durationMinutes >= episodeDurationRequirement) & + (episodes.percentOfReadings >= episodePercentOfReadings)].reset_index(drop=True) + episodes["criterion.name"] = episodeName + episodes["criterion.threshold"] = episodeThreshold + episodes["criterion.duration"] = episodeDurationRequirement + episodes["criterion.percentOfReadings"] = episodePercentOfReadings + + allEpisodes = pd.concat([allEpisodes, episodes]).reset_index(drop=True) + + return allEpisodes + + +def get_cgmStats(df): + + statDF = pd.Series(df.mg_dL.describe()) + statDF.rename(index={"count":"totalNumberCBGValues"}, inplace=True) + + statDF["mean_mgdL"] = df.mg_dL.mean() + statDF["std_mgdL"] = df.mg_dL.std() + statDF["cov_mgdL"] = statDF["std_mgdL"] / statDF["mean_mgdL"] + + statDF["totalBelow54"] = sum(df.mg_dL < 54) + statDF["totalBelow70"] = sum(df.mg_dL < 70) + statDF["total54to70"] = sum((df.mg_dL >= 54) & (df.mg_dL < 70)) + statDF["total70to140"] = sum((df.mg_dL >= 70) & (df.mg_dL <= 140)) + statDF["total70to180"] = sum((df.mg_dL >= 70) & (df.mg_dL <= 180)) + statDF["total180to250"] = sum((df.mg_dL > 180) & (df.mg_dL <= 250)) + statDF["totalAbove180"] = sum(df.mg_dL > 180) + statDF["totalAbove250"] = sum(df.mg_dL > 250) + + statDF["percentBelow54"] = statDF["totalBelow54"] / statDF["totalNumberCBGValues"] + statDF["percentBelow70"] = statDF["totalBelow70"] / statDF["totalNumberCBGValues"] + statDF["percent70to140"] = statDF["total70to140"] / statDF["totalNumberCBGValues"] + statDF["percent70to180"] = statDF["total70to180"] / statDF["totalNumberCBGValues"] + statDF["percentAbove180"] = statDF["totalAbove180"] / statDF["totalNumberCBGValues"] + statDF["percentAbove250"] = statDF["totalAbove250"] / statDF["totalNumberCBGValues"] + + statDF["min_mgdL"] = df.mg_dL.min() + statDF["median_mgdL"] = df.mg_dL.describe()["50%"] + statDF["max_mgdL"] = df.mg_dL.max() + + # calculate the start and end time of the cbg data + startTime = df["roundedLocalTime"].min() + statDF["startTime"] = startTime + endTime = df["roundedLocalTime"].max() + statDF["endTime"] = endTime + cgmFrequency = np.round((endTime - startTime).seconds / statDF["totalNumberCBGValues"]) + + # sense whether cgm data comes in 5 minute or 15 minute intervals + cgmFrequency = \ + np.nanmedian((df["roundedLocalTime"] - df["roundedLocalTime"].shift(1)).dt.seconds / 60) + + statDF["cgmFrequency"] = cgmFrequency + statDF["totalNumberPossibleCBGvalues"] = len(pd.date_range(startTime, endTime, freq=str(int(cgmFrequency)) + "min")) + statDF["percentCgmValues"] = statDF["totalNumberCBGValues"] / statDF["totalNumberPossibleCBGvalues"] + + return statDF + + # %% DELELET LATER -args.startIndex = 0 +args.startIndex = 46 args.endIndex = 4226 @@ -1357,7 +1479,34 @@ def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories): totalDailyCarbs.rename(columns={"carbInput": "totalDailyCarbs"}, inplace=True) dayData = pd.merge(dayData, totalDailyCarbs, how="left", on="day") - # valid pump should be having exactly 24 hours of basal rate + # get daily cgm stats + cgm.sort_values("localTime", inplace=True) + cgmCountsPerDay = cgm.groupby("day")["mg_dL"].count().reset_index() + cgmCountsPerDay.rename(columns={"mg_dL":"cgmCountsPerDay"}, inplace=True) + cgm = pd.merge(cgm, cgmCountsPerDay, how="left", on="day") + + cgmStats = cgm[cgm["cgmCountsPerDay"] > 1].groupby("day").apply(get_cgmStats) + # fix start and end times (not sure why the get transformed to ints) + cgmStats["startTime"] = pd.to_datetime(cgmStats["startTime"]) + cgmStats["endTime"] = pd.to_datetime(cgmStats["endTime"]) + + cgmStats = cgmStats.add_prefix("cgm.") + cgmStats.reset_index(inplace=True) + dayData = pd.merge(dayData, cgmStats, how="left", on="day") + + # %% get all episodes + allEpisodes = get_episodes(cgm) + allEpisodes["day"] = allEpisodes["start.roundedLocalTime"].dt.date + allEpisodes = pd.merge(allEpisodes, dayData[["age", "ylw", "day"]], how="left", on="day") + + for episodeType in allEpisodes["criterion.name"].unique(): + episodeGroup = allEpisodes[allEpisodes["criterion.name"] == episodeType].groupby(["day"]) + episodeDaySummary = episodeGroup["durationMinutes"].describe().add_prefix(episodeType + "-durationMinutes.") + episodeDaySummary.rename(columns={episodeType + "-durationMinutes.count": episodeType + ".count"}, inplace=True) + episodeDaySummary.reset_index(inplace=True) + dayData = pd.merge(dayData, episodeDaySummary, how="left", on="day") + + # %% valid pump should be having exactly 24 hours of basal rate dayData["validPumpData"] = dayData["totalBasalDuration"] == 24 dayData["atLeast3Boluses"] = dayData["numberOfNormalBoluses"] >= 3 @@ -1576,23 +1725,35 @@ def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories): for category in ["age", "ylw", ["age", "ylw"]]: pumpSummary = get_pumpSummary(basalEvents, bolusEvents, dayData, category) - # very low level cgm stats per age - catDF = cgm.groupby(category) - cgmStats = catDF["mg_dL"].describe().add_prefix("cgm.") + # cgm stats per category + catDF = cgm[cgm["cgmCountsPerDay"] > 1].groupby(category) + cgmStats = catDF.apply(get_cgmStats) + # fix start and end times (not sure why the get transformed to ints) + cgmStats["startTime"] = pd.to_datetime(cgmStats["startTime"]) + cgmStats["endTime"] = pd.to_datetime(cgmStats["endTime"]) + + cgmStats = cgmStats.add_prefix("cgm.") pumpCgmSummary = pd.concat([pumpSummary, cgmStats], axis=1) + # get all episodes + for episodeType in allEpisodes["criterion.name"].unique(): + episodeGroup = allEpisodes[allEpisodes["criterion.name"] == episodeType].groupby(category) + episodeDaySummary = episodeGroup["durationMinutes"].describe().add_prefix(episodeType + "-durationMinutes.") + episodeDaySummary.rename(columns={episodeType + "-durationMinutes.count": episodeType + ".count"}, inplace=True) + pumpCgmSummary = pd.concat([pumpCgmSummary, episodeDaySummary], axis=1) + if category == "age": pumpCgmSummary.reset_index(inplace=True) ageSummary = pd.merge(ageSummary, pumpCgmSummary, on=category, how="left") ageSummary["hashID"] = hashID - allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True) + allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True, sort=False) allAgeSummaries.to_csv(os.path.join(outputPath, "allAgeSummaries-dIndex-" + str(startIndex) + ".csv")) elif category == "ylw": pumpCgmSummary.reset_index(inplace=True) ylwSummary = pd.merge(ylwSummary, pumpCgmSummary, on=category, how="left") ylwSummary["hashID"] = hashID - allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True) + allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True, sort=False) allYlwSummaries.to_csv(os.path.join(outputPath, "allYlwSummaries-dIndex-" + str(startIndex) + ".csv")) else: @@ -1601,7 +1762,7 @@ def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories): pumpCgmSummary.reset_index(inplace=True) pumpCgmSummary.reset_index(inplace=True) pumpCgmSummary["hashID"] = hashID - allAgeANDylwSummaries = pd.concat([allAgeANDylwSummaries, pumpCgmSummary], ignore_index=True) + allAgeANDylwSummaries = pd.concat([allAgeANDylwSummaries, pumpCgmSummary], ignore_index=True, sort=False) allAgeANDylwSummaries.to_csv(os.path.join(outputPath, "allAgeANDylwSummaries-dIndex-" + str(startIndex) + ".csv")) @@ -1637,16 +1798,18 @@ def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories): bolusEvents.to_csv(os.path.join(outputFolderName_Path, fName)) fName = outputFolderName + "-cgm.csv" cgm.to_csv(os.path.join(outputFolderName_Path, fName)) + fName = outputFolderName + "-allEpisodes.csv" + allEpisodes.to_csv(os.path.join(outputFolderName_Path, fName)) # %% save the processed data (saving this data will take up a lot of space and time) -# data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) -# basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) -# bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) -# cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) -# pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) -# allSettings.to_csv(os.path.join(processedDataPath, "allSettings-PHI-" + userID + ".csv")) -# dayData.to_csv(os.path.join(processedDataPath, "dayData-PHI-" + userID + ".csv")) + data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv")) + basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv")) + bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv")) + cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv")) + pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv")) + allSettings.to_csv(os.path.join(processedDataPath, "allSettings-PHI-" + userID + ".csv")) + dayData.to_csv(os.path.join(processedDataPath, "dayData-PHI-" + userID + ".csv")) else: metadata["flags"] = "no bolus wizard data" From 6423aeef340f644e6fd165c62adc275a01b08f3a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 29 Jan 2019 18:46:08 -0600 Subject: [PATCH 63/78] fix return typo --- projects/predict-simulate/get-users-settings-and-events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index fb523d18..357c7180 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -729,7 +729,7 @@ def get_episodes(df): allEpisodes = pd.concat([allEpisodes, episodes]).reset_index(drop=True) - return allEpisodes + return allEpisodes def get_cgmStats(df): @@ -780,7 +780,7 @@ def get_cgmStats(df): # %% DELELET LATER -args.startIndex = 46 +args.startIndex = 0 args.endIndex = 4226 From 6934f7c3f085f3ebfeabc7decb76bf56df5df5af Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 29 Jan 2019 19:26:58 -0600 Subject: [PATCH 64/78] turn the try-except back on for run on AWS --- .../get-users-settings-and-events.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 357c7180..892d0e37 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -820,7 +820,7 @@ def get_cgmStats(df): metadata = pd.DataFrame(index=[dIndex]) metadata["hashID"] = hashID - if 1 == 1: # try: + try: # make folder to save data processedDataPath = os.path.join(phiOutputPath, "PHI-" + userID) if not os.path.exists(processedDataPath): @@ -834,7 +834,6 @@ def get_cgmStats(df): # %% LOAD IN DONOR JSON DATA - jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData") jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json") @@ -950,8 +949,7 @@ def get_cgmStats(df): bolus["duration"].replace(0, np.nan, inplace=True) bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0 bolus["rate"] = bolus["extended"] / bolus["durationHours"] -# bolusExtendedCH = ["localTime", "timezone", "roundedTime", "roundedLocalTime", -# "durationHours", "rate", "type"] + bolusExtendedCH = ["hashID", "age", "ylw", "day", "utcTime", "localTime", "timezone", "tzo", "roundedTime", "roundedLocalTime", @@ -1822,9 +1820,9 @@ def get_cgmStats(df): else: metadata["flags"] = "missing bDay/dDay" -# except: -# print("something is broke dIndex=", dIndex) -# metadata["flags"] = "something is broke" + except: + print("something is broke dIndex=", dIndex) + metadata["flags"] = "something is broke" # write metaData to allMetadata From 0f8dd254793012cbad1f33414dc4db9bbcb31b25 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 29 Jan 2019 20:08:24 -0600 Subject: [PATCH 65/78] remove input argument bypass --- projects/predict-simulate/get-users-settings-and-events.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/predict-simulate/get-users-settings-and-events.py index 892d0e37..10b2b7c6 100644 --- a/projects/predict-simulate/get-users-settings-and-events.py +++ b/projects/predict-simulate/get-users-settings-and-events.py @@ -779,11 +779,6 @@ def get_cgmStats(df): return statDF -# %% DELELET LATER -args.startIndex = 0 -args.endIndex = 4226 - - # %% START OF CODE dataPulledDate = args.dateStamp dataPulledDF = pd.DataFrame(pd.to_datetime(dataPulledDate), columns=["day"], index=[0]) From 7d183b047278b13d387401242d02a8636b1a766a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 31 Jan 2019 07:14:41 -0600 Subject: [PATCH 66/78] gather and combine files for analysis --- projects/predict-simulate/gather-data.py | 71 ++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 projects/predict-simulate/gather-data.py diff --git a/projects/predict-simulate/gather-data.py b/projects/predict-simulate/gather-data.py new file mode 100644 index 00000000..b77bcafc --- /dev/null +++ b/projects/predict-simulate/gather-data.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +description: gather the ouput from get users settings and events +version: 0.0.1 +created: 2019-01-30 +author: Ed Nykaza +dependencies: + * +license: BSD-2-Clause +""" + +# %% REQUIRED LIBRARIES +import pandas as pd +import datetime as dt +import os +import argparse +import glob + + +# %% USER INPUTS (ADD THIS IN LATER) +codeDescription = "Get user's settings and events" +parser = argparse.ArgumentParser(description=codeDescription) + +parser.add_argument("-d", + "--dataPulledDate", + dest="dataPulledDate", + default="2018-09-28", + help="date in '%Y-%m-%d' format of unique donor list" + + "(e.g., PHI-2018-03-02-uniqueDonorList)") + +parser.add_argument("-p", + "--dataProcessedDate", + dest="dataProcessedDate", + default="2019-01-21", + help="date in '%Y-%m-%d' format") + +args = parser.parse_args() + + +# %% START OF CODE +dataPulledDate = args.dataPulledDate +dataProcessedDate = pd.to_datetime(args.dataProcessedDate) + +phiDate = "PHI-" + dataPulledDate +donorPath = os.path.join( + "..", "bigdata-processing-pipeline", + "data", phiDate + "-donor-data") + +outputPath = os.path.join(donorPath, "settings-and-events") + +for name in ["allMetadata", "allAgeANDylwSummaries", + "allAgeSummaries", "allYlwSummaries", + "dayData", "pumpEvents"]: + allDF = pd.DataFrame() + if name.startswith("all"): + files = glob.glob(os.path.join(outputPath, name + '*')) + else: + files = glob.glob( + os.path.join(outputPath, "data", "**", "*-" + name + ".csv")) + for f in files: + dateModified = \ + pd.to_datetime(dt.datetime.fromtimestamp(os.path.getmtime(f))) + if dateModified > dataProcessedDate: + tempDF = pd.read_csv(f, low_memory=False) + tempDF.rename( + columns={'Unnamed: 0': 'originalIndex'}, inplace=True) + tempDF["from"] = f + allDF = pd.concat([allDF, tempDF], ignore_index=True, sort=False) + allDF.to_csv(os.path.join(outputPath, "combined-" + name + ".csv")) + print("completed " + name) From f78f128611afd2db1ed7386c8bc7ba1244be2e09 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 31 Jan 2019 08:10:53 -0600 Subject: [PATCH 67/78] pumpEvents no longer exists, change to basal and bolus Events --- projects/predict-simulate/gather-data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/predict-simulate/gather-data.py b/projects/predict-simulate/gather-data.py index b77bcafc..b3c32465 100644 --- a/projects/predict-simulate/gather-data.py +++ b/projects/predict-simulate/gather-data.py @@ -51,7 +51,7 @@ for name in ["allMetadata", "allAgeANDylwSummaries", "allAgeSummaries", "allYlwSummaries", - "dayData", "pumpEvents"]: + "basalEvents", "bolusEvents"]: allDF = pd.DataFrame() if name.startswith("all"): files = glob.glob(os.path.join(outputPath, name + '*')) From ec8b7cb5f01c3b6aa6f82f1bd527f4c3505f5eb0 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 31 Jan 2019 08:14:21 -0600 Subject: [PATCH 68/78] add dayData back to output --- projects/predict-simulate/gather-data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/predict-simulate/gather-data.py b/projects/predict-simulate/gather-data.py index b3c32465..d2093b22 100644 --- a/projects/predict-simulate/gather-data.py +++ b/projects/predict-simulate/gather-data.py @@ -51,7 +51,7 @@ for name in ["allMetadata", "allAgeANDylwSummaries", "allAgeSummaries", "allYlwSummaries", - "basalEvents", "bolusEvents"]: + "dayData", "basalEvents", "bolusEvents"]: allDF = pd.DataFrame() if name.startswith("all"): files = glob.glob(os.path.join(outputPath, name + '*')) From 500c124531879c93fee23852aa797e6439d6452a Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Fri, 8 Feb 2019 07:59:56 -0600 Subject: [PATCH 69/78] update packages to include allow static figure recreate with plotly --- environment.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environment.yml b/environment.yml index 7061885d..043d73c7 100644 --- a/environment.yml +++ b/environment.yml @@ -12,6 +12,9 @@ dependencies: - matplotlib - scikit-learn - plotly +- plotly::plotly-orca +- poppler +- psutil - r - r-essentials - pytest From f5f146ecceda14f580b1bb1ad987f3c7e5131bce Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Wed, 13 Feb 2019 09:04:14 -0600 Subject: [PATCH 70/78] add a work in progress (wip) to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ba5690ed..8492155a 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ data figures isf-basal-figures fonts +wip # Test htmlcov From effb6ad73ddeaa32213903998013dcea72ed8bc5 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Mon, 20 May 2019 10:34:13 -0500 Subject: [PATCH 71/78] code used to generate summary tables --- .../gather-data.py | 0 .../get-users-settings-and-events.py | 0 .../visualize-users-settings-and-events-v3.py | 1273 +++++++++++++++++ 3 files changed, 1273 insertions(+) rename projects/{predict-simulate => get-donors-pump-settings}/gather-data.py (100%) rename projects/{predict-simulate => get-donors-pump-settings}/get-users-settings-and-events.py (100%) create mode 100644 projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py diff --git a/projects/predict-simulate/gather-data.py b/projects/get-donors-pump-settings/gather-data.py similarity index 100% rename from projects/predict-simulate/gather-data.py rename to projects/get-donors-pump-settings/gather-data.py diff --git a/projects/predict-simulate/get-users-settings-and-events.py b/projects/get-donors-pump-settings/get-users-settings-and-events.py similarity index 100% rename from projects/predict-simulate/get-users-settings-and-events.py rename to projects/get-donors-pump-settings/get-users-settings-and-events.py diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py new file mode 100644 index 00000000..7f4c38e4 --- /dev/null +++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py @@ -0,0 +1,1273 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Jan 22 06:46:33 2019 + +@author: ed +""" + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +description: visualize users settings and events +version: 0.0.1 +created: 2019-01-11 +author: Ed Nykaza +dependencies: + * +license: BSD-2-Clause +""" + + +# %% REQUIRED LIBRARIES +import pandas as pd +import numpy as np +from pytz import timezone +from datetime import timedelta +import datetime as dt +import os +import argparse +import pdb +import matplotlib.pyplot as plt +import plotly +import plotly.plotly as py +import plotly.graph_objs as go +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot +import plotly.io as pio + + +# %% FUNCTIONS +def make_bold(val_list): + bold_list = [] + for val in val_list: + bold_list.append('' + str(val) + '') + return bold_list + +def make_bold_and_round(val_list, nDecimalPlaces): + bold_list = [] + for val in val_list: + if nDecimalPlaces == 0: + bold_list.append('' + str(int(np.round(val, nDecimalPlaces))) + '') + + else: + bold_list.append('' + str(np.round(val, nDecimalPlaces)) + '') + return bold_list + + +def save_fig(fig, plot_name, width, height, scale): + pio.write_image( + fig, + os.path.join( + "..", + "figures", + plot_name + ".png" + ), + width=width, + height=height, + scale=scale) + + return + + +def make_static_plot(field, yLabel, figName, df, yMin, yMax): + + df.sort_values("categories", inplace=True) + + traces = [] + for yd in df.categories.unique(): + traces.append(go.Box( + y=df.loc[df["categories"] == yd, field].values, + x=df.loc[df["categories"] == yd, "categories"].values, + name=yd, + boxpoints="all", + notched=True, + hoverlabel=dict(font=dict(size=22)), + marker=dict( + color=df.loc[df["categories"] == yd, "allColors"].describe()["top"], + opacity=0, + ), + )) + + layout = go.Layout( + font=dict( + size=22 + ), + xaxis=dict( + tickangle=52.5 + ), + yaxis=dict( + title=yLabel, + range=[yMin, yMax], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + margin=dict( + l=100, + r=200, + b=250, + t=50, + ), + + boxmode='group', + showlegend=False, + legend=dict(font=dict(size=14)) + ) + + fig = go.Figure(data=traces, layout=layout) + + save_fig(fig, figName + "-boxplot-lowRes", 1800, 1200, 1) + save_fig(fig, figName + "-boxplot-highRes", 1800, 1200, 4) + +def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable=False): + + # first make an overall table + allCounts = filteredDF.groupby(["hashID"])[field].describe() + allAgeTable = pd.DataFrame(index=[field]) + allAgeTable["min"] = allCounts["min"].min() + allAgeTable["max"] = allCounts["max"].max() + allAgeTable["U"] = len(allCounts) + allAgeTable["N"] = allCounts["count"].sum() + + # then make summary per categories + uniqueCounts = filteredDF.groupby(["categories"])["hashID"].describe() + uniqueCounts.reset_index(inplace=True) + summaryTable = filteredDF.groupby("categories")[field].describe() + summaryTable.reset_index(inplace=True) + summaryTable = pd.merge(summaryTable, uniqueCounts[["categories", "unique"]], how="left", on="categories") + summaryTable = pd.merge(summaryTable, catColorDF, how="left", on="categories") + summaryTable["unique"] = summaryTable["unique"].astype(float) + + # add in interquartile range + summaryTable["IQR"] = summaryTable["75%"] - summaryTable["25%"] + + col_headings = make_bold(["Group", "N", "U", "Average", "Stdev", "Min", "Q1", "Median", "Q3", "Max"]) + + trace = go.Table( + header=dict(values=col_headings, + fill = dict(color='white'), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=12)), + columnwidth=[1.5, 1, 1, 1, 1, 1, 1, 1, 1, 1], + cells=dict(values=[make_bold(summaryTable["categories"]), + make_bold_and_round(summaryTable["count"], 0), + make_bold_and_round(summaryTable["unique"], 0), + make_bold_and_round(summaryTable["mean"], nDecimals), + make_bold_and_round(summaryTable["std"], nDecimals), + make_bold_and_round(summaryTable["min"], nDecimals), + make_bold_and_round(summaryTable["25%"], nDecimals), + make_bold_and_round(summaryTable["50%"], nDecimals), + make_bold_and_round(summaryTable["75%"], nDecimals), + make_bold_and_round(summaryTable["max"], nDecimals)], + fill = dict(color = [summaryTable["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=10), + height = 20) + ) + + fig = go.Figure() + fig.add_trace(trace) + + pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-table-highRes.png" + ), + width=1200, + height=1200, + scale=4) + + pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-table-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + summaryTable.to_csv( + os.path.join( + "..", + "figures", + figName + "-table.csv" + ) + ) + allAgeTable.to_csv( + os.path.join( + "..", + "figures", + figName + "-all-age-table.csv" + ) + ) + + if return_summaryTable: + return summaryTable, allAgeTable + else: + return + + +def make_lite_interactive_boxplot(field, yLabel, df, yMin, yMax): + df.sort_values("categories", inplace=True) + + traces = [] + for yd in df.categories.unique(): + yValues = df.loc[df["categories"] == yd, field] + yStats = yValues.describe() + yMinimum = yStats["min"] + yQ1 = yStats["25%"] + yQ2 = yStats["50%"] + yQ3 = yStats["75%"] + yMaximum = yStats["max"] + yIQR = yQ3 - yQ1 + maxWhisker = yIQR * 1.5 + lowWhiskerBound = yQ1 - maxWhisker + highWhiskerBound = yQ3 + maxWhisker + yLowerFence = yValues[yValues >= lowWhiskerBound].min() + yUpperFence = yValues[yValues <= highWhiskerBound].max() + yBoxData = [yMinimum, yLowerFence, yQ1, yQ1, yQ1, yQ1, yQ1, + yQ2, yQ3, yQ3, yQ3, yQ3, yQ3, + yUpperFence, yMaximum] + + # get N and U + nDays = df.loc[df["categories"] == yd, "count"].median().astype(int) + uniqueDonors = df.loc[df["categories"] == yd, "unique"].median().astype(int) + + traces.append(go.Box( + y=yBoxData, + jitter=0, + pointpos=0, + text=list(np.repeat("N=%s, U=%s" % (nDays, uniqueDonors), len(yBoxData))), + hoverinfo="y+text", + name=yd, + boxpoints="all", + notched=False, + marker=dict( + color=df.loc[df["categories"] == yd, "allColors"].describe()["top"], + opacity=0, + ), + )) + + layout = go.Layout( + yaxis=dict( + title=yLabel, + range=[yMin, yMax], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + showlegend=True + ) + + fig = go.Figure(data=traces, layout=layout) + plot_url = py.plot(fig, filename="Distribution of " + figName, auto_open=False) + print(figName, plot_url) + + return + + +def filter_data(df, min_days_criteria=7): + + # keep all type1 adn null diagnosis data (not specified) + df = df[((df.diagnosisType.isnull()) | (df.diagnosisType == "type1"))] + + # filter out invalid ages and ylw + df = df[((df.age.astype(float) >= 0) & (df.age.astype(float) <= 90))] + df = df[((df.ylw.astype(float) >= 0) & (df.ylw.astype(float) <= 80))] + + # filter out invalid pump and cgm days + df = df[((df["validPumpData"]) & (df["validCGMData"]))] + + # filter out Paradigm Veo Pumps + df = df[~df["pump.top"].str.contains("Paradigm Veo")] + + # filter out omnipod with mg/dL likely settings + df = df[~((df["pump.top"].str.contains("InsOmn-130")) & + (df['pumpSettings.isfLikelyUnits'] == "mg/dL"))] + + # require a minimum number of days of data + dayGroups = pd.DataFrame(df.groupby(["hashID", "age", "ylw"]).day.count()).reset_index() + dayGroups.rename(columns={"day": "nDays"}, inplace=True) + df = pd.merge(df, dayGroups, how="left", on=["hashID", "age", "ylw"]) + + df = df[df["nDays"] >= min_days_criteria] + + return df + + +def merge_dayData(df, dayDF): + + df = pd.merge( + df, + dayDF[[ + "hashID", + "day", + "validPumpData", + "atLeast3Boluses", + "validCGMData", + "diagnosisType", + "pump.top", + "pumpSettings.isfLikelyUnits" + ]], + how="left", + on=["hashID", "day"] + ) + + return df + + +def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min_unique_donors=10): + + # bin data (defined above) + df["ageBins"] = pd.cut(df["age"], ageBins, labels=ageGroupNames) + df["ylwBins"] = pd.cut(df["ylw"], ylwBins, labels=ylwGroupNames) + df["ageCategories"] = df["ageBins"].astype(str) + df["ylwCategories"] = df["ylwBins"].astype(str) + df["categories"] = "age " + df["ageBins"].astype(str) + " ylw " + df["ylwBins"].astype(str) + + # attach bin colors (defined above) + df = pd.merge(df, catColorDF, how="left", on="categories") + df["categories"].astype("category", inplace=True) + + # attach counts per group + dGroups = df.groupby("categories") + groupDF = dGroups["hashID"].describe() + groupDF["ageCategories"] = dGroups["ageCategories"].describe()["top"] + groupDF["ylwCategories"] = dGroups["ylwCategories"].describe()["top"] + #groupDF["ylwAlpha"] = dGroups["ylwAlpha"].mean() + groupDF["allColors"] = dGroups["allColors"].describe()["top"] + groupDF.reset_index(inplace=True) + + # attach group counts to the main dataframe + df = pd.merge(df, groupDF[["categories", "count", "unique"]], how="left", on="categories") + + # remove all categories that do NOT have at least 10 unique people + df = df[df["unique"] > min_unique_donors] + groupDF = groupDF[groupDF["unique"] > min_unique_donors] + + # attach N and U to the categories + df["categoriesFull"] = ( + df["categories"].astype(str) + + " (N=" + df["count"].astype(str) + + ", U=" + df["unique"].astype(str) + ")" + ) + + return df, groupDF + + +# %% define age and years living with bins +group_title = "-withYlw0" + + +# next bin the data by age-ylw groups +dataGroupName = "age-ylw-groups" +ageBins = np.array([0,5,8,12,17,24,85]) +ylwBins = np.array([-1,0,1,2,5,10,25,75]) + +# bin by age +ageGroupNames = [] +for x, y in zip(ageBins[:-1]+1, ageBins[1:]): + ageGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}")) + +ylwGroupNames = [] +for x, y in zip(ylwBins[:-1]+1, ylwBins[1:]): + if x == y: + ylwGroupNames.append("%s"%(f"{x:02d}")) + else: + ylwGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}")) + +catColors = [ + '#f0d8e5','#f4bdd8','#f7a0cc','#f781bf', + '#ebc3c1','#f1a095','#f17d6c','#ec5644','#e41a1c', + '#f2d8c3','#fbc299','#ffac6f','#ff9746','#ff7f00', + '#d0e1cc','#b8d8b2','#9fcd97','#86c37e','#6cb964','#4daf4a', + '#c9d6e3','#afc4da','#95b1d2','#7aa0c9','#5b8fc1','#377eb8', + '#dacbde','#d0b6d4','#c5a1ca','#ba8dc0','#af78b7','#a464ad','#984ea3' +] + +finalCategories = [ + 'age 01-05 ylw 00', 'age 01-05 ylw 01', 'age 01-05 ylw 02', + 'age 01-05 ylw 03-05', 'age 06-08 ylw 00', 'age 06-08 ylw 01', + 'age 06-08 ylw 02', 'age 06-08 ylw 03-05', 'age 06-08 ylw 06-10', + 'age 09-12 ylw 00', 'age 09-12 ylw 01', 'age 09-12 ylw 02', + 'age 09-12 ylw 03-05', 'age 09-12 ylw 06-10', 'age 13-17 ylw 00', + 'age 13-17 ylw 01', 'age 13-17 ylw 02', 'age 13-17 ylw 03-05', + 'age 13-17 ylw 06-10', 'age 13-17 ylw 11-25', 'age 18-24 ylw 00', + 'age 18-24 ylw 01', 'age 18-24 ylw 02', 'age 18-24 ylw 03-05', + 'age 18-24 ylw 06-10', 'age 18-24 ylw 11-25', 'age 25-85 ylw 00', + 'age 25-85 ylw 01', 'age 25-85 ylw 02', 'age 25-85 ylw 03-05', + 'age 25-85 ylw 06-10', 'age 25-85 ylw 11-25', + 'age 25-85 ylw 26-75' +] + +catColorDF = pd.DataFrame(data=[finalCategories, catColors], index=["categories", "allColors"]).T + + +# %% load in summary donor data +dataPulledDate = "2019-01-10" +dataProcessedDate = "2019-01-22" + +phiDate = "PHI-" + dataPulledDate +donorPath = os.path.join("..", "..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") +donorList = phiDate + "-uniqueDonorList" +donors = pd.read_csv(os.path.join(donorPath, donorList + ".csv"), low_memory=False) + + +# %% all-donors summary +allAgeSummary = pd.DataFrame() +dataPath = os.path.join(donorPath, "settings-and-events") +d = pd.read_csv(os.path.join(dataPath, "combined-allMetadata.csv"), low_memory=False) + +# attach the donor level data to the +allMetadata = pd.merge( + d, + donors[[ + "hashID", + "userID", + "diagnosisType", + "targetDevices", + "targetTimezone", + "termsAccepted" + ]], + how="left", + on="hashID" +) +allMetadata.to_csv(os.path.join(donorPath, donorList + "-w-metaData.csv")) + + +# %% load data +dayData = pd.read_csv(os.path.join(dataPath, "combined-dayData.csv"), low_memory=False) +bolusData = pd.read_csv(os.path.join(dataPath, "combined-bolusEvents.csv"), low_memory=False) +basalData = pd.read_csv(os.path.join(dataPath, "combined-basalEvents.csv"), low_memory=False) + +# %% attach the diagnosis type to the day data +dayDF = pd.merge( + dayData, + allMetadata[[ + "hashID", + "diagnosisType", + "pump.top", + "pumpSettings.isfLikelyUnits" + ]], + how="left", + on="hashID" +) + +dayDF = filter_data(dayDF, min_days_criteria=7) +dayDF, dayDFGroupSummary = ( + bin_data( + dayDF, + ageBins, + ageGroupNames, + ylwBins, + ylwGroupNames, + catColorDF, + min_unique_donors=10 + ) +) + + +# %% all-event level summary (max basal and max bolus) +# attach the day to bolus data and filter data by analysis criteria +# NOTE: seet the filter_data function for details +bolus = merge_dayData(bolusData, dayDF) +bolus = filter_data(bolus, min_days_criteria=7) +bolus, bolusGroupSummary = ( + bin_data( + bolus, + ageBins, + ageGroupNames, + ylwBins, + ylwGroupNames, + catColorDF, + min_unique_donors=10 + ) +) + +# %% overview of bolus data table +figName = "overviewTable-bolus-events" +figName = figName + group_title +trace = go.Table( + header=dict( + values=make_bold(["AGE-YLW Group", + "Age", + "Years Living with T1D", + "N (Bolus Events)", + "U (Unique Donors)"]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=14) + ), + cells=dict( + values=[make_bold(bolusGroupSummary['categories']), + make_bold(bolusGroupSummary['ageCategories']), + make_bold(bolusGroupSummary['ylwCategories']), + make_bold(bolusGroupSummary['count']), + make_bold(bolusGroupSummary['unique'])], + fill = dict(color = [bolusGroupSummary["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=11), + height = 22 + ), +) + +fig = go.Figure() +fig.add_trace(trace) + +pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-highRes.png" + ), + width=1200, + height=1200, + scale=4) + +pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + +# %% max bolus amount () +maxBolus = pd.DataFrame(bolus.groupby(["hashID", "day"])["unitsInsulin"].max()).reset_index() +maxBolus.rename(columns={"unitsInsulin":"maxBolusPerDay"}, inplace=True) + +maxBolus = pd.merge( + maxBolus, + dayDF[[ + "hashID", + "day", + "categories", + "allColors" + ]], + how="left", + on=["hashID", "day"] +) + +# remove nans in category as they represent data from days that did not meat the +# acceptable day standard +maxBolus = maxBolus[maxBolus["categories"].notnull()] + +field = 'maxBolusPerDay' +yLabel = "Max Bolus Per Day (U)" +figName = "Max Bolus" +yMin = 0 +yMax = 21 +filteredDF = maxBolus[maxBolus[field] > 0].copy() + +## make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +# add N events and n unique donors +filteredDF = pd.merge( + filteredDF, + summaryTable[[ + "categories", + "count", + "unique" + ]], + how="left", + on="categories" +) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% basal data +basal = merge_dayData(basalData, dayDF) +basal = filter_data(basal, min_days_criteria=7) +basal, basalGroupSummary = ( + bin_data( + basal, + ageBins, + ageGroupNames, + ylwBins, + ylwGroupNames, + catColorDF, + min_unique_donors=10 + ) +) + + +# %% overview of basal data table +figName = "overviewTable-basal-events" +figName = figName + group_title + +trace = go.Table( + header=dict( + values=make_bold(["AGE-YLW Group", + "Age", + "Years Living with T1D", + "N (Basal Events)", + "U (Unique Donors)"]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=14) + ), + cells=dict( + values=[make_bold(basalGroupSummary['categories']), + make_bold(basalGroupSummary['ageCategories']), + make_bold(basalGroupSummary['ylwCategories']), + make_bold(basalGroupSummary['count']), + make_bold(basalGroupSummary['unique'])], + fill = dict(color = [basalGroupSummary["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=11), + height = 22 + ), +) + +fig = go.Figure() +fig.add_trace(trace) + +pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-highRes.png" + ), + width=1200, + height=1200, + scale=4) + +pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + +# %% max basal rate +maxBasal = pd.DataFrame(basal[basal["type"]=="basal"].groupby(["hashID", "day"])["rate"].max()).reset_index() + +maxBasal.rename(columns={"rate":"maxBasalRatePerDay"}, inplace=True) + +maxBasal = pd.merge( + maxBasal, + dayDF[[ + "hashID", + "day", + "categories", + "allColors" + ]], + how="left", + on=["hashID", "day"] +) + +# remove nans in category as they represent data from days that did not meat the +# acceptable day standard +maxBasal = maxBasal[maxBasal["categories"].notnull()] + +field = 'maxBasalRatePerDay' +yLabel = "Max Basal Per Day (U/hr)" +figName = "Max Basal" +yMin = 0 +yMax = 3.25 +filteredDF = maxBasal[maxBasal[field] > 0].copy() + +## make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +# add N events and n unique donors +filteredDF = pd.merge( + filteredDF, + summaryTable[[ + "categories", + "count", + "unique" + ]], + how="left", + on="categories" +) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% overview of day level data table +figName = "overviewTable-day-data" +figName = figName + group_title + +trace = go.Table( + header=dict( + values=make_bold(["AGE-YLW Group", + "Age", + "Years Living with T1D", + "N (Days)", + "U (Unique Donors)"]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=14) + ), + cells=dict( + values=[make_bold(dayDFGroupSummary['categories']), + make_bold(dayDFGroupSummary['ageCategories']), + make_bold(dayDFGroupSummary['ylwCategories']), + make_bold(dayDFGroupSummary['count']), + make_bold(dayDFGroupSummary['unique'])], + fill = dict(color = [dayDFGroupSummary["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=11), + height = 22 + ), +) + +fig = go.Figure() +fig.add_trace(trace) + +pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-highRes.png" + ), + width=1200, + height=1200, + scale=4) + +pio.write_image( + fig, + os.path.join( + "..", + "figures", + figName + "-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + +# %% Average ISF per day +dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1) +field = 'isfRounded' +yLabel = "Insulin Sensitivity Factor (mg/dL/U)" +figName = "Insulin Sensitivity Factor" +yMin = 0 +yMax = 400 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average CIR per day +field = 'cir.weightedMean' +yLabel = "Carb to Insulin Ratio (g/U)" +figName = "Carb to Insulin Ratio" +yMin = 0 +yMax = 70 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Correction Target per day +field = 'ct.target.weightedMean' +yLabel = "Correction Target (mg/dL)" +figName = "Correction Target" +yMin = 70 +yMax = 180 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Basal Rate per day +field = 'sbr.weightedMean' +yLabel = "Scheduled Basal Rate (U/hr)" +figName = "Scheduled Basal Rate" +yMin = 0 +yMax = 2.5 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 3 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Total Daily Dose +field = "totalAmountOfInsulin" +yLabel = "Total Daily Dose (U)" +figName = "Total Daily Dose" +yMin = 0 +yMax = 125 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Percent Basal +dayDF["perecentBasalInPercent"] = dayDF["percentBasal"] * 100 +field = "perecentBasalInPercent" +yLabel = "Basal Proportion of Total Daily Dose (%)" +figName = "Basal Proportion of Total Daily Dose" +yMin = 0 +yMax = 100 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Total Daily Carbs +field = "totalDailyCarbs" +yLabel = "Total Daily Carbs (g)" +figName = "Total Daily Carbs" +yMin = 0 +yMax = 600 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Daily Time in Range (70-180 mg/dL) +dayDF["perecentInRange"] = dayDF["cgm.percent70to180"] * 100 +field = "perecentInRange" +yLabel = "Percent of Day in Targe Range (70-180 mg/dL, %)" +figName = "Percent of Day in Targe Range 70-180" +yMin = 0 +yMax = 100 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Mean CGM (mg/dL) +field = "cgm.mean_mgdL" +yLabel = "Daily Average CGM Level (mg/dL)" +figName = "Daily Average CGM Level" +yMin = 50 +yMax = 300 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Cov CGM (mg/dL) +dayDF["covPercent"] = dayDF["cgm.cov_mgdL"] * 100 +field = "covPercent" +yLabel = "Coeffient of Variation (%)" +figName = "Coeffient of Variation" +yMin = 6 +yMax = 62 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Daily Time Below 54 (Percentage) +dayDF["perecentBelow54mgdL"] = dayDF["cgm.percentBelow54"] * 100 +field = "perecentBelow54mgdL" +yLabel = "Percent of Day Below 54 mg/dL (%)" +figName = "Percent of Day in Extreme Hypo Below 54 mgdL" +yMin = 0 +yMax = 5 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 2 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Number of Below 54 mg/dL Episodes per Day +field = "extreme-hypo.count" +dayDF[field].fillna(0, inplace=True) +yLabel = "Number of Extreme Hypo Episodes (Below 54 mg/dL) per Day" +figName = "Number of Extreme Hypo Episodes per Day" +yMin = 0 +yMax = 2 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Duration of each Episode Below 54 mg/dL +field = "extreme-hypo-durationMinutes.mean" +yLabel = "Average Duration of each Extreme Hypo Episode (minutes)" +figName = "Average Duration of each Extreme Hypo Episode" +yMin = 15 +yMax = 120 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Daily Time Above 250 (Percentage) +dayDF["perecentAbove250mgdL"] = dayDF["cgm.percentAbove250"] * 100 +field = "perecentAbove250mgdL" +yLabel = "Percent of Day Above 250 mg/dL (%)" +figName = "Percent of Day in Extreme Hyper Above 250 mgdL" +yMin = 0 +yMax = 75 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Number of Above 250 mg/dL Episodes per Day +field = "extreme-hyper.count" +dayDF[field].fillna(0, inplace=True) +yLabel = "Number of Extreme Hyper Episodes (Above 250 mg/dL) per Day" +figName = "Number of Extreme Hyper Episodes per Day" +yMin = 0 +yMax = 2 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Duration of each Episode Above 250 mg/dL +dayDF["avgExtremeHyperHours"] = dayDF["extreme-hyper-durationMinutes.mean"] / 60 +field = "avgExtremeHyperHours" +yLabel = "Average Duration of each Extreme Hyper Episode (hours)" +figName = "Average Duration of each Extreme Hyper Episode" +yMin = 2 +yMax = 10 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% save the all age summaries +figName = "allAgeSettingSummary" + group_title +allAgeSummary.to_csv( + os.path.join( + "..", + "figures", + figName + "-all-age-table.csv" + ) +) From 1c9f0fc69e02577d2aee6d84db5e769bd0d56ab0 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Mon, 20 May 2019 10:42:38 -0500 Subject: [PATCH 72/78] exclude parser output in gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8492155a..1c0f392a 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ figures isf-basal-figures fonts wip +projects/parsers/output/ # Test htmlcov From 7542bf13e1252dbfdfb64bd73256a6ad075a02a6 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 23 May 2019 11:24:23 -0500 Subject: [PATCH 73/78] change location of data path and file name --- ...-and-events-v3.py => visualize-users-settings-and-events.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename projects/get-donors-pump-settings/{visualize-users-settings-and-events-v3.py => visualize-users-settings-and-events.py} (99%) diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py similarity index 99% rename from projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py rename to projects/get-donors-pump-settings/visualize-users-settings-and-events.py index 7f4c38e4..502a41c2 100644 --- a/projects/get-donors-pump-settings/visualize-users-settings-and-events-v3.py +++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py @@ -418,7 +418,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min dataProcessedDate = "2019-01-22" phiDate = "PHI-" + dataPulledDate -donorPath = os.path.join("..", "..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") +donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") donorList = phiDate + "-uniqueDonorList" donors = pd.read_csv(os.path.join(donorPath, donorList + ".csv"), low_memory=False) From 07ccf9418823d8ba5a1f93ffdb5f85ff9cdaf5b3 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 23 May 2019 11:52:35 -0500 Subject: [PATCH 74/78] change figure output path --- .../visualize-users-settings-and-events.py | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py index 502a41c2..652236fc 100644 --- a/projects/get-donors-pump-settings/visualize-users-settings-and-events.py +++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py @@ -58,8 +58,7 @@ def save_fig(fig, plot_name, width, height, scale): pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, plot_name + ".png" ), width=width, @@ -174,8 +173,7 @@ def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-table-highRes.png" ), width=1200, @@ -185,8 +183,7 @@ def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-table-lowRes.png" ), width=1200, @@ -195,15 +192,13 @@ def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable summaryTable.to_csv( os.path.join( - "..", - "figures", + figure_path, figName + "-table.csv" ) ) allAgeTable.to_csv( os.path.join( - "..", - "figures", + figure_path, figName + "-all-age-table.csv" ) ) @@ -367,7 +362,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min # %% define age and years living with bins group_title = "-withYlw0" - +figure_path = os.path.join(".", "figures") # next bin the data by age-ylw groups dataGroupName = "age-ylw-groups" @@ -494,6 +489,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min ) ) + # %% overview of bolus data table figName = "overviewTable-bolus-events" figName = figName + group_title @@ -526,8 +522,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-highRes.png" ), width=1200, @@ -537,8 +532,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-lowRes.png" ), width=1200, @@ -655,8 +649,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-highRes.png" ), width=1200, @@ -666,8 +659,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-lowRes.png" ), width=1200, @@ -769,8 +761,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-highRes.png" ), width=1200, @@ -780,8 +771,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min pio.write_image( fig, os.path.join( - "..", - "figures", + figure_path, figName + "-lowRes.png" ), width=1200, @@ -1266,8 +1256,7 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min figName = "allAgeSettingSummary" + group_title allAgeSummary.to_csv( os.path.join( - "..", - "figures", + figure_path, figName + "-all-age-table.csv" ) ) From a8048ca0a63a26553ccea3a2d8ead57bcef18b79 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 23 May 2019 21:35:32 -0500 Subject: [PATCH 75/78] ignore local plotly plot --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 1c0f392a..657b5eef 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,5 @@ htmlcov .pytest_cache + +projects/get-donors-pump-settings/temp-plot\.html From 795d642cc4658d7b26a48aa2c9c128211c754191 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Thu, 23 May 2019 21:36:29 -0500 Subject: [PATCH 76/78] plots of isf and tdd (local only) needs cleaning before pushing to repository --- .../visualize-users-settings-and-events.py | 267 ++++++++++++++++++ 1 file changed, 267 insertions(+) diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py index 652236fc..cd392754 100644 --- a/projects/get-donors-pump-settings/visualize-users-settings-and-events.py +++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py @@ -390,6 +390,8 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min '#dacbde','#d0b6d4','#c5a1ca','#ba8dc0','#af78b7','#a464ad','#984ea3' ] + + finalCategories = [ 'age 01-05 ylw 00', 'age 01-05 ylw 01', 'age 01-05 ylw 02', 'age 01-05 ylw 03-05', 'age 06-08 ylw 00', 'age 06-08 ylw 01', @@ -1260,3 +1262,268 @@ def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min figName + "-all-age-table.csv" ) ) + +# %% make a plot of TDD by ISF + +# Average ISF per day +dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1) +#field = 'isfRounded' +#yLabel = "Insulin Sensitivity Factor (mg/dL/U)" +#figName = "Insulin Sensitivity Factor" +#yMin = 0 +#yMax = 400 + +## Total Daily Dose +#field = "totalAmountOfInsulin" +#yLabel = "Total Daily Dose (U)" +#figName = "Total Daily Dose" +#yMin = 0 +#yMax = 125 +#filteredDF = dayDF[dayDF[field] > 0].copy() + +filteredDF = dayDF[((dayDF['isfRounded'] > 0) & + (dayDF['totalAmountOfInsulin'] > 0))].copy() + +ylwColors = ["#ffffb2", '#fecc5c', '#fd8d3c', '#f03b20', '#bd0026'] +for f in filteredDF["ylwCategories"].unique(): + if f == '00': + colorCode = 0 + if f == '01': + colorCode = 1 + if f == '02': + colorCode = 2 + if f == '03-05': + colorCode = 3 + else: + colorCode = 4 + + filteredDF.loc[filteredDF["ylwCategories"] == f, "ylwColor"] = ylwColors[colorCode] + + + +from scipy.optimize import curve_fit +def func(x, a, b, c): + return (a * x + b) / (x - 10) + +import statsmodels.api as sm +lowess = sm.nonparametric.lowess +#a * np.exp(-b*x) + c * np.exp(-d * x) +#y = a * np.exp(-b * x) + c +#y = a * np.exp(b*x) + c * np.exp(d * x) + +xdata = filteredDF['totalAmountOfInsulin'].round() +ydata = filteredDF['isfRounded'] +popt, pcov = curve_fit(func, xdata, ydata) + + +x = np.arange(1, 500) +c = pd.DataFrame(columns=["ISF", "TDD"]) +for xi in x: + if sum(filteredDF['isfRounded'] == xi) > 3: + c.loc[xi, "ISF"] = xi + c.loc[xi, "TDD"] = filteredDF.loc[ + filteredDF['isfRounded'] == xi, + "totalAmountOfInsulin"].median() + +asdf2 = c.rolling(25, center=True).mean() +plt.plot(asdf2["TDD"], asdf2["ISF"]) + +x = np.arange(1, 300) +d = pd.DataFrame(columns=["TDD", "ISF"]) +for xi in x: + if sum(filteredDF['totalAmountOfInsulin'].round() == xi) > 3: + d.loc[xi, "TDD"] = xi + d.loc[xi, "ISF"] = filteredDF.loc[ + filteredDF['totalAmountOfInsulin'].round() == xi, + "isfRounded"].median() + +# then smooth out the medians +asdf = d.rolling(10, center=True).mean() +plt.plot(asdf["TDD"], asdf["ISF"]) + + +# try a different approach were we just do a smoothed line + + +z = lowess(ydata, xdata) +#>>> w = lowess(y, x, frac=1./3) +plt.plot(z[:,0], z[:,1]) + +plt.plot( + x, + func(x, *popt), + 'r-', +# label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt) +) + +#df.sort_values("categories", inplace=True) + +traces = [] +traces.append(go.Scatter( + y=ydata, + x=xdata, + name="Scatter", + mode='markers', + marker=dict( + color=filteredDF["allColors"], + opacity=0.125, + ), +)) + +#traces.append(go.Scatter( +# y=z2[:,0], +# x=z2[:,1], +# mode='lines', +#)) +# +#traces.append(go.Scatter( +# y=z[:,1], +# x=z[:,0], +# mode='lines', +# line=dict( +# color="black", +# ), +#)) + +traces.append(go.Scatter( + y=asdf["ISF"], + x=asdf["TDD"], + mode='lines', + name="Trend by TDD", + line=dict( + color="black", + dash="dot", + ), +)) + +traces.append(go.Scatter( + y=asdf2["ISF"], + x=asdf2["TDD"], + mode='lines', + name="Trend by ISF", + line=dict( + color="black", + dash="dash", + ), +)) + +layout = go.Layout( + font=dict( + size=18 + ), + xaxis=dict( + title="TDD", + dtick=20, + range=[0, 300], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + yaxis=dict( + title="ISF", + dtick=20, + range=[0, 500], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ) +) + +fig = go.Figure(data=traces, layout=layout) +plot(fig) + +for yd in df.categories.unique(): + traces.append(go.Box( + y=df.loc[df["categories"] == yd, field].values, + x=df.loc[df["categories"] == yd, "categories"].values, + name=yd, + boxpoints="all", + notched=True, + hoverlabel=dict(font=dict(size=22)), + marker=dict( + color=df.loc[df["categories"] == yd, "allColors"].describe()["top"], + opacity=0, + ), + )) + +layout = go.Layout( + font=dict( + size=22 + ), + xaxis=dict( + tickangle=52.5 + ), + yaxis=dict( + title=yLabel, + range=[yMin, yMax], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + margin=dict( + l=100, + r=200, + b=250, + t=50, + ), + + boxmode='group', + showlegend=False, + legend=dict(font=dict(size=14)) +) + +fig = go.Figure(data=traces, layout=layout) + +save_fig(fig, figName + "-boxplot-lowRes", 1800, 1200, 1) +save_fig(fig, figName + "-boxplot-highRes", 1800, 1200, 4) + + + + + + + + +#filteredDF.plot.scatter(y="isfRounded", x="totalAmountOfInsulin", alpha=0.025) + +# %% make a plot of TDD by max temp basal rate +maxBasal = pd.DataFrame(basal[basal["type"]=="basal"].groupby(["hashID", "day"])["rate"].max()).reset_index() + +maxBasal.rename(columns={"rate":"maxBasalRatePerDay"}, inplace=True) + +maxBasal = pd.merge( + maxBasal, + dayDF[[ + "hashID", + "day", + "categories", + "allColors", + "totalAmountOfInsulin", + 'basal.closedLoopDays' + ]], + how="left", + on=["hashID", "day"] +) + +# remove nans in category as they represent data from days that did not meat the +# acceptable day standard +#maxBasal = maxBasal[maxBasal["categories"].notnull()] + + + + +filteredDF = maxBasal[((maxBasal['totalAmountOfInsulin'] > 0) & + (maxBasal['maxBasalRatePerDay'] > 0))].copy() + + +filteredDF.plot.scatter(y="maxBasalRatePerDay", x="totalAmountOfInsulin", alpha=0.125) + From c709afdc41f4183d8cf838860256652e56809af9 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 28 May 2019 13:51:17 -0500 Subject: [PATCH 77/78] update environment --- environment.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 043d73c7..97b1f82d 100644 --- a/environment.yml +++ b/environment.yml @@ -11,15 +11,13 @@ dependencies: - xlsxwriter - matplotlib - scikit-learn +- pip - plotly - plotly::plotly-orca - poppler - psutil -- r -- r-essentials - pytest - pytest-cov - pip: - - python-dotenv - - -e git+https://github.com/tidepool-org/data-analytics#egg=tidals\&subdirectory=tidepool-analysis-tools + - python-dotenv \ No newline at end of file From 7b4d0f9b80e762792e9dfad230e96255f0e2a554 Mon Sep 17 00:00:00 2001 From: Ed Nykaza Date: Tue, 28 May 2019 19:02:38 -0500 Subject: [PATCH 78/78] plots using jos age and ylw groups --- ...ize-users-settings-and-events-jaeb-ages.py | 1396 +++++++++++++++++ 1 file changed, 1396 insertions(+) create mode 100644 projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py new file mode 100644 index 00000000..62eb6969 --- /dev/null +++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py @@ -0,0 +1,1396 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Jan 22 06:46:33 2019 + +@author: ed +""" + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +description: visualize users settings and events +version: 0.0.1 +created: 2019-01-11 +author: Ed Nykaza +dependencies: + * +license: BSD-2-Clause +""" + + +# %% REQUIRED LIBRARIES +import pandas as pd +import numpy as np +from pytz import timezone +from datetime import timedelta +import datetime as dt +import os +import argparse +import pdb +import matplotlib.pyplot as plt +import plotly +import plotly.plotly as py +import plotly.graph_objs as go +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot +import plotly.io as pio + + + +# %% FUNCTIONS +def make_bold(val_list): + bold_list = [] + for val in val_list: + bold_list.append('' + str(val) + '') + return bold_list + +def make_bold_and_round(val_list, nDecimalPlaces): + bold_list = [] + for val in val_list: + if nDecimalPlaces == 0: + bold_list.append('' + str(int(np.round(val, nDecimalPlaces))) + '') + + else: + bold_list.append('' + str(np.round(val, nDecimalPlaces)) + '') + return bold_list + + +def save_fig(fig, plot_name, width, height, scale): + pio.write_image( + fig, + os.path.join( + figure_path, + plot_name + ".png" + ), + width=width, + height=height, + scale=scale) + + return + + +def make_static_plot(field, yLabel, figName, df, yMin, yMax): + + df.sort_values("categories", inplace=True) + + traces = [] + for yd in df.categories.unique(): + traces.append(go.Box( + y=df.loc[df["categories"] == yd, field].values, + x=df.loc[df["categories"] == yd, "categories"].values, + name=yd, + boxpoints="all", + notched=True, + hoverlabel=dict(font=dict(size=22)), + marker=dict( + color=df.loc[df["categories"] == yd, "allColors"].describe()["top"], + opacity=0, + ), + )) + + layout = go.Layout( + font=dict( + size=22 + ), + xaxis=dict( + tickangle=52.5 + ), + yaxis=dict( + title=yLabel, + range=[yMin, yMax], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + margin=dict( + l=100, + r=200, + b=250, + t=50, + ), + +# boxmode='group', + showlegend=False, + legend=dict(font=dict(size=14)) + ) + + fig = go.Figure(data=traces, layout=layout) + + save_fig(fig, figName + "-boxplot-lowRes", 1800, 1200, 1) + save_fig(fig, figName + "-boxplot-highRes", 1800, 1200, 4) + +def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable=False): + + # first make an overall table + allCounts = filteredDF.groupby(["hashID"])[field].describe() + allAgeTable = pd.DataFrame(index=[field]) + allAgeTable["min"] = allCounts["min"].min() + allAgeTable["max"] = allCounts["max"].max() + allAgeTable["U"] = len(allCounts) + allAgeTable["N"] = allCounts["count"].sum() + + # then make summary per categories + uniqueCounts = filteredDF.groupby(["categories"])["hashID"].describe() + uniqueCounts.reset_index(inplace=True) + summaryTable = filteredDF.groupby("categories")[field].describe() + summaryTable.reset_index(inplace=True) + summaryTable = pd.merge(summaryTable, uniqueCounts[["categories", "unique"]], how="left", on="categories") + summaryTable = pd.merge(summaryTable, catColorDF, how="left", on="categories") + summaryTable["unique"] = summaryTable["unique"].astype(float) + + # add in interquartile range + summaryTable["IQR"] = summaryTable["75%"] - summaryTable["25%"] + + col_headings = make_bold(["Group", "N", "U", "Average", "Stdev", "Min", "Q1", "Median", "Q3", "Max"]) + + trace = go.Table( + header=dict(values=col_headings, + fill = dict(color='white'), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=12)), + columnwidth=[1.5, 1, 1, 1, 1, 1, 1, 1, 1, 1], + cells=dict(values=[make_bold(summaryTable["categories"]), + make_bold_and_round(summaryTable["count"], 0), + make_bold_and_round(summaryTable["unique"], 0), + make_bold_and_round(summaryTable["mean"], nDecimals), + make_bold_and_round(summaryTable["std"], nDecimals), + make_bold_and_round(summaryTable["min"], nDecimals), + make_bold_and_round(summaryTable["25%"], nDecimals), + make_bold_and_round(summaryTable["50%"], nDecimals), + make_bold_and_round(summaryTable["75%"], nDecimals), + make_bold_and_round(summaryTable["max"], nDecimals)], + fill = dict(color = [summaryTable["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=10), + height = 20) + ) + + fig = go.Figure() + fig.add_trace(trace) + + pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-table-highRes.png" + ), + width=1200, + height=1200, + scale=4) + + pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-table-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + summaryTable.to_csv( + os.path.join( + figure_path, + figName + "-table.csv" + ) + ) + allAgeTable.to_csv( + os.path.join( + figure_path, + figName + "-all-age-table.csv" + ) + ) + + if return_summaryTable: + return summaryTable, allAgeTable + else: + return + + +def make_lite_interactive_boxplot(field, yLabel, df, yMin, yMax): + df.sort_values("categories", inplace=True) + + traces = [] + for yd in df.categories.unique(): + yValues = df.loc[df["categories"] == yd, field] + yStats = yValues.describe() + yMinimum = yStats["min"] + yQ1 = yStats["25%"] + yQ2 = yStats["50%"] + yQ3 = yStats["75%"] + yMaximum = yStats["max"] + yIQR = yQ3 - yQ1 + maxWhisker = yIQR * 1.5 + lowWhiskerBound = yQ1 - maxWhisker + highWhiskerBound = yQ3 + maxWhisker + yLowerFence = yValues[yValues >= lowWhiskerBound].min() + yUpperFence = yValues[yValues <= highWhiskerBound].max() + yBoxData = [yMinimum, yLowerFence, yQ1, yQ1, yQ1, yQ1, yQ1, + yQ2, yQ3, yQ3, yQ3, yQ3, yQ3, + yUpperFence, yMaximum] + + # get N and U + nDays = df.loc[df["categories"] == yd, "count"].median().astype(int) + uniqueDonors = df.loc[df["categories"] == yd, "unique"].median().astype(int) + + traces.append(go.Box( + y=yBoxData, + jitter=0, + pointpos=0, + text=list(np.repeat("N=%s, U=%s" % (nDays, uniqueDonors), len(yBoxData))), + hoverinfo="y+text", + name=yd, + boxpoints="all", + notched=False, + marker=dict( + color=df.loc[df["categories"] == yd, "allColors"].describe()["top"], + opacity=0, + ), + )) + + layout = go.Layout( + yaxis=dict( + title=yLabel, + range=[yMin, yMax], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + showlegend=True + ) + + fig = go.Figure(data=traces, layout=layout) + plot_url = py.plot(fig, filename="Distribution of " + figName, auto_open=False) + print(figName, plot_url) + + return + + +def filter_data(df, min_days_criteria=7): + + # keep all type1 adn null diagnosis data (not specified) + df = df[((df.diagnosisType.isnull()) | (df.diagnosisType == "type1"))] + + # filter out invalid ages and ylw + df = df[((df.age.astype(float) >= 0) & (df.age.astype(float) <= 90))] + df = df[((df.ylw.astype(float) >= 0) & (df.ylw.astype(float) <= 80))] + + # filter out invalid pump and cgm days + df = df[((df["validPumpData"]) & (df["validCGMData"]))] + + # filter out Paradigm Veo Pumps + df = df[~df["pump.top"].str.contains("Paradigm Veo")] + + # filter out omnipod with mg/dL likely settings + df = df[~((df["pump.top"].str.contains("InsOmn-130")) & + (df['pumpSettings.isfLikelyUnits'] == "mg/dL"))] + + # require a minimum number of days of data + dayGroups = pd.DataFrame(df.groupby(["hashID", "age", "ylw"]).day.count()).reset_index() + dayGroups.rename(columns={"day": "nDays"}, inplace=True) + df = pd.merge(df, dayGroups, how="left", on=["hashID", "age", "ylw"]) + + df = df[df["nDays"] >= min_days_criteria] + + return df + + +def merge_dayData(df, dayDF): + + df = pd.merge( + df, + dayDF[[ + "hashID", + "day", + "validPumpData", + "atLeast3Boluses", + "validCGMData", + "diagnosisType", + "pump.top", + "pumpSettings.isfLikelyUnits" + ]], + how="left", + on=["hashID", "day"] + ) + + return df + + +def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min_unique_donors=10): + + # bin data (defined above) + df["ageBins"] = pd.cut(df["age"], ageBins, labels=ageGroupNames) + df["ylwBins"] = pd.cut(df["ylw"], ylwBins, labels=ylwGroupNames) + df["ageCategories"] = df["ageBins"].astype(str) + df["ylwCategories"] = df["ylwBins"].astype(str) + df["categories"] = "age " + df["ageBins"].astype(str) + " ylw " + df["ylwBins"].astype(str) + + # attach bin colors (defined above) + df = pd.merge(df, catColorDF, how="left", on="categories") + df["categories"].astype("category", inplace=True) + + # attach counts per group + dGroups = df.groupby("categories") + groupDF = dGroups["hashID"].describe() + groupDF["ageCategories"] = dGroups["ageCategories"].describe()["top"] + groupDF["ylwCategories"] = dGroups["ylwCategories"].describe()["top"] + #groupDF["ylwAlpha"] = dGroups["ylwAlpha"].mean() + groupDF["allColors"] = dGroups["allColors"].describe()["top"] + groupDF.reset_index(inplace=True) + + # attach group counts to the main dataframe + df = pd.merge(df, groupDF[["categories", "count", "unique"]], how="left", on="categories") + + # remove all categories that do NOT have at least 10 unique people + df = df[df["unique"] > min_unique_donors] + groupDF = groupDF[groupDF["unique"] > min_unique_donors] + + # attach N and U to the categories + df["categoriesFull"] = ( + df["categories"].astype(str) + + " (N=" + df["count"].astype(str) + + ", U=" + df["unique"].astype(str) + ")" + ) + + return df, groupDF + + +# %% define age and years living with bins +group_title = "-jos-groups" +figure_path = os.path.join(".", "figures") + +# next bin the data by age-ylw groups +dataGroupName = "age-ylw-groups" + +# original age and ylw bins +#ageBins = np.array([0,5,8,12,17,24,85]) +#ylwBins = np.array([-1,0,1,2,5,10,25,75]) + +#catColors = [ +# '#f0d8e5','#f4bdd8','#f7a0cc','#f781bf', +# '#ebc3c1','#f1a095','#f17d6c','#ec5644','#e41a1c', +# '#f2d8c3','#fbc299','#ffac6f','#ff9746','#ff7f00', +# '#d0e1cc','#b8d8b2','#9fcd97','#86c37e','#6cb964','#4daf4a', +# '#c9d6e3','#afc4da','#95b1d2','#7aa0c9','#5b8fc1','#377eb8', +# '#dacbde','#d0b6d4','#c5a1ca','#ba8dc0','#af78b7','#a464ad','#984ea3' +#] +# +#finalCategories = [ +# 'age 01-05 ylw 00', 'age 01-05 ylw 01', 'age 01-05 ylw 02', +# 'age 01-05 ylw 03-05', 'age 06-08 ylw 00', 'age 06-08 ylw 01', +# 'age 06-08 ylw 02', 'age 06-08 ylw 03-05', 'age 06-08 ylw 06-10', +# 'age 09-12 ylw 00', 'age 09-12 ylw 01', 'age 09-12 ylw 02', +# 'age 09-12 ylw 03-05', 'age 09-12 ylw 06-10', 'age 13-17 ylw 00', +# 'age 13-17 ylw 01', 'age 13-17 ylw 02', 'age 13-17 ylw 03-05', +# 'age 13-17 ylw 06-10', 'age 13-17 ylw 11-25', 'age 18-24 ylw 00', +# 'age 18-24 ylw 01', 'age 18-24 ylw 02', 'age 18-24 ylw 03-05', +# 'age 18-24 ylw 06-10', 'age 18-24 ylw 11-25', 'age 25-85 ylw 00', +# 'age 25-85 ylw 01', 'age 25-85 ylw 02', 'age 25-85 ylw 03-05', +# 'age 25-85 ylw 06-10', 'age 25-85 ylw 11-25', +# 'age 25-85 ylw 26-75' +#] + +# jaeb obs study bins +ageBins = np.array([-1,6,13,25,85]) +ylwBins = np.array([-1,1,5,75]) + +# bin by age +ageGroupNames = [] +for x, y in zip(ageBins[:-1]+1, ageBins[1:]): + ageGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}")) + +ylwGroupNames = [] +for x, y in zip(ylwBins[:-1]+1, ylwBins[1:]): + if x == y: + ylwGroupNames.append("%s"%(f"{x:02d}")) + else: + ylwGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}")) + +## 7 colors in each +#oranges = ['#fdd0a2','#fdae6b','#fd8d3c','#f16913','#d94801','#a63603','#7f2704'] +#reds = ['#fcbba1','#fc9272','#fb6a4a','#ef3b2c','#cb181d','#a50f15','#67000d'] +#greens = ['#c7e9c0','#a1d99b','#74c476','#41ab5d','#238b45','#006d2c','#00441b'] +#blues = ['#c6dbef','#9ecae1','#6baed6','#4292c6','#2171b5','#08519c','#08306b'] +#purples = ['#dadaeb','#bcbddc','#9e9ac8','#807dba','#6a51a3','#54278f','#3f007d'] +#greys = ['#d9d9d9','#bdbdbd','#969696','#737373','#525252','#252525','#000000'] + +# 3 colors in each +#reds = ['#fcae91','#fb6a4a','#cb181d'] +oranges = ['#fdbe85','#fd8d3c','#d94701'] +greens = ['#bae4b3','#74c476','#238b45'] +blues = ['#bdd7e7','#6baed6','#2171b5'] +purples = ['#cbc9e2','#9e9ac8','#6a51a3'] +#greys = ['#cccccc','#969696','#525252'] + + +color_matrix = pd.DataFrame([oranges, greens, blues, purples]) + +all_colors = np.reshape(color_matrix.values, -1) + +i = 0 +catColorDF = pd.DataFrame() +for ai in range(0, len(ageGroupNames)): + for yi in range(0, len(ylwGroupNames)): + catColorDF.loc[i, "categories"] = "age %s ylw %s" %(ageGroupNames[ai], ylwGroupNames[yi]) + catColorDF.loc[i, "allColors"] = all_colors[i] + i = i + 1 + + + +# %% load in summary donor data +dataPulledDate = "2019-01-10" +dataProcessedDate = "2019-01-22" + +phiDate = "PHI-" + dataPulledDate +donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data") +donorList = phiDate + "-uniqueDonorList" +donors = pd.read_csv(os.path.join(donorPath, donorList + ".csv"), low_memory=False) + + +# %% all-donors summary +allAgeSummary = pd.DataFrame() +dataPath = os.path.join(donorPath, "settings-and-events") +d = pd.read_csv(os.path.join(dataPath, "combined-allMetadata.csv"), low_memory=False) + +# attach the donor level data to the +allMetadata = pd.merge( + d, + donors[[ + "hashID", + "userID", + "diagnosisType", + "targetDevices", + "targetTimezone", + "termsAccepted" + ]], + how="left", + on="hashID" +) +allMetadata.to_csv(os.path.join(donorPath, donorList + "-w-metaData.csv")) + + +# %% load data +dayData = pd.read_csv(os.path.join(dataPath, "combined-dayData.csv"), low_memory=False) +bolusData = pd.read_csv(os.path.join(dataPath, "combined-bolusEvents.csv"), low_memory=False) +basalData = pd.read_csv(os.path.join(dataPath, "combined-basalEvents.csv"), low_memory=False) + +# %% attach the diagnosis type to the day data +dayDF = pd.merge( + dayData, + allMetadata[[ + "hashID", + "diagnosisType", + "pump.top", + "pumpSettings.isfLikelyUnits" + ]], + how="left", + on="hashID" +) + +dayDF = filter_data(dayDF, min_days_criteria=7) +dayDF, dayDFGroupSummary = ( + bin_data( + dayDF, + ageBins, + ageGroupNames, + ylwBins, + ylwGroupNames, + catColorDF, + min_unique_donors=10 + ) +) + + +# %% all-event level summary (max basal and max bolus) +# attach the day to bolus data and filter data by analysis criteria +# NOTE: seet the filter_data function for details +bolus = merge_dayData(bolusData, dayDF) +bolus = filter_data(bolus, min_days_criteria=7) +bolus, bolusGroupSummary = ( + bin_data( + bolus, + ageBins, + ageGroupNames, + ylwBins, + ylwGroupNames, + catColorDF, + min_unique_donors=10 + ) +) + + +# %% overview of bolus data table +figName = "overviewTable-bolus-events" +figName = figName + group_title +trace = go.Table( + header=dict( + values=make_bold(["AGE-YLW Group", + "Age", + "Years Living with T1D", + "N (Bolus Events)", + "U (Unique Donors)"]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=14) + ), + cells=dict( + values=[make_bold(bolusGroupSummary['categories']), + make_bold(bolusGroupSummary['ageCategories']), + make_bold(bolusGroupSummary['ylwCategories']), + make_bold(bolusGroupSummary['count']), + make_bold(bolusGroupSummary['unique'])], + fill = dict(color = [bolusGroupSummary["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=11), + height = 22 + ), +) + +fig = go.Figure() +fig.add_trace(trace) + +pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-highRes.png" + ), + width=1200, + height=1200, + scale=4) + +pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + +# %% max bolus amount () +maxBolus = pd.DataFrame(bolus.groupby(["hashID", "day"])["unitsInsulin"].max()).reset_index() +maxBolus.rename(columns={"unitsInsulin":"maxBolusPerDay"}, inplace=True) + +maxBolus = pd.merge( + maxBolus, + dayDF[[ + "hashID", + "day", + "categories", + "allColors" + ]], + how="left", + on=["hashID", "day"] +) + +# remove nans in category as they represent data from days that did not meat the +# acceptable day standard +maxBolus = maxBolus[maxBolus["categories"].notnull()] + +field = 'maxBolusPerDay' +yLabel = "Max Bolus Per Day (U)" +figName = "Max Bolus" +yMin = 0 +yMax = 21 +filteredDF = maxBolus[maxBolus[field] > 0].copy() + +## make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +# add N events and n unique donors +filteredDF = pd.merge( + filteredDF, + summaryTable[[ + "categories", + "count", + "unique" + ]], + how="left", + on="categories" +) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% basal data +basal = merge_dayData(basalData, dayDF) +basal = filter_data(basal, min_days_criteria=7) +basal, basalGroupSummary = ( + bin_data( + basal, + ageBins, + ageGroupNames, + ylwBins, + ylwGroupNames, + catColorDF, + min_unique_donors=10 + ) +) + + +# %% overview of basal data table +figName = "overviewTable-basal-events" +figName = figName + group_title + +trace = go.Table( + header=dict( + values=make_bold(["AGE-YLW Group", + "Age", + "Years Living with T1D", + "N (Basal Events)", + "U (Unique Donors)"]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=14) + ), + cells=dict( + values=[make_bold(basalGroupSummary['categories']), + make_bold(basalGroupSummary['ageCategories']), + make_bold(basalGroupSummary['ylwCategories']), + make_bold(basalGroupSummary['count']), + make_bold(basalGroupSummary['unique'])], + fill = dict(color = [basalGroupSummary["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=11), + height = 22 + ), +) + +fig = go.Figure() +fig.add_trace(trace) + +pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-highRes.png" + ), + width=1200, + height=1200, + scale=4) + +pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + +# %% max basal rate +maxBasal = pd.DataFrame(basal[basal["type"]=="basal"].groupby(["hashID", "day"])["rate"].max()).reset_index() + +maxBasal.rename(columns={"rate":"maxBasalRatePerDay"}, inplace=True) + +maxBasal = pd.merge( + maxBasal, + dayDF[[ + "hashID", + "day", + "categories", + "allColors" + ]], + how="left", + on=["hashID", "day"] +) + +# remove nans in category as they represent data from days that did not meat the +# acceptable day standard +maxBasal = maxBasal[maxBasal["categories"].notnull()] + +field = 'maxBasalRatePerDay' +yLabel = "Max Basal Per Day (U/hr)" +figName = "Max Basal" +yMin = 0 +yMax = 3.25 +filteredDF = maxBasal[maxBasal[field] > 0].copy() + +## make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +# add N events and n unique donors +filteredDF = pd.merge( + filteredDF, + summaryTable[[ + "categories", + "count", + "unique" + ]], + how="left", + on="categories" +) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% overview of day level data table +figName = "overviewTable-day-data" +figName = figName + group_title + +trace = go.Table( + header=dict( + values=make_bold(["AGE-YLW Group", + "Age", + "Years Living with T1D", + "N (Days)", + "U (Unique Donors)"]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=14) + ), + cells=dict( + values=[make_bold(dayDFGroupSummary['categories']), + make_bold(dayDFGroupSummary['ageCategories']), + make_bold(dayDFGroupSummary['ylwCategories']), + make_bold(dayDFGroupSummary['count']), + make_bold(dayDFGroupSummary['unique'])], + fill = dict(color = [dayDFGroupSummary["allColors"]]), + align = ['center', 'center', 'center'], + font = dict(color = 'black', size=11), + height = 22 + ), +) + +fig = go.Figure() +fig.add_trace(trace) + +pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-highRes.png" + ), + width=1200, + height=1200, + scale=4) + +pio.write_image( + fig, + os.path.join( + figure_path, + figName + "-lowRes.png" + ), + width=1200, + height=1200, + scale=1) + + +# %% Average ISF per day +dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1) +field = 'isfRounded' +yLabel = "Insulin Sensitivity Factor (mg/dL/U)" +figName = "Insulin Sensitivity Factor" +yMin = 0 +yMax = 400 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average CIR per day +field = 'cir.weightedMean' +yLabel = "Carb to Insulin Ratio (g/U)" +figName = "Carb to Insulin Ratio" +yMin = 0 +yMax = 70 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Correction Target per day +field = 'ct.target.weightedMean' +yLabel = "Correction Target (mg/dL)" +figName = "Correction Target" +yMin = 70 +yMax = 180 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Basal Rate per day +field = 'sbr.weightedMean' +yLabel = "Scheduled Basal Rate (U/hr)" +figName = "Scheduled Basal Rate" +yMin = 0 +yMax = 2.5 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 3 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Total Daily Dose +field = "totalAmountOfInsulin" +yLabel = "Total Daily Dose (U)" +figName = "Total Daily Dose" +yMin = 0 +yMax = 125 +filteredDF = dayDF[dayDF[field] > 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Percent Basal +dayDF["perecentBasalInPercent"] = dayDF["percentBasal"] * 100 +field = "perecentBasalInPercent" +yLabel = "Basal Proportion of Total Daily Dose (%)" +figName = "Basal Proportion of Total Daily Dose" +yMin = 0 +yMax = 100 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Total Daily Carbs +field = "totalDailyCarbs" +yLabel = "Total Daily Carbs (g)" +figName = "Total Daily Carbs" +yMin = 0 +yMax = 600 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Daily Time in Range (70-180 mg/dL) +dayDF["perecentInRange"] = dayDF["cgm.percent70to180"] * 100 +field = "perecentInRange" +yLabel = "Percent of Day in Targe Range (70-180 mg/dL, %)" +figName = "Percent of Day in Targe Range 70-180" +yMin = 0 +yMax = 100 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Mean CGM (mg/dL) +field = "cgm.mean_mgdL" +yLabel = "Daily Average CGM Level (mg/dL)" +figName = "Daily Average CGM Level" +yMin = 50 +yMax = 300 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Cov CGM (mg/dL) +dayDF["covPercent"] = dayDF["cgm.cov_mgdL"] * 100 +field = "covPercent" +yLabel = "Coeffient of Variation (%)" +figName = "Coeffient of Variation" +yMin = 6 +yMax = 62 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Daily Time Below 54 (Percentage) +dayDF["perecentBelow54mgdL"] = dayDF["cgm.percentBelow54"] * 100 +field = "perecentBelow54mgdL" +yLabel = "Percent of Day Below 54 mg/dL (%)" +figName = "Percent of Day in Extreme Hypo Below 54 mgdL" +yMin = 0 +yMax = 5 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 2 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Number of Below 54 mg/dL Episodes per Day +field = "extreme-hypo.count" +dayDF[field].fillna(0, inplace=True) +yLabel = "Number of Extreme Hypo Episodes (Below 54 mg/dL) per Day" +figName = "Number of Extreme Hypo Episodes per Day" +yMin = 0 +yMax = 2 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Duration of each Episode Below 54 mg/dL +field = "extreme-hypo-durationMinutes.mean" +yLabel = "Average Duration of each Extreme Hypo Episode (minutes)" +figName = "Average Duration of each Extreme Hypo Episode" +yMin = 15 +yMax = 120 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Daily Time Above 250 (Percentage) +dayDF["perecentAbove250mgdL"] = dayDF["cgm.percentAbove250"] * 100 +field = "perecentAbove250mgdL" +yLabel = "Percent of Day Above 250 mg/dL (%)" +figName = "Percent of Day in Extreme Hyper Above 250 mgdL" +yMin = 0 +yMax = 75 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 0 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Number of Above 250 mg/dL Episodes per Day +field = "extreme-hyper.count" +dayDF[field].fillna(0, inplace=True) +yLabel = "Number of Extreme Hyper Episodes (Above 250 mg/dL) per Day" +figName = "Number of Extreme Hyper Episodes per Day" +yMin = 0 +yMax = 2 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% Average Duration of each Episode Above 250 mg/dL +dayDF["avgExtremeHyperHours"] = dayDF["extreme-hyper-durationMinutes.mean"] / 60 +field = "avgExtremeHyperHours" +yLabel = "Average Duration of each Extreme Hyper Episode (hours)" +figName = "Average Duration of each Extreme Hyper Episode" +yMin = 2 +yMax = 10 +filteredDF = dayDF[dayDF[field] >= 0].copy() + +# make/save static summary table +figName = figName + group_title +nDecimalPlaces = 1 +summaryTable, allAgeTable = make_static_table( + field, + figName, + filteredDF, + nDecimalPlaces, + return_summaryTable=True +) + +# add the ageTable to the allAgeSummary +#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0) + +## make/save static boxplot +make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax) + +# make lite interactive plot +make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax) + + +# %% save the all age summaries +figName = "allAgeSettingSummary" + group_title +allAgeSummary.to_csv( + os.path.join( + figure_path, + figName + "-all-age-table.csv" + ) +) + + +# %% make a plot of TDD by ISF +# Average ISF per day +dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1) + +filteredDF = dayDF[((dayDF['isfRounded'] > 0) & + (dayDF['totalAmountOfInsulin'] > 0))].copy() + +x = np.arange(1, 500) +c = pd.DataFrame(columns=["ISF", "TDD"]) +for xi in x: + if sum(filteredDF['isfRounded'] == xi) > 3: + c.loc[xi, "ISF"] = xi + c.loc[xi, "TDD"] = filteredDF.loc[ + filteredDF['isfRounded'] == xi, + "totalAmountOfInsulin"].median() + +trend_by_isf = c.rolling(25, center=True).mean() + +x = np.arange(1, 300) +d = pd.DataFrame(columns=["TDD", "ISF"]) +for xi in x: + if sum(filteredDF['totalAmountOfInsulin'].round() == xi) > 3: + d.loc[xi, "TDD"] = xi + d.loc[xi, "ISF"] = filteredDF.loc[ + filteredDF['totalAmountOfInsulin'].round() == xi, + "isfRounded"].median() + +# then smooth out the medians +trend_by_tdd = d.rolling(10, center=True).mean() + +traces = [] + +for yd in catColorDF.categories.unique(): + traces.append(go.Scattergl( + y=filteredDF.loc[filteredDF["categories"] == yd, 'isfRounded'], + x=filteredDF.loc[filteredDF["categories"] == yd, 'totalAmountOfInsulin'].round(), + name=yd, + mode='markers', + marker=dict( + color=filteredDF.loc[filteredDF["categories"] == yd, 'allColors'], + opacity=0.5, + ), + )) + +traces.append(go.Scattergl( + y=trend_by_tdd["ISF"], + x=trend_by_tdd["TDD"], + mode='lines', + name="Trend by TDD", + line=dict( + color="black", + dash="dot", + ), +)) + +traces.append(go.Scattergl( + y=trend_by_isf["ISF"], + x=trend_by_isf["TDD"], + mode='lines', + name="Trend by ISF", + line=dict( + color="black", + dash="dash", + ), +)) + +layout = go.Layout( + font=dict( + size=18 + ), + xaxis=dict( + title="TDD", + dtick=20, + range=[0, 300], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ), + yaxis=dict( + title="ISF", + dtick=20, + range=[0, 500], + showgrid=True, + gridcolor='#f1f3f4', + gridwidth=2, + zeroline=True, + zerolinecolor='#f1f3f4', + zerolinewidth=2, + ) +) +figName = "ISFbyTDD" +fig = go.Figure(data=traces, layout=layout) +plot_url = py.plot(fig, filename=figName, auto_open=False) +print(figName, plot_url)