#!/usr/bin/env python # -*- coding=utf-8 -*- #Python script to retrieve Netatmo data from stations in a given .csv file #and save each station's data as a netCDF file #Adapted from file by Ben Pickering to save disdrometer data as a netCDF file: #the 'make_file' function contains much of Ben's code ########################################################################## ### USAGE: ### ###python gethistoric_netCDF_JASMIN.py input_directory output_directory### ########################################################################## from netCDF4 import Dataset from datetime import timedelta, date, datetime from time import gmtime, strftime, time import sys import os from pathlib import Path #import pathlib2 from subprocess import call import pandas import requests import warnings warnings.filterwarnings("ignore") import numpy as np def csvread(file_name): """ Import a previously saved csv file Parameters ---------- file_name : str Path to a file containing Netatmo MAC addresses Returns ------- dict Dictionary containing MAC addresses. """ try: file = pandas.read_csv(file_name, index_col=0).to_dict() for k in file: file[k]['module_name'] = (file[k]['module_name']) return file except: print('Check your csv file exists.') return {} def getpayload(): """ Generate payload depending on the time (i.e. after 00 or after 30 minutes past the hour) """ payload1 = {'grant_type': 'password', 'username': "$username", 'password': "$password", 'client_id':"$client_id_1", 'client_secret': "$client_secret_1", 'scope': 'read_station'} payload2 = {'grant_type': 'password', 'username': "$username", 'password': "$password", 'client_id':"$client_id_2", 'client_secret': "$client_secret_2", 'scope': 'read_station'} current_time = time() anom = current_time %3600 if anom > 1800: return payload2 else: return payload1 def gethistoricdata(_id, modules, start_time, end_time): """ Get Historic Netatmo Data for a given station. Max 1024 entries per call of the API Parameters ---------- _id : str MAC address of a Netatmo weather station (usually of the form '70:xx:xx:xx:xx:xx'). modules : list list containing strings of modules associated with Netatmo weather station, e.g.: ['02:xx:xx:xx:xx:xx','05:xx:xx:xx:xx:xx'] for a station with a temperature sensor and a rain gauge. start_time : int/float Time to retrieve data from, in the form of seconds since 1970-01-01T00:00 end_time : int/float Time to retrieve data until, in the form of seconds since 1970-01-01T00:00 Returns ------- output : dict Dictionary containing historical Netatmo data. """ modules = eval(modules) payload = getpayload() try: response = requests.post("https://api.netatmo.com/oauth2/token", data=payload) response.raise_for_status() access_token = response.json()["access_token"] refresh_token = response.json()["refresh_token"] scope = response.json()["scope"] except requests.exceptions.HTTPError as error: print(error.response.status_code, error.response.text) temphum = [n for n in modules if n.startswith('02:')] rain = [n for n in modules if n.startswith('05:')] wind = [n for n in modules if n.startswith('06:')] params_list = [] desc_list = [] params_outdoor = { 'access_token': access_token, 'device_id': _id, 'module_id': temphum[0], 'scale': 'max', 'type' : 'Temperature,Humidity', 'optimize' : 'False', 'date_begin' : start_time, 'date_end' : end_time } params_indoor = { 'access_token': access_token, 'device_id': _id, 'scale': 'max', 'type' : 'pressure', 'optimize' : 'False', 'date_begin' : start_time, 'date_end' : end_time } params_list.append(params_indoor) desc_list.append('indoor') params_list.append(params_outdoor) desc_list.append('outdoor') if len(rain) == 1: params_rain = { 'access_token': access_token, 'device_id': _id, 'module_id': rain[0], 'scale': 'max', 'type' : 'rain', 'optimize' : 'False', 'date_begin' : start_time, 'date_end' : end_time } params_list.append(params_rain) desc_list.append('rain') if len(wind) == 1: params_wind = { 'access_token': access_token, 'device_id': _id, 'module_id': wind[0], 'scale': 'max', 'type' : 'WindStrength,WindAngle,GustStrength,GustAngle', 'optimize' : 'False', 'date_begin' : start_time, 'date_end' : end_time } params_list.append(params_wind) desc_list.append('wind') output = {} item = 0 while item < len(params_list): try: response = requests.post("https://api.netatmo.com/api/getmeasure", params=params_list[item]) response.raise_for_status() data = response.json()["body"] if data == []: data = {'nan':['no data here']} output[desc_list[item]] = data except requests.exceptions.HTTPError as error: print(error.response.status_code, error.response.text) item += 1 return output def doit(file, output_path): data = csvread(file) current_time = time() anom = current_time %86400 finish = current_time - anom start = finish - 86400 for station in data: makefile(station, data[station], start, finish, output_path) def makefile(key, item, start, finish, output_path): """ Create a netCDF file. Function adapted from script written by Ben Pickering to save disdrometer data as a netCDF file. Parameters ---------- key : str MAC address of Netatmo station item : dict Metadata for Netatmo station from csv file. start : int/float Time to retrieve data from, in the form of seconds since 1970-01-01T00:00 finish : int/float Time to retrieve data until, in the form of seconds since 1970-01-01T00:00 output_path : str Desired output directory . Returns ------- None. """ try: site = key #start_date = datetime(int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4])) start_date = datetime.fromtimestamp(int(start)) #end_date = datetime(int(sys.argv[5]), int(sys.argv[6]), int(sys.argv[7])) end_date = datetime.fromtimestamp(int(finish)) print("Site Number %s" %site) print("Start Date %s" %start_date.strftime("%Y-%m-%d")) print("End Date %s" %end_date.strftime("%Y-%m-%d")) end_date = end_date + timedelta(days=1) #the way timedate works means the date range needs to be one day extra #to cover the dates the user entered current_dir = output_path # directory containing all the date directories output_dir = output_path date_counter = 0 missing = np.zeros([(end_date-start_date).days]) # Check OUTPUT YEAR directory exists directory_out_year = Path(output_dir + "/" + (start_date.strftime("%Y"))) if not directory_out_year.is_dir(): print("Directory %s DOESN'T EXIST!" %directory_out_year) os.mkdir(current_dir+'/'+start_date.strftime('%Y')) # Check OUTPUT MONTH directory exists directory_out_month = Path(output_dir + "/" + (start_date.strftime("%Y/%m"))) if not directory_out_month.is_dir(): print("Directory %s DOESN'T EXIST!" %directory_out_month) os.mkdir(current_dir+'/'+start_date.strftime('%Y/%m')) # Check OUTPUT day directory exists directory_out_day = Path(output_dir + "/" + (start_date.strftime("%Y/%m/%d"))) if not directory_out_day.is_dir(): print("Directory %s DOESN'T EXIST!" %directory_out_day) os.mkdir(current_dir+'/'+start_date.strftime('%Y/%m/%d')) ############################### ### Get historic data here: ### ############################### data = gethistoricdata(key, item['full_modules'], start, finish) if len(data) > 0: ################################ ### Create netCDF file here: ### ################################ town = item['city'] if town != item['city']: print(town, item['city']) if type(town) != str: town = 'uk' #print('town') town = town.replace(':', '') town = town.replace(' ', '') town = town.replace('/', '') if town == 'nocity': town = 'uk' fileloc = (output_dir+'/'+start_date.strftime('%Y/%m/%d') + '/' + 'uol-netatmo-' + key.replace(':', '') + '_' + town + '_' + start_date.strftime('%Y%m%d') + '_surfacemet_v1.5.nc' ) dataset = Dataset(fileloc, 'w', format='NETCDF4') # Global Attributes dataset.Conventions = 'CF-1.6, NCAS-AMF-1.0' dataset.source = 'UoL Netatmo unit '+ key.replace(':', '') #site_source[site] dataset.instrument_manufacturer = 'Netatmo' dataset.instrument_model = 'Netatmo Smart Home Weather Station' dataset.creator_name = 'Jonathan Coney' dataset.creator_email = 'mm16jdc@leeds.ac.uk' dataset.creator_url = 'https://orcid.org/0000-0001-7310-8002' dataset.institution = 'National Centre for Atmospheric Science (NCAS)' dataset.processing_software_url = 'https://github.com/jdconey/netatmo' dataset.processing_software_version = '1.0' dataset.calibration_sensitivity = "https://www.netatmo.com/en-gb/weather/weatherstation/specifications" dataset.calibration_certification_date = "unknown" dataset.calibration_certification_url = "https://www.netatmo.com/en-gb/weather/weatherstation/specifications" dataset.sampling_interval = '5 minutes' dataset.averaging_interval = '5 minute' # Interpreted as the frequency of data in the file dataset.product_version = 'v1.0' dataset.processing_level = '1' dataset.last_revised_date = strftime("%Y-%m-%dT%H:%M:%S", gmtime()) dataset.project = 'Goldmine or Bust? Crowdsourced data for atmospheric science' dataset.project_principal_investigator = 'Ben Pickering' dataset.project_principal_investigator_email = 'ben.pickering@ncas.ac.uk' dataset.project_principal_investigator_url = 'https://orcid.org/0000-0002-8474-9005' dataset.licence = 'This work is distributed under the Creative Commons Attribution 4.0 License: https://creativecommons.org/licenses/by/4.0/' dataset.acknowledgement = 'Acknowledgement of Netatmo and NCAS as the data provider is required whenever and wherever these data are used' dataset.platform_type = 'stationary_platform' dataset.deployment_mode = 'land' dataset.title = 'Point measurement of data recorded from a Netatmo home weather station in a single day' dataset.featureType = 'timeSeries' dataset.time_coverage_start = start_date.strftime("%Y%m%d") + "T00:00:00" dataset.time_coverage_end = start_date.strftime("%Y%m%d") + "T23:55:00" dataset.geospatial_bounds = item['location'] dataset.platform_altitude = str(item['altitude'])+' m' dataset.location_keywords = str(town)+', United Kingdom, Europe' dataset.amf_vocabularies_release = "https://github.com/ncasuk/AMF_CVs/releases/tag/v0.2.4" dataset.history = "Collected: " + start_date.strftime("%Y-%m-%d") + "\nProcessed to netCDF: " + strftime("%Y-%m-%dT%H:%M", gmtime()) dataset.comment = 'None' # Dimensions latitude = dataset.createDimension('latitude', 1) longitude = dataset.createDimension('longitude', 1) time = dataset.createDimension('time', 288) # Make coordinate variables times = dataset.createVariable('time', np.float64, ('time',)) latitudes = dataset.createVariable('latitude', np.float64, ('latitude',)) longitudes = dataset.createVariable('longitude', np.float64, ('longitude',)) # Add attributes times.axis = 'T' times.units = 'seconds since 1970-01-01 00:00' # UNIX time. times.standard_name = 'time' times.long_name = 'Time (seconds since 1970-01-01)' times.valid_min = np.float64((start_date - datetime(1970, 1, 1)).total_seconds()) times.valid_max = np.float64(1439.*60. + 59 + (start_date - datetime(1970, 1, 1)).total_seconds()) times.calendar = 'standard' latitudes.units = 'degrees_north' latitudes.standard_name = 'latitude' latitudes.long_name = 'Latitude' longitudes.units = 'degrees_east' longitudes.standard_name = 'longitude' longitudes.long_name = 'Longitude' # Assign coordinate variables latlong = eval(item['location']) latitudes[:] = [latlong[1]][:] longitudes[:] = [latlong[0]][:] times[:] = np.float64(np.linspace(0, 1435*60, 288) +(start_date - datetime(1970, 1, 1)).total_seconds()) ################################################ ### Forge Netatmo data AT = np.zeros([288]) #Air Temperature AP = np.zeros([288]) # Air Pressure H = np.zeros([288]) #Relative Humidity if 'rain' in data.keys(): if len(data['rain']) > 1: PR = np.zeros([288]) # Precipitation Flux if 'wind' in data.keys(): if len(data['wind']) > 1: wind_speed = np.zeros([288]) wind_from_direction = np.zeros([288]) wind_speed_of_gust = np.zeros([288]) wind_gust_from_direction = np.zeros([288]) newdata = {} for i in data: newdata[i] = {} for time in data[i]: if time != 'nan': new_time = (int(time)-(int(time)%300)) if new_time not in newdata[i]: newdata[i][new_time] = data[i][time] else: newdata[i]['nan'] = data[i][time] j = 0 while j < len(times): for sensor in newdata: current = int(times[j]) if current in newdata[sensor].keys(): if sensor == 'indoor': AP[j] = np.float32(newdata[sensor][current][0]) if sensor == 'outdoor': AT[j] = np.float32(newdata[sensor][current][0]+273.15) H[j] = np.float32(newdata[sensor][current][1]) if sensor == 'rain': if len(data['rain']) > 1: PR[j] = np.float32(newdata[sensor][current][0]/300) if sensor == 'wind': if len(data['wind']) > 1: wind_speed[j] = np.float32(newdata[sensor][current][0]/3.6) wind_from_direction[j] = np.float32(newdata[sensor][current][1]) wind_speed_of_gust[j] = np.float32(newdata[sensor][current][2]/3.6) wind_gust_from_direction[j] = np.float32(newdata[sensor][current][3]) else: if sensor == 'indoor': AP[j] = -1e20 if sensor == 'outdoor': AT[j] = -1e20 H[j] = -1e20 if sensor == 'rain': if len(data['rain']) > 1: PR[j] = -1e20 if sensor == 'wind': if len(data['wind']) > 1: wind_speed[j] = -1e20 wind_from_direction[j] = -1e20 wind_speed_of_gust[j] = -1e20 wind_gust_from_direction[j] = -1e20 j = j + 1 ################################################ # Make qc_flag qc = np.full(288, 1) for i in range(0, 288): error = 0 if AT[i] > 273.15+ 60: error = error+1 if AT[i] < 273.15-60: error = error+1 if AP[i] > 1100: error = error+2 if AP[i] < 760: error = error+2 if H[i] > 100: error = error+4 if H[i] < 0: error = error+4 if 'rain' in data.keys(): if len(data['rain']) > 1: if PR[i] < 0: error = error+8 if PR[i] > 150: error = error+8 if 'wind' in data.keys(): if len(data['wind']) > 1: if wind_speed[i] < 0: error = error+16 if wind_speed[i] > 100: error = error+16 if wind_speed_of_gust[i] < 0: error = error+32 if wind_speed_of_gust[i] > 100: error = error+32 if wind_from_direction[i] < 0: error = error+64 if wind_from_direction[i] > 360: error = error+64 if wind_gust_from_direction[i] < 0: error = error+128 if wind_gust_from_direction[i] > 360: error = error+128 qc[i] = error qc_flag = dataset.createVariable( varname='qc_flag', dimensions=('time'), datatype=np.uint8 ) qc_flag[:] = np.uint8(error) ################################################ # Create Netatmo Variables & assign for rain and wind sensors where available Air_Pressure = dataset.createVariable( varname='air_pressure', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) Air_Temperature = dataset.createVariable( varname='air_temperature', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) Rel_Humidity = dataset.createVariable( varname='relative_humidity', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) if 'rain' in data.keys(): if len(data['rain']) > 1: precipitation_flux = dataset.createVariable( varname='precipitation_flux', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) precipitation_flux[:] = PR[:] precipitation_flux.units = 'kg m-2 s-1' precipitation_flux.long_name = 'Precipitation rate for 5 minutes in kg m-2 s-1 (liquid equivalent for solid precipitation)' precipitation_flux.valid_min = np.float64(0.) precipitation_flux.valid_max = np.float64(np.nanmax(PR)) precipitation_flux.cell_methods = 'time: mean' precipitation_flux.coordinates = 'latitude longitude' if 'wind' in data.keys(): if len(data['wind']) > 1: Wind_Spd = dataset.createVariable( varname='wind_speed', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) Wind_Dir = dataset.createVariable( varname='wind_from_direction', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) Gust_Spd = dataset.createVariable( varname='wind_speed_of_gust', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) Gust_Dir = dataset.createVariable( varname='wind_gust_from_direction', dimensions=('time'), fill_value=-1e20, datatype=np.float64 ) Wind_Spd[:] = wind_speed[:] Wind_Dir[:] = wind_from_direction[:] Gust_Spd[:] = wind_speed_of_gust[:] Gust_Dir[:] = wind_gust_from_direction[:] Wind_Spd.units = ' m s-1' Wind_Spd.long_name = 'Magnitude of the wind velocity in m s-1' Wind_Spd.valid_min = np.float64(0.) Wind_Spd.valid_max = np.float64(np.nanmax(wind_speed)) Wind_Spd.type = 'float64' Wind_Spd.cell_methods = 'time: mean' Wind_Spd.coordinates = 'latitude longitude' Wind_Dir.units = 'degree' Wind_Dir.long_name = 'Direction in degrees from which the wind was blowing' Wind_Dir.valid_min = np.float64(0.) Wind_Dir.valid_max = np.float64(360.) Wind_Dir.type = 'float64' Wind_Dir.cell_methods = 'time: mean' Wind_Dir.coordinates = 'latitude longitude' Gust_Spd.units = ' m s-1' Gust_Spd.long_name = 'Magnitude of the wind gust velocity in m s -1' Gust_Spd.valid_min = np.float64(0.) Gust_Spd.valid_max = np.float64(np.nanmax(wind_speed)) Gust_Spd.type = 'float64' Gust_Spd.cell_methods = 'time: mean' Gust_Spd.coordinates = 'latitude longitude' Gust_Dir.units = 'degree' Gust_Dir.long_name = 'Direction in degrees from which the wind was gusting' Gust_Dir.valid_min = np.float64(0.) Gust_Dir.valid_max = np.float64(360.) Gust_Dir.type = 'float64' Gust_Dir.cell_methods = 'time: mean' Gust_Dir.coordinates = 'latitude longitude' ################################################ # Create Time Variables year = dataset.createVariable( varname='year', dimensions=('time'), datatype=np.int32 ) month = dataset.createVariable( varname='month', dimensions=('time'), datatype=np.int32 ) day = dataset.createVariable( varname='day', dimensions=('time'), datatype=np.int32 ) hour = dataset.createVariable( varname='hour', dimensions=('time'), datatype=np.int32 ) minute = dataset.createVariable( varname='minute', dimensions=('time'), datatype=np.int32 ) second = dataset.createVariable( varname='second', dimensions=('time'), datatype=np.float64 ) day_of_year = dataset.createVariable( varname='day_of_year', dimensions=('time'), datatype=np.float64 ) ################################################ # Assign Netatmo weather data for indoor and outdoor sensors Air_Pressure[:] = AP[:] Air_Temperature[:] = AT[:] Rel_Humidity[:] = H[:] Air_Pressure.units = 'hPa' Air_Pressure.long_name = 'Air Pressure at Mean Sea Level' Air_Pressure.standard_name = 'air_pressure_at_mean_sea_level' Air_Pressure.valid_min = np.float64(np.nanmin(AP)) Air_Pressure.valid_max = np.float64(np.nanmax(AP)) Air_Pressure.cell_methods = 'time: mean' Air_Pressure.coordinates = 'latitude longitude' Air_Temperature.units = 'K' Air_Temperature.long_name = 'Temperature of the outside air in K' Air_Temperature.standard_name = 'air_temperature' Air_Temperature.valid_min = np.float64(np.nanmin(AT)) Air_Temperature.valid_max = np.float64(np.nanmax(AT)) Air_Temperature.cell_methods = 'time: mean' Air_Temperature.coordinates = 'latitude longitude' Rel_Humidity.units = '%' Rel_Humidity.long_name = 'Relative Humidity' Rel_Humidity.standard_name = 'relative_humidity' Rel_Humidity.valid_min = np.float64(np.nanmin(H)) Rel_Humidity.valid_max = np.float64(np.nanmax(H)) Rel_Humidity.cell_methods = 'time: mean' Rel_Humidity.coordinates = 'latitude longitude' # Assign time data year[:] = np.full(288, np.int32(start_date.strftime("%Y"))) month[:] = np.full(288, np.int32(start_date.strftime("%m"))) day[:] = np.full(288, np.int32(start_date.strftime("%d"))) hour[:] = np.repeat(np.linspace(0, 23, 24, dtype=np.int32), 12) minute[:] = np.tile(np.linspace(0, 55, 12, dtype=np.int32), 24) second[:] = np.zeros(288, dtype=np.float32) day_of_year[:] = np.linspace(start_date.timetuple().tm_yday, start_date.timetuple().tm_yday+1, 289, dtype=np.float32)[:-1] ################################################ # TIME year.units = "1" year.long_name = 'Year' year.valid_min = int(start_date.strftime("%Y")) year.valid_max = int(start_date.strftime("%Y")) month.units = '1' month.long_name = 'Month' month.valid_min = int(start_date.strftime("%m")) month.valid_max = int(start_date.strftime("%m")) day.units = '1' day.long_name = 'Day' day.valid_min = int(start_date.strftime("%d")) day.valid_max = int(start_date.strftime("%d")) hour.units = '1' hour.long_name = 'Hour' hour.valid_min = int(0) hour.valid_max = int(23) minute.units = '1' minute.long_name = 'Minute' minute.valid_min = int(0) minute.valid_max = int(59) second.units = '1' second.long_name = 'Second' second.valid_min = np.float64(0) second.valid_max = np.float64(59) day_of_year.units = "1" day_of_year.long_name = 'Day of Year' day_of_year.valid_min = np.float64(np.min(day_of_year)) day_of_year.valid_max = np.float64(np.max(day_of_year)) qc_flag.long_name = 'Data Quality Flag' qc_flag.type = 'byte' qc_flag.fill_value = np.uint8(0) numbers = list(range(0, 256)) hexes = [] for j in numbers: hexes.append(hex(j)) qc_flag.flag_values = np.uint8(numbers) qc_flag.flag_meanings = ("1 - temperature < 213.15 C or > 333.15 K; \ 2 - Pressure >1100 hPa or <760 hPa; \ 4 - Humidity < 0 or > 100; 8 - Rain <0 kg m-2 s-1 or > 150 kg m-2 s-1;\ 16 - Wind velocity <0 m s-1 or >100 m s-1;\ 32 - Gust velocity < 0 m s-1 or > 100 m s-1;\ 64 - Wind direction <0 degrees or >360 degrees;\ 128 - Gust direction <0 degrees or >360 degrees." ) ################################################ # Close the file dataset.close() date_counter += 1 else: print('fail. Empty data') except Exception as e: print(e) dataset.close() def filefinder(path): """ Calculates which csv file to read based on the time of day. Parameters ---------- path : str path containing csv files to read. Returns ------- path to filename as str. """ current_time = time() anom = current_time %86400 output = (anom / 1800) - 2 filename = int(np.floor(output)) string = os.path.join(path,str(filename)+'.csv') return string def main(): input_path=sys.argv[1] output_path=sys.argv[2] temp = filefinder(input_path) print(temp) doit(temp,output_path) if __name__ == "__main__": main()