#!/usr/bin/env python # -*- coding=utf-8 -*- #Code to split up csv files containing MAC addresses for later use. ############################################################################ ###USAGE ### ###python nationsplit.py input_stations_modules_csv_file output_directory### ############################################################################ import sys import pandas import os def csvread(file_name): """ import a previously saved csv file Parameters ---------- file_name : str Path to csv file containing Netatmo stations, generated by get_ids_modules.py. Returns ------- dict containing Netatmo stations """ file = pandas.read_csv(file_name, index_col=0).to_dict() for station in file: file[station]['module_name'] = (eval(file[station]['module_name'])[0]) return(file) def fix_latlong(file_name): """ import and format data so that location is evaluated into a list Parameters ---------- file_name : str Path to csv file containing Netatmo stations, generated by get_ids_modules.py. Returns ------- dict containing Netatmo stations """ file = csvread(file_name) for station in file: file[station]['location'] = eval(file[station]['location']) return(file) def split(data): """ Remove stations that are outside the UK Parameters ---------- data : dict dict returned by 'fix_latlong' function. Returns ------- uk_data - dict containing just stations in the UK outside_uk - dict containing stations outside the UK, e.g. ROI, France """ uk_data, outside_uk = {}, {} #add station to each region, or others if it doesn't fit. for j in data: in_uk = 0 if data[j]['location'][1] > 54: if data[j]['location'][0] > -9: in_uk = 1 elif data[j]['location'][1] > 51: if data[j]['location'][0] > -6: if data[j]['location'][0] < 1.9: in_uk = 1 elif data[j]['location'][1] > 50: if data[j]['location'][0] <= 1: in_uk = 1 elif data[j]['location'][1] > 49: if data[j]['location'][0] <= -2: if data[j]['location'][0] >= -7: in_uk = 1 if in_uk == 1: uk_data[j] = data[j] else: outside_uk[j] = data[j] return(uk_data, outside_uk) def api_fix2(data, output): """ Splits data into sections. The API has a limit of 500 calls per hour. Since pressure data is recorded by the indoor sensor, a station with an indoor sensor, outdoor sensor, rain gauge and anemometer requires 4 calls of the API. Hence, this function loops through stations until 247 calls is reached, at which point a csv file is created, and then the process starts again until each station has been written to some csv file. ''' Parameters ---------- data : dict dict returned by 'split' function. output : str path where output csv files should be saved Returns ------- None. """ iteration = 0 temp_data = data new_dataset = {} count = 0 for item in temp_data: new_dataset[item] = temp_data[item] count = count + len(eval(temp_data[item]['full_modules'])) if count > 247: df = pandas.DataFrame.from_dict(new_dataset) df.to_csv(os.path.join(output,str(iteration)+'.csv')) print('file ' + str(iteration) + '.csv written') new_dataset = {} count = 0 iteration += 1 df = pandas.DataFrame.from_dict(new_dataset) df.to_csv(os.path.join(output,str(iteration)+'.csv')) print('file ' + str(iteration) + '.csv written') print('complete') def main(): inp=sys.argv[1] out=sys.argv[2] data=fix_latlong(inp) uk_data,outside_uk=split(data) api_fix2(uk_data,out) if __name__ == "__main__": main()