Index | About | Computer Project | Files
Computer Project Introduction | Get IDs | Split up IDs | Get Netatmo weather data
stations_modules.csv
, into chunks and writes them into separate (roughly 40) .csv
files. An example of stations_modules.csv
can be found here, and one of the "chunked" files can be found here.
This code requires the following core modules:
and the following package:pandas
0.25.1, which is used to read and write .csv
files.Example input file
Example output file (in reality far more than 1 file is generated)
Script
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | #!/usr/bin/env python # -*- coding=utf-8 -*- #Code to split up csv files containing MAC addresses for later use. ############################################################################ ###USAGE ### ###python nationsplit.py input_stations_modules_csv_file output_directory### ############################################################################ import sys import pandas import os def csvread(file_name): """ import a previously saved csv file Parameters ---------- file_name : str Path to csv file containing Netatmo stations, generated by get_ids_modules.py. Returns ------- dict containing Netatmo stations """ file = pandas.read_csv(file_name, index_col=0).to_dict() for station in file: file[station]['module_name'] = (eval(file[station]['module_name'])[0]) return(file) def fix_latlong(file_name): """ import and format data so that location is evaluated into a list Parameters ---------- file_name : str Path to csv file containing Netatmo stations, generated by get_ids_modules.py. Returns ------- dict containing Netatmo stations """ file = csvread(file_name) for station in file: file[station]['location'] = eval(file[station]['location']) return(file) def split(data): """ Remove stations that are outside the UK Parameters ---------- data : dict dict returned by 'fix_latlong' function. Returns ------- uk_data - dict containing just stations in the UK outside_uk - dict containing stations outside the UK, e.g. ROI, France """ uk_data, outside_uk = {}, {} #add station to each region, or others if it doesn't fit. for j in data: in_uk = 0 if data[j]['location'][1] > 54: if data[j]['location'][0] > -9: in_uk = 1 elif data[j]['location'][1] > 51: if data[j]['location'][0] > -6: if data[j]['location'][0] < 1.9: in_uk = 1 elif data[j]['location'][1] > 50: if data[j]['location'][0] <= 1: in_uk = 1 elif data[j]['location'][1] > 49: if data[j]['location'][0] <= -2: if data[j]['location'][0] >= -7: in_uk = 1 if in_uk == 1: uk_data[j] = data[j] else: outside_uk[j] = data[j] return(uk_data, outside_uk) def api_fix2(data, output): """ Splits data into sections. The API has a limit of 500 calls per hour. Since pressure data is recorded by the indoor sensor, a station with an indoor sensor, outdoor sensor, rain gauge and anemometer requires 4 calls of the API. Hence, this function loops through stations until 247 calls is reached, at which point a csv file is created, and then the process starts again until each station has been written to some csv file. ''' Parameters ---------- data : dict dict returned by 'split' function. output : str path where output csv files should be saved Returns ------- None. """ iteration = 0 temp_data = data new_dataset = {} count = 0 for item in temp_data: new_dataset[item] = temp_data[item] count = count + len(eval(temp_data[item]['full_modules'])) if count > 247: df = pandas.DataFrame.from_dict(new_dataset) df.to_csv(os.path.join(output,str(iteration)+'.csv')) print('file ' + str(iteration) + '.csv written') new_dataset = {} count = 0 iteration += 1 df = pandas.DataFrame.from_dict(new_dataset) df.to_csv(os.path.join(output,str(iteration)+'.csv')) print('file ' + str(iteration) + '.csv written') print('complete') def main(): inp=sys.argv[1] out=sys.argv[2] data=fix_latlong(inp) uk_data,outside_uk=split(data) api_fix2(uk_data,out) if __name__ == "__main__": main() |