Jonathan Coney

MRes Student in Climate and Atmospheric Science 2019-20

Index | About | Computer Project | Files

Computer Project Introduction | Get IDs | Split up IDs | Get Netatmo weather data

Split up UK station IDs

This code splits up the data from Netatmo weather stations in stations_modules.csv, into chunks and writes them into separate (roughly 40) .csv files. An example of stations_modules.csv can be found here, and one of the "chunked" files can be found here.

This code requires the following core modules:

and the following package:

Example input file

stations_modules.csv

Example output file (in reality far more than 1 file is generated)

0.csv

Script

nationsplit.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env python
# -*- coding=utf-8 -*-

#Code to split up csv files containing MAC addresses for later use.

############################################################################
###USAGE                                                                 ###
###python nationsplit.py input_stations_modules_csv_file output_directory###
############################################################################

import sys
import pandas
import os

def csvread(file_name):
    """
    import a previously saved csv file

    Parameters
    ----------
    file_name : str
        Path to csv file containing Netatmo stations, generated by get_ids_modules.py.

    Returns
    -------
    dict containing Netatmo stations

    """
    file = pandas.read_csv(file_name, index_col=0).to_dict()
    for station in file:
        file[station]['module_name'] = (eval(file[station]['module_name'])[0])
    return(file)

def fix_latlong(file_name):
    """
    import and format data so that location is evaluated into a list

    Parameters
    ----------
    file_name : str
        Path to csv file containing Netatmo stations, generated by get_ids_modules.py.

    Returns
    -------
    dict containing Netatmo stations

    """
    file = csvread(file_name)
    for station in file:
        file[station]['location'] = eval(file[station]['location'])
    return(file)

def split(data):
    """
    Remove stations that are outside the UK  

    Parameters
    ----------
    data : dict
        dict returned by 'fix_latlong' function.

    Returns
    -------
    uk_data    - dict containing just stations in the UK
    outside_uk - dict containing stations outside the UK, e.g. ROI, France 

    """
    uk_data, outside_uk = {}, {}
    #add station to each region, or others if it doesn't fit.
    for j in data:
        in_uk = 0
        if data[j]['location'][1] > 54:
            if data[j]['location'][0] > -9:
                in_uk = 1
        elif data[j]['location'][1] > 51:
            if data[j]['location'][0] > -6:
                if data[j]['location'][0] < 1.9:
                    in_uk = 1
        elif data[j]['location'][1] > 50:
            if data[j]['location'][0] <= 1:
                in_uk = 1
        elif data[j]['location'][1] > 49:
            if data[j]['location'][0] <= -2:
                if data[j]['location'][0] >= -7:
                    in_uk = 1
            
        if in_uk == 1:
            uk_data[j] = data[j]
        else:
            outside_uk[j] = data[j]
    return(uk_data, outside_uk)
                    
def api_fix2(data, output):
    """
    Splits data into sections. The API has a limit of 500 calls per hour. Since
    pressure data is recorded by the indoor sensor, a station with an indoor sensor,
    outdoor sensor, rain gauge and anemometer requires 4 calls of the API. Hence,
    this function loops through stations until 247 calls is reached, at which point
    a csv file is created, and then the process starts again until each station has
    been written to some csv file.
    '''

    Parameters
    ----------
    data : dict
        dict returned by 'split' function.
    
    output : str
        path where output csv files should be saved

    Returns
    -------
    None.

    """
    iteration = 0
    temp_data = data
    new_dataset = {}
    count = 0
    for item in temp_data:
        new_dataset[item] = temp_data[item]
        count = count + len(eval(temp_data[item]['full_modules']))
        if count > 247:
            df = pandas.DataFrame.from_dict(new_dataset)
            df.to_csv(os.path.join(output,str(iteration)+'.csv'))
            print('file ' + str(iteration) + '.csv written')
            new_dataset = {}
            count = 0
            iteration += 1
    df = pandas.DataFrame.from_dict(new_dataset)
    df.to_csv(os.path.join(output,str(iteration)+'.csv'))
    print('file ' + str(iteration) + '.csv written')
    print('complete')
         

def main():
    inp=sys.argv[1]
    out=sys.argv[2]
    data=fix_latlong(inp)
    uk_data,outside_uk=split(data)
    api_fix2(uk_data,out)
    
if __name__ == "__main__":
    main()