Computer Project

#!/usr/bin/env python
# -*- coding=utf-8 -*-

#Code to split up csv files containing MAC addresses for later use.

############################################################################
###USAGE                                                                 ###
###python nationsplit.py input_stations_modules_csv_file output_directory###
############################################################################

import sys
import pandas
import os

def csvread(file_name):
    """
    import a previously saved csv file

    Parameters
    ----------
    file_name : str
        Path to csv file containing Netatmo stations, generated by get_ids_modules.py.

    Returns
    -------
    dict containing Netatmo stations

    """
    file = pandas.read_csv(file_name, index_col=0).to_dict()
    for station in file:
        file[station]['module_name'] = (eval(file[station]['module_name'])[0])
    return(file)

def fix_latlong(file_name):
    """
    import and format data so that location is evaluated into a list

    Parameters
    ----------
    file_name : str
        Path to csv file containing Netatmo stations, generated by get_ids_modules.py.

    Returns
    -------
    dict containing Netatmo stations

    """
    file = csvread(file_name)
    for station in file:
        file[station]['location'] = eval(file[station]['location'])
    return(file)

def split(data):
    """
    Remove stations that are outside the UK  

    Parameters
    ----------
    data : dict
        dict returned by 'fix_latlong' function.

    Returns
    -------
    uk_data    - dict containing just stations in the UK
    outside_uk - dict containing stations outside the UK, e.g. ROI, France 

    """
    uk_data, outside_uk = {}, {}
    #add station to each region, or others if it doesn't fit.
    for j in data:
        in_uk = 0
        if data[j]['location'][1] > 54:
            if data[j]['location'][0] > -9:
                in_uk = 1
        elif data[j]['location'][1] > 51:
            if data[j]['location'][0] > -6:
                if data[j]['location'][0] < 1.9:
                    in_uk = 1
        elif data[j]['location'][1] > 50:
            if data[j]['location'][0] <= 1:
                in_uk = 1
        elif data[j]['location'][1] > 49:
            if data[j]['location'][0] <= -2:
                if data[j]['location'][0] >= -7:
                    in_uk = 1
            
        if in_uk == 1:
            uk_data[j] = data[j]
        else:
            outside_uk[j] = data[j]
    return(uk_data, outside_uk)
                    
def api_fix2(data, output):
    """
    Splits data into sections. The API has a limit of 500 calls per hour. Since
    pressure data is recorded by the indoor sensor, a station with an indoor sensor,
    outdoor sensor, rain gauge and anemometer requires 4 calls of the API. Hence,
    this function loops through stations until 247 calls is reached, at which point
    a csv file is created, and then the process starts again until each station has
    been written to some csv file.
    '''

    Parameters
    ----------
    data : dict
        dict returned by 'split' function.
    
    output : str
        path where output csv files should be saved

    Returns
    -------
    None.

    """
    iteration = 0
    temp_data = data
    new_dataset = {}
    count = 0
    for item in temp_data:
        new_dataset[item] = temp_data[item]
        count = count + len(eval(temp_data[item]['full_modules']))
        if count > 247:
            df = pandas.DataFrame.from_dict(new_dataset)
            df.to_csv(os.path.join(output,str(iteration)+'.csv'))
            print('file ' + str(iteration) + '.csv written')
            new_dataset = {}
            count = 0
            iteration += 1
    df = pandas.DataFrame.from_dict(new_dataset)
    df.to_csv(os.path.join(output,str(iteration)+'.csv'))
    print('file ' + str(iteration) + '.csv written')
    print('complete')
         

def main():
    inp=sys.argv[1]
    out=sys.argv[2]
    data=fix_latlong(inp)
    uk_data,outside_uk=split(data)
    api_fix2(uk_data,out)
    
if __name__ == "__main__":
    main()
Jonathan Coney

MRes Student in Climate and Atmospheric Science 2019-20

Split up UK station IDs