Source code for VMS_DFO_Oracle

import pandas as pd
import numpy as np
import datetime
import xarray as xr
import yaml

import ship_mapper as sm

[docs]def convert(file_in, file_out, data_info_file):
    '''
    This is a csv to nc converter for VMS data
    
    :param str file_in: Input file (.csv)
    
    :param str file_out: Output file (.nc)
    
    :param str data_info_file: YAML file describing other conversion parameters (.yml)
            
    :return: A netCDF file (.nc) in a format standard for "ship_mapper" 
             containing the original data, but also containing metadata included in the data_info.yml file
    '''
    #Load data
    print(file_in)
    data = pd.read_csv(file_in, sep=',')
    
    
    # Chores: Create empty lists and counters
    DateTime_list = []
    DateFlag_list = []
    SeqNum_list = []
    error = 0
    originTime = datetime.datetime.strptime('1/1/1980 00:00',"%m/%d/%Y %H:%M")
    
    #Check date-strings and calculate "serial-date" (i.e. SeqNum)
    for i in range(0,len(data)):
        try:
            DateTime_list.append(datetime.datetime.strptime(data['POSITION_UTC_DATE'].values[i],"%Y-%m-%d %H:%M:%S"))
            SeqNum_list.append(sm.elapsed_days(DateTime_list[-1]-originTime))

        except ValueError:
            DateTime_list.append(np.nan)   
            SeqNum_list.append(np.nan)
            DateFlag_list.append(0)
            error += 1


    print('error = ' + str(error))
    
    # Add new columns to DataFrame
    data['DateTime'] = pd.Series(DateTime_list)
    data['SeqNum'] = pd.Series(SeqNum_list)
    data['DateFlag'] = pd.Series(DateFlag_list)

    
    
    # STAGE 2 - Calculate Apparent Speed
    data['AppSpeed'] = np.nan
    data.sort_values(by=['SeqNum'],inplace=True)
    data = data.reset_index(drop=True)
    
    # Chores: create empty lists
    VRN = []
    LATITUDE = []
    LONGITUDE = []
    POSITION_UTC_DATE = []
    DateTime = []
    SeqNum = []
    DateFlag = []
    ElapsedDistance = []
    ElapsedDays = []
    ApparentSpeed = []
    
    
    # Find unique ships
    unis = pd.unique(data['VR_NUMBER'])
    print('Number of Unique Ships = ' + str(len(unis)))
    
    # Loop over each ship
    counter = 0
    for ship in unis:
        counter += 1
        print('Ship: ' + str(counter) + '('+ str(ship) + ')')
        singleship = data[data['VR_NUMBER'] == ship]
    
        # Note that the first point of each ship is not counted
        for i in range(1,len(singleship)):
            
            # Don't process ship track with only one point 
            if len(singleship) > 1:
                idx1 = singleship.index[i-1]
                idx2 = singleship.index[i]
                
                # Time
                DateTime.append(data['DateTime'][idx2])
                ElapsedDays.append(sm.elapsed_days(DateTime[-1]-data['DateTime'][idx1]))
                
                # Get lat/lons
                lon1 = singleship['LONGITUDE'][idx1]
                lat1 = singleship['LATITUDE'][idx1]
                lon2 = singleship['LONGITUDE'][idx2]
                lat2 = singleship['LATITUDE'][idx2]
                                
                # Estimate elapsed distance and apperent speed
                ElapsedDistance.append(sm.distance(lat1,lon1,lat2,lon2))
                if ElapsedDays[-1] > 0:
                    ApparentSpeed.append(sm.estimate_velocity(ElapsedDays[-1] * 86400, ElapsedDistance[-1]))
                else:
                    ApparentSpeed.append(np.nan)
    
                # Done! Append extra information to lists
                VRN.append(int(singleship['VR_NUMBER'][idx2]))
                LATITUDE.append(singleship['LATITUDE'][idx2])
                LONGITUDE.append(singleship['LONGITUDE'][idx2])
                POSITION_UTC_DATE.append(singleship['POSITION_UTC_DATE'][idx2])
                SeqNum.append(singleship['SeqNum'][idx2])
                DateFlag.append(singleship['DateFlag'][idx2])
                
    # Metadata
    dinfo = yaml.load(open(data_info_file, 'r'))
    dinfo['startdate'] = min(DateTime).strftime('%Y-%m-%d %H:%M:%S')
    dinfo['enddate'] = max(DateTime).strftime('%Y-%m-%d %H:%M:%S')
    
    D = xr.Dataset({'ship_id_vrn':(['Dindex'],VRN),
                    'latitude':(['Dindex'],LATITUDE),
                    'longitude':(['Dindex'],LONGITUDE),
                    'DateTime':(['Dindex'],DateTime),
                    'SeqNum':(['Dindex'],SeqNum),
                    'ApparentSpeed':(['Dindex'],ApparentSpeed)},

                coords={'Dindex':(['Dindex'],pd.Series(VRN).index)},
                attrs=dinfo)
    
    
    encoding = {}
    encoding = {'ship_id_vrn':{'zlib':True},
                'latitude':{'zlib':True},
                'longitude':{'zlib':True},
                'DateTime':{'zlib':True},
                'SeqNum':{'zlib':True},
                'ApparentSpeed':{'zlib':True}}
    
    D.to_netcdf(file_out,format='NETCDF4',engine='netcdf4',encoding=encoding)
    
    
    
    print('Good!')
    return