import pandas as pd
import numpy as np
import datetime
import xarray as xr
import yaml
import ship_mapper as sm
[docs]def convert(file_in, file_out, data_info_file):
'''
This is a csv to nc converter for VMS data
:param str file_in: Input file (.csv)
:param str file_out: Output file (.nc)
:param str data_info_file: YAML file describing other conversion parameters (.yml)
:return: A netCDF file (.nc) in a format standard for "ship_mapper"
containing the original data, but also containing metadata included in the data_info.yml file
'''
#Load data
print(file_in)
data = pd.read_csv(file_in, sep=',')
# Chores: Create empty lists and counters
DateTime_list = []
DateFlag_list = []
SeqNum_list = []
error = 0
originTime = datetime.datetime.strptime('1/1/1980 00:00',"%m/%d/%Y %H:%M")
#Check date-strings and calculate "serial-date" (i.e. SeqNum)
for i in range(0,len(data)):
try:
DateTime_list.append(datetime.datetime.strptime(data['POSITION_UTC_DATE'].values[i],"%Y-%m-%d %H:%M:%S"))
SeqNum_list.append(sm.elapsed_days(DateTime_list[-1]-originTime))
except ValueError:
DateTime_list.append(np.nan)
SeqNum_list.append(np.nan)
DateFlag_list.append(0)
error += 1
print('error = ' + str(error))
# Add new columns to DataFrame
data['DateTime'] = pd.Series(DateTime_list)
data['SeqNum'] = pd.Series(SeqNum_list)
data['DateFlag'] = pd.Series(DateFlag_list)
# STAGE 2 - Calculate Apparent Speed
data['AppSpeed'] = np.nan
data.sort_values(by=['SeqNum'],inplace=True)
data = data.reset_index(drop=True)
# Chores: create empty lists
VRN = []
LATITUDE = []
LONGITUDE = []
POSITION_UTC_DATE = []
DateTime = []
SeqNum = []
DateFlag = []
ElapsedDistance = []
ElapsedDays = []
ApparentSpeed = []
# Find unique ships
unis = pd.unique(data['VR_NUMBER'])
print('Number of Unique Ships = ' + str(len(unis)))
# Loop over each ship
counter = 0
for ship in unis:
counter += 1
print('Ship: ' + str(counter) + '('+ str(ship) + ')')
singleship = data[data['VR_NUMBER'] == ship]
# Note that the first point of each ship is not counted
for i in range(1,len(singleship)):
# Don't process ship track with only one point
if len(singleship) > 1:
idx1 = singleship.index[i-1]
idx2 = singleship.index[i]
# Time
DateTime.append(data['DateTime'][idx2])
ElapsedDays.append(sm.elapsed_days(DateTime[-1]-data['DateTime'][idx1]))
# Get lat/lons
lon1 = singleship['LONGITUDE'][idx1]
lat1 = singleship['LATITUDE'][idx1]
lon2 = singleship['LONGITUDE'][idx2]
lat2 = singleship['LATITUDE'][idx2]
# Estimate elapsed distance and apperent speed
ElapsedDistance.append(sm.distance(lat1,lon1,lat2,lon2))
if ElapsedDays[-1] > 0:
ApparentSpeed.append(sm.estimate_velocity(ElapsedDays[-1] * 86400, ElapsedDistance[-1]))
else:
ApparentSpeed.append(np.nan)
# Done! Append extra information to lists
VRN.append(int(singleship['VR_NUMBER'][idx2]))
LATITUDE.append(singleship['LATITUDE'][idx2])
LONGITUDE.append(singleship['LONGITUDE'][idx2])
POSITION_UTC_DATE.append(singleship['POSITION_UTC_DATE'][idx2])
SeqNum.append(singleship['SeqNum'][idx2])
DateFlag.append(singleship['DateFlag'][idx2])
# Metadata
dinfo = yaml.load(open(data_info_file, 'r'))
dinfo['startdate'] = min(DateTime).strftime('%Y-%m-%d %H:%M:%S')
dinfo['enddate'] = max(DateTime).strftime('%Y-%m-%d %H:%M:%S')
D = xr.Dataset({'ship_id_vrn':(['Dindex'],VRN),
'latitude':(['Dindex'],LATITUDE),
'longitude':(['Dindex'],LONGITUDE),
'DateTime':(['Dindex'],DateTime),
'SeqNum':(['Dindex'],SeqNum),
'ApparentSpeed':(['Dindex'],ApparentSpeed)},
coords={'Dindex':(['Dindex'],pd.Series(VRN).index)},
attrs=dinfo)
encoding = {}
encoding = {'ship_id_vrn':{'zlib':True},
'latitude':{'zlib':True},
'longitude':{'zlib':True},
'DateTime':{'zlib':True},
'SeqNum':{'zlib':True},
'ApparentSpeed':{'zlib':True}}
D.to_netcdf(file_out,format='NETCDF4',engine='netcdf4',encoding=encoding)
print('Good!')
return