Source code for ship_mapper.gridder

'''
Fucntions that condense a list of "pings" (i.e. vessel positions) into a 2-dimensional grid.


'''
import pandas as pd
import numpy as np
import xarray as xr
import os
import copy
import ship_mapper as sm



[docs]def gridder(info, data_in, filename_out, overwrite=False): ''' Counts "pings" inside a grid-cell and computes "Ship minutes per km2" Arguments: info (info): ``info`` object containing metadata data_in (xarray.Dataset): Data to be gridded filename_out (str): Name of file that will be writen as output Keyword arguments: overwrite (bool): If ``True`` older files will be overwritten. If ``False``, only new files will be processed See also: :py:func:`gridder_pingsPerCell <ship_mapper.gridder_pingsPerCelll>` :py:func:`grid_merger <ship_mapper.grid_merger>` ''' print('gridder ---------------------------------------------') file_out = os.path.join(str(info.dirs.gridded_data), filename_out) interp_threshold = 40 if not os.path.isfile(file_out) or overwrite: ship_id = data_in.attrs['ship_id'] data = data_in # Make grid x = np.arange(info.grid.minlon, info.grid.maxlon, info.grid.bin_size, dtype=np.float64) y = np.arange(info.grid.minlat, info.grid.maxlat, info.grid.bin_size, dtype=np.float64) info.grid.bin_number = (np.ceil((info.grid.maxlon - info.grid.minlon)/info.grid.bin_size), np.ceil((info.grid.maxlat - info.grid.minlat)/info.grid.bin_size)) # Find unique ships unis = pd.unique(data[ship_id].values) print('Number of Unique Ships = ' + str(len(unis))) iix, iiy, iit = [], [], [] counter = 0 for ship in unis: counter += 1 print('Ship: ' + str(counter) + ' (id:'+ str(ship) + ')') indxship = (data[ship_id] == ship) singleship = data.sel(Dindex=indxship) # Determine "trips" indxtrip = (singleship['SeqNum'].diff('Dindex') > 1) #find > 1-day gaps trip_gaps = indxtrip[indxtrip==True]['Dindex'].values trip_gaps = np.insert(trip_gaps, 0, 0) trip_gaps = np.append(trip_gaps,singleship['Dindex'].values[-1]+1) # Loop over trips for k in range(1,len(trip_gaps)): print('trip = ' + str(k)) index_gap = ((singleship['Dindex'] >= trip_gaps[k-1]) & (singleship['Dindex'] < trip_gaps[k])) singleship_trip = singleship.sel(Dindex=index_gap) # Get lat/lons/seqNums lons = singleship_trip['longitude'].values.tolist() lats = singleship_trip['latitude'].values.tolist() seqNums = singleship_trip['SeqNum'].values.tolist() num_of_pings = len(lons) if num_of_pings > 1: for j in range(1,num_of_pings): lon1 = lons[j-1] lat1 = lats[j-1] lon2 = lons[j] lat2 = lats[j] x1, y1 = sm.align_with_grid(x, y, lon1, lat1) x2, y2 = sm.align_with_grid(x, y, lon2, lat2) # Iterpolate bewtween known points elapsed_days = abs(seqNums[j] - seqNums[j-1]) # Estimate distance and velocity dist = sm.distance(lat1,lon1,lat2,lon2) if dist < (interp_threshold * 1.852 * 1000): # knots * knots_to_km/h conversion * km_to_m conversion (= meters) ix, iy = sm.interp2d(x1, y1, x2, y2) minutes_per_cell = elapsed_days / len(ix) * 1440 iix.extend(ix) iiy.extend(iy) iit.extend([minutes_per_cell] * len(ix)) # Project pings to grid H0, xedges, yedges = np.histogram2d(iix,iiy,bins=info.grid.bin_number, range=[[0, info.grid.bin_number[0]], [0, info.grid.bin_number[1]]], weights=iit) ship_density = H0 / info.grid.areas D = xr.Dataset({'ship_density':(['x','y'],ship_density)}, coords={'lon':(['x'],x), 'lat':(['y'],y)}) # Metadata D.attrs = copy.deepcopy(data.attrs) # delete irrelevants del(D.attrs['ship_id']) # add new ones D.attrs['minlat'] = info.grid.minlat D.attrs['maxlat'] = info.grid.maxlat D.attrs['minlon'] = info.grid.minlon D.attrs['maxlon'] = info.grid.maxlon D.attrs['bin_number'] = info.grid.bin_number D.attrs['bin_size'] = info.grid.bin_size D.attrs['time_bin'] = info.grid.time_bin D.attrs['interpolation'] = 'Linear' D.attrs['interp_threshold'] = interp_threshold D.attrs['units'] = 'Ship minutes per km$^2$' D.attrs['unit_description'] = ('Minutes spent by vessels\n' + 'inside the area of a gricell\n' + '(note it is LOG scale)') # Print NetCDF file sm.checkDir(str(info.dirs.gridded_data)) print('Writting...') print('...' + file_out) D.to_netcdf(path=file_out) return
[docs]def gridder_pingsPerCell(info, data_in, file_name, overwrite=False): ''' Counts "pings" inside a gridcell and computes "No. of vessels within grid-cell" Arguments: info (info): ``info`` object containing metadata data_in (xarray.Dataset): Data to be gridded file_name (str): Name of file that will be writen as output Keyword arguments: overwrite (bool): If ``True`` older files will be overwritten. If ``False``, only new files will be processed See also: :py:func:`gridder <ship_mapper.gridder>` :py:func:`grid_merger <ship_mapper.grid_merger>` ''' print('gridder_pingsPerCell ----------------------------') file_out = os.path.join(str(info.dirs.gridded_data), file_name) interp_threshold = 40 if not os.path.isfile(file_out) or overwrite: ship_id = data_in.attrs['ship_id'] data = data_in # Make grid x = np.arange(info.grid.minlon, info.grid.maxlon, info.grid.bin_size, dtype=np.float64) y = np.arange(info.grid.minlat, info.grid.maxlat, info.grid.bin_size, dtype=np.float64) info.grid.bin_number = (np.ceil((info.grid.maxlon - info.grid.minlon)/info.grid.bin_size), np.ceil((info.grid.maxlat - info.grid.minlat)/info.grid.bin_size)) # Find unique ships unis = pd.unique(data[ship_id].values) print('Number of Unique Ships = ' + str(len(unis))) iiix, iiiy = [], [] counter = 0 for ship in unis: counter += 1 print('Ship: ' + str(counter) + ' (id:'+ str(ship) + ')') indxship = (data[ship_id] == ship) singleship = data.sel(Dindex=indxship) # Determine "trips" indxtrip = (singleship['SeqNum'].diff('Dindex') > 1) #find > 1-day gaps trip_gaps = indxtrip[indxtrip==True]['Dindex'].values trip_gaps = np.insert(trip_gaps, 0, 0) trip_gaps = np.append(trip_gaps,singleship['Dindex'].values[-1]+1) # Loop over trips for k in range(1,len(trip_gaps)): print(k) index_gap = ((singleship['Dindex'] >= trip_gaps[k-1]) & (singleship['Dindex'] < trip_gaps[k])) singleship_trip = singleship.sel(Dindex=index_gap) # Split data into "time_bins" time_bin = 1/144#info.grid.bin_size / (60*24) # units are converted to: days MinSeqNum = singleship_trip['SeqNum'].values.min() MaxSeqNum = singleship_trip['SeqNum'].values.max() + time_bin time_bins = np.arange(MinSeqNum, MaxSeqNum, time_bin) # 1/144 = once every 10 minutes # Loop over each ship's time_bin for i in range(1,len(time_bins)): iix, iiy = [], [] indx = ((singleship_trip['SeqNum'] >= time_bins[i-1]) & (singleship_trip['SeqNum'] <= time_bins[i])) singleship_trip_bin = singleship_trip.sel(Dindex=indx) # Get lat/lons lons = singleship_trip_bin['longitude'].values.tolist() lats = singleship_trip_bin['latitude'].values.tolist() #Insert last bin's lat/lon if i > 1 and len(lons) > 0: lons.insert(0, last_lon) lats.insert(0, last_lat) num_of_pings = len(lons) if num_of_pings == 0: pass elif num_of_pings == 1: lon2 = lons[0] lat2 = lats[0] x2, y2 = sm.align_with_grid(x, y, lon2, lat2) iix.append(x2) iiy.append(y2) elif num_of_pings > 1: for j in range(1,num_of_pings): # Iterpolate bewtween known points lon1 = lons[j-1] lat1 = lats[j-1] lon2 = lons[j] lat2 = lats[j] # Estimate distance and velocity dist = sm.distance(lat1,lon1,lat2,lon2) if dist < (interp_threshold * 1.852 * 1000): # knots * knots_to_km/h conversion * km_to_m conversion (= meters) x1, y1 = sm.align_with_grid(x, y, lon1, lat1) x2, y2 = sm.align_with_grid(x, y, lon2, lat2) ix, iy = sm.interp2d(x1, y1, x2, y2) iix.extend(ix) iiy.extend(iy) #drop duplicates df = {} df = pd.DataFrame({'x':iix,'y':iiy}).drop_duplicates(keep='last') # Append iiix.extend(df['x'].tolist()) iiiy.extend(df['y'].tolist()) #Save last lat/lon if len(lats) > 0: last_lat = lats[-1] last_lon = lons[-1] # Project pings to grid H0, xedges, yedges = np.histogram2d(iiix,iiiy,bins=info.grid.bin_number, range=[[0, info.grid.bin_number[0]], [0, info.grid.bin_number[1]]]) # # Rotate and flip H... # H0 = np.rot90(H0) # H0 = np.flipud(H0) D = xr.Dataset({'ship_density':(['x','y'],H0)}, coords={'lon':(['x'],x), 'lat':(['y'],y)}) # Metadata D.attrs = copy.deepcopy(data.attrs) # delete irrelevants del(D.attrs['ship_id']) # add new ones D.attrs['minlat'] = info.grid.minlat D.attrs['maxlat'] = info.grid.maxlat D.attrs['minlon'] = info.grid.minlon D.attrs['maxlon'] = info.grid.maxlon D.attrs['bin_number'] = info.grid.bin_number D.attrs['bin_size'] = info.grid.bin_size D.attrs['time_bin'] = info.grid.time_bin D.attrs['interpolation'] = 'Linear' D.attrs['interp_threshold'] = interp_threshold D.attrs['units'] = 'No. of vessels within grid-cell' D.attrs['unit_description'] = ('Number of vessels inside\n' + 'a gridcell within the time range of\n' + 'observations (note it is LOG scale)') # Print NetCDF file sm.checkDir(str(info.dirs.gridded_data)) print('Writting...') print('...' + file_out) D.to_netcdf(path=file_out) return
[docs]def grid_merger(info, files=None, filename_out='auto'): ''' Combines several gridded files into one Arguments: info (info): ``info`` object containing metadata Keyword Arguments: files (str): Directory where gridded files recide filename_out (str): Name of output file Returns: One gridded file ''' from datetime import datetime print('grid_merger ---------------------------------------------') if files == None: all_files = sm.get_all_files(info.dirs.gridded_data) else: all_files = files # Process 1st grid data0 = xr.open_dataset(all_files[0]) H0 = data0['ship_density'].values startdate = datetime.strptime(data0.attrs['startdate'], '%Y-%m-%d %H:%M:%S') enddate = datetime.strptime(data0.attrs['enddate'], '%Y-%m-%d %H:%M:%S') #Pre-build metadata metadata = data0.attrs # Process the rest of the grid files for file_in in all_files[1:]: try: print(file_in) dataX = xr.open_dataset(file_in) startdate = min(startdate, datetime.strptime(dataX.attrs['startdate'], '%Y-%m-%d %H:%M:%S')) enddate = max(enddate, datetime.strptime(dataX.attrs['enddate'], '%Y-%m-%d %H:%M:%S')) H = dataX['ship_density'].values H0 = H0 + H except: pass # Create dataset D = xr.Dataset({'ship_density':(['x','y'],H0)}, coords={'lon':(['x'],data0['lon'].values), 'lat':(['y'],data0['lat'].values)}) #Save metadata D.attrs = metadata D.attrs['startdate']=startdate.strftime('%Y-%m-%d %H:%M:%S') D.attrs['enddate']=enddate.strftime('%Y-%m-%d %H:%M:%S') # Save merged file sm.checkDir(str(info.dirs.merged_grid)) if filename_out == 'auto': filename_OUT = 'merged_grid.nc' else: filename_OUT = filename_out file_out = os.path.join(str(info.dirs.merged_grid), filename_OUT) D.to_netcdf(path=file_out) data0.close() print('Merging completed!') return D
[docs]def getWKT_PRJ (epsg_code): ''' Downloads and returns geospatial parameters given an epsg code Arguments: epsg_code (str): Code referring to an entry in the EPSG Geodetic Parameter Dataset, which is a collection of definitions of coordinate reference systems and coordinate transformations Returns: str: geospatial parameters ''' import urllib.request wkt = urllib.request.urlopen("http://spatialreference.org/ref/epsg/{0}/prettywkt/".format(epsg_code)) wkt_bytes = wkt.read() wkt_decoded = wkt_bytes.decode("utf8") remove_spaces = wkt_decoded.replace(" ","") output = remove_spaces.replace("\n", "") return output
[docs]def mergedgrid_to_shp(info, file_in=None): ''' Converts a gridded file into a shapefile Arguments: info (info): ``info`` object containing metadata Keyword Arguments: file_in (str): Gridded file to convert. If ``None`` it will use for ``merged_grid.nc`` Returns: A set of shape files ''' import shapefile print('mergedgrid_to_shp ------------------------------------------------') # Load data if file_in == None: file_in = os.path.join(str(info.dirs.merged_grid),'merged_grid.nc') d = xr.open_dataset(file_in) w = shapefile.Writer() w.field('Vessel Density', 'C') for i in range(0,len(d['lon'].values)-1): print(i) for j in range(0,len(d['lat'].values)-1): lon1 = float(d['lon'].values[i]) lat1 = float(d['lat'].values[j]) lon2 = float(d['lon'].values[i+1]) lat2 = float(d['lat'].values[j+1]) density = float(d['ship_density'].values[i,j]) if density > 0: w.poly(parts=[[[lon1,lat1],[lon2,lat1],[lon2,lat2],[lon1,lat2],[lon1,lat1]]]) w.record(density) shapefile_name = os.path.join(str(info.dirs.shapefiles),info.project_name) w.save(shapefile_name) prj = open(shapefile_name + '.prj', 'w') epsg = getWKT_PRJ(info.grid.epsg_code) prj.write(epsg) prj.close() return
[docs]def calculate_gridcell_areas(info): ''' Calculates the area of each of the grid-cells in the domain (in km^2) Arguments: info (info): ``info`` object WITHOUT ``info.grid.areas`` Returns: info: ``info`` object WITH ``info.grid.areas`` ''' import ship_mapper as sm print('Calculating grid-cell areas........................') x = np.arange(info.grid.minlon, info.grid.maxlon, info.grid.bin_size, dtype=np.float64) y = np.arange(info.grid.minlat, info.grid.maxlat, info.grid.bin_size, dtype=np.float64) u = np.empty((len(x)+1,)) v = np.empty((len(y)+1,)) areas = np.empty((len(x),len(y))) for i in range(1,len(x)): u[i] = x[i] - ((x[i] - x[i-1])/2) u[0] = x[0] - ((x[1] - x[0])/2) u[-1] = x[-1] + ((x[-1] - x[-2])/2) for i in range(1,len(y)): v[i] = y[i] - ((y[i] - y[i-1])/2) v[0] = y[0] - ((y[1] - y[0])/2) v[-1] = y[-1] + ((y[-1] - y[-2])/2) for i in range(0,len(x)): for j in range(0,len(y)): # areas = ((a+b)/2)*h a = sm.distance(v[j+1],u[i],v[j+1],u[i+1])/1000 #divided by 1000 to convert to km b = sm.distance(v[j],u[i],v[j],u[i+1])/1000 #divided by 1000 to convert to km h = sm.distance(v[j],x[i],v[j+1],x[i])/1000 #divided by 1000 to convert to km areas[i,j] = ((a + b)/2)*h info.grid.areas = areas return info
[docs]def grid_to_esriascii(info, file_in=None): ''' Writes ESRI ASCII file from gridded data See more info about `ESRI ASCII <http://resources.esri.com/help/9.3/arcgisdesktop/com/gp_toolref/spatial_analyst_tools/esri_ascii_raster_format.htm>`_ Arguments: info (info): ``info`` object containing metadata Keyword Arguments: file_in (str): Gridded file to convert. If ``None`` it will use for ``/merged_grid/merged_grid.nc`` Returns: ESRI ASCII file (.asc) ''' print('grid_to_esriascii ------------------------------------------------') # Load data if file_in == None: file_in = os.path.join(str(info.dirs.merged_grid),'merged_grid.nc') # Outputfile sm.checkDir(str(info.dirs.shapefiles)) ascii_name = os.path.join(str(info.dirs.shapefiles),info.project_name) d = xr.open_dataset(file_in) DataShape = d['ship_density'].shape TheFile=open(ascii_name + ".asc","w") TheFile.write('ncols ' + str(DataShape[0]) + '\n') TheFile.write('nrows ' + str(DataShape[1]) + '\n') TheFile.write('xllcenter ' + str(info.grid.minlon) + '\n') TheFile.write('yllcenter ' + str(info.grid.minlat) + '\n') TheFile.write('cellsize ' + str(info.grid.bin_size) + '\n') TheFile.write('nodata_value 0.0\n') print('Printing esri ascii file') for i in range(DataShape[1]-1,-1,-1): for j in range(0,DataShape[0]): TheFile.write(str(d['ship_density'].values[j,i]) + chr(32)) TheFile.write("\n") TheFile.close() return