[python] GHCNV4 Analysis

Viewer

copydownloadembedprintName: GHCNV4 Analysis
  1. import matplotlib.pyplot as plt
  2. import numpy as nm
  3. import pandas as pd
  4.  
  5. GHCNDat = "C:/Users/ALAJON/Desktop/Climate Science/GHCNV4 Data/ghcnm.tavg.v4.0.1.20210416.qcu.dat"
  6. GHCNmeta = "C:/Users/ALAJON/Desktop/Climate Science/GHCNV4 Data/ghcnm.tavg.v4.0.1.20210416.qcu.inv"
  7. landmask = "C:/Users/ALAJON/Desktop/Climate Science/GHCNV4 Data/landmask.dta"
  8.  
  9. # load the GHCNV4 monthly with column names
  10. colspecs = [(0, 2), (0, 11), (11, 15), (15, 19)]
  11. names = ['country_code', 'station', 'year', 'variable']
  12. = 19
  13. for m in range(1, 13):
  14.     mon = str(m)
  15.     colspecs_tmp = (i, i + 5)
  16.     names_tmp = 'VALUE' + mon
  17.  
  18.     colspecs.append(colspecs_tmp)
  19.     names.append(names_tmp)
  20.  
  21.     i = i + 8
  22.  
  23. ghcnv4 = pd.read_fwf(GHCNDat,
  24.                      colspecs=colspecs, names=names)
  25.  
  26. ghcnv4 = ghcnv4[~ghcnv4.eq(-9999).any(1)]
  27. # load landmask
  28. lndmsk = pd.read_stata(landmask)
  29.  
  30. # Load station metadata
  31. stnMeta = pd.read_fwf(GHCNmeta, colspecs=[(0, 2), (0, 12), (12, 21), (21, 31),
  32.                                           (31, 38), (38, 69)],
  33.                       names=['country_code', 'station',
  34.                              'lat', 'lon', 'elev', 'name'])
  35.  
  36. # create grids
  37. grid_size = 5
  38. count = -90 + (grid_size / 2)
  39. stnMeta['latgrid'] = 0
  40.  
  41. for x in range(-90, 90, 5):
  42.     stnMeta.loc[stnMeta['lat'].between(x, x + 5), 'latgrid'] = count
  43.     count = count + grid_size
  44.  
  45. count = -180 + (grid_size / 2)
  46. stnMeta['longrid'] = 0
  47.  
  48. for x in range(-180, 180, 5):
  49.     stnMeta.loc[stnMeta['lon'].between(x, x + 5), 'longrid'] = count
  50.     count = count + grid_size
  51.  
  52. stnMeta['gridbox'] = stnMeta['latgrid'].map(str) + " lat " + stnMeta['longrid'].map(str) + " lon"
  53. stnMetaGrid = stnMeta.merge(lndmsk, on='gridbox')
  54. stnMetaGrid['grid_weight'] = nm.sin((stnMetaGrid['latgrid'] + grid_size / 2) * nm.pi / 180) - nm.sin(
  55.     (stnMetaGrid['latgrid'] - grid_size / 2) * nm.pi / 180)
  56. stnMetaGrid['grid_weight'] = stnMetaGrid['grid_weight'] * stnMetaGrid['land_percent']
  57.  
  58. # clean ghcn and create anomalies
  59. ghcnv4NoNullYears = ghcnv4[ghcnv4.year.notnull()]
  60. ghcnv4NoNullYears = ghcnv4NoNullYears.replace(-9999, None)
  61. for m in range(1, 13):
  62.     ghcnv4NoNullYears['VALUE' + str(m)] = ghcnv4NoNullYears['VALUE' + str(m)] / 100
  63.  
  64. ghcnlong = ghcnv4NoNullYears.set_index('station')
  65. ghcnlong = ghcnlong.reset_index()
  66. ghcnlong = pd.melt(ghcnlong, id_vars=['station', 'year'],
  67.                    value_vars=['VALUE1', 'VALUE2', 'VALUE3', 'VALUE4', 'VALUE5', 'VALUE6',
  68.                                'VALUE7', 'VALUE8', 'VALUE9', 'VALUE10', 'VALUE11', 'VALUE12'])
  69.  
  70. ghcnBaselines = ghcnlong[ghcnlong['year'].between(1961, 1990)]
  71. ghcnBaselines = ghcnBaselines.drop(columns='year')
  72. ghcnBaselines = ghcnBaselines.groupby(['station', 'variable']).mean()
  73. ghcnBaselines = ghcnBaselines.rename(columns={"value""baseline"})
  74.  
  75. ghcnAnoms = ghcnlong.merge(ghcnBaselines, on=['station', 'variable'])
  76. ghcnAnoms['anomalies'] = ghcnAnoms['value'] - ghcnAnoms['baseline']
  77.  
  78. # merge on the metadata
  79. ghcnAnomsGrid = ghcnAnoms.merge(stnMetaGrid, on=['station'])
  80. ghcnAnomsGrid = ghcnAnomsGrid[ghcnAnomsGrid.anomalies.notnull()]
  81. ghcnAnomsGrid = ghcnAnomsGrid.drop(columns=['baseline', 'lat', 'lon', 'elev', 'latgrid', 'longrid', 'land_percent',
  82.                                             'ocean_percent', 'value'])
  83. ghcnAnomsGrid = ghcnAnomsGrid.groupby(['gridbox', 'variable', 'year']).mean().reset_index()
  84.  
  85.  
  86. def weighted_average(group):
  87.     weights = group['grid_weight']
  88.     anomalies = group['anomalies']
  89.     return nm.average(anomalies, weights=weights)
  90.  
  91.  
  92. ghcnAnomsGrid = ghcnAnomsGrid[ghcnAnomsGrid["variable"].isin(['VALUE1', 'VALUE2', 'VALUE3'])]
  93. ghcnAnomsWtd = ghcnAnomsGrid.groupby(['variable', 'year']).apply(func=weighted_average).reset_index()
  94. ghcnAnomsWtd.columns.values[2] = "anomalies"
  95.  
  96. simpleTemp = ghcnAnomsWtd[ghcnAnomsWtd['year'].between(1900, 2020)]
  97.  
  98. simpleTemp = simpleTemp.groupby('year').mean('anomalies').reset_index()
  99.  
  100. simpleTemp['rollAvg'] = simpleTemp['anomalies'].rolling(window=5).mean()
  101.  
  102. simpleTemp.to_excel(r'C:\Users\ALAJON\Desktop\Climate Science\Alps Analysis\annual.xlsx', index=False)
  103.  
  104. coef = nm.polyfit(simpleTemp['year'], simpleTemp['anomalies'], 1)
  105. poly1d_fn = nm.poly1d(coef)
  106.  
  107. # plot temperature record
  108. plt.plot(simpleTemp['year'], simpleTemp['anomalies'], simpleTemp['year'], poly1d_fn(simpleTemp['year']), '--k')
  109. plt.plot(simpleTemp['year'], simpleTemp['rollAvg'], 'r')
  110. plt.xlabel("Year")
  111. plt.ylabel("Anomaly (degrees C)")
  112. plt.show()
  113.  

Editor

You can edit this paste and save as new:


File Description
  • GHCNV4 Analysis
  • Paste Code
  • 05 May-2021
  • 4.41 Kb
You can Share it: