[rsplus] czlsfinal
Viewer
*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.
- ### SETUP ###
- library(qwraps2)
- library(ggridges)
- library(ggplot2)
- library(sp)
- library(tidyverse)
- library(ggplot2)
- library(readxl)
- getwd()
- ### Reading data and filtering ###
- china <- read_csv("00_Data/GFW-CHN.csv")
- fao_chn <- read_csv("00_Data/FAO-fspfsc_CHN.csv")
- fao_chn <- fao_chn %>% filter(Code == "CHN")
- china$date <- as.Date(china$date)
- china_2012 <- filter(china, date <= as.Date('2012-12-31'))
- china_2013 <-
- filter(china, (date < as.Date('2013-12-31') &
- date >= as.Date('2013-01-01')))
- china_2014 <-
- filter(china, (date < as.Date('2014-12-31') &
- date >= as.Date('2014-01-01')))
- china_2015 <-
- filter(china, (date < as.Date('2015-12-31') &
- date >= as.Date('2015-01-01')))
- china_2016 <-
- filter(china, (date < as.Date('2016-12-31') &
- date >= as.Date('2016-01-01')))
- china_2017 <-
- filter(china, (date < as.Date('2017-12-31') &
- date >= as.Date('2017-01-01')))
- china_2018 <-
- filter(china, (date < as.Date('2018-12-31') &
- date >= as.Date('2018-01-01')))
- china_2019 <-
- filter(china, (date < as.Date('2019-12-31') &
- date >= as.Date('2019-01-01')))
- china_2020 <-
- filter(china, (date < as.Date('2020-12-31') &
- date >= as.Date('2020-01-01')))
- # Get random samples from everything.
- china_2012$random <- runif(nrow(china_2012), min = 0, max = 1)
- samples_2012 <- china_2012 %>% filter(random <= .1)
- china_2013$random <- runif(nrow(china_2013), min = 0, max = 1)
- samples_2013 <- china_2013 %>% filter(random <= .1)
- china_2014$random <- runif(nrow(china_2014), min = 0, max = 1)
- samples_2014 <- china_2014 %>% filter(random <= .1)
- china_2015$random <- runif(nrow(china_2015), min = 0, max = 1)
- samples_2015 <- china_2015 %>% filter(random <= .1)
- china_2016$random <- runif(nrow(china_2016), min = 0, max = 1)
- samples_2016 <- china_2016 %>% filter(random <= .1)
- china_2017$random <- runif(nrow(china_2017), min = 0, max = 1)
- samples_2017 <- china_2017 %>% filter(random <= .1)
- china_2018$random <- runif(nrow(china_2018), min = 0, max = 1)
- samples_2018 <- china_2018 %>% filter(random <= .1)
- china_2019$random <- runif(nrow(china_2019), min = 0, max = 1)
- samples_2019 <- china_2019 %>% filter(random <= .1)
- china_2020$random <- runif(nrow(china_2020), min = 0, max = 1)
- samples_2020 <- china_2020 %>% filter(random <= .1)
- # Take each long lat and get the distance to chinacenter <- c(104.195, 35.86)
- chinacenter <- c(104.195, 35.86)
- samples_2012$center_lon <- 104.195
- samples_2012$center_lat <- 35.86
- samples_2012$distance <-
- distHaversine(samples_2012[, 4:3], samples_2012[, 10:11])
- #... repeat for 2012 - 2013
- samples_2013$center_lon <- 104.195
- samples_2013$center_lat <- 35.86
- #... repeat for 2012 - 2013
- samples_2012$distance <-
- distHaversine(samples_2012[, 4:3], samples_2012[, 10:11])
- #... repeat for 2012 - 2013
- samples_2012$dist_mi <- samples_2012$distance / 1609.34
- #... repeat for 2012 - 2013
- # Mean distance in 2012 vs 2020
- dist_2012 <- mean(samples_2012$dist_mi)
- #... repeat for 2012 - 2013
- # Mean hours
- hrs_2012 <- mean(samples_2012$hours)
- #... repeat for 2012 - 2013
- # Creating the dataframe that we do our analysis with.
- chn_distances <- data.frame(
- Year = c(2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020),
- m_dist = c(
- dist_2012,
- dist_2013,
- dist_2014,
- dist_2015,
- dist_2016,
- dist_2017,
- dist_2018,
- dist_2019,
- dist_2020
- ),
- fsp = fao_chn$fspc[52:60],
- fsq = fao_chn$fsqc[52:60],
- pop = fao_chn$Population[52:60],
- m_hrs = c(
- hrs_2012,
- hrs_2013,
- hrs_2014,
- hrs_2015,
- hrs_2016,
- hrs_2017,
- hrs_2018,
- hrs_2019,
- hrs_2020
- )
- )
- # Collapsing all samples into one.
- all_samples <-
- rbind(
- samples_2012,
- samples_2013,
- samples_2014,
- samples_2015,
- samples_2016,
- samples_2017,
- samples_2018
- ,
- samples_2019,
- samples_2020
- )
- all_samples$year <-
- format(as.Date(all_samples$date, format = "%d/%m/%Y"), "%Y")
- # Converting to miles
- all_samples$dist_mi <- all_samples$distance / 1609.34
- # Creating bins by distance.
- all_samples <- all_samples %>% mutate(bins = case_when(
- (dist_mi < 1501) ~ 1,
- (dist_mi < 3001) ~ 2,
- (dist_mi < 4501) ~ 3,
- (dist_mi < 6001) ~ 4,
- (dist_mi > 6000) ~ 5,
- ))
- chn_distances$diffs <- chn_distances$fsp - chn_distances$fsq
- chn_distances$m_dist_mi <- chn_distances$m_dist / 1609.34
- ### ANALYSIS BELOW ###
- # Get summary statistics
- summary(fao_chn$fsqc)
- summary(fao_chn$fspc)
- summary(fao_chn$Population)
- sd(fao_chn$fsqc)
- sd(fao_chn$fspc)
- sd(fao_chn$Population)
- # Plotting supply and product and population, first as histogram then as scatter.
- distbyyear <- ggplot(all_samples, aes(x = date, y = dist_mi)) +
- geom_point(color = "dodgerblue4", pch = ".") +
- labs(x = "Date", y = "Distance") +
- facet_wrap( ~ year, nrow = 3, scales = "free")
- ggsave(
- file = "distbyyear.svg",
- plot = distbyyear,
- width = 12,
- height = 10
- )
- # Histogram
- hist_byyear <- ggplot(all_samples, aes(x = dist_mi)) +
- geom_histogram(binwidth = 100, color = "dodgerblue4") +
- labs(x = "Distance (mi)", y = "Frequency") +
- facet_wrap( ~ year, nrow = 3, scales = "free")
- hist_byyear
- ggsave(
- file = "histbyyear.svg",
- plot = hist_byyear,
- width = 12,
- height = 10
- )
- hg_stack <-
- ggplot(all_samples, aes(x = fishing_hours, y = factor(bins), fill = bins)) +
- geom_density_ridges(
- stat = "binline",
- bins = 25,
- scale = .9,
- draw_baseline = TRUE,
- show.legend = TRUE
- ) +
- theme_minimal() +
- labs(x = "Fishing Hours", y = "by Distance", title = "Fishing Hours by Distance")
- ggsave(
- file = "histstack.svg",
- plot = hg_stack,
- width = 12,
- height = 10
- )
- hg_stack
- # Plot box plot & histograms
- samples_over1250 <- all_samples %>% filter(dist_mi > 1250)
- hist(samples_over1250$dist_mi)
- boxplot <- ggplot(all_samples, aes(x = year, y = dist_mi)) +
- geom_boxplot(outlier.shape = NA)
- boxplot_o <- ggplot(samples_over1250, aes(x = year, y = dist_mi)) +
- geom_boxplot()
- # ANOVA + HSD
- chn.aov <- aov(dist_mi ~ year, data = all_samples)
- summary(chn.aov)
- TukeyHSD(chn.aov)
- # Effect of % change in population on production and consumption per capita.
- lm.fao_prod <- lm(fspc ~ log(Population), data = fao_chn)
- summary(lm.fao_prod)
- lm.fao_cons <- lm(fsqc ~ log(Population), data = fao_chn)
- summary(lm.fao_cons)
- # Effect of consumption on how much is fished.
- lm.fao_cp <- lm(fspc ~ fsqc, data = fao_chn)
- summary(lm.fao_cp)
- # Regression between mean dist in miles and fsp.
- chn_dist_svg <-
- chn_distances %>% ggplot(aes(x = fsp, y = m_dist_mi), color = "red") + geom_point(size =
- 1) +
- geom_smooth(method = 'lm', color = 'blue') +
- theme_minimal() +
- labs(x = 'Fish Production', y = 'Mean Distance (mi)', title = 'Mean Distance ~ Fish Production') +
- theme(plot.title = element_text(hjust = 0.5))
- ggsave(
- file = "chn_dist.svg",
- plot = chn_dist_svg,
- width = 10,
- height = 10
- )
- # Regression between mean dists and mean hrs.
- chn_dist_hrs <-
- chn_distances %>% ggplot(aes(x = m_hrs, y = m_dist), color = "red") + geom_point(size =
- 1) +
- geom_smooth(method = 'lm', color = 'blue') +
- theme_minimal() +
- labs(x = 'Mean Hours Fishing', y = 'Mean Distance (mi)', title = 'Mean Distance ~ Mean Hours') +
- theme(plot.title = element_text(hjust = 0.5))
- ggsave(
- file = "chn_dist_hrs.svg",
- plot = chn_dist_hrs,
- width = 10,
- height = 10
- )
- lm.dists_fsp <- lm(m_dist_mi ~ fsp, data = chn_distances)
- summary(lm.dists_fsp)
- lm.dists_hrs <- lm(m_dist ~ m_hrs, data = chn_distances)
- summary(lm.dists_hrs)
- # Regression between mean dist in miles and fsq.
- chn_fsq_svg <-
- chn_distances %>% ggplot(aes(x = fsq, y = m_dist_mi), color = "red") + geom_point(size =
- 1) +
- geom_smooth(method = 'lm', color = 'red') +
- theme_minimal() +
- labs(x = 'Fish Stock', y = 'Mean Distance (mi)', title = 'Mean Distance ~ Fish Stock') +
- theme(plot.title = element_text(hjust = 0.5))
- lm.dists_fsq <- lm(m_dist_mi ~ fsq, data = chn_distances)
- summary(lm.dists_fsq)
- ggsave(
- file = "chn_fsq.svg",
- plot = chn_fsq_svg,
- width = 10,
- height = 10
- )
- # We observe that fsq has greater correlation than fsp.
- # Plotting supply and product and population, ln of samples
- distbyyearln <-
- ggplot(all_samples, aes(x = date, y = log(dist_mi))) +
- geom_point(color = "dodgerblue4", pch = ".") +
- labs(x = "Date", y = "Distance") +
- facet_wrap( ~ year, nrow = 3, scales = "free")
- distbyyearln
Editor
You can edit this paste and save as new: