[rsplus] czlsfinal

Viewer

*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.

copy download embed printName: czlsfinal

### SETUP ###
library(qwraps2)
library(ggridges)
library(ggplot2)
library(sp)
library(tidyverse)
library(ggplot2)
library(readxl)
getwd()
### Reading data and filtering ###
china <- read_csv("00_Data/GFW-CHN.csv")
fao_chn <- read_csv("00_Data/FAO-fspfsc_CHN.csv")
fao_chn <- fao_chn %>% filter(Code == "CHN")
china$date <- as.Date(china$date)
china_2012 <- filter(china, date <= as.Date('2012-12-31'))
china_2013 <-
filter(china, (date < as.Date('2013-12-31') &
date >= as.Date('2013-01-01')))
china_2014 <-
filter(china, (date < as.Date('2014-12-31') &
date >= as.Date('2014-01-01')))
china_2015 <-
filter(china, (date < as.Date('2015-12-31') &
date >= as.Date('2015-01-01')))
china_2016 <-
filter(china, (date < as.Date('2016-12-31') &
date >= as.Date('2016-01-01')))
china_2017 <-
filter(china, (date < as.Date('2017-12-31') &
date >= as.Date('2017-01-01')))
china_2018 <-
filter(china, (date < as.Date('2018-12-31') &
date >= as.Date('2018-01-01')))
china_2019 <-
filter(china, (date < as.Date('2019-12-31') &
date >= as.Date('2019-01-01')))
china_2020 <-
filter(china, (date < as.Date('2020-12-31') &
date >= as.Date('2020-01-01')))
# Get random samples from everything.
china_2012$random <- runif(nrow(china_2012), min = 0, max = 1)
samples_2012 <- china_2012 %>% filter(random <= .1)
china_2013$random <- runif(nrow(china_2013), min = 0, max = 1)
samples_2013 <- china_2013 %>% filter(random <= .1)
china_2014$random <- runif(nrow(china_2014), min = 0, max = 1)
samples_2014 <- china_2014 %>% filter(random <= .1)
china_2015$random <- runif(nrow(china_2015), min = 0, max = 1)
samples_2015 <- china_2015 %>% filter(random <= .1)
china_2016$random <- runif(nrow(china_2016), min = 0, max = 1)
samples_2016 <- china_2016 %>% filter(random <= .1)
china_2017$random <- runif(nrow(china_2017), min = 0, max = 1)
samples_2017 <- china_2017 %>% filter(random <= .1)
china_2018$random <- runif(nrow(china_2018), min = 0, max = 1)
samples_2018 <- china_2018 %>% filter(random <= .1)
china_2019$random <- runif(nrow(china_2019), min = 0, max = 1)
samples_2019 <- china_2019 %>% filter(random <= .1)
china_2020$random <- runif(nrow(china_2020), min = 0, max = 1)
samples_2020 <- china_2020 %>% filter(random <= .1)
# Take each long lat and get the distance to chinacenter <- c(104.195, 35.86)
chinacenter <- c(104.195, 35.86)
samples_2012$center_lon <- 104.195
samples_2012$center_lat <- 35.86
samples_2012$distance <-
distHaversine(samples_2012[, 4:3], samples_2012[, 10:11])
#... repeat for 2012 - 2013
samples_2013$center_lon <- 104.195
samples_2013$center_lat <- 35.86
#... repeat for 2012 - 2013
samples_2012$distance <-
distHaversine(samples_2012[, 4:3], samples_2012[, 10:11])
#... repeat for 2012 - 2013
samples_2012$dist_mi <- samples_2012$distance / 1609.34
#... repeat for 2012 - 2013
# Mean distance in 2012 vs 2020
dist_2012 <- mean(samples_2012$dist_mi)
#... repeat for 2012 - 2013
# Mean hours
hrs_2012 <- mean(samples_2012$hours)
#... repeat for 2012 - 2013
# Creating the dataframe that we do our analysis with.
chn_distances <- data.frame(
Year = c(2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020),
m_dist = c(
dist_2012,
dist_2013,
dist_2014,
dist_2015,
dist_2016,
dist_2017,
dist_2018,
dist_2019,
dist_2020
),
fsp = fao_chn$fspc[52:60],
fsq = fao_chn$fsqc[52:60],
pop = fao_chn$Population[52:60],
m_hrs = c(
hrs_2012,
hrs_2013,
hrs_2014,
hrs_2015,
hrs_2016,
hrs_2017,
hrs_2018,
hrs_2019,
hrs_2020
)
)
# Collapsing all samples into one.
all_samples <-
rbind(
samples_2012,
samples_2013,
samples_2014,
samples_2015,
samples_2016,
samples_2017,
samples_2018
,
samples_2019,
samples_2020
)
all_samples$year <-
format(as.Date(all_samples$date, format = "%d/%m/%Y"), "%Y")
# Converting to miles
all_samples$dist_mi <- all_samples$distance / 1609.34
# Creating bins by distance.
all_samples <- all_samples %>% mutate(bins = case_when(
(dist_mi < 1501) ~ 1,
(dist_mi < 3001) ~ 2,
(dist_mi < 4501) ~ 3,
(dist_mi < 6001) ~ 4,
(dist_mi > 6000) ~ 5,
))
chn_distances$diffs <- chn_distances$fsp - chn_distances$fsq
chn_distances$m_dist_mi <- chn_distances$m_dist / 1609.34
### ANALYSIS BELOW ###
# Get summary statistics
summary(fao_chn$fsqc)
summary(fao_chn$fspc)
summary(fao_chn$Population)
sd(fao_chn$fsqc)
sd(fao_chn$fspc)
sd(fao_chn$Population)
# Plotting supply and product and population, first as histogram then as scatter.
distbyyear <- ggplot(all_samples, aes(x = date, y = dist_mi)) +
geom_point(color = "dodgerblue4", pch = ".") +
labs(x = "Date", y = "Distance") +
facet_wrap( ~ year, nrow = 3, scales = "free")
ggsave(
file = "distbyyear.svg",
plot = distbyyear,
width = 12,
height = 10
)
# Histogram
hist_byyear <- ggplot(all_samples, aes(x = dist_mi)) +
geom_histogram(binwidth = 100, color = "dodgerblue4") +
labs(x = "Distance (mi)", y = "Frequency") +
facet_wrap( ~ year, nrow = 3, scales = "free")
hist_byyear
ggsave(
file = "histbyyear.svg",
plot = hist_byyear,
width = 12,
height = 10
)
hg_stack <-
ggplot(all_samples, aes(x = fishing_hours, y = factor(bins), fill = bins)) +
geom_density_ridges(
stat = "binline",
bins = 25,
scale = .9,
draw_baseline = TRUE,
show.legend = TRUE
) +
theme_minimal() +
labs(x = "Fishing Hours", y = "by Distance", title = "Fishing Hours by Distance")
ggsave(
file = "histstack.svg",
plot = hg_stack,
width = 12,
height = 10
)
hg_stack
# Plot box plot & histograms
samples_over1250 <- all_samples %>% filter(dist_mi > 1250)
hist(samples_over1250$dist_mi)
boxplot <- ggplot(all_samples, aes(x = year, y = dist_mi)) +
geom_boxplot(outlier.shape = NA)
boxplot_o <- ggplot(samples_over1250, aes(x = year, y = dist_mi)) +
geom_boxplot()
# ANOVA + HSD
chn.aov <- aov(dist_mi ~ year, data = all_samples)
summary(chn.aov)
TukeyHSD(chn.aov)
# Effect of % change in population on production and consumption per capita.
lm.fao_prod <- lm(fspc ~ log(Population), data = fao_chn)
summary(lm.fao_prod)
lm.fao_cons <- lm(fsqc ~ log(Population), data = fao_chn)
summary(lm.fao_cons)
# Effect of consumption on how much is fished.
lm.fao_cp <- lm(fspc ~ fsqc, data = fao_chn)
summary(lm.fao_cp)
# Regression between mean dist in miles and fsp.
chn_dist_svg <-
chn_distances %>% ggplot(aes(x = fsp, y = m_dist_mi), color = "red") + geom_point(size =
1) +
geom_smooth(method = 'lm', color = 'blue') +
theme_minimal() +
labs(x = 'Fish Production', y = 'Mean Distance (mi)', title = 'Mean Distance ~ Fish Production') +
theme(plot.title = element_text(hjust = 0.5))
ggsave(
file = "chn_dist.svg",
plot = chn_dist_svg,
width = 10,
height = 10
)
# Regression between mean dists and mean hrs.
chn_dist_hrs <-
chn_distances %>% ggplot(aes(x = m_hrs, y = m_dist), color = "red") + geom_point(size =
1) +
geom_smooth(method = 'lm', color = 'blue') +
theme_minimal() +
labs(x = 'Mean Hours Fishing', y = 'Mean Distance (mi)', title = 'Mean Distance ~ Mean Hours') +
theme(plot.title = element_text(hjust = 0.5))
ggsave(
file = "chn_dist_hrs.svg",
plot = chn_dist_hrs,
width = 10,
height = 10
)
lm.dists_fsp <- lm(m_dist_mi ~ fsp, data = chn_distances)
summary(lm.dists_fsp)
lm.dists_hrs <- lm(m_dist ~ m_hrs, data = chn_distances)
summary(lm.dists_hrs)
# Regression between mean dist in miles and fsq.
chn_fsq_svg <-
chn_distances %>% ggplot(aes(x = fsq, y = m_dist_mi), color = "red") + geom_point(size =
1) +
geom_smooth(method = 'lm', color = 'red') +
theme_minimal() +
labs(x = 'Fish Stock', y = 'Mean Distance (mi)', title = 'Mean Distance ~ Fish Stock') +
theme(plot.title = element_text(hjust = 0.5))
lm.dists_fsq <- lm(m_dist_mi ~ fsq, data = chn_distances)
summary(lm.dists_fsq)
ggsave(
file = "chn_fsq.svg",
plot = chn_fsq_svg,
width = 10,
height = 10
)
# We observe that fsq has greater correlation than fsp.
# Plotting supply and product and population, ln of samples
distbyyearln <-
ggplot(all_samples, aes(x = date, y = log(dist_mi))) +
geom_point(color = "dodgerblue4", pch = ".") +
labs(x = "Date", y = "Distance") +
facet_wrap( ~ year, nrow = 3, scales = "free")
distbyyearln

Editor

You can edit this paste and save as new:

fullscreen copy clear

Syntax Highlighting

Title / Paste Name

Meta robots tag

reCaptcha

File Description

czlsfinal
Paste Code
03 Dec-2022
8.78 Kb

You can Share it:

Latest Code Pastes

Full list

Tools

[rsplus] czlsfinal

Viewer

Editor