LSA-Mielnicka-vignette

Author

Monika Mielnicka

Published

March 3, 2024

R Markdown

Function made by Francesco for pretty graphs

ggtheme <- function(legend.position = "bottom") {
  theme_classic(base_size = 14) %+replace%
    theme(aspect.ratio = 1,
          legend.position = legend.position,
          legend.text = element_text(size = 14),
          axis.title = element_text(size = 15),
          legend.title = element_text(size = 15),
          plot.title = element_text(size = 16),
          strip.text = element_text(size = 14))
}

summary function

data_summary <- function(x) {
  m <- mean(x)
  ymin <- m-sd(x)
  ymax <- m+sd(x)
  return(c(y=m,ymin=ymin,ymax=ymax))
}

data load gfp/gfp line

gfp_breeding_table = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 7)
head(gfp_breeding_table)

# A tibble: 6 × 18
  maleID     genotype cage  pregnancy birth_date          no_pups_born males...7
  <chr>      <chr>    <chr>     <dbl> <dttm>                     <dbl> <chr>    
1 MBO-000971 gfp/gfp  bmbo…         1 2019-10-27 00:00:00            7 4        
2 MBO-000971 gfp/gfp  bmbo…         2 2019-11-17 00:00:00            9 4        
3 MBO-000971 gfp/gfp  bmbo…         3 2020-01-20 00:00:00           11 4        
4 MBO-000971 gfp/gfp  bmbo…         4 2020-02-16 00:00:00            9 4        
5 MBO-000971 gfp/gfp  bmbo…         5 2020-04-02 00:00:00            8 NA       
6 MBO-000972 gfp/gfp  bmbo…         1 2019-11-04 00:00:00            9 2        
# ℹ 11 more variables: females...8 <chr>, weight_at_birth <chr>,
#   no_of_pups_weaned <dbl>, males...11 <chr>, females...12 <chr>,
#   weight_at_weaning <chr>, end_of_breeding <dttm>, DOB <dttm>,
#   breeding_start_date <dttm>, time_of_breeding <dbl>, fathers_age <dbl>

birth weights for progeny sired by gfp/gfp males vs WT

# graph 
gfp_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  ggplot(aes(x = genotype, y = weight_at_birth, fill = genotype)) +
  geom_violin(trim = F) +
  geom_boxplot(width = 0.2) +
  stat_summary(fun = mean, geom = "point", shape = 5, size = 2) +
  labs(y="Weight of pups at birth (g)", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

# testing



gfp_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  t_test(weight_at_birth ~ genotype, alternative = "two.sided") %>%
  add_significance()

# A tibble: 1 × 9
  .y.             group1  group2    n1    n2 statistic    df          p p.signif
  <chr>           <chr>   <chr>  <int> <int>     <dbl> <dbl>      <dbl> <chr>   
1 weight_at_birth gfp/gfp WT       188   199      4.59  380. 0.00000611 ****

gfp_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  dplyr::rename(genotype = genotype) %>%
  group_by(genotype) %>%
  summarise(mean = mean(weight_at_birth), median = median(weight_at_birth), sd = sd(weight_at_birth))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        1.31    1.3 0.176
2 gfp/gfp   1.40    1.4 0.187

weaning weights for progeny sired by gfp/gfp males vs WT

#graph
gfp_breeding_table %>%
  separate_rows(weight_at_weaning, sep = "-") %>%
  type_convert(col_types = cols(weight_at_weaning = col_double())) %>%
  drop_na(weight_at_weaning) %>%
  ggplot(aes(x = genotype, y = weight_at_weaning, fill = genotype)) +
  geom_violin(trim = F) +
  geom_boxplot(width = 0.2) +
  stat_summary(fun = mean, shape = 5, size = 0.5) +
  labs(y="Weight of pups at weening (g)", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

#testing

gfp_breeding_table %>%
  separate_rows(weight_at_weaning, sep = "-") %>%
  type_convert(col_types = cols(weight_at_weaning = col_double())) %>%
  drop_na(weight_at_weaning) %>%
  t_test(weight_at_weaning ~ genotype, alternative = "greater") %>%
  add_significance()

# A tibble: 1 × 9
  .y.               group1  group2    n1    n2 statistic    df     p p.signif
  <chr>             <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 weight_at_weaning gfp/gfp WT       169   193      1.53  353. 0.064 ns

gfp_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  group_by(genotype) %>%
  dplyr::summarise(mean = mean(weight_at_birth), median = median(weight_at_birth), sd = sd(weight_at_birth))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        1.31    1.3 0.176
2 gfp/gfp   1.40    1.4 0.187

data load for phd/phd line

phd_breeding_table =  readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 8)
head(phd_breeding_table)

# A tibble: 6 × 18
  cage    genotype pregnancy father_ID birth_date          n_pups_born males...7
  <chr>   <chr>        <dbl> <chr>     <dttm>                    <dbl> <chr>    
1 bmbo-0… phd/phd          1 MBO-0014… 2020-06-15 00:00:00           7 5        
2 bmbo-0… phd/phd          2 MBO-0014… 2020-09-09 00:00:00           8 5        
3 bmbo-0… phd/phd          3 MBO-0014… 2020-10-05 00:00:00           8 NA       
4 bmbo-0… phd/phd          1 MBO-0014… 2020-06-12 00:00:00          11 NA       
5 bmbo-0… phd/phd          1 MBO-0014… 2020-06-26 00:00:00           8 4        
6 bmbo-0… phd/phd          2 MBO-0014… 2020-08-08 00:00:00          10 5        
# ℹ 11 more variables: females...8 <chr>, weight_at_birth <chr>,
#   n_of_pups_weaned <dbl>, males...11 <chr>, females...12 <chr>,
#   weight_at_weaning <chr>, end_of_breeding <dttm>, DOB <dttm>,
#   breeding_start_date <dttm>, time_of_breeding <dbl>, fathers_age <dbl>

birth weights for progeny sired by phd/phd males vs WT

phd_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  ggplot(aes(x = genotype, y = weight_at_birth, fill = genotype)) +
  geom_violin(trim = F) +
  geom_boxplot(width=0.1) +
  stat_summary(fun=mean, geom="point", shape=23, size=2) +
  labs(y="Weight of pups at birth (g)", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

#testing
phd_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  t_test(weight_at_birth ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.             group1  group2    n1    n2 statistic    df      p p.signif
  <chr>           <chr>   <chr>  <int> <int>     <dbl> <dbl>  <dbl> <chr>   
1 weight_at_birth phd/phd WT       200   359      2.35  435. 0.0192 *

phd_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  group_by(genotype) %>%
  dplyr::summarise(mean = mean(weight_at_birth), median = median(weight_at_birth), sd = sd(weight_at_birth))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        1.34   1.3  0.205
2 phd/phd   1.38   1.38 0.192

ween weights for progeny sired by phd/phd males vs WT

#graph
phd_breeding_table %>%
  separate_rows(weight_at_weaning, sep = "-") %>%
  type_convert(col_types = cols(weight_at_weaning = col_double())) %>%
  drop_na(weight_at_weaning) %>%
  ggplot(aes(x = genotype, y = weight_at_weaning, fill = genotype)) +
  geom_violin(trim = F) +
  geom_boxplot(width = 0.1) +
  stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
  labs(y="Weight of pups at weaning (g)", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
ℹ Please use the `fun` argument instead.

#testing
phd_breeding_table %>%
  separate_rows(weight_at_weaning, sep = "-") %>%
  type_convert(col_types = cols(weight_at_weaning = col_double())) %>%
  drop_na(weight_at_weaning) %>%
  t_test(weight_at_weaning ~ genotype, alternative = "greater") %>%
  add_significance()

# A tibble: 1 × 9
  .y.               group1  group2    n1    n2 statistic    df     p p.signif
  <chr>             <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 weight_at_weaning phd/phd WT       135   274     0.691  251. 0.245 ns

phd_breeding_table %>%
  separate_rows(weight_at_weaning, sep = "-") %>%
  type_convert(col_types = cols(weight_at_weaning = col_double())) %>%
  drop_na(weight_at_weaning) %>%
  group_by(genotype) %>%
  summarise(mean = mean(weight_at_weaning), median = median(weight_at_weaning), sd = sd(weight_at_weaning))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        8.62   8.4   1.66
2 phd/phd   8.74   8.67  1.78

litter sizes gfp/gfp

gfp_breeding_table %>%
  drop_na(no_pups_born) %>%
  ggplot(aes(x = genotype, y = no_pups_born, fill = genotype)) +
  geom_boxplot() +
  geom_dotplot(size = 3, binaxis='y', stackdir='center') +
  stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
  labs(y="litter size distribution", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning in geom_dotplot(size = 3, binaxis = "y", stackdir = "center"): Ignoring
unknown parameters: `size`

Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

gfp_breeding_table %>%
  drop_na(no_pups_born) %>%
  t_test(no_pups_born ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.          group1  group2    n1    n2 statistic    df      p p.signif
  <chr>        <chr>   <chr>  <int> <int>     <dbl> <dbl>  <dbl> <chr>   
1 no_pups_born gfp/gfp WT        20    24      1.74  41.7 0.0894 ns

gfp_breeding_table %>%
  group_by(genotype) %>%
  summarise(mean = mean(no_pups_born), median = median(no_pups_born), sd = sd(no_pups_born))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        8.29    8.5  2.40
2 gfp/gfp   9.4     9.5  1.82

litter sizes phd/phd

phd_breeding_table %>%
  drop_na(n_pups_born) %>%
  ggplot(aes(x = genotype, y = n_pups_born, fill = genotype)) +
  geom_boxplot() +
  geom_dotplot(size = 3, binaxis='y', stackdir='center') +
  stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
  labs(y="litter size distribution", x = "") +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning in geom_dotplot(size = 3, binaxis = "y", stackdir = "center"): Ignoring
unknown parameters: `size`

Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

phd_breeding_table %>%
  drop_na(n_pups_born) %>%
  t_test(n_pups_born ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.         group1  group2    n1    n2 statistic    df     p p.signif
  <chr>       <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 n_pups_born phd/phd WT        30    51    -0.621  57.0 0.537 ns

phd_breeding_table %>%
  group_by(genotype) %>%
  summarise(mean = mean(n_pups_born), median = median(n_pups_born), sd = sd(n_pups_born))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        7.04      7  2.47
2 phd/phd   6.67      7  2.68

female breeding

fem_breeding_table = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 9)
head(fem_breeding_table)

# A tibble: 6 × 18
  femaleID   genotype cage  pregnancy birth_date          no_pups_born males...7
  <chr>      <chr>    <chr>     <dbl> <dttm>                     <dbl>     <dbl>
1 MBO-000977 gfp/gfp  BMBO…         1 2019-10-27 00:00:00            6         5
2 MBO-000977 gfp/gfp  BMBO…         2 2019-11-20 00:00:00           11         5
3 MBO-000976 gfp/gfp  bmbo…         1 2019-10-27 00:00:00            9         5
4 MBO-000976 gfp/gfp  bmbo…         2 2019-11-20 00:00:00           10         6
5 MBO-000975 gfp/gfp  BMBO…         1 2019-10-30 00:00:00            8         2
6 MBO-000975 gfp/gfp  BMBO…         2 2019-11-23 00:00:00           10         5
# ℹ 11 more variables: females...8 <dbl>, weight_at_birth <chr>,
#   no_of_pups_weaned <dbl>, males...11 <chr>, females...12 <chr>,
#   weight_at_weaning <chr>, end_of_breeding <dttm>, DOB <dttm>,
#   breeding_start_date <dttm>, fem_breeding <dbl>, mothers_age <dbl>

birth weights females gfp/gfp

fem_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  ggplot(aes(x = genotype, y = weight_at_birth, fill = genotype)) +
  geom_violin(trim = F) +
  geom_boxplot(width=0.1) +
  stat_summary(fun=mean, geom="point", shape=23, size=2) +
  labs(y="Weight of pups at birth (g)", x = "") +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

# testing

fem_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  t_test(weight_at_birth ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.             group1  group2    n1    n2 statistic    df     p p.signif
  <chr>           <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 weight_at_birth gfp/gfp WT        54    71     0.912  91.6 0.364 ns

fem_breeding_table %>%
  separate_rows(weight_at_birth, sep = "-") %>%
  type_convert(col_types = cols(weight_at_birth = col_double())) %>%
  drop_na(weight_at_birth) %>%
  group_by(genotype) %>%
  dplyr::summarise(mean = mean(weight_at_birth), median = median(weight_at_birth), sd = sd(weight_at_birth))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        1.31    1.3 0.187
2 gfp/gfp   1.35    1.3 0.262

gfp/gfp fem littersizes

fem_breeding_table %>%
  drop_na(no_pups_born) %>%
  ggplot(aes(x = genotype, y = no_pups_born, fill = genotype)) +
  geom_boxplot() +
  geom_dotplot(size = 3, binaxis='y', stackdir='center') +
  stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
  labs(y="litter size distribution", x = "") +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning in geom_dotplot(size = 3, binaxis = "y", stackdir = "center"): Ignoring
unknown parameters: `size`

Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

fem_breeding_table %>%
  drop_na(no_pups_born) %>%
  t_test(no_pups_born ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.          group1  group2    n1    n2 statistic    df      p p.signif
  <chr>        <chr>   <chr>  <int> <int>     <dbl> <dbl>  <dbl> <chr>   
1 no_pups_born gfp/gfp WT         6    10      1.96  11.8 0.0746 ns

fem_breeding_table %>%
  group_by(genotype) %>%
  summarise(mean = mean(no_pups_born), median = median(no_pups_born), sd = sd(no_pups_born))

# A tibble: 2 × 4
  genotype  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT         7.1    7    2.02
2 gfp/gfp    9      9.5  1.79

testis weight gfp/gfp data load

gfp_testis_table = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 1)

gfp_testis_table = gfp_testis_table %>%
  pivot_longer(cols = testis_1_weight_g:testis_2_weight_g, names_to = "testis_num", values_to = "testis_weight")
head(gfp_testis_table)

# A tibble: 6 × 8
  replicate DOB                 genotype   age sort_date              ID
      <dbl> <dttm>              <chr>    <dbl> <dttm>              <dbl>
1         1 2021-01-08 00:00:00 WT          23 2021-06-17 00:00:00  1558
2         1 2021-01-08 00:00:00 WT          23 2021-06-17 00:00:00  1558
3         2 2021-01-08 00:00:00 gfp/gfp     23 2021-06-18 00:00:00  1562
4         2 2021-01-08 00:00:00 gfp/gfp     23 2021-06-18 00:00:00  1562
5         3 2021-01-08 00:00:00 gfp/gfp     23 2021-06-22 00:00:00  1561
6         3 2021-01-08 00:00:00 gfp/gfp     23 2021-06-22 00:00:00  1561
# ℹ 2 more variables: testis_num <chr>, testis_weight <dbl>

tesits weight gfp/gfp test

gfp_testis_table %>%
  ggplot(aes(x = genotype, y = testis_weight*1000, fill = genotype)) +
  geom_boxplot() +
  geom_dotplot(size = 3, binaxis='y', stackdir='center') +
  stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
  labs(y="testis weight (mg)", x = "") +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning in geom_dotplot(size = 3, binaxis = "y", stackdir = "center"): Ignoring
unknown parameters: `size`

Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

gfp_testis_table %>%
  drop_na(testis_weight) %>%
  t_test(testis_weight ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.           group1  group2    n1    n2 statistic    df     p p.signif
  <chr>         <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 testis_weight gfp/gfp WT        12    12    -0.138  18.4 0.891 ns

gfp_testis_table %>%
  group_by(genotype) %>%
  summarise(mean = mean(testis_weight), median = median(testis_weight), sd = sd(testis_weight))

# A tibble: 2 × 4
  genotype  mean median     sd
  <chr>    <dbl>  <dbl>  <dbl>
1 WT       0.109  0.11  0.0121
2 gfp/gfp  0.108  0.106 0.0195

sperm parameters gfp/gfp data load

sperm_parameters_gfp = gfp_testis_table = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 2)
head(sperm_parameters_gfp)

# A tibble: 6 × 7
  Mutation ID      DOB                 Total Number million…¹ `% Total Motility`
  <chr>    <chr>   <dttm>                               <dbl>              <dbl>
1 WT       MBO-00… 2020-08-27 00:00:00                     37                 51
2 WT       MBO-00… 2021-07-26 00:00:00                     38                 32
3 WT       MBO-00… 2021-07-26 00:00:00                     32                 39
4 WT       MBO-00… 2021-08-09 00:00:00                     29                 32
5 gfp/gfp  MBO-00… 2020-06-17 00:00:00                     38                 32
6 gfp/gfp  MBO-00… 2020-06-17 00:00:00                     30                 47
# ℹ abbreviated name: ¹`Total Number million /mL`
# ℹ 2 more variables: `% Progressive Motility` <dbl>,
#   `% Vibrating Motility` <dbl>

sperm_parameters_gfp = sperm_parameters_gfp %>%
  pivot_longer(cols = `Total Number million /mL`:`% Vibrating Motility`, values_to = "measurments",  names_to = "parameter")
head(sperm_parameters_gfp)

# A tibble: 6 × 5
  Mutation ID         DOB                 parameter                measurments
  <chr>    <chr>      <dttm>              <chr>                          <dbl>
1 WT       MBO-006795 2020-08-27 00:00:00 Total Number million /mL          37
2 WT       MBO-006795 2020-08-27 00:00:00 % Total Motility                  51
3 WT       MBO-006795 2020-08-27 00:00:00 % Progressive Motility            32
4 WT       MBO-006795 2020-08-27 00:00:00 % Vibrating Motility              19
5 WT       MBO-008419 2021-07-26 00:00:00 Total Number million /mL          38
6 WT       MBO-008419 2021-07-26 00:00:00 % Total Motility                  32

ivf_gfp = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 3)
head(ivf_gfp)

# A tibble: 6 × 6
  Mutation ID    DOB                 `N. Oocytes` `N. 2-Cells` `% Fertilization`
  <chr>    <chr> <dttm>                     <dbl>        <dbl>             <dbl>
1 gfp/gfp  MBO-… 2021-03-11 00:00:00           81           72              88.9
2 gfp/gfp  MBO-… 2021-03-11 00:00:00           80           67              83.8
3 gfp/gfp  MBO-… 2021-03-11 00:00:00           79           68              86.1
4 gfp/gfp  MBO-… 2021-03-11 00:00:00           56           49              87.5
5 gfp/gfp  MBO-… 2021-03-11 00:00:00           59           53              89.8
6 gfp/gfp  MBO-… 2021-03-11 00:00:00          100           88              88

ivf_gfp = ivf_gfp %>%
   pivot_longer(cols = `N. Oocytes`:`% Fertilization`, values_to = "measurments",  names_to = "parameter")
head(ivf_gfp)

# A tibble: 6 × 5
  Mutation ID         DOB                 parameter       measurments
  <chr>    <chr>      <dttm>              <chr>                 <dbl>
1 gfp/gfp  MBO-007550 2021-03-11 00:00:00 N. Oocytes             81  
2 gfp/gfp  MBO-007550 2021-03-11 00:00:00 N. 2-Cells             72  
3 gfp/gfp  MBO-007550 2021-03-11 00:00:00 % Fertilization        88.9
4 gfp/gfp  MBO-007550 2021-03-11 00:00:00 N. Oocytes             80  
5 gfp/gfp  MBO-007550 2021-03-11 00:00:00 N. 2-Cells             67  
6 gfp/gfp  MBO-007550 2021-03-11 00:00:00 % Fertilization        83.8

sperm parameters & IVF gfp/gfp

sperm count

sperm_parameters_gfp %>%
  dplyr::filter(parameter == "Total Number million /mL") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "gfp/gfp")) +
  labs(y="total number million/ml", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

sperm_parameters_gfp %>%
  dplyr::filter(parameter == "Total Number million /mL") %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 1 × 9
  .y.         group1  group2    n1    n2 statistic    df     p p.signif
  <chr>       <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 measurments gfp/gfp WT        10     4     0.146  7.21 0.888 ns

sperm_parameters_gfp %>%
  dplyr::filter(parameter == "Total Number million /mL") %>%
  group_by(Mutation) %>%
  summarise(mean = mean(measurments), median = median(measurments), sd = sd(measurments))

# A tibble: 2 × 4
  Mutation  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        34     34.5  4.24
2 gfp/gfp   34.4   33    5.44

sperm motility in % for gfp/gfp

sperm_parameters_gfp %>%
  dplyr::filter(!parameter == "Total Number million /mL") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  facet_wrap(~factor(parameter, levels = c("% Progressive Motility", "% Vibrating Motility", "% Total Motility"))) +
  geom_boxplot() +
  stat_summary(fun = mean, shape = 5, size = 0.4) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "gfp/gfp")) +
  labs(y="sperm paramters", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

sperm_parameters_gfp %>%
  dplyr::filter(!parameter == "Total Number million /mL") %>%
  group_by(parameter) %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 3 × 10
  parameter       .y.   group1 group2    n1    n2 statistic    df     p p.signif
  <chr>           <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 % Progressive … meas… gfp/g… WT        10     4    -0.710  5.20 0.508 ns      
2 % Total Motili… meas… gfp/g… WT        10     4     0.157  4.88 0.882 ns      
3 % Vibrating Mo… meas… gfp/g… WT        10     4     1.56   8.82 0.153 ns

sperm_parameters_gfp %>%
  dplyr::filter(!parameter == "Total Number million /mL") %>%
  group_by(parameter, Mutation) %>%
  summarise(mean = mean(measurments), median = median(measurments), sd = sd(measurments))

`summarise()` has grouped output by 'parameter'. You can override using the
`.groups` argument.

# A tibble: 6 × 5
# Groups:   parameter [3]
  parameter              Mutation  mean median    sd
  <chr>                  <chr>    <dbl>  <dbl> <dbl>
1 % Progressive Motility WT        24     23    6.32
2 % Progressive Motility gfp/gfp   21.4   21    5.85
3 % Total Motility       WT        38.5   35.5  8.96
4 % Total Motility       gfp/gfp   39.3   39    7.67
5 % Vibrating Motility   WT        14.5   13.5  3.11
6 % Vibrating Motility   gfp/gfp   17.9   17    4.82

IVF results gfp/gfp

ivf_gfp %>%
  dplyr::filter(parameter == "% Fertilization") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  stat_summary(fun = mean, shape = 5, size = 0.5) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "gfp/gfp")) +
  labs(y="percentage of oocytes", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

ivf_gfp %>%
  dplyr::filter(parameter == "% Fertilization") %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 1 × 9
  .y.         group1  group2    n1    n2 statistic    df     p p.signif
  <chr>       <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 measurments gfp/gfp WT        21    12      1.70  20.0 0.104 ns

# number of oocytes and fertilised eggs reaching 2cell stage (2N)

ivf_gfp %>%
  dplyr::filter(!parameter == "% Fertilization") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  stat_summary(fun = mean, shape = 5, size = 0.5) +
  facet_wrap(~factor(parameter, levels =c("N. Oocytes", "N. 2-Cells"))) +
  scale_x_discrete(limits = c("WT", "gfp/gfp"), labels = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "gfp/gfp")) +
  labs(y="percentage of oocytes", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "gfp/gfp")) +
  scale_fill_discrete(name = "genotype", labels = c("gfp/gfp", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

testis weight phd/phd

phd_testis_table = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 4)

phd_testis_table = phd_testis_table %>%
  pivot_longer(cols = testis_1_weight:testis_2_weight, names_to = "testis_num", values_to = "testis_weight")
head(phd_testis_table)

# A tibble: 6 × 5
  male_ID    total_body_weight genotype testis_num      testis_weight
  <chr>                  <dbl> <chr>    <chr>                   <dbl>
1 MBO-001492              36.5 phd/phd  testis_1_weight         0.114
2 MBO-001492              36.5 phd/phd  testis_2_weight         0.119
3 MBO-001498              37.7 phd/phd  testis_1_weight         0.105
4 MBO-001498              37.7 phd/phd  testis_2_weight         0.114
5 MBO-001499              33.1 phd/phd  testis_1_weight         0.12 
6 MBO-001499              33.1 phd/phd  testis_2_weight         0.11

phd_testis_table %>%
  ggplot(aes(x = genotype, y = testis_weight*1000, fill = genotype)) +
  geom_boxplot() +
  geom_dotplot(size = 3, binaxis='y', stackdir='center') +
  stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
  labs(y="testis weight (mg)", x = "") +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Warning in geom_dotplot(size = 3, binaxis = "y", stackdir = "center"): Ignoring
unknown parameters: `size`

Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

phd_testis_table %>%
  drop_na(testis_weight) %>%
  t_test(testis_weight ~ genotype) %>%
  add_significance()

# A tibble: 1 × 9
  .y.           group1  group2    n1    n2 statistic    df     p p.signif
  <chr>         <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 testis_weight phd/phd WT        18    20     -1.65  34.4 0.108 ns

phd_testis_table %>%
  group_by(genotype) %>%
  summarise(mean = mean(testis_weight), median = median(testis_weight), sd = sd(testis_weight))

# A tibble: 2 × 4
  genotype  mean median      sd
  <chr>    <dbl>  <dbl>   <dbl>
1 WT       0.116  0.116 0.0133 
2 phd/phd  0.110  0.11  0.00958

sperm parameters phd/phd data load

sperm_parameters_phd = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 5)
head(sperm_parameters_phd)

# A tibble: 6 × 7
  Mutation ID      DOB                 Total Number million…¹ `% Total Motility`
  <chr>    <chr>   <dttm>                               <dbl>              <dbl>
1 WT       MBO-00… 2021-04-06 00:00:00                     35                 23
2 WT       MBO-00… 2021-04-06 00:00:00                     44                 23
3 WT       MBO-00… 2021-04-06 00:00:00                     32                 29
4 WT       MBO-00… 2021-07-19 00:00:00                     40                 21
5 WT       MBO-00… 2021-07-19 00:00:00                     35                 29
6 WT       MBO-00… 2021-08-09 00:00:00                     39                 25
# ℹ abbreviated name: ¹`Total Number million /mL`
# ℹ 2 more variables: `% Progressive Motility` <dbl>,
#   `% Vibrating Motility` <dbl>

sperm_parameters_phd = sperm_parameters_phd %>%
  pivot_longer(cols = `Total Number million /mL`:`% Vibrating Motility`, values_to = "measurments",  names_to = "parameter")
head(sperm_parameters_phd)

# A tibble: 6 × 5
  Mutation ID         DOB                 parameter                measurments
  <chr>    <chr>      <dttm>              <chr>                          <dbl>
1 WT       MBO-007723 2021-04-06 00:00:00 Total Number million /mL          35
2 WT       MBO-007723 2021-04-06 00:00:00 % Total Motility                  23
3 WT       MBO-007723 2021-04-06 00:00:00 % Progressive Motility            14
4 WT       MBO-007723 2021-04-06 00:00:00 % Vibrating Motility               9
5 WT       MBO-007737 2021-04-06 00:00:00 Total Number million /mL          44
6 WT       MBO-007737 2021-04-06 00:00:00 % Total Motility                  23

ivf_phd = readxl::read_xlsx(path = "Mielnicka_source_data_16042024_updated_names.xlsx", sheet = 6)
head(ivf_phd)

# A tibble: 6 × 7
  Mutation ID         DOB                 `Lab ID` `N. Oocytes` `N. 2-Cells`
  <chr>    <chr>      <dttm>                 <dbl>        <dbl>        <dbl>
1 WT       MBO-007723 2021-04-06 00:00:00     4117           79           74
2 WT       MBO-007723 2021-04-06 00:00:00     4117           60           57
3 WT       MBO-007723 2021-04-06 00:00:00     4117           90           84
4 WT       MBO-007737 2021-04-06 00:00:00     4106           90           78
5 WT       MBO-007737 2021-04-06 00:00:00     4106           93           87
6 WT       MBO-007737 2021-04-06 00:00:00     4106           65           60
# ℹ 1 more variable: `% Fertilization` <dbl>

ivf_phd = ivf_phd %>%
   pivot_longer(cols = `N. Oocytes`:`% Fertilization`, values_to = "measurments",  names_to = "parameter")
head(ivf_phd)

# A tibble: 6 × 6
  Mutation ID         DOB                 `Lab ID` parameter       measurments
  <chr>    <chr>      <dttm>                 <dbl> <chr>                 <dbl>
1 WT       MBO-007723 2021-04-06 00:00:00     4117 N. Oocytes             79  
2 WT       MBO-007723 2021-04-06 00:00:00     4117 N. 2-Cells             74  
3 WT       MBO-007723 2021-04-06 00:00:00     4117 % Fertilization        93.7
4 WT       MBO-007723 2021-04-06 00:00:00     4117 N. Oocytes             60  
5 WT       MBO-007723 2021-04-06 00:00:00     4117 N. 2-Cells             57  
6 WT       MBO-007723 2021-04-06 00:00:00     4117 % Fertilization        95

sperm count phd/phd

sperm_parameters_phd %>%
  dplyr::filter(parameter == "Total Number million /mL") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "phd/phd")) +
  labs(y="total number million/ml", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

sperm_parameters_phd %>%
  dplyr::filter(parameter == "Total Number million /mL") %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 1 × 9
  .y.         group1  group2    n1    n2 statistic    df     p p.signif
  <chr>       <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 measurments phd/phd WT         6     6     0.405  9.42 0.694 ns

sperm_parameters_phd %>%
  dplyr::filter(parameter == "Total Number million /mL") %>%
  group_by(Mutation) %>%
  summarise(mean = mean(measurments), median = median(measurments), sd = sd(measurments))

# A tibble: 2 × 4
  Mutation  mean median    sd
  <chr>    <dbl>  <dbl> <dbl>
1 WT        37.5     37  4.32
2 phd/phd   38.7     42  5.57

sperm motility in % phd/phd

sperm_parameters_phd %>%
  dplyr::filter(!parameter == "Total Number million /mL") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  facet_wrap(~factor(parameter, levels = c("% Progressive Motility", "% Vibrating Motility", "% Total Motility"))) +
  geom_boxplot() +
  stat_summary(fun = mean, shape = 5, size = 0.4) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "phd/phd")) +
  labs(y="sperm paramters", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

sperm_parameters_phd %>%
  dplyr::filter(!parameter == "Total Number million /mL") %>%
  group_by(parameter) %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 3 × 10
  parameter       .y.   group1 group2    n1    n2 statistic    df     p p.signif
  <chr>           <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 % Progressive … meas… phd/p… WT         6     6     0.558  9.09 0.59  ns      
2 % Total Motili… meas… phd/p… WT         6     6     0.939  9.70 0.37  ns      
3 % Vibrating Mo… meas… phd/p… WT         6     6     1.4    9.98 0.192 ns

sperm_parameters_phd %>%
  dplyr::filter(!parameter == "Total Number million /mL") %>%
  group_by(parameter, Mutation) %>%
  summarise(mean = mean(measurments), median = median(measurments), sd = sd(measurments))

`summarise()` has grouped output by 'parameter'. You can override using the
`.groups` argument.

# A tibble: 6 × 5
# Groups:   parameter [3]
  parameter              Mutation  mean median    sd
  <chr>                  <chr>    <dbl>  <dbl> <dbl>
1 % Progressive Motility WT       15.2    15    2.14
2 % Progressive Motility phd/phd  16      15.5  2.97
3 % Total Motility       WT       25      24    3.35
4 % Total Motility       phd/phd  27      26.5  4   
5 % Vibrating Motility   WT        9.83    9.5  1.47
6 % Vibrating Motility   phd/phd  11      11    1.41

IVF results phd/phd

ivf_phd %>%
  dplyr::filter(parameter == "% Fertilization") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  stat_summary(fun = mean, shape = 5, size = 0.5) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "phd/phd")) +
  labs(y="percentage of oocytes", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

ivf_phd %>%
  dplyr::filter(parameter == "% Fertilization") %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 1 × 9
  .y.         group1  group2    n1    n2 statistic    df     p p.signif
  <chr>       <chr>   <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 measurments phd/phd WT        18    18      1.19  32.0 0.243 ns

# number of oocytes and fertilised eggs reaching 2cell stage (2N)

ivf_phd %>%
  dplyr::filter(!parameter == "% Fertilization") %>%
  ggplot(aes(x = Mutation, y = measurments, fill = Mutation)) +
  geom_boxplot() +
  stat_summary(fun = mean, shape = 5, size = 0.5) +
  facet_wrap(~factor(parameter, levels =c("N. Oocytes", "N. 2-Cells"))) +
  scale_x_discrete(limits = c("WT", "phd/phd"), labels = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("WT", "phd/phd")) +
  labs(y="percentage of oocytes", x = "") +
  guides(fill=guide_legend(title="genotype")) +
  scale_x_discrete(limits = c("WT", "phd/phd")) +
  scale_fill_discrete(name = "genotype", labels = c("phd/phd", "WT")) +
  guides(fill = guide_legend(reverse=TRUE)) +
  ggtheme()

Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

ivf_phd %>%
  dplyr::filter(!parameter == "% Fertilization") %>%
  group_by(parameter) %>%
  t_test(measurments ~ Mutation) %>%
  add_significance()

# A tibble: 2 × 10
  parameter  .y.        group1 group2    n1    n2 statistic    df     p p.signif
  <chr>      <chr>      <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>   
1 N. 2-Cells measurmen… phd/p… WT        18    18    0.674   33.8 0.505 ns      
2 N. Oocytes measurmen… phd/p… WT        18    18    0.0926  33.8 0.927 ns