# import all packages at top, even if you need them somewhere later
library(tidyverse)
library(psych)
library(patchwork)
# we can avoid typing paths multiple times by having a variable which points to the folder,
# where we have our stuff
data_in <- 'C:\\Users\\Aleksandre Asatiani\\Desktop\\R_course\\'
# use read_csv to read in two parts of the same data set
subjects <- read_csv(paste0(data_in, 'subinfo_for_demo.csv'))
subjects
bonds <- read_csv(paste0(data_in, 'bonds_for_demo.csv'), na='NaN')
bonds
# we're using join to combine the subjects and bonds data frames (or rather, tibbles)
data <- full_join(subjects, bonds, by='subid')
# gather can be used to go from a wide format of data to a long format
data_long <- data %>% gather(Partner:M_Stranger, key='person', value='emotional_bond')
# load in one more data file and convert it to long and combine it with the others
touch <- read_csv(paste0(data_in, 'touch_allowance_for_demo.csv'))
touch_long <- touch %>% gather(Partner:M_Stranger, key='person', value='touch_allowance')
full_data <- full_join(data_long, touch_long, by=c('subid','person'))
## Manipulating data
# Mostly with dplyr package
# last time we learned these tools to look at your data
summary(full_data)
describe(full_data)
full_data
# with dplyr we can easily select a specific subset of data to look at (or save)
full_data %>% filter(age>60, person=='Partner') %>% na.omit()
full_data %>% select(subid, person, emotional_bond, touch_allowance) %>%
filter(!is.na(touch_allowance))
# editing all of our problematic missing values
full_data_2 <- full_data %>% mutate(weight=na_if(weight, -1),
height=na_if(height, -1),
sex = factor(sex, levels=c(0,1), labels=c('female','male')),
bmi = weight / (height^2)) %>%
select(c(subid, sex, age, height, weight, bmi, person, emotional_bond, touch_allowance))
# an example of how to group and summarise data
full_data_2 %>% group_by(sex) %>% summarise(mean(height, na.rm=T))
# a more complete example of grouping and summarising data
avg_by_sex <- full_data_2 %>% group_by(sex, person) %>% select(sex, person, emotional_bond, touch_allowance) %>%
summarise_all(mean, na.rm=T)
# save summary table to file
write_csv(avg_by_sex, paste0(data_in, 'avg_by_sex.csv'))
## Plotting
# feat ggplot2
# creating a plot and assignign it to variable
p1 <- ggplot(data=avg_by_sex,
aes(x=emotional_bond, y=touch_allowance, col=sex)) +
geom_point(size = 3) +
stat_smooth(method='lm', se=F)+
theme_classic() +
ylab(' ') +
xlab('Emotional Bond') +
scale_y_continuous(limits=c(0,1)) +
scale_x_continuous(limits=c(1,10), breaks=1:10) +
theme(axis.text = element_text(size=18),
axis.title = element_text(size=18),
legend.position= 'top')
# call variable name to see the plot
p1
p2 <- ggplot(data=full_data_2,
aes(x=emotional_bond, y=touch_allowance, col=sex)) +
geom_jitter(size = 3, alpha=0.2) +
stat_smooth(method='lm', se=F)+
theme_classic() +
ylab('Touchability Index') +
xlab('Emotional Bond') +
scale_x_continuous(limits=c(1,10), breaks=1:10) +
theme(axis.text = element_text(size=18),
axis.title = element_text(size=18),
legend.position= 'top')
# saving the plots
png(paste0(data_in, 'side_by_side_comparison.png'),
width=800, height=800)
# the package patchwork allows you to combine plots to
# single figure with + and /
p2 + p1
dev.off()
# violin plot
# example of how to create different types of figures
# also showing how to re-order discrete axes
preferred_order <- c('Partner', 'Mother', 'Father', 'Sister', 'Brother',
'Aunt', 'Uncle', 'F_Cousin', 'M_Cousin',
'F_Friend', 'M_Friend', 'F_Acq', 'M_Acq',
'F_Stranger', 'M_Stranger')
ggplot(data = full_data_2,
aes(x=factor(person, level=preferred_order),
y=touch_allowance)) +
geom_violin(col='blue', fill='red', alpha=0.5) +
theme_bw()
Created by Juulia Suvilehto
juulia.suvilehto@iki.fi
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License