Rは計算機
R
#Rは計算機です
#足し算
1+1
#掛け算
5*6
#割り算
9/3
#べき乗
3^2
#平方根
sqrt(16)
#演算子の後ろで改行はOK
80*200/8+66*200/8+
80*200/8R
data <- data.frame(
id = 1:10,
nenrei = c(52, 55, 52, 56, 56, 41, 44, 56, 54, 57),
seibetu = c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1),
sincho = c(169.9, 172.1, 175.7, 173.1, 165.3, 155.1, 155.2, 157.2, 148.2, 156.4),
taiju = c(77.1, 60.3, 74.0, 67.5, 70.8, 54.7, 62.2, 57.7, 51.1, 53.6),
taishibo = c(24.0, 21.8, 23.8, 27.1, 30.0, 28.7, 34.8, 30.0, 34.6, 25.4),
ketuatu = c(93, 119, 115, 113, 123, 139, 120, 108, 114, 102),
k_kiou = c(0, 1, 1, 1, 1, 1, 0, 0, 1, 0)
)
data> data
id nenrei seibetu sincho taiju taishibo ketuatu k_kiou
1 1 52 0 169.9 77.1 24.0 93 0
2 2 55 0 172.1 60.3 21.8 119 1
3 3 52 0 175.7 74.0 23.8 115 1
4 4 56 0 173.1 67.5 27.1 113 1
5 5 56 0 165.3 70.8 30.0 123 1
6 6 41 1 155.1 54.7 28.7 139 1
7 7 44 1 155.2 62.2 34.8 120 0
8 8 56 1 157.2 57.7 30.0 108 0
9 9 54 1 148.2 51.1 34.6 114 1
10 10 57 1 156.4 53.6 25.4 102 0
R
label <- data.frame(
variable = c("id", "nenrei", "seibetu", "sincho", "taiju", "taishibo", "ketuatu", "k_kiou"),
label = c(
"id",
"年齢",
"性別(0=男, 1=女)",
"身長",
"体重",
"体脂肪率",
"収縮期血圧",
"高血圧既往歴(0=無、1=有)"
)
)
View(label)
代表値
R
data2 <- data[, c("nenrei", "sincho", "taiju", "taishibo", "ketuatu")]
# 平均
sapply(data2, mean)
# 中央値
sapply(data2, median)
# 最頻値
as.numeric(names(which.max(table(data$k_kiou))))散布度
R
# 標準偏差
sapply(data2, sd)
# 四分位範囲
sapply(data2, IQR)
# 最小値
sapply(data2, min)
# 最大値
sapply(data2, max)グラフ
グラフ(血圧)ヒストグラム
R
hist(data$ketuatu)グラフ(血圧)箱ひげ図
R
boxplot(data$ketuatu)グラフ(血圧)散布図
R
ggplot(data, aes(x = factor(seibetu, levels = c(0, 1)),
y = ketuatu,
color = factor(seibetu))) +
geom_point(size = 2) +
scale_color_manual(values = c("0" = "blue", "1" = "red")) +
scale_x_discrete(labels = c("0" = "男性", "1" = "女性")) +
theme_classic() +
xlab("性別") +
ylab("血圧 (mmHg)") +
theme(legend.position = "none")グラフ(血圧)エラーバーグラフ
R
library(dplyr)
library(ggplot2)
# 性別をラベル化
data$seibetu <- factor(data$seibetu,
levels = c(0, 1),
labels = c("男性", "女性"))
# 集計
summary_data <- data %>%
group_by(seibetu) %>%
summarise(
mean = mean(ketuatu, na.rm = TRUE),
sd = sd(ketuatu, na.rm = TRUE),
n = n(),
se = sd / sqrt(n)
)
# プロット
ggplot(summary_data, aes(x = seibetu, y = mean, color = seibetu)) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
width = 0.15) +
scale_color_manual(values = c("男性" = "blue", "女性" = "red")) +
theme_classic() +
ylab("血圧 (mmHg)") +
xlab("性別") +
theme(legend.position = "none")散布図(年齢と血圧の関係)
R
plot(data$nenrei, data$ketuatu)度数分布表と円グラフ
