研究方法論2026

Rは計算機

R
#Rは計算機です
#足し算
1+1
#掛け算
5*6
#割り算
9/3
#べき乗
3^2
#平方根
sqrt(16)
#演算子の後ろで改行はOK
80*200/8+66*200/8+
80*200/8
R
data <- data.frame(
  id = 1:10,
  nenrei = c(52, 55, 52, 56, 56, 41, 44, 56, 54, 57),
  seibetu = c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1),
  sincho = c(169.9, 172.1, 175.7, 173.1, 165.3, 155.1, 155.2, 157.2, 148.2, 156.4),
  taiju = c(77.1, 60.3, 74.0, 67.5, 70.8, 54.7, 62.2, 57.7, 51.1, 53.6),
  taishibo = c(24.0, 21.8, 23.8, 27.1, 30.0, 28.7, 34.8, 30.0, 34.6, 25.4),
  ketuatu = c(93, 119, 115, 113, 123, 139, 120, 108, 114, 102),
  k_kiou = c(0, 1, 1, 1, 1, 1, 0, 0, 1, 0)
)

data
> data
   id nenrei seibetu sincho taiju taishibo ketuatu k_kiou
1   1     52       0  169.9  77.1     24.0      93      0
2   2     55       0  172.1  60.3     21.8     119      1
3   3     52       0  175.7  74.0     23.8     115      1
4   4     56       0  173.1  67.5     27.1     113      1
5   5     56       0  165.3  70.8     30.0     123      1
6   6     41       1  155.1  54.7     28.7     139      1
7   7     44       1  155.2  62.2     34.8     120      0
8   8     56       1  157.2  57.7     30.0     108      0
9   9     54       1  148.2  51.1     34.6     114      1
10 10     57       1  156.4  53.6     25.4     102      0
R
label <- data.frame(
  variable = c("id", "nenrei", "seibetu", "sincho", "taiju", "taishibo", "ketuatu", "k_kiou"),
  label = c(
    "id",
    "年齢",
    "性別(0=男, 1=女)",
    "身長",
    "体重",
    "体脂肪率",
    "収縮期血圧",
    "高血圧既往歴(0=無、1=有)"
  )
)

View(label)

代表値

R
data2 <- data[, c("nenrei", "sincho", "taiju", "taishibo", "ketuatu")]
# 平均
sapply(data2, mean)
# 中央値
sapply(data2, median)
# 最頻値
as.numeric(names(which.max(table(data$k_kiou))))

散布度

R
# 標準偏差
sapply(data2, sd)
# 四分位範囲
sapply(data2, IQR)
# 最小値
sapply(data2, min)
# 最大値
sapply(data2, max)

グラフ

グラフ(血圧)ヒストグラム

R
hist(data$ketuatu)

グラフ(血圧)箱ひげ図

R
boxplot(data$ketuatu)

グラフ(血圧)散布図

R
ggplot(data, aes(x = factor(seibetu, levels = c(0, 1)),
                 y = ketuatu,
                 color = factor(seibetu))) +
  geom_point(size = 2) +
  scale_color_manual(values = c("0" = "blue", "1" = "red")) +
  scale_x_discrete(labels = c("0" = "男性", "1" = "女性")) +
  theme_classic() +
  xlab("性別") +
  ylab("血圧 (mmHg)") +
  theme(legend.position = "none")

グラフ(血圧)エラーバーグラフ

R
library(dplyr)
library(ggplot2)

# 性別をラベル化
data$seibetu <- factor(data$seibetu,
                       levels = c(0, 1),
                       labels = c("男性", "女性"))

# 集計
summary_data <- data %>%
  group_by(seibetu) %>%
  summarise(
    mean = mean(ketuatu, na.rm = TRUE),
    sd   = sd(ketuatu, na.rm = TRUE),
    n    = n(),
    se   = sd / sqrt(n)
  )

# プロット
ggplot(summary_data, aes(x = seibetu, y = mean, color = seibetu)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se),
                width = 0.15) +
  scale_color_manual(values = c("男性" = "blue", "女性" = "red")) +
  theme_classic() +
  ylab("血圧 (mmHg)") +
  xlab("性別") +
  theme(legend.position = "none")

散布図(年齢と血圧の関係)

R
plot(data$nenrei, data$ketuatu)

度数分布表と円グラフ

タイトルとURLをコピーしました