第 1 章 直方图

1.1 基本直方图

data <- data.frame(value=rnorm(100))

ggplot(data, aes(x=value)) + 
  geom_histogram()

1.1.1 控制bin宽度

data1 <- read.table("Datas/1_OneNum.csv", header=TRUE)

binwidths <- c(3,15,30,100)
plots <- list()

for (bin in binwidths) {
  p <- data1 %>%
  filter( price<300 ) %>%
  ggplot( aes(x=price)) +
    geom_histogram( binwidth=bin, fill="#69b3a2", color="#e9ecef", alpha=0.9) +
    ggtitle(paste0("Bin size = ",bin)) +
    theme_ipsum() +
    theme(
      plot.title = element_text(size=15)
    )
plots[[as.character(bin)]] <- p
}

ggarrange(plotlist = plots)

1.2 分组直方图

data2 <- data.frame(
  type = c( rep("variable 1", 1000), rep("variable 2", 1000) ),
  value = c( rnorm(1000), rnorm(1000, mean=4) )
)

# Represent it
data2 %>%
  ggplot( aes(x=value, fill=type)) +
  geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
  scale_fill_manual(values=c("#69b3a2", "#404080")) +
  theme_ipsum() +
  labs(fill="")

1.3 分面直方图

data3 <- read.table("Datas/probly.csv", header=TRUE, sep=",")
data3 <- data3 %>%
  gather(key="text", value="value") %>%
  mutate(text = gsub("\\.", " ",text)) %>%
  mutate(value = round(as.numeric(value),0))

# plot
data3 %>%
  mutate(text = fct_reorder(text, value)) %>%
  ggplot( aes(x=value, color=text, fill=text)) +
    geom_histogram(alpha=0.6, binwidth = 5) +
    scale_fill_viridis(discrete=TRUE) + # library(viridis) 
    scale_color_viridis(discrete=TRUE) +
    theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
    xlab("") +
    ylab("Assigned Probability (%)") +
    facet_wrap(~text)

1.4 镜像直方图

data4 <- data.frame(
  var1 = rnorm(1000),
  var2 = rnorm(1000, mean=2)
)

ggplot(data4, aes(x=x) ) +
  geom_histogram( aes(x = var1, y = ..density..), fill="#69b3a2" ) +
  geom_label( aes(x=4.5, y=0.25, label="variable1"), color="#69b3a2") +
  geom_histogram( aes(x = var2, y = -..density..), fill= "#404080") +
  geom_label( aes(x=4.5, y=-0.25, label="variable2"), color="#404080") +
  theme_ipsum() +
  xlab("value of x")

1.5 带边缘的直方图

p <- ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, size=cyl)) +
      geom_point() +
      theme(legend.position="none")
 
# with marginal histogram
p1 <- ggMarginal(p, type="histogram")
 
# marginal density
p2 <- ggMarginal(p, type="density")
 
# marginal boxplot
p3 <- ggMarginal(p, type="boxplot")

ggarrange(p1,p2,p3,labels=c("p1","p2","p3"))

1.5.1 边缘样式

# Set relative size of marginal plots (main plot 10x bigger than marginals)
p1 <- ggMarginal(p, type="histogram", size=10)
 
# Custom marginal plots:
p2 <- ggMarginal(p, type="histogram", fill = "slateblue", xparams = list(  bins=10))
 
# Show only marginal plot for x axis
p3 <- ggMarginal(p, margins = 'x', color="purple", size=4)
ggarrange(p1,p2,p3,labels=c("p1","p2","p3"))