#ELGL19 On Twitter: A Brief Synthesis

Posted on May 24, 2019 | 4 minute read

The Boring Bits

library(tidyverse)
library(lubridate)
library(tidytext)
library(kableExtra)

tweets <- read_rds("data/tweets.rds") %>%
  as_tibble()

f <- function(time) {
  x <- time
  hour(x) <- hour(x)-4
  return(x)
}


24 Hours of Tweets

tweets %>%
  filter(is_retweet == FALSE) %>%
  mutate(created_at = floor_date(created_at, unit = "hour")) %>%
  mutate(created_at = f(created_at)) %>%
  group_by(created_at) %>%
  summarise(tweet_count = n()) %>%
  ungroup() %>%
  arrange(created_at) %>%
  mutate(running_total = cumsum(tweet_count)) %>%
  top_n(24, running_total) %>%
  ggplot(aes(created_at, running_total)) +
  geom_col(fill = "#112E51") +
  geom_label(aes(label = scales::comma(running_total)), nudge_y = -50, size = 2) +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(title = "New Tweets: #ELGL19",
       caption = "Author: Jason Jones, @packpridejones",
       x = NULL, y = NULL) +
  theme(panel.background = element_blank(),
        panel.grid.major.y = element_line(color = "light grey"))


Retweets Are Tweets Too

tweets %>%
  filter(is_retweet == TRUE) %>%
  mutate(created_at = floor_date(created_at, unit = "hour")) %>%
  mutate(created_at = f(created_at)) %>%
  group_by(created_at) %>%
  summarise(tweet_count = n()) %>%
  ungroup() %>%
  arrange(created_at) %>%
  mutate(running_total = cumsum(tweet_count)) %>%
  top_n(24, running_total) %>%
  ggplot(aes(created_at, running_total)) +
  geom_col(fill = "#FF7043") +
  geom_label(aes(label = scales::comma(running_total)), nudge_y = -50, size = 2) +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(title = "Retweets: #ELGL19",
       caption = "Author: Jason Jones, @packpridejones",
       x = NULL, y = NULL) +
  theme(panel.background = element_blank(),
        panel.grid.major.y = element_line(color = "light grey"))


Biggest Fans

Most Original Tweets

tweets %>%
  filter(is_retweet == FALSE) %>%
  group_by(screen_name) %>%
  summarise(tweets = n()) %>%
  arrange(desc(tweets)) %>%
  top_n(10, tweets) %>%
  kable(col.names = c("Twitter ID", "Tweet Count")) %>%
  kable_styling()
Twitter ID Tweet Count
kwyatt23 134
kowyatt 97
TheBaconDiaries 76
acornsandnuts 54
RealMaggieJones 54
benkittelson56 41
JBStephens1 37
Josh_Edwards11 35
kimstric 34
77ccampbell 32
BarkmanSusan 32


Cheering Section

Most Retweets

tweets %>%
  filter(is_retweet == TRUE) %>%
  group_by(screen_name) %>%
  summarise(tweets = n()) %>%
  arrange(desc(tweets)) %>%
  top_n(10, tweets) %>%
  kable(col.names = c("Twitter ID", "Retweet Count")) %>%
  kable_styling()
Twitter ID Retweet Count
ELGL50 243
SEELGL 125
77ccampbell 75
MountainELGL 67
NWELGL 58
kowyatt 56
danwein 51
acornsandnuts 45
CALELGL 38
SWELGL 37


Enough Already!

Top Average Tweet Length

tweets %>%
  filter(is_retweet == FALSE) %>%
  group_by(screen_name) %>%
  summarise(avg_length = mean(display_text_width)) %>%
  top_n(15, avg_length) %>%
  ggplot(aes(reorder(screen_name, avg_length), avg_length)) +
  geom_col(fill = "#0095A8") +
  coord_flip() +
  labs(title = "Average Tweet Length",
       subtitle = "Top 15",
       caption = "Author: Jason Jones, @packpridejones",
       x = NULL, y = NULL) +
  theme(panel.background = element_blank(),
        panel.grid.major.x = element_line(color = "light grey"))


Short and Sweet

tweets %>%
  filter(is_retweet == FALSE) %>%
  group_by(screen_name) %>%
  summarise(avg_length = mean(display_text_width)) %>%
  top_n(-15, avg_length) %>%
  ggplot(aes(reorder(screen_name, desc(avg_length)), avg_length)) +
  geom_col(fill = "#0095A8") +
  coord_flip() +
  labs(title = "Average Tweet Length",
       subtitle = "Lowest 15",
       caption = "Author: Jason Jones, @packpridejones",
       x = NULL, y = NULL) +
  theme(panel.background = element_blank(),
        panel.grid.major.x = element_line(color = "light grey"))


iPhone or Android?

Twitter tool of choice

tweets %>%
  filter(is_retweet == FALSE) %>%
  group_by(source) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  top_n(10, Count) %>%
  kable() %>%
  kable_styling()
source Count
Twitter for iPhone 866
Twitter for Android 353
Twitter Web Client 99
Twitter Web App 71
Twitter for iPad 12
Instagram 7
HubSpot 4
IFTTT 2
LinkedIn 2
Sprout Social 2


Stealing Thunder?

Retweet has more favorites than original tweet

tweets %>%
  filter(favorite_count < retweet_favorite_count) %>%
  group_by(screen_name) %>%
  summarise(thunder_stolen = n()) %>%
  arrange(desc(thunder_stolen)) %>%
  top_n(10, thunder_stolen) %>%
  kable(col.names = c("Screen Name", "Count Of Thunder Steals")) %>%
  kable_styling()
Screen Name Count Of Thunder Steals
ELGL50 238
SEELGL 121
77ccampbell 75
MountainELGL 65
NWELGL 57
kowyatt 56
danwein 51
acornsandnuts 45
CALELGL 37
MidwestELGL 36


Language Is Important

Scoring Tweets by Language Sentiment

tweets %>%
  filter(created_at > as.POSIXct("2019-05-15 23:59:59")) %>%
  mutate(index = row_number()) %>%
  unnest_tokens("word", text) %>%
  select(index, created_at, screen_name, word) %>%
  anti_join(stop_words) %>%
  mutate(created_at = floor_date(created_at, unit = "hour")) %>%
  mutate(created_at = f(created_at)) %>%
  inner_join(get_sentiments(lexicon = "afinn")) %>%
  group_by(created_at) %>%
  summarise(score = sum(value)) %>%
  ungroup() %>%
  arrange(created_at) %>%
  mutate(sent_flow = cumsum(score)) %>%
  ggplot(aes(created_at, sent_flow)) +
  geom_line() +
  geom_point(color = "#FF7043", size = 3) +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(title = "#ELGL19: Twitter Cumulative Sentiment",
       subtitle = "Y'all Some Positive People!",
       caption = "Author: Jason Jones, @packpridejones",
       x = NULL, y = NULL) +
  theme(panel.background = element_blank(),
        panel.grid.major.y = element_line(color = "light grey"))


Most Used Words

sentiment <- tweets %>%
  filter(created_at > as.POSIXct("2019-05-15 23:59:59")) %>%
  mutate(index = row_number()) %>%
  unnest_tokens("word", text) %>%
  select(index, created_at, screen_name, word) %>%
  anti_join(stop_words) %>%
  mutate(created_at = floor_date(created_at, unit = "hour")) %>%
  mutate(created_at = f(created_at)) %>%
  inner_join(get_sentiments(lexicon = "bing")) %>%
  group_by(word) %>%
  summarise(word_count = n()) %>%
  ungroup()

wordcloud::wordcloud(sentiment$word, sentiment$word_count, colors = c("#0095A8",
                                                                      "#112E51",
                                                                      "#FF7043"))


Want To Play With The Data Too?




Share via

Tags:
comments powered by Disqus