.Rhistory

select(created_at))
tweets_20181225 %>%
filter(screen_name == "Wylie_H_Dallas") %>%
select(created_at)
wylie_first_tweet_20181225 <- min(tweets_20181225 %>%
filter(screen_name == "Wylie_H_Dallas") %>%
pull(created_at))
wylie_last_tweet_20181225 <- max(tweets_20181225 %>%
filter(screen_name == "Wylie_H_Dallas") %>%
pull(created_at))
wylie_first_tweet_20190507 <- min(tweets_20190507 %>%
filter(screen_name == "Wylie_H_Dallas") %>%
pull(created_at))
wylie_last_tweet_20190507 <- max(tweets_20190507 %>%
filter(screen_name == "Wylie_H_Dallas") %>%
pull(created_at))
tweets_20181225_filtered <- tweets_20181225 %>%
filter(created_at >= wylie_first_tweet_20181225 &
created_at <= wylie_last_tweet_20181225)
tweets_20190507_filtered <- tweets_20190507 %>%
filter(created_at >= wylie_first_tweet_20190507 &
created_at <= wylie_last_tweet_20190507)
tweet_words <- bind_rows(tweets_20181225_filtered, tweets_20190507_filtered) %>%
# Remove retweets. Those don't reflect the author's own words.
filter(!is_retweet) %>%
# This sorts everything by the date the tweet was posted.
arrange(created_at) %>%
# I only care about these three fields.
select(screen_name, text) %>%
# Eliminate duplicate tweets.
distinct(text, .keep_all = TRUE) %>%
# Get rid of links back to Twitter. They show up if you reference another tweet. These are junk text as far as our analysis is concerned. Same for &amp; entity references.
mutate(text = str_replace_all(text, "https?://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
# This splits tweets into individual words. What we are analyzing are the words, not the tweets.
unnest_tokens(word, text, token = "tweets") %>%
# We are only retaining words that contain at least one letter.  unnest_tokens made everything lowercase, so that is why you don't also see A-Z.
filter(str_detect(word, "[a-z]")) %>%
# Remove words that are stop words. Stop words do not contribute anything meaningful to the analysis, so they get removed.
filter(!word %in% stop_words$word)
tweet_words %>%
count(word, sort = TRUE) %>%
head(16) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col() +
coord_flip() +
labs(y = "# of uses among all accounts") +
ggtitle("Most commonly used words", subtitle="from tweets from Dallas-area media personalities and Wylie H. Dallas")
tweet_words %>%
filter(screen_name == "Wylie_H_Dallas") %>%
filter(str_detect(word, "dallas")) %>%
filter(!str_detect(word, "[@#]")) %>%
arrange(word) %>%
head(10) %>%
kable() %>%
kable_styling(full_width = F)
word_counts <- tweet_words %>%
count(screen_name, word, sort = TRUE)
word_counts %>%
filter(screen_name == "Wylie_H_Dallas") %>%
filter(str_detect(word, "dallas")) %>%
filter(!str_detect(word, "[@#]")) %>%
arrange(word) %>%
head(10) %>%
kable() %>%
kable_styling(full_width = F)
# Compute TF-IDF using "word" as term and "screen_name" as document.
word_tf_idf <- word_counts %>%
bind_tf_idf(word, screen_name, n) %>%
arrange(desc(tf_idf))
word_tf_idf %>%
filter(screen_name == "Wylie_H_Dallas") %>%
arrange(desc(tf_idf)) %>%
select(screen_name, word, tf_idf) %>%
head(10) %>%
kable(caption = "Wylie H. Dallas's most distinct words") %>%
kable_styling(full_width = F)
word_tf_idf %>%
filter(screen_name == "JimSchutze") %>%
arrange(desc(tf_idf)) %>%
select(screen_name, word, tf_idf) %>%
head(10) %>%
kable(caption = "Jim Schutze's most distinct words") %>%
kable_styling(full_width = F)
similarity <- word_tf_idf %>%
pairwise_similarity(screen_name, word, tf_idf, upper = FALSE, sort = TRUE)
similarity <- word_tf_idf %>%
pairwise_similarity(screen_name, word, tf_idf, upper = FALSE, sort = TRUE)
However, these are only the top ten words. Wylie, for example, has `r word_tf_idf %>% filter(screen_name == "Wylie_H_Dallas") %>% count()` words total, so we need to do something more sophisticated. That will be a pairwise similarity calculation between all of each author's words, taking into account their TF-IDF statistics:
```{r}
similarity <- word_tf_idf %>%
pairwise_similarity(screen_name, word, tf_idf, upper = FALSE, sort = TRUE)
```
Let's look at the top 20 matches:
```{r}
similarity %>%
arrange(desc(similarity)) %>%
head(10) %>%
kable() %>%
kable_styling(full_width = FALSE)
```
This makes sense. What you are seeing is a high degree of similarity of distinct-word use between NBC5 accounts. Remember above, when I observed how NBC-related keywords rank high in total counts? NBC5 may be closely managing its Twitter accounts, which means they may have similar use of words that are relatively distinct across all 51 journalists + Wylie.
```{r}
# Limit the list to just comparisons with Wylie
similarity_to_wylie <- similarity %>%
filter(item1 == "Wylie_H_Dallas" |
item2 == "Wylie_H_Dallas") %>%
unite(account, item1, item2, sep="")
similarity_to_wylie$account <- str_replace(similarity_to_wylie$account, "Wylie_H_Dallas", "")
similarity_to_wylie %>%
head(10) %>%
kable() %>%
kable_styling(full_width = FALSE)
# Limit the list to just comparisons with Wylie
similarity_to_wylie <- similarity %>%
filter(item1 == "Wylie_H_Dallas" |
item2 == "Wylie_H_Dallas") %>%
unite(account, item1, item2, sep="")
similarity_to_wylie$account <- str_replace(similarity_to_wylie$account, "Wylie_H_Dallas", "")
similarity_to_wylie %>%
head(10) %>%
kable() %>%
kable_styling(full_width = FALSE)
# Turning the account column into a factor so that ggplot doesn't reorder everything.
similarity_to_wylie <- similarity_to_wylie %>%
mutate(account = reorder(account, similarity))
similarity_to_wylie %>%
ggplot(aes(x=account, y=similarity)) +
geom_col() +
coord_flip()
similarity_to_wylie %>%
top_n(20) %>%
ggplot(aes(x=account, y=similarity)) +
geom_col() +
coord_flip()
tweets$hour <- as.numeric(strftime(tweets$created_at, format = "%H")) +
as.numeric(strftime(tweets$created_at, format = "%M")) / 60
# first, merge the two filtered datasets of tweets
tweets_filtered <- bind_rows(tweets_20181225_filtered, tweets_20190507_filtered)
tweets_filtered$hour <- as.numeric(strftime(tweets_filtered$created_at, format = "%H")) +
as.numeric(strftime(tweets_filtered$created_at, format = "%M")) / 60
for (i in unique(tweets_filtered$screen_name)) {
plot <- tweets_filtered %>%
filter(screen_name == i) %>%
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets") +
ggtitle(paste(advocamentum_news_media[advocamentum_news_media$screen_name == i,]$name, "'s tweet time histogram", sep = ""))
print(plot)
ggsave(paste(i, ".png"))
}
advocamentum_news_media <- advocamentum_news_media %>%
add_row(name = "Wylie H. Dallas",
screen_name = "Wylie_H_Dallas")
advocamentum_news_media <- lists_members(owner_user = "Advocamentum", slug = "news-media")
advocamentum_news_media <- advocamentum_news_media %>%
add_row(name = "Wylie H. Dallas",
screen_name = "Wylie_H_Dallas")
for (i in unique(tweets_filtered$screen_name)) {
plot <- tweets_filtered %>%
filter(screen_name == i) %>%
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets") +
ggtitle(paste(advocamentum_news_media[advocamentum_news_media$screen_name == i,]$name, "'s tweet time histogram", sep = ""))
print(plot)
ggsave(paste(i, ".png"))
}
View(advocamentum_news_media)
for (i in unique(tweets_filtered$screen_name)) {
plot <- tweets_filtered %>%
filter(screen_name == i) %>%
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets") +
ggtitle(paste(advocamentum_news_media[advocamentum_news_media$screen_name == i,]$screen_name, "'s tweet time histogram", sep = ""))
print(plot)
ggsave(paste(i, ".png"))
}
View(advocamentum_news_media)
for (i in unique(tweets_filtered$screen_name)) {
plot <- tweets_filtered %>%
filter(screen_name == i) %>%
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets") +
ggtitle(paste(advocamentum_news_media[advocamentum_news_media$screen_name == i,]$screen_name, "'s tweet time histogram", sep = ""))
print(plot)
ggsave(paste(i, ".png"))
}
unique(tweets_filtered$screen_name)
advocamentum_news_media <- advocamentum_news_media %>%
add_row(name = "Wylie H. Dallas",
screen_name = "Wylie_H_Dallas") %>%
add_row(name = "Philip Kingston",
screen_name = "PhilipTKingston")
for (i in unique(tweets_filtered$screen_name)) {
plot <- tweets_filtered %>%
filter(screen_name == i) %>%
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets") +
ggtitle(paste(advocamentum_news_media[advocamentum_news_media$screen_name == i,]$screen_name, "'s tweet time histogram", sep = ""))
print(plot)
ggsave(paste(i, ".png"))
}
for (i in unique(sort(tweets_filtered$screen_name))) {
plot <- tweets_filtered %>%
filter(screen_name == i) %>%
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets") +
ggtitle(paste(advocamentum_news_media[advocamentum_news_media$screen_name == i,]$screen_name, "'s tweet time histogram", sep = ""))
print(plot)
ggsave(paste(i, ".png"))
}
count(tweets_filtered %>% filter(screen_name == "JimSchutze"))
tweets %>%
filter(screen_name == "Wylie_H_Dallas" |
screen_name == "JimSchutze") %>%
spread(key = screen_name, value = )
tweets_filtered %>%
filter(screen_name == "Wylie_H_Dallas" |
screen_name == "JimSchutze") %>%
spread(key = screen_name, value = )
tweets_filtered %>%
filter(screen_name == "Wylie_H_Dallas" |
screen_name == "JimSchutze") %>%
spread(key = screen_name, value = created_at)
ggplot(aes(x = hour)) +
geom_histogram(bins = 24 * 6) +
scale_x_continuous(breaks = seq(0, 24, 4)) +
xlab("hour of day") +
ylab("count of tweets")
source('~/.active-rstudio-document', echo=TRUE)
# Define keys
app_id = '289030254440328'
app_secret = '6465b4f9ba41af7a2351b31078b7e01d'
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
fb_token
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/me",
config = config(token = fb_token))
# Show content returned
content(response)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
rm(fb_token)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/me",
config = config(token = fb_token))
# Show content returned
content(response)
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
library(httr)
library(jsonlite)
library(dplyr)
library(lubridate)
# Define keys
app_id = '289030254440328'
app_secret = '6465b4f9ba41af7a2351b31078b7e01d'
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
?httr::cookies
httr:cookies
httr::cookies
cookies(fb_app)
httr::cookies(fb_app)
detach("package:httr", unload=TRUE)
library(httr)
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = TRUE)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/me",
config = config(token = fb_token))
cookies(response)
# Show content returned
content(response)
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = FALSE)
# Define keys
app_id = '451524665391845'
app_secret = '583485cc5c8372a601520369f0a39403'
# Define keys
app_id = '451524665391845'
app_secret = '583485cc5c8372a601520369f0a39403'
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = FALSE)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/me",
config = config(token = fb_token))
# Show content returned
content(response)
fb_token
# attempt 2, from https://bigdataenthusiast.wordpress.com/tag/facebook-data-extraction-using-r/ (see Step 2)
fbOAuth(app_id,app_secret)
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = FALSE)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/me",
config = config(token = fb_token))
# Show content returned
content(response)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/681877972",
config = config(token = fb_token))
# Show content returned
content(response)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/me",
config = config(token = fb_token))
# Show content returned
content(response)
fb_token
# Define the app
fb_app <- oauth_app(appname = "facebook",
key = app_id,
secret = app_secret)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = FALSE)
# Get OAuth user access token
fb_token <- oauth2.0_token(oauth_endpoints("facebook"),
fb_app,
scope = 'public_profile',
type = "application/x-www-form-urlencoded",
cache = FALSE)
# GET request for your user information
response <- GET("https://graph.facebook.com",
path = "/681877972",
config = config(token = fb_token))
# Show content returned
content(response)
# based on http://varianceexplained.org/r/op-ed-text-analysis/
library(rtweet)
library(tidyverse)
library(tidytext)
library(knitr)
library(kableExtra)
library(widyr)
advocamentum_news_media <- lists_members(owner_user = "Advocamentum", slug = "news-media")
advocamentum_news_media <- advocamentum_news_media %>%
add_row(name = "Wylie H. Dallas",
screen_name = "Wylie_H_Dallas") %>%
add_row(name = "Philip Kingston",
screen_name = "PhilipTKingston")
install.packages(c("kableExtra", "rtweet", "tidytext", "tidyverse", "widyr"))
install.packages("rtweet")
install.packages("openssl")
install.packages("openssl")
install.packages("rtweet")
install.packages("httr")
install.packages("httr")
install.packages("rtweet")
install.packages("tidyverse")
install.packages("tidyverse")
install.packages("tidytext")
install.packages("kableExtra")
install.packages("widyr")
install.packages("devtools")
if (!require('knitr')) {install.packages("knitr")}
if (!require('devtools')) {install.packages("devtools")}
if (!require('RWordPress')) {devtools::install_github(c("duncantl/XMLRPC", "duncantl/RWordPress"))}
devtools::install_github("duncantl/XMLRPC")
devtools::install_github("duncantl/RWordPress")
if (!require('RWordPress')) {devtools::install_github(c("duncantl/XMLRPC", "duncantl/RWordPress"))}
library(knitr)
library(RWordPress)
options(WordpressLogin = c(`Aren Cambre` = 'tQ3djsKK'),
WordpressURL = 'https://arencambre.com/xmlrpc.php')
knit2wp('index.RmD',
title = 'Who is Wylie H. Dallas?',
publish = FALSE,
action = "newPage")
knit2wp('index.Rmd',
title = 'Who is Wylie H. Dallas?',
publish = FALSE,
action = "newPage")
options(WordpressLogin = c(`Aren Cambre` = 'tQ3djsKK'),
WordpressURL = 'https://www.arencambre.com/xmlrpc.php')
knit2wp('index.Rmd',
title = 'Who is Wylie H. Dallas?',
publish = FALSE,
action = "newPage")
library(keyring)
install.packages("keyring")
install.packages("keyring")
library(keyring)
key_set("blog_username")
key_set("blog_password")
options(WordpressLogin = c(key_get("blog_username") = key_get("blog_password")),
WordpressURL = 'https://www.arencambre.com/xmlrpc.php')
c(user = 'PASSWORD')
c(`user`` = 'PASSWORD')
c(`user` = 'PASSWORD')
c("user" = 'PASSWORD')
c(key_get("blog_username") = key_get("blog_password"))
key_get("blog_username")
key_get("blog_password")
c("Aren Cambre" = "k8gsIClAfpi%lRwJGxkuQjp9")
c("Aren Cambre" = key_get("blog_password"))
c(key_get("blog_username") = key_get("blog_password"))
options(WordpressLogin = c("Aren Cambre" = key_get("blog_password")),
WordpressURL = 'https://www.arencambre.com/xmlrpc.php')