library(tidyverse)
library(stringr)
library(lubridate)
library(here)

tb_raw <- read_csv(here("talks", "trump-tweets.csv"))
tb_raw
if (interactive()) View(tb_raw)

source_regex <- "android|iphone"
tword_regex <- "badly|crazy|weak|spent|strong|dumb|joke|guns|funny|dead"

tb <- tb_raw %>%
  mutate(source = str_extract(source, source_regex),
         twords = str_extract_all(tweet, tword_regex))

tb$tweet

tb %>% select(-tweet)
if (interactive()) View(tb)

if (interactive()) str_view_all(tb$tweet, tword_regex)

tb <- tb %>%
  mutate(n = lengths(twords),
         hour = hour(created),
         start = gregexpr(tword_regex, tweet))
# another possibilty that would require more processing
# so less useful for a talk example
# but more useful IRL:
# str_locate_all(tweet, tword_regex))

if (interactive()) View(tb)

tb$twords[c(4, 7)]
tb$start[[7]]

tb %>%
  filter(hour < 14,        ## created before 2pm
         between(n, 1, 2), ## containing 1 or 2 Android words
         between(map_int(start, min), 0, 30)) ## Android word in 1st 30 chars

tb %>%
  filter(map_lgl(twords, ~ all(c("strong", "weak") %in% .x)))