#import library
library(tidyverse)
library(dplyr)
library(plotly)
library(haven)
library(viridis)

knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  fig.width = 10,
  fig.height = 6,
  out.width = "90%"
)

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d
theme_set(theme_minimal() + theme(legend.position = "right"))

# specify some map projection/options
g = list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)

According to CDC, fewer than one in ten adult cigarette smokers succeed in quitting smoking each year. In 2018, only 7.5% of adult smokers successfully quit smoking in the past year. In this page, we aim to learn more about the situations of the group of people who are willing or trying to quit smoking in 2020.

Tried Quit Smoking?

smk_df = read_csv("./data/adult20.csv")

smk_df = 
  smk_df %>% 
  janitor::clean_names() %>% 
  rename(try_quit = smkqt12m_a, 
         doctor_quit = smktlk_a, 
         time_quit = smkqtnp_a) %>%
  select(try_quit, doctor_quit, time_quit) %>% 
  filter(if_any(everything(), ~ !is.na(.)))

try_quit_plot = 
  smk_df %>% 
  select(try_quit) %>% 
  drop_na() %>% 
  mutate(
    try_quit = as.character(try_quit),
    try_quit = str_replace(try_quit, "1", "Yes"),
    try_quit = str_replace(try_quit, "2", "No"),
    try_quit = str_replace(try_quit, "7", "Refused"),
    try_quit = str_replace(try_quit, "8", "Not Ascertained"),
    try_quit = str_replace(try_quit, "9", "Don't Know")
  ) %>% 
  group_by(try_quit) %>% 
  count() %>% 
  plot_ly(labels = ~try_quit, values = ~n, type = "pie", width = 500, 
          height = 500,marker = list(colors = c("#9ACD32", "#87CEEB", "#FA8072", 
                           "#FFFACD", " #F5DEB3"))) %>% 
  layout(title = "Have you tried to stop smoking in the past year? ")

try_quit_plot

This pie chart illustrates the willingness to quit smoking of the smokers who answered the survey. Less than half, 46.1%, of the people have tried to stop smoking in 2020. Interested in this outcome, we investigate the number of days that these people persist in stop smoking, which is presented in the bar chart shown below:

Time since quitting smoking

time_quit_plot = 
  smk_df %>% 
  select(time_quit) %>% 
  mutate(
    period = cut(time_quit, breaks = seq(0, 365, by = 7))
  ) %>% 
  drop_na() %>% 
  group_by(period) %>% 
  count() %>% 
  plot_ly(y = ~n, type = "bar", color = ~n,
          #add text above the bars
          text = ~n, textposition = 'auto' ) %>% 
  layout(title = "Time since quitting (Weeks)", 
         xaxis = list(title = "Weeks"),
         yaxis = list(title = "Number of People")) %>% 
  colorbar(title = "Number of \npeople")
  
time_quit_plot

As shown in the bar chart, it is can be seemed that only a few people have quitted smoking for more than 10 weeks when they answered this survey. However, it usually takes around 12 weeks to withdraw all nicotine symptoms, despite heavy or light smokers. Many of them tried to stop smoking for less than 3 weeks. Due to limited data points, we were unable to examine whether these people quit smoking successfully in 2021. Therefore, the number of people who quit smoking in 2020 may increase.

In the sections below, we explore two factors that may be related to quit smoking: Doctor’s advice and tobacco tax. The plots listed below are current situations related to these factors.

Doctors’ Advice

doctor_quit = 
  smk_df %>% 
  select(doctor_quit) %>% 
  drop_na() %>% 
  mutate(
    doctor_quit = as.character(doctor_quit),
    doctor_quit = str_replace(doctor_quit, "1", "Yes"),
    doctor_quit = str_replace(doctor_quit, "2", "No"),
    doctor_quit = str_replace(doctor_quit, "7", "Refused"),
    doctor_quit = str_replace(doctor_quit, "8", "Not Ascertained"),
    doctor_quit = str_replace(doctor_quit, "9", "Don't Know")
  ) %>% 
  group_by(doctor_quit) %>% 
  count()  %>% 
  plot_ly(labels = ~doctor_quit, values = ~n, type = "pie", width = 500, 
          height = 500, marker = list(colors = c("#9ACD32", "#87CEEB", 
                           "#11AA22", "#FA8072",    "#F5DEB3"))) %>% 
  layout(title = "Has a doctor advised you about stop smoking 
         last year? ")

doctor_quit

From the NISH dataset, we plotted the variable related to the question: Has a doctor, dentist, or other health professional advised you about ways to stop smoking or prescribed medication to help you quit? The pie chart shows that 52.5% of respondents said “Yes” to this question. To further explore this situation, we study the dataset related to the quitline calls received per tobacco users.

Cigarette tax by state

#data cleaning
tax_df = read.csv("data/CDC_STATE_System_Tobacco_Legislation_-_Tax.csv") %>% 
  janitor::clean_names() %>% 
  select(year, location_abbr:topic_desc, measure_desc, provision_desc, 
         provision_alt_value, data_type, geo_location) %>% 
  filter(!(provision_alt_value == 0)) %>% 
  filter(data_type %in% c("Money", "Number")) %>% 
  distinct() %>% 
  filter(year == 2021)

#create plots
tax_2021 = tax_df %>% 
  filter(topic_desc == "Legislation - Tax Combustible Tobacco",
         !(provision_desc == "Percent Value"),
         measure_desc == "Cigarette") %>% 
  group_by(location_abbr) %>%
  mutate(location_abbr = reorder(location_abbr, provision_alt_value)) %>% 
  ggplot(aes(y = reorder(location_abbr, provision_alt_value), 
             x = provision_alt_value, fill = location_abbr)) + 
  geom_col(alpha = 0.5) +
  labs(title = "Cigarette Tax Value by State",
       x = "Tax value ($ per pack)",
       y = "State",
       fill = "Location Abbreviation") +
  theme(axis.text.y = element_text(size = 5.5),
        plot.title = element_text(hjust = 0.5)) 


ggplotly(tax_2021)
Legislation on Tobacco includes three categories:
  • Combustible Tobacco Tax
  • Non-Combustible Tobacco
  • Stamp Tax
  • Combustible Tobacco covers a majority number of smokers in the country and among five sub-groups in Combustible Tobacco, cigarette plays dominate role (more than 95%), so we mainly focus the cigarette tax among regions in the U.S.

    The graph shows the amount of tax ($ per pack) for 58 regions in the US (50 states and 8 territories that belongs to the U.S.). American Samoa (AS) has the highest cigarette tax, with 6 dollars per pack, followed by Puerto Rico (PR), Palau (PW), and District Colombia (DC). According to our previous findings, these four regions have a higher decrease of people who smoke, and DC has the highest decrease. Among 50 states in the country, New York (NY) has the highest tax, with 4.35 dollars per pack.

    As for the states with low cigarette tax, 35 regions have cigarette tax lower than 2 dollars and 14 regions lower than 1 dollar. Ohio and Texas have the largest increase in smokers from 2017 to 2020, both of which have cigarette lower than 2 dollars.