Content Analysis Employing the LSS Dictionary

This is a sample R code for dictionary-based content analysis. You have to install qunteda package before running this code.

calc_scores <- function(mx, df_dict){
  
  dict <- list()
  mx <- as.matrix(t(subset(t(mx), colnames(mx) %in% rownames(df_dict)))) #Remove words not in the dictionary
  for(word in colnames(mx)){
    dict[[word]] <- df_dict$score[rownames(df_dict)==word]
  }
  
  # Based on Wordscore (Benoit & Laver 2003)
  mx_tf <- tf(mx) # Convert to proportions
  mns <- as.matrix(mx_tf) %*% as.matrix(unlist(dict)) # Mean scores of documents
  mx_binary <- as.matrix((mx > 0) + 0)
  mx_score <- mx_binary * t(as.matrix(unlist(dict))[,rep(1,nrow(mx_tf))])  
  mx_dev <- mx_score - mns[,rep(1,ncol(mx_binary))] # Difference from the mean
  mx_error <- (mx_dev ** 2) * mx_tf # Square of deviation weighted by frequency
  vars <- rowSums(mx_error) # Variances
  sds <- sqrt(vars) # Standard diviaitons
  ses <- sds / sqrt(rowSums(mx)) # SD divided by sqrt of total number of words
  return(list('mean'=mns[,1], 'se'=ses))
}

library(quanteda)
cp <- corpus(textfile('Russian news.txt') # Your texts
mx <- dfm(cp)
df_dict <- read.csv("protest_framing_dictionary_v3.csv", header=FALSE, sep='\t', # Our dictionary
                     col.names=c('word', 'score'), row.names=1)) 
calc_scores(mx, df_dict)

2 thoughts on “Content Analysis Employing the LSS Dictionary

Leave a Reply

Your email address will not be published. Required fields are marked *