Word cloud help us to understand and visualize important keywords in given textual data sets. R is a powerful programming language used for exploration and visualization of data. Following code snippet can be used to generate word cloud using R programming language.
[js]
install.packages("tm") // package for text mining
install.packages("wordcloud") // to generate word cloud
install.packages("RColorBrewer") // to add colors in the word cloud
library(tm) // loading tm package
library(RColorBrewer) // loading RColorBrewer package
library(wordcloud) // loading wordcloud package
text_data <- read_csv("data.csv") // reading data from csv file
text <- text_data$col_name // extracting data from column ‘col_name’
text_corpus <- Corpus(VectorSource(text)) // creating corpus from textual data
inspect(text_corpus) // to view the corpus data
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x)) // it will replace given pattern to space
formatted_text <- tm_map(text_corpus, toSpace, "/") // replacing ‘/’ with space
formatted_text <- tm_map(text_corpus, toSpace, "@") // replacing ‘@’ with space
formatted_text <- tm_map(text_corpus, toSpace, "\|") // replacing ‘\|’ with space
formatted_text <- tm_map(formatted_text, tolower) // converting text to lowercase
formatted_text <- tm_map(formatted_text, removeWords, stopwords("english")) // removing stopwords
formatted_text <- tm_map(formatted_text, removePunctuation) // removing punctuation marks
formatted_text <- tm_map(formatted_text, stripWhitespace) // removing white spaces
// following functions create table containing the word frequencies
text_tdm <- TermDocumentMatrix(formatted_text)
text_m <- as.matrix(text_tdm)
text_v <- sort(rowSums(text_m), decreasing = TRUE)
text_d <- data.frame(word=names(text_v), freq = text_v)
head(text_d, 10) // visualize first 10 entries
set.seed(1234)
// following function actually creates the word cloud
wordcloud(words = text_d$word, freq = text_d$freq, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2"))
[/js]