## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(text2emotion) ## ----eval=FALSE--------------------------------------------------------------- # install.packages("text2emotion") ## ----------------------------------------------------------------------------- library(stringr) library(textclean) library(magrittr) library(text2vec) library(ranger) library(caret) library(parallel) library(stats) ## ----------------------------------------------------------------------------- # Sample text with contractions, slang, and emoticons text <- "I'm so excited!! 2nite we go 4 a gg :)" # Preprocess the text cleaned_text <- preprocess_text(text) # View the processed text cleaned_text ## ----------------------------------------------------------------------------- # Define custom slang terms custom_slang <- c( "bff" = "best friend forever", "omg" = "oh my god" ) # Preprocess the text with custom slang text_with_custom_slang <- preprocess_text("omg! My bff is here!", custom_slang = custom_slang) # View the processed text text_with_custom_slang ## ----eval=FALSE--------------------------------------------------------------- # predict_emotion_with_emoji("I'm feeling great today!") # #> I'm feeling great today! 😊 # # predict_emotion_with_emoji("He's super angry!!", output_type = "emoji") # #> 😡 # # predict_emotion_with_emoji("I feel scared", output_type = "emotion") # #> fear # ## ----eval=FALSE--------------------------------------------------------------- # best_params <- tune_rf_model( # train_matrix = tfidf_result$tfidf_matrix, # train_labels = train_labels, # mtry_grid = c(5, 10, 20), # ntree_grid = c(100, 200, 300), # seed = 123, # verbose = TRUE # ) ## ----eval=FALSE--------------------------------------------------------------- # rf_model <- train_rf_model( # train_matrix = tfidf_result$tfidf_matrix, # train_labels = train_labels, # ntree = best_params$ntree, # mtry = best_params$mtry, # seed = 123, # verbose = TRUE, # train_df_cache_path = train_df_cache_path # ) ## ----eval=FALSE--------------------------------------------------------------- # eval_result <- evaluate_rf_model( # rf_model = rf_model, # test_texts = preprocessed_test_texts, # test_labels = test_labels, # tfidf_model = tfidf_result$tfidf_model, # vectorizer = tfidf_result$vectorizer, # stopwords = stopwords, # verbose = TRUE # ) ## ----eval=FALSE--------------------------------------------------------------- # eval_result$text_accuracy # eval_result$macro_f1 # eval_result$confusion