dc.Rd
Classify document.
dc_(model, documents, output = NULL) dc(model, documents)
model | Model to use, generally returned by |
---|---|
documents | Documents to classify. |
output | Full path to output file. |
# NOT RUN { # get working directory # need to pass full path wd <- getwd() data <- data.frame( class = c("Sport", "Business", "Sport", "Sport", "Business", "Politics", "Politics", "Politics"), doc = c("Football, tennis, golf and, bowling and, score.", "Marketing, Finance, Legal and, Administration.", "Tennis, Ski, Golf and, gym and, match.", "football, climbing and gym.", "Marketing, Business, Money and, Management.", "This document talks politics and Donal Trump.", "Donald Trump is the President of the US, sadly.", "Article about politics and president Trump.") ) # repeat data 50 times # Obviously do not do that in te real world data <- do.call("rbind", replicate(20, data[sample(nrow(data), 3),], simplify = FALSE)) # train model model <- dc_train(paste0(wd, "/classifier.bin"),"en", data) # create documents to classify documents <- data.frame( docs = c("This discusses golf which is a sport.", "This document is about business administration.", "This is about people who do sport, go to the gym and play tennis.", "Some play tennis and work in Finance", "This documents discusses finance and money management.") ) # classify documents classified <- dc(model, documents) cat(classified) # }