Supervised learning
M. Benesty
2019-10-27
library(fastrtext)
data("train_sentences")
data("test_sentences")
# prepare data
tmp_file_model <- tempfile()
train_labels <- paste0("__label__", train_sentences[,"class.text"])
train_texts <- tolower(train_sentences[,"text"])
train_to_write <- paste(train_labels, train_texts)
train_tmp_file_txt <- tempfile()
writeLines(text = train_to_write, con = train_tmp_file_txt)
test_labels <- paste0("__label__", test_sentences[,"class.text"])
test_labels_without_prefix <- test_sentences[,"class.text"]
test_texts <- tolower(test_sentences[,"text"])
test_to_write <- paste(test_labels, test_texts)
# learn model
execute(commands = c("supervised", "-input", train_tmp_file_txt, "-output", tmp_file_model, "-dim", 20, "-lr", 1, "-epoch", 20, "-wordNgrams", 2, "-verbose", 1))
##
Read 0M words
## Number of words: 5060
## Number of labels: 15
##
Progress: 100.0% words/sec/thread: 1105085 lr: 0.000000 avg.loss: 0.351986 ETA: 0h 0m 0s
## add .bin extension to the path
## [[1]]
## OWNX
## 0.999851
##
## [[2]]
## MISC
## 0.9858458
##
## [[3]]
## MISC
## 0.9926952
##
## [[4]]
## OWNX
## 0.9089149
##
## [[5]]
## AIMX
## 0.991272
## [1] 0.83
## [1] 0.83
## [[1]]
## OWNX
## 0.999851
##
## [[2]]
## MISC
## 0.9858458
##
## [[3]]
## MISC
## 0.9926952
##
## [[4]]
## OWNX
## 0.9089149
##
## [[5]]
## AIMX
## 0.991272
## OWNX MISC MISC OWNX AIMX MISC--the
## 0.9998510 0.9858458 0.9926952 0.9089149 0.9912720 0.4533829
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 601082 32.2 1222758 65.4 1222758 65.4
## Vcells 1257842 9.6 8388608 64.0 1802458 13.8