Vectors

library(decipher)

# get working directory
# need to pass full path
wd <- getwd()

# Training to find "WEF"
data <- paste("This organisation is called the <START:wef> World Economic Forum <END>",
  "It is often referred to as <START:wef> Davos <END> or the <START:wef> WEF <END> .")

# train the model
(
  model <- tnf_train(model = paste0(wd, "/wef.bin"), lang = "en",
    data = data, type = "wef")
)
#> Indexing events with TwoPass using cutoff of 0
#> 
#>  Computing event counts...  done. 19 events
#>  Indexing...  done.
#> Collecting events... Done indexing in 0.02 s.
#> Incorporating indexed data for training...  
#> done.
#>  Number of Event Tokens: 19
#>      Number of Outcomes: 3
#>    Number of Predicates: 256
#> Computing model parameters...
#> Performing 300 iterations.
#>   1:  . (13/19) 0.6842105263157895
#>   2:  . (17/19) 0.8947368421052632
#>   3:  . (19/19) 1.0
#>   4:  . (19/19) 1.0
#>   5:  . (19/19) 1.0
#>   6:  . (19/19) 1.0
#> Stopping: change in training set accuracy less than 1.0E-5
#> Stats: (19/19) 1.0
#> ...done.
#> 
#> Training data summary:
#> #Sentences: 1
#> #Tokens: 19
#> #wef entities: 3
#> [1] "/home/jp/news-r/decipher/vignettes/wef.bin"

# Create sentences to test our model
sentences <- paste("This sentence mentions the World Economic Forum the annual meeting",
  "of which takes place in Davos. Note that the forum is often shortened to WEF.")

# run model on sentences
(results <- tnf(model = model, sentences = sentences))
#> [1] "This sentence mentions the <START:wef> World Economic <END> Forum the annual meeting of which takes place in <START:wef> Davos. <END> Note that the forum is often shortened to <START:wef> WEF. <END>"
#> [1] "This sentence mentions the <START:wef> World Economic <END> Forum the annual meeting of which takes place in <START:wef> Davos. <END> Note that the forum is often shortened to <START:wef> WEF. <END>"

# get names from results
(names <- get_names(results))
#> # A tibble: 3 x 3
#>   string                           type  name          
#>   <chr>                            <chr> <chr>         
#> 1 <START:wef> World Economic <END> wef   World Economic
#> 2 <START:wef> Davos. <END>         wef   Davos         
#> 3 <START:wef> WEF. <END>           wef   WEF

Text files

You can also do train and run your model from .txt files.

# same with .txt files
# Training to find "WEF"
data <- paste("This organisation is called the <START:wef> World Economic Forum <END>", 
  "It is often referred to as <START:wef> Davos <END> or the <START:wef> WEF <END> .")

# Save the above as file
write(data, file = "input.txt")

# Trains the model and returns the full path to the model
(
  model <- tnf_train_(model = paste0(wd, "/wef.bin"), lang = "en",
    data = paste0(wd, "/input.txt"), type = "wef")
)
#> Indexing events with TwoPass using cutoff of 0
#> 
#>  Computing event counts...  done. 19 events
#>  Indexing...  done.
#> Collecting events... Done indexing in 0.02 s.
#> Incorporating indexed data for training...  
#> done.
#>  Number of Event Tokens: 19
#>      Number of Outcomes: 3
#>    Number of Predicates: 256
#> Computing model parameters...
#> Performing 300 iterations.
#>   1:  . (13/19) 0.6842105263157895
#>   2:  . (17/19) 0.8947368421052632
#>   3:  . (19/19) 1.0
#>   4:  . (19/19) 1.0
#>   5:  . (19/19) 1.0
#>   6:  . (19/19) 1.0
#> Stopping: change in training set accuracy less than 1.0E-5
#> Stats: (19/19) 1.0
#> ...done.
#> 
#> Training data summary:
#> #Sentences: 1
#> #Tokens: 19
#> #wef entities: 3
#> [1] "/home/jp/news-r/decipher/vignettes/wef.bin"

# Create sentences to test our model
sentences <- paste("This sentence mentions the World Economic Forum the annual meeting",
  "of which takes place in Davos. Note that the forum is often called the WEF.")

# Save sentences
write(data, file = "sentences.txt")

# Extract names
# Without specifying an output file the extracted names appear in the console
(model <- tnf_(model = model, sentences = paste0(wd, "/sentences.txt")))
#> This organisation is called the <START:wef> <START:wef> World Economic Forum <END> <END> It is often referred to as <START:wef> <START:wef> <END> Davos <END> or the <START:wef> <START:wef> <END> WEF <END> .
#> NULL
  
# You can train slightly more sophisticated models too
# Training to find sentiments
data <- paste("This sentence is <START:sentiment.neg> very bad <END> !",
  "This sentence is <START:sentiment.pos> rather good <END> .",
  "This sentence on the other hand, is <START:sentiment.neg> horrible <END> .")

# Save the above as file
write(data, file = "input.txt")

# Trains the model and returns the full path to the model
(
  model <- tnf_train_(model = paste0(wd, "/sentiment.bin"), lang = "en",
    data = paste0(wd, "/input.txt"), type = "sentiment")
)
#> Indexing events with TwoPass using cutoff of 0
#> 
#>  Computing event counts...  done. 21 events
#>  Indexing...  done.
#> Collecting events... Done indexing in 0.03 s.
#> Incorporating indexed data for training...  
#> done.
#>  Number of Event Tokens: 21
#>      Number of Outcomes: 3
#>    Number of Predicates: 227
#> Computing model parameters...
#> Performing 300 iterations.
#>   1:  . (15/21) 0.7142857142857143
#>   2:  . (20/21) 0.9523809523809523
#>   3:  . (21/21) 1.0
#>   4:  . (21/21) 1.0
#>   5:  . (21/21) 1.0
#>   6:  . (21/21) 1.0
#> Stopping: change in training set accuracy less than 1.0E-5
#> Stats: (21/21) 1.0
#> ...done.
#> 
#> Training data summary:
#> #Sentences: 1
#> #Tokens: 21
#> #sentiment.neg entities: 2
#> #sentiment.pos entities: 1
#> [1] "/home/jp/news-r/decipher/vignettes/sentiment.bin"

sentences <- paste("The first half of this sentence is a bad and negative while",
  "the second half is great and positive.")

# Save sentences
write(data, file = "sentences.txt")

# Extract names
# Without specifying an output file the extracted names appear in the console
(tnf_(model = model, sentences = paste0(wd, "/sentences.txt")))
#> This sentence is <START:sentiment.neg> <START:sentiment> very bad <END> <END> ! This sentence is <START:sentiment.pos> rather <START:sentiment> good <END> <END> . This sentence on the other hand, is <START:sentiment> <START:sentiment.neg> horrible <END> <END> .
#> NULL