The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(tensorflow)
library(keras)
library(data.table)
library(tfdatasets)
library(tfaddons)
# Preprocessing -----------------------------------------------------------
# Assumes you've downloaded and unzipped one of the bilingual datasets offered at
# http://www.manythings.org/anki/ and put it into a directory "data"
# This example translates English to Dutch.
download_data = function(){
if(!dir.exists('data')) {
dir.create('data')
}
if(!file.exists('data/nld-eng.zip')) {
download.file('http://www.manythings.org/anki/nld-eng.zip',
destfile = file.path("data", basename('nld-eng.zip')))
unzip('data/nld-eng.zip', exdir = 'data')
}
}
download_data()
filepath <- file.path("data", "nld.txt")
df = data.table::fread(filepath, header = FALSE, encoding = 'UTF-8',
select = c(1,2), nrows = -1)
text_cleaner <- function(text){
text = text %>%
# replace non ascii
textclean::replace_non_ascii() %>%
# remove all non relevant symbols (letters, spaces, and apostrophes are retained)
textclean::strip(apostrophe.remove = TRUE) %>%
paste('<start> ', ., ' <end>')
}
df = sapply(1:2, function(x) text_cleaner(df[[x]])) %>% as.data.table()
text_tok <- function(text) {
tokenizer = text_tokenizer(filters='')
tokenizer %>% fit_text_tokenizer(text)
vocab_size = tokenizer$word_index
data = tokenizer %>%
texts_to_sequences(text) %>%
pad_sequences(padding='post')
list(vocab_size,data,tokenizer)
}
c(input_vocab_size, data_en, tokenizer_en) %<-% c(df[['V1']] %>% text_tok())
c(output_vocab_size, data_de, tokenizer_de) %<-% c(df[['V2']] %>% text_tok())
# Split the dataset
indices_to_take = sample.int(n = nrow(df), size = floor(0.8*nrow(df)), replace = FALSE)
split_data <- function(data) {
c(train, test) %<-% list(data[indices_to_take, ], data[-indices_to_take, ] )
list(train, test)
}
c(en_train, en_test, de_train, de_test) %<-% c(split_data(data_en), split_data(data_de))
rm(df, filepath, indices_to_take, download_data, split_data, text_cleaner, text_tok)
batch_size = 64L
buffer_size = nrow(en_train)
steps_per_epoch = buffer_size %/% batch_size
embedding_dims = 256L
rnn_units = 1024L
dense_units = 1024L
dtype = tf$float32 #used to initialize DecoderCell Zero state
dataset = tensor_slices_dataset(list(en_train, de_train)) %>%
dataset_shuffle(buffer_size) %>% dataset_batch(batch_size, drop_remainder = TRUE)
EncoderNetwork = reticulate::PyClass(
'EncoderNetwork',
inherit = tf$keras$Model,
defs = list(
`__init__` = function(self, input_vocab_size, embedding_dims, rnn_units) {
super()$`__init__`()
self$encoder_embedding = layer_embedding(input_dim = length(input_vocab_size),
output_dim = embedding_dims)
self$encoder_rnnlayer = layer_lstm(units = rnn_units, return_sequences = TRUE,
return_state = TRUE)
NULL
}
)
)
DecoderNetwork = reticulate::PyClass(
'DecoderNetwork',
inherit = tf$keras$Model,
defs = list(
`__init__` = function(self, output_vocab_size, embedding_dims, rnn_units) {
super()$`__init__`()
self$decoder_embedding = layer_embedding(input_dim = length(output_vocab_size),
output_dim = embedding_dims)
self$dense_layer = layer_dense(units = length(output_vocab_size))
self$decoder_rnncell = tf$keras$layers$LSTMCell(rnn_units)
# Sampler
self$sampler = sampler_training()
# Create attention mechanism with memory = NULL
self$attention_mechanism = self$build_attention_mechanism(dense_units, NULL, c(rep(ncol(data_en), batch_size)))
self$rnn_cell = self$build_rnn_cell(batch_size)
self$decoder = decoder_basic(cell=self$rnn_cell, sampler = self$sampler,
output_layer = self$dense_layer)
NULL
},
build_attention_mechanism = function(self, units, memory, memory_sequence_length) {
attention_luong(units = units , memory = memory,
memory_sequence_length = memory_sequence_length)
},
build_rnn_cell = function(self, batch_size) {
rnn_cell = attention_wrapper(cell = self$decoder_rnncell,
attention_mechanism = self$attention_mechanism,
attention_layer_size = dense_units)
rnn_cell
},
build_decoder_initial_state = function(self, batch_size, encoder_state, dtype) {
decoder_initial_state = self$rnn_cell$get_initial_state(batch_size = batch_size,
dtype = dtype)
decoder_initial_state = decoder_initial_state$clone(cell_state = encoder_state)
decoder_initial_state
}
)
)
encoderNetwork = EncoderNetwork(input_vocab_size, embedding_dims, rnn_units)
decoderNetwork = DecoderNetwork(output_vocab_size, embedding_dims, rnn_units)
optimizer = tf$keras$optimizers$Adam()
loss_function <- function(y_pred, y) {
#shape of y [batch_size, ty]
#shape of y_pred [batch_size, Ty, output_vocab_size]
loss = keras::loss_sparse_categorical_crossentropy(y, y_pred)
mask = tf$logical_not(tf$math$equal(y,0L)) #output 0 for y=0 else output 1
mask = tf$cast(mask, dtype=loss$dtype)
loss = mask * loss
loss = tf$reduce_mean(loss)
loss
}
train_step <- function(input_batch, output_batch,encoder_initial_cell_state) {
loss = 0L
with(tf$GradientTape() %as% tape, {
encoder_emb_inp = encoderNetwork$encoder_embedding(input_batch)
c(a, a_tx, c_tx) %<-% encoderNetwork$encoder_rnnlayer(encoder_emb_inp,
initial_state = encoder_initial_cell_state)
#[last step activations,last memory_state] of encoder passed as input to decoder Network
# Prepare correct Decoder input & output sequence data
decoder_input = tf$convert_to_tensor(output_batch %>% as.array() %>% .[,1:45]) # ignore <end>
#compare logits with timestepped +1 version of decoder_input
decoder_output = tf$convert_to_tensor(output_batch %>% as.array() %>% .[,2:46]) #ignore <start>
# Decoder Embeddings
decoder_emb_inp = decoderNetwork$decoder_embedding(decoder_input)
#Setting up decoder memory from encoder output and Zero State for AttentionWrapperState
decoderNetwork$attention_mechanism$setup_memory(a)
decoder_initial_state = decoderNetwork$build_decoder_initial_state(batch_size,
encoder_state = list(a_tx, c_tx),
dtype = tf$float32)
#BasicDecoderOutput
c(outputs, res1, res2) %<-% decoderNetwork$decoder(decoder_emb_inp,initial_state = decoder_initial_state,
sequence_length = c(rep(ncol(data_en) - 1L, batch_size)))
logits = outputs$rnn_output
#Calculate loss
loss = loss_function(logits, decoder_output)
})
#Returns the list of all layer variables / weights.
variables = c(encoderNetwork$trainable_variables, decoderNetwork$trainable_variables)
# differentiate loss wrt variables
gradients = tape$gradient(loss, variables)
#grads_and_vars – List of(gradient, variable) pairs.
grads_and_vars = purrr::transpose(list(gradients,variables))
optimizer$apply_gradients(grads_and_vars)
loss
}
initialize_initial_state = function() {
list(tf$zeros(c(batch_size, rnn_units)), tf$zeros(c(batch_size, rnn_units)))
}
epochs = 1
for (i in 1:sum(epochs + 1)) {
encoder_initial_cell_state = initialize_initial_state()
total_loss = 0.0
res = dataset %>% dataset_take(steps_per_epoch) %>% iterate()
for (batch in 1:length(res)) {
c(input_batch, output_batch) %<-% res[[batch]]
batch_loss = train_step(input_batch, output_batch, encoder_initial_cell_state)
total_loss = total_loss + batch_loss
if((batch+1) %% 5 == 0) {
print(paste('total loss:', batch_loss$numpy(), 'epoch', i, 'batch',batch+1))
}
}
}
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.