README

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

R2PMML

Features

News and Updates

Prerequisites

Installation

install.packages("r2pmml")

Alternatively, installing the latest snapshot version from GitHub using the devtools package:

library("devtools")

install_github("jpmml/r2pmml")

Usage

Base functionality

library("randomForest")
library("r2pmml")

data(iris)

# Train a model using raw Iris dataset
iris.rf = randomForest(Species ~ ., data = iris, ntree = 7)
print(iris.rf)

# Export the model to PMML
r2pmml(iris.rf, "iris_rf.pmml")

Data pre-processing

The r2pmml function takes an optional argument preProcess, which associates the model with data pre-processing transformations.

library("caret")
library("randomForest")
library("r2pmml")

data(iris)

# Create a preprocessor
iris.preProcess = preProcess(iris, method = c("range"))

# Use the preprocessor to transform raw Iris dataset to pre-processed Iris dataset
iris.transformed = predict(iris.preProcess, newdata = iris)

# Train a model using pre-processed Iris dataset
iris.rf = randomForest(Species ~., data = iris.transformed, ntree = 7)
print(iris.rf)

# Export the model to PMML.
# Pass the preprocessor as the `preProcess` argument
r2pmml(iris.rf, "iris_rf.pmml", preProcess = iris.preProcess)

Model formulae

Alternatively, it is possible to associate lm, glm and randomForest models with data pre-processing transformations using model formulae.

library("plyr")
library("r2pmml")

# Load and prepare the Auto-MPG dataset
auto = read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", quote = "\"", header = FALSE, na.strings = "?", row.names = NULL, col.names = c("mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "model_year", "origin", "car_name"))
auto$origin = as.factor(auto$origin)
auto$car_name = NULL
auto = na.omit(auto)

# Train a model
auto.glm = glm(mpg ~ (. - horsepower - weight - origin) ^ 2 + I(displacement / cylinders) + cut(horsepower, breaks = c(0, 50, 100, 150, 200, 250)) + I(log(weight)) + revalue(origin, replace = c("1" = "US", "2" = "Europe", "3" = "Japan")), data = auto)

# Export the model to PMML
r2pmml(auto.glm, "auto_glm.pmml")

Package ranger

library("ranger")
library("r2pmml")

data(iris)

# Train a model.
# Keep the forest data structure by specifying `write.forest = TRUE`
iris.ranger = ranger(Species ~ ., data = iris, num.trees = 7, write.forest = TRUE)
print(iris.ranger)

# Export the model to PMML.
# Pass the training dataset as the `data` argument
r2pmml(iris.ranger, "iris_ranger.pmml", data = iris)

Package xgboost

library("xgboost")
library("r2pmml")

data(iris)

iris_X = iris[, 1:4]
iris_y = as.integer(iris[, 5]) - 1

# Generate R model matrix
iris.matrix = model.matrix(~ . - 1, data = iris_X)

# Generate XGBoost DMatrix and feature map based on R model matrix
iris.DMatrix = xgb.DMatrix(iris.matrix, label = iris_y)
iris.fmap = as.fmap(iris.matrix)

# Train a model
iris.xgb = xgboost(data = iris.DMatrix, missing = NULL, objective = "multi:softmax", num_class = 3, nrounds = 13)

# Export the model to PMML.
# Pass the feature map as the `fmap` argument.
# Pass the name and category levels of the target field as `response_name` and `response_levels` arguments, respectively.
# Pass the value of missing value as the `missing` argument
# Pass the optimal number of trees as the `ntreelimit` argument (analogous to the `ntreelimit` argument of the `xgb::predict.xgb.Booster` function)
r2pmml(iris.xgb, "iris_xgb.pmml", fmap = iris.fmap, response_name = "Species", response_levels = c("setosa", "versicolor", "virginica"), missing = NULL, ntreelimit = 7, compact = TRUE)

Advanced functionality

Sys.setenv(JAVA_TOOL_OPTIONS = "-Xms4G -Xmx8G")

r2pmml(iris.rf, "iris_rf.pmml")

r2pmml(iris.rf, "iris_rf.pmml", converter = "com.mycompany.MyRandomForestConverter", converter_classpath = "/path/to/myconverter-1.0-SNAPSHOT.jar")

De-installation

remove.packages("r2pmml")

Documentation

License

If you would like to use R2PMML in a proprietary software project, then it is possible to enter into a licensing agreement which makes R2PMML available under the terms and conditions of the BSD 3-Clause License instead.

Additional information

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.

R2PMML

Features

News and Updates

Prerequisites

Installation

Usage

Base functionality

Data pre-processing

Model formulae

Package `ranger`

Package `xgboost`

Advanced functionality

De-installation

Documentation

License

Additional information

R2PMML

Features

News and Updates

Prerequisites

Installation

Usage

Base functionality

Data pre-processing

Model formulae

Package ranger

Package xgboost

Advanced functionality

De-installation

Documentation

License

Additional information

Package `ranger`

Package `xgboost`