Large scale machine learning projects with r suite

Rscript R/master.R \--port=7137

○○

> rsuite install Detecting repositories ... Will use repositories: CRAN.CRAN = https://mran.microsoft.com/snapshot/2017-10-15 CRAN.CRANextra = http://www.stats.ox.ac.uk/pub/RWin Other = http://wlog-rsuite.s3.amazonaws.com Installing RSuite(v0.17x) package ... installing the source package 'RSuite' All done.

> rsuite proj start -n spmf

Commands: update Checks if newest version of RSuite CLI is installed. If not installer for newest version is downloaded and installation is initiated. install Install RSuite with all the dependencies. proj Use it to manage project, its dependencies, and build project packages. repo Use to manage repositories. e.g. upload packages. pkgzip Use to create PKGZIP packages to fillup remove repository. version Show RSuite CLI version. help Show this message and exit. Call 'rsuite [command] help' to get information on acceptable [args].

logs/.gitignore

PARAMETERS

●●

○○○○

●●●

LogLevel: INFON_days: 365solver_max_iterations: 10solver_opt_horizon: 8

●●

○ main○ if __name__ == "__main__":

predmodel

● ==● >=● <=

master.R

spmf/libs

packages_import.R

master.R

import_training.R (I)

● import/<session_id>/● work/<session_id>/

library(predmodel)

import_path <- file.path(script_path, "../import")work_path <- file.path(script_path, "../work")

# requiredsession_id <- args$get(name = "session_id", default = "201711122000", required = FALSE)

loginfo("--> Session id:%s", session_id)

session_work <- file.path(work_path, session_id)

if(!dir.exists(session_work)) { dir.create(session_work)}

import_training_data(file.path(import_path, session_id), session_work)

import_training.R (II)

devtools

import_training_data

#' @exportimport_training_data <- function(import_path, work_path) { pkg_loginfo("Importing from %s into %s", import_path, work_path)

n <- 10000 dt <- data.table(feature1 = rnorm(n), feature2 = rnorm(n)) m <- round(n*0.3) dt[, resp := c(rep(1, m), rep(0, n - m))] fwrite(x = dt, file = file.path(work_path, "training.csv"), sep = ";")}

estimate_model.R (I)

●●library(predmodel)

work_path <- file.path(script_path, "../work")# requiredsession_id <- args$get(name = "session_id", required = FALSE, default = "201710111655")loginfo("--> Session id:%s", session_id)session_work <- file.path(work_path, session_id)

h2o.init(max_mem_size = "4g", nthreads = 2)

logdebug("---> H2O started")

train_file <- file.path(session_work, "training.csv")

stopifnot(file.exists(train_file))

train_file %>% transform_training() %>% estimate_model(session_id) %>% save_model(session_work)

transform_training

#' @exporttransform_training <- function(train_file) { dt <- h2o.importFile(path = train_file, destination_frame = "train_dt", parse = TRUE, header = TRUE, sep = ";") dt$resp <- as.factor(dt$resp) dt <- h2o.assign(data=dt, key = "train_dt")

return(dt)}

estimate_model

#'@exportestimate_model <- function(dt, session_id) { model <- h2o.gbm(x = colnames(dt), y = "resp", training_frame = dt, model_id = sprintf("gbm_%s", session_id), ntrees = 10, learn_rate = 0.1)}

save_model

#' @exportsave_model <- function(model, session_work) { h2o.saveModel(model, path = session_work, force =TRUE)}

import_test.R (I)

● import/<session_id>/● work/<session_id>/

library(predmodel)

import_path <- file.path(script_path, "../import")work_path <- file.path(script_path, "../work")

# requiredsession_id <- args$get(name = "session_id", default = "201711122000", required = FALSE)

loginfo("--> Session id:%s", session_id)

session_work <- file.path(work_path, session_id)

if(!dir.exists(session_work)) { dir.create(session_work)}

import_test_data(file.path(import_path, session_id), session_work)

import_test_data

#' @exportimport_test_data <- function(import_path, work_path) { pkg_loginfo("Importing from %s into %s", import_path, work_path)

n <- 1000 dt <- data.table(feature1 = rnorm(n), feature2 = rnorm(n)) fwrite(x = dt, file = file.path(work_path, "test.csv"), sep = ";")}

score_model.R (I)

● work/<score_session_id>● work/<train_session_id>● export/<score_session_id>

score_model.R (II)

library(h2o)library(magrittr)library(predmodel)

work_path <- file.path(script_path, "../work")export_path <- file.path(script_path, "../export")

# requiredtrain_session_id <- args$get(name = "train_session_id", required = FALSE, default = "201710111655")score_session_id <- args$get(name = "score_session_id", required = FALSE, default = "201710111655")

loginfo("--> train session id:%s", train_session_id)loginfo("--> score session id:%s", score_session_id)

score_session_export <- export_pathtrain_session_work <- file.path(work_path, train_session_id)score_session_work <- file.path(work_path, score_session_id)

h2o.init(max_mem_size = "4g", nthreads = 2)

logdebug("---> H2O started")

test_file <- file.path(score_session_work, "test.csv")model_file <- file.path(train_session_work, sprintf("gbm_%s", train_session_id))

stopifnot(file.exists(test_file))stopifnot(file.exists(model_file))

test_dt <- test_file %>% transform_test()

score_model(test_dt = test_dt, model_path = model_file) %>% export_score(export_path = export_path, score_session_id = score_session_id)

transform_test

#' @exporttransform_test <- function(test_file) { h2o.importFile(path = test_file, destination_frame = "test_dt", parse = TRUE, header = TRUE, sep = ";")}

score_model

#' @exportscore_model <- function(test_dt, model_path) { model <- h2o.loadModel(model_path) pred_dt <- h2o.predict(model, test_dt) pred_dt}

export_score

#' @exportexport_score <- function(score_dt, score_session_id, export_path) { score_dt <- as.data.table(score_dt) score_dt[, score_session_id := score_session_id] fwrite(x = score_dt, file = file.path(export_path, "score.csv"), sep = ";", append = TRUE)}

Productionspmf_0.1_001.zipProduction/spmf import export

Production/spmf/R

a. Rscript import_training.Rb. Rscript estimate_model.Rc. Rscript import_test.Rd. Rscript score_model.R

Production/spmf/export

loginfo("Phase 1 passed")

logdebug("Iter %d done", i)

logtrace("Iter %d done", i)

logwarning("Are you sure?")

logerror("I failed :(")

Packages

pkg_loginfo("Phase 1 passed")

pkg_logdebug("Iter %d done", i)

pkg_logtrace("Iter %d done", i)

pkg_logwarning("Are you sure?")

pkg_logerror("I failed :(")

2017-11-13 13:47:03 INFO::--> Session id:201711122000

2017-11-13 13:47:03 INFO:predmodel:Importing from C:/Workplace/Sandbox/Production/spmf/R/../import/201711122000 into C:/Workplace/Sandbox/Production/spmf/R/../work/201711122000

2017-11-13 13:47:14 INFO::--> Session id:201711122000

2017-11-13 13:47:51 INFO::--> Session id:201711131000

2017-11-13 13:47:51 INFO:predmodel:Importing from C:/Workplace/Sandbox/Production/spmf/R/../import/201711131000 into C:/Workplace/Sandbox/Production/spmf/R/../work/201711131000

2017-11-13 13:47:57 INFO::--> train session id:201711122000

2017-11-13 13:47:57 INFO::--> score session id:201711131000

LogLevel: INFO

LogLevel: DEBUG

LogLevel: TRACE

import_training.R

tests/test_spmf.R

library(predmodel)library(testthat)

context("Testing context")

test_that(desc = "Test", code = { expect_true(5 > 3) expect_true(pi < 3) })

Large scale machine learning projects with r suite

Data & Analytics

Transcript of Large scale machine learning projects with r suite

Machine Intelligence at Google Scale

Introduction to Large Scale Machine Management (second part)€¦ · Introduction to Large Scale Machine Management (second part) DAAD Summer School: Aspects of Large Scale High Speed

TensorFlow: Large-Scale Machine Learning on Heterogeneous ...

Small scale and large scale capsule filling machine

Four Moves to Machine Learning at Scale

Challenges in Large Scale Machine Learning

Large-Scale Machine Learning for E-commerce

Large-Scale Machine Learning - New York University LeCun Large-Scale Machine Learning Large-Scale Machine Learning John Langford Yann LeCun Microsoft Research Courant Institute John

Apache SystemML - Declarative Large-Scale Machine Learning

Machine- learning tools in TruSightTM Software Suite

Large-scale Matrix Factorizationkijungs/etc/10-405.pdf · Large-scale Matrix Factorization ... Large-scale Matrix Factorization (by Kijung Shin) 16/99 Machine 1 Machine 2 Machine

Machine Intelligence at Google Scale: TensorFlow

Suite of CNC machine tools - The Test House

Machine Learning Enhancement of Storm Scale Ensemble ...

Large-scale deployment of statistical machine translation · Large-scale deployment of statistical machine translation Example Microsoft Chris.Wendt@microsoft.com ... User Interface

Large Scale Machine learning with Spark

Machine Translation and Resources for SCALE’21

Making Machine Learning Scale: Single Machine and Distributed

Large Scale Machine Translation Architectures Qin Gao.

Workday: Building Large Scale Machine Learning Pipelines