The following is the fourth set of demo code from my video on using R in the Oracle Database. Check out the video before using the following code. The blog post for the video will be updated to contain links to all blog posts that have the various demo code.
The following code example illustrate how you can build a Data Mining model using the in-database data mining algorithms. In this example a Decision Tree model is created. This model is then applied to new data, scoring this data with the predicted values.
> # > # Build am in-database ODM Decision Tree > # > dtData <- ore.get("MINING_DATA_BUILD_V") > # Create a ODM DT model in the DB : Only a temporary model. It is deleted when you logout > dtModel <- ore.odmDT(AFFINITY_CARD ~ ., dtData) > # View the details of the ODM model > #summary(dtModel) > names(dtModel) [1] "name" "settings" "attributes" "costs" "distributions” [6] "nodes" "formula" "extRef" "call" > dtModel$name [1] "ORE$208_210” > dtModel$settings value prep.auto on impurity.metric impurity.gini term.max.depth 7 term.minpct.node 0.05 term.minpct.split 0.1 term.minrec.node 10 term.minrec.split 20 > dtModel$attributes name type data.type data.length precision scale is.target 1 AFFINITY_CARD categorical number 22 0 0 TRUE 2 AGE numerical number 22 NA NA FALSE 3 CUST_MARITAL_STATUS categorical varchar2 20 NA NA FALSE 4 EDUCATION categorical varchar2 21 NA NA FALSE 5 HOUSEHOLD_SIZE categorical varchar2 21 NA NA FALSE 6 OCCUPATION categorical varchar2 21 NA NA FALSE 7 YRS_RESIDENCE numerical number 22 NA NA FALSE > > ## Compute the Compusion Matrix > dtResults <- predict(dtModel, dtData, "AFFINITY_CARD") > with(dtResults, table(AFFINITY_CARD, PREDICTION)) PREDICTION AFFINITY_CARD 0 1 0 1056 64 1 201 179 > ## How do you persist the model in the DB > ## Rename and save the model in the database > dtModel$name [1] "ORE$208_210" > ## Save the ODM model in the in-database R datastore > ore.save(dtModel, name = "ORE_MODELS", overwrite=TRUE) > ore.load(name = "ORE_MODELS") [1] "dtModel" > ## Score new data using the DM Model > ore.sync(table = c("MINING_DATA_APPLY")) > ore.ls() [1] "DEMO_R_APPLY_RESULT" "DEMO_R_TABLE" "DEMO_SUBSET_TABLE" [4] "INSUR_CUST_LTV_SAMPLE" "MINING_DATA_APPLY" "MINING_DATA_BUILD_V" [7] "MINING_DATA_TEST_V" > dtApply <- ore.get("MINING_DATA_APPLY") > dim(dtApply) [1] 1500 18 > class(dtApply) [1] "ore.frame” attr(,"package") [1] "OREbase” > DTAPPLY <- ore.push(dtApply) > dtApplyResult <- predict(dtModel, DTAPPLY) > dtApplyResult <- predict(dtModel, DTAPPLY) > head(dtApplyResult) '0' '1' PREDICTION 100001 0.9521912 0.04780876 0 100002 0.9521912 0.04780876 0 100003 0.9521912 0.04780876 0 100004 0.9521912 0.04780876 0 100005 0.2633745 0.73662551 1 100006 0.9521912 0.04780876 0 > dim(dtApplyResult) [1] 1500 3 > dim(dtApply) [1] 1500 18 > dtResults <- cbind(dtApply, dtApplyResult) > dim(dtResults) [1] 1500 21 > ore.drop(table = "DEMO_R_APPLY_RESULT") > ore.create(dtApplyResult, table="DEMO_R_APPLY_RESULT") > ## Run the following for the first time you will rename a mode > # ore.exec(paste("BEGIN> # DBMS_DATA_MINING.RENAME_MODEL(model_name => '", dtModel$name, "',> # new_model_name => 'DEMO_R_DT_MODEL'); END;",sep=""))> ## Run the following to refresh an existing model > ore.exec(paste("BEGIN + DBMS_DATA_MINING.DROP_MODEL('DEMO_R_DT_MODEL'); + DBMS_DATA_MINING.RENAME_MODEL(model_name => '", dtModel$name,"', + new_model_name => 'DEMO_R_DT_MODEL'); + END;",sep=""))