library(ggplot2)
library(dplyr)
library(class)
library(MASS)
library(caret)
library(devtools)
library(countreg)
library(forcats)
library(AER)
library(pscl)
install.packages("countreg", repos="http://R-Forge.R-project.org")
#Attaching data for modeling
data(dataCar)
data1 <- dataCar
#Data Cleaning & Pre-processing
data2 <- unique(data1)
data3 <- data2[data2\$veh_value > quantile(data2\$veh_value, 0.0001),]
data4 <- data3[data3\$veh_value < quantile(data3\$veh_value, 0.999), ]
#Regrouping vehicle categories
top9 <- c('SEDAN','HBACK','STNWG','UTE','TRUCK','HDTOP','COUPE','PANVN','MIBUS')
data4\$veh_body <- fct_other(data4\$veh_body, keep = top9, other_level = 'other')
#Converting catagorical variables into factors
names <- c('veh_body' ,'veh_age','gender','area','agecat')
data4[,names] <- lapply(data4[,names] , factor)
str(data4)
##data partition - original data
data <- data4
data_partition <- createDataPartition(data\$numclaims, times = 1,p = 0.8,list = FALSE)
str(data_partition)
training <- data[data_partition,]
testing <- data[-data_partition,]
#Re-sampling
sample1 <- subset(data4, numclaims!=0)
sample2 <- data4[ sample( which(data4\$numclaims==0),
round(0.9*length(which(data4\$numclaims==0)))), ]
sample3 <- data4[ sample( which(data4\$numclaims==0),
round(0.1*length(which(data4\$numclaims==0)))), ]
y <- rnbinom(n = 6323, mu = 1, size = 3) # n value should be equal to sample 3
sample3\$numclaims <- y
df_sample <- rbind(sample1,sample2,sample3)
我在學習怎麼用R 去模擬claim frequency 在網上看見這個例子 開始在最後的
y <- rnbinom(n = 6323, mu = 1, size = 3) # n value should be equal to sample 3
sample3\$numclaims <- y
df_sample <- rbind(sample1,sample2,sample3)
我在學習怎麼用R 去模擬claim frequency 在網上看見這個例子 開始在y <- rnbinom(n = 6323, mu = 1, size = 3) # n value should be equal to sample 3 sample3\$numclaims <- y df_sample <- rbind(sample1,sample2,sample3)
我在學習怎麼用R 去模擬claim frequency 在網上看見這個例子 開始在最後的random negative binomial simulation 裡面 mu=1 和 size=3 是怎麼得出的呢?
還有df_sample 是什麼意思?
有大神可以指教一下? 抱歉 新手用R 嘗試自己理解