Fit observations with skew normal distribution or two truncated normal distributions

 The skew normal distribution is based on the work of:

Azzalini, A., 1985. A class of distributions which includes the normal ones. Scand. J. Statist. 12, 171-178.

The two truncated normal distributions is based on the work of:

Sicard, O., 2013. La loi normale asymétrique. Définition, premières propriétés, estimation des paramètres. Pour que l’asymétrie ne soit plus considérée comme anormale... Laboratoire d'informatique et de mathématiques, La Réunion.

The skew normal distribution is available in sn package in CRAN. The asymetric Gaussian distribution is not available. It is easy to create this function; it has 3 parameters, mean, sdlow and sdhigh. If sdlow==sdhigh, the asymetric Gaussian distribution is equal to Gaussian distribution.

dnorm2 <- function(x, mean = 0, sdlow = 1, sdhigh = 1, sd=NULL, log = FALSE) {
  if (!is.null(sd)) {
    sdlow <- sd
    sdhigh <- sd
  }
  return(dnorm(x = x, mean=mean, sd=ifelse(x < mean, sdlow, sdhigh), log = log))
}

fitnorm2 <- function(par, x) {
  if (any(names(par)=="sd")) 
    par <- c(par[names(par) != "sd"], sdlow=unname(par["sd"]), sdhigh=unname(par["sd"]))
  if (par["sdlow"] <= 0 | par["sdhigh"] <= 0) return(+1E6)
  return(-sum(dnorm2(x=x, mean=par["mean"], sdlow = par["sdlow"], sdhigh = par["sdhigh"], log=TRUE)))
}


Let do an example with Gaussian random numbers: 

x <- rnorm(100)
hist(x, freq = FALSE, xlim=c(-3, 3), breaks = seq(from=-5, to=5, by=0.5))
g0_norm2 <- optim(par = c(mean=0, sdlow=1, sdhigh=1), fn=fitnorm2, x=x)
g1_norm2 <- optim(par = c(mean=0, sd=1), fn=fitnorm2, x=x)
library("HelpersMG")
compare_AIC(symetric=g1_norm2, asymetric=g0_norm2)

x <- seq(from=-5, to=5, length.out=101)
y <- dnorm2(x=x, mean=g1$par["mean"], sd=g1$par["sd"], log = FALSE)
x <- c(x, x[1])
y <- c(y, y[1])
polygon(x, y, border = NA, col=rgb(red = 1, blue = 0, green = 0, alpha = 0.2))

ok. It works. Now let do an example with asymetric distribution:

x <- rlnorm(100)
hist(x, freq = FALSE, xlim=c(0, 15), breaks = seq(from=0, to=15, by=0.5))
g0_l_norm2 <- optim(par = c(mean=0, sdlow=1, sdhigh=1), fn=fitnorm2, x=x)
g1_l_norm2 <- optim(par = c(mean=0, sd=1), fn=fitnorm2, x=x)

compare_AIC(symetric=g1_l_norm2, asymetric=g0_l_norm2)

x <- seq(from=0, to=15, length.out=101)
y <- dnorm2(x=x, mean=g0_l_norm2$par["mean"], sdlow = g0_l_norm2$par["sdlow"],  
            sdhigh = g0_l_norm2$par["sdhigh"], log = FALSE)
x <- c(x, x[1])
y <- c(y, 0)
polygon(x, y, border = NA, col=rgb(red = 1, blue = 0, green = 0, alpha = 0.2))

It is not so nice. There is a large scale change below and above fitted mean.

Let try now with the skew distribution.

library("sn")
library("HelpersMG")

fitsnorm <- function(par, x, fixed.parameter=NULL) {
  if (is.na(par["alpha"])) par <- c(par, alpha=0)
  par <- c(par, fixed.parameter)
  return(-sum(dsn(x=x, xi=par["xi"], omega = par["omega"], alpha = par["alpha"], log=TRUE)))
}

x <- rnorm(100)
hist(x, freq = FALSE, xlim=c(-3, 3), breaks = seq(from=-5, to=5, by=0.5))

g0_sn <- optim(par = c(xi=0, omega=1, alpha=0), fn=fitsnorm, x=x)
g1_sn <- optim(par = c(xi=0, omega=1), fn=fitsnorm, x=x, fixed.parameter=c(alpha=0))

compare_AIC(symetric=g1_sn, asymetric=g0_sn)

x <- seq(from=-5, to=5, length.out=101)
y <- dsn(x=x, xi=g1_sn$par["xi"], omega=g1_sn$par["omega"], log = FALSE)
x <- c(x, x[1])
y <- c(y, y[1])
polygon(x, y, border = NA, col=rgb(red = 1, blue = 0, green = 0, alpha = 0.2))


x <- rlnorm(100)
x[x>15] <-  NA
x <- na.omit(x)
hist(x, freq = FALSE, xlim=c(0, 15), breaks = seq(from=0, to=15, by=0.5))

g0_l_sn <- optim(par = c(xi=0, omega=1, alpha=0), fn=fitsnorm, x=x)
g1_l_sn <- optim(par = c(xi=0, omega=1), fn=fitsnorm, x=x, fixed.parameter=c(alpha=0))

compare_AIC(symetric=g1_l_sn, asymetric=g0_l_sn)

x <- seq(from=0, to=15, length.out=101)
y <- dsn(x=x, xi=g0_l_sn$par["xi"], omega=g0_l_sn$par["omega"], 
         alpha=g0_l_sn$par["alpha"], log = FALSE)
x <- c(x, x[1])
y <- c(y, 0)
polygon(x, y, border = NA, col=rgb(red = 1, blue = 0, green = 0, alpha = 0.2))

And finally which one is better:

> compare_AIC(doublenormal=g1_norm2 , skewnormal=g1_sn)
[1] "The lowest AIC (285.359) is for series skewnormal with Akaike weight=1.000"
                AIC DeltaAIC Akaike_weight
doublenormal 513.64   228.29          0.00
skewnormal   285.36     0.00          1.00


Another example with x <- rlnorm(1000, meanlog = 1.5, sdlog = 0.4)



Commentaires

Posts les plus consultés de ce blog

Standard error from Hessian Matrix... what can be done when problem occurs

stepAIC from package MASS with AICc

Install treemix in ubuntu 20.04