{-# LANGUAGE FunctionalDependencies #-}
{-|
Module      : Example.Regression
Description : Example of usage for Symbolic Regression
Copyright   : (c) Fabricio Olivetti de Franca, 2020
License     : GPL-3
Maintainer  : fabricio.olivetti@gmail.com
Stability   : experimental
Portability : POSIX

Configuration parsing and report generation.
-}
module ITEA.Config where

import IT
import IT.Algorithms
import IT.Mutation
import IT.Metrics
import IT.Random

import qualified Numeric.LinearAlgebra as LA
import qualified MachineLearning as ML
import Data.List.Split (splitOn)

-- | Class of types that can be validate
class Monoid a => Valid a b | a -> b, b -> a where
  validateConfig :: a -> b

-- | A parameter is either empty (None) or Has something
data Param a = None | Has a deriving Int -> Param a -> ShowS
[Param a] -> ShowS
Param a -> String
(Int -> Param a -> ShowS)
-> (Param a -> String) -> ([Param a] -> ShowS) -> Show (Param a)
forall a. Show a => Int -> Param a -> ShowS
forall a. Show a => [Param a] -> ShowS
forall a. Show a => Param a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Param a] -> ShowS
$cshowList :: forall a. Show a => [Param a] -> ShowS
show :: Param a -> String
$cshow :: forall a. Show a => Param a -> String
showsPrec :: Int -> Param a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Param a -> ShowS
Show

-- | Extract parameter. This is a partial function.
fromParam :: Param a -> a
fromParam :: Param a -> a
fromParam (Has a
x) = a
x
fromParam Param a
None    = String -> a
forall a. HasCallStack => String -> a
error String
"fromParam: empty value"

instance Semigroup (Param a) where
  Param a
p <> :: Param a -> Param a -> Param a
<> Param a
None = Param a
p
  Param a
_ <> Param a
p    = Param a
p
instance Monoid (Param a) where
  mempty :: Param a
mempty = Param a
forall a. Param a
None

-- | Unchecked mutation config 
data UncheckedMutationCfg = UMCfg { UncheckedMutationCfg -> Param (Int, Int)
_expLim   :: Param (Int, Int)
                                  , UncheckedMutationCfg -> Param (Int, Int)
_termLim  :: Param (Int, Int)
                                  , UncheckedMutationCfg -> Param Int
_nzExp    :: Param Int
                                  , UncheckedMutationCfg -> Param [Transformation]
_transFun :: Param [Transformation]
                                  , UncheckedMutationCfg -> Param [String]
_measures :: Param [String]
                                  }

-- | Validated mutation config 
data MutationCfg = MCfg (Int, Int) (Int, Int) Int [Transformation] [String] deriving Int -> MutationCfg -> ShowS
[MutationCfg] -> ShowS
MutationCfg -> String
(Int -> MutationCfg -> ShowS)
-> (MutationCfg -> String)
-> ([MutationCfg] -> ShowS)
-> Show MutationCfg
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MutationCfg] -> ShowS
$cshowList :: [MutationCfg] -> ShowS
show :: MutationCfg -> String
$cshow :: MutationCfg -> String
showsPrec :: Int -> MutationCfg -> ShowS
$cshowsPrec :: Int -> MutationCfg -> ShowS
Show

instance Semigroup UncheckedMutationCfg where
  (UMCfg Param (Int, Int)
p1 Param (Int, Int)
p2 Param Int
p3 Param [Transformation]
p4 Param [String]
p5) <> :: UncheckedMutationCfg
-> UncheckedMutationCfg -> UncheckedMutationCfg
<> (UMCfg Param (Int, Int)
q1 Param (Int, Int)
q2 Param Int
q3 Param [Transformation]
q4 Param [String]
q5) = Param (Int, Int)
-> Param (Int, Int)
-> Param Int
-> Param [Transformation]
-> Param [String]
-> UncheckedMutationCfg
UMCfg (Param (Int, Int)
p1Param (Int, Int) -> Param (Int, Int) -> Param (Int, Int)
forall a. Semigroup a => a -> a -> a
<>Param (Int, Int)
q1) (Param (Int, Int)
p2Param (Int, Int) -> Param (Int, Int) -> Param (Int, Int)
forall a. Semigroup a => a -> a -> a
<>Param (Int, Int)
q2) (Param Int
p3Param Int -> Param Int -> Param Int
forall a. Semigroup a => a -> a -> a
<>Param Int
q3) (Param [Transformation]
p4Param [Transformation]
-> Param [Transformation] -> Param [Transformation]
forall a. Semigroup a => a -> a -> a
<>Param [Transformation]
q4) (Param [String]
p5Param [String] -> Param [String] -> Param [String]
forall a. Semigroup a => a -> a -> a
<>Param [String]
q5)
instance Monoid UncheckedMutationCfg where
  mempty :: UncheckedMutationCfg
mempty = Param (Int, Int)
-> Param (Int, Int)
-> Param Int
-> Param [Transformation]
-> Param [String]
-> UncheckedMutationCfg
UMCfg Param (Int, Int)
forall a. Monoid a => a
mempty Param (Int, Int)
forall a. Monoid a => a
mempty Param Int
forall a. Monoid a => a
mempty Param [Transformation]
forall a. Monoid a => a
mempty Param [String]
forall a. Monoid a => a
mempty

-- | Generates a configuration with only '_expLim' holding a value.
exponents :: Int -> Int -> UncheckedMutationCfg
exponents :: Int -> Int -> UncheckedMutationCfg
exponents Int
x Int
y = UncheckedMutationCfg
forall a. Monoid a => a
mempty { _expLim :: Param (Int, Int)
_expLim   = (Int, Int) -> Param (Int, Int)
forall a. a -> Param a
Has (Int
x,Int
y) }

-- | Generates a configuration with only '_termLim' holding a value.
termLimit :: Int -> Int -> UncheckedMutationCfg
termLimit :: Int -> Int -> UncheckedMutationCfg
termLimit   Int
x Int
y = UncheckedMutationCfg
forall a. Monoid a => a
mempty { _termLim :: Param (Int, Int)
_termLim  = (Int, Int) -> Param (Int, Int)
forall a. a -> Param a
Has (Int
x,Int
y) }

-- | Generates a configuration with only '_nzExp' holding a value.
nonzeroExps :: Int -> UncheckedMutationCfg
nonzeroExps :: Int -> UncheckedMutationCfg
nonzeroExps Int
x = UncheckedMutationCfg
forall a. Monoid a => a
mempty { _nzExp :: Param Int
_nzExp = Int -> Param Int
forall a. a -> Param a
Has Int
x }

-- | Generates a configuration with only '_transFun' holding a value.
transFunctions :: [Transformation] -> UncheckedMutationCfg
transFunctions :: [Transformation] -> UncheckedMutationCfg
transFunctions  [Transformation]
fs  = UncheckedMutationCfg
forall a. Monoid a => a
mempty { _transFun :: Param [Transformation]
_transFun = [Transformation] -> Param [Transformation]
forall a. a -> Param a
Has [Transformation]
fs }

-- | Generates a configuration with only '_measures' holding a value.
measures :: [String] -> UncheckedMutationCfg
measures :: [String] -> UncheckedMutationCfg
measures [String]
ms = UncheckedMutationCfg
forall a. Monoid a => a
mempty { _measures :: Param [String]
_measures = [String] -> Param [String]
forall a. a -> Param a
Has [String]
ms }

instance Valid UncheckedMutationCfg MutationCfg where
  -- validateConfig :: UncheckedMutationCfg -> MutationCfg
  validateConfig :: UncheckedMutationCfg -> MutationCfg
validateConfig (UMCfg Param (Int, Int)
None Param (Int, Int)
_ Param Int
_ Param [Transformation]
_ Param [String]
_) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No exponent limits set"
  validateConfig (UMCfg Param (Int, Int)
_ Param (Int, Int)
None Param Int
_ Param [Transformation]
_ Param [String]
_) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No expression size limits set"
  validateConfig (UMCfg Param (Int, Int)
_ Param (Int, Int)
_ Param Int
None Param [Transformation]
_ Param [String]
_) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No maximum non-zero exponents set"
  validateConfig (UMCfg Param (Int, Int)
_ Param (Int, Int)
_ Param Int
_ Param [Transformation]
None Param [String]
_) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No transformation functions chosen"
  validateConfig (UMCfg Param (Int, Int)
_ Param (Int, Int)
_ Param Int
_ (Has []) Param [String]
_) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No transformation functions chosen"
  validateConfig (UMCfg Param (Int, Int)
_ Param (Int, Int)
_ Param Int
_ Param [Transformation]
_ Param [String]
None) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No error functions chosen"
  validateConfig (UMCfg Param (Int, Int)
_ Param (Int, Int)
_ Param Int
_ Param [Transformation]
_ (Has [])) = String -> MutationCfg
forall a. HasCallStack => String -> a
error String
"No error functions chosen"
  validateConfig UncheckedMutationCfg
c = (Int, Int)
-> (Int, Int) -> Int -> [Transformation] -> [String] -> MutationCfg
MCfg (UncheckedMutationCfg -> (Int, Int)
pexpLim UncheckedMutationCfg
c) (UncheckedMutationCfg -> (Int, Int)
ptermLim UncheckedMutationCfg
c) (UncheckedMutationCfg -> Int
pnzExp UncheckedMutationCfg
c) (UncheckedMutationCfg -> [Transformation]
ptransFun UncheckedMutationCfg
c) (UncheckedMutationCfg -> [String]
pmeasure UncheckedMutationCfg
c)
    where
      pexpLim :: UncheckedMutationCfg -> (Int, Int)
pexpLim   = Param (Int, Int) -> (Int, Int)
forall a. Param a -> a
fromParam (Param (Int, Int) -> (Int, Int))
-> (UncheckedMutationCfg -> Param (Int, Int))
-> UncheckedMutationCfg
-> (Int, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UncheckedMutationCfg -> Param (Int, Int)
_expLim
      ptermLim :: UncheckedMutationCfg -> (Int, Int)
ptermLim  = Param (Int, Int) -> (Int, Int)
forall a. Param a -> a
fromParam (Param (Int, Int) -> (Int, Int))
-> (UncheckedMutationCfg -> Param (Int, Int))
-> UncheckedMutationCfg
-> (Int, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UncheckedMutationCfg -> Param (Int, Int)
_termLim
      pnzExp :: UncheckedMutationCfg -> Int
pnzExp    = Param Int -> Int
forall a. Param a -> a
fromParam (Param Int -> Int)
-> (UncheckedMutationCfg -> Param Int)
-> UncheckedMutationCfg
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UncheckedMutationCfg -> Param Int
_nzExp
      ptransFun :: UncheckedMutationCfg -> [Transformation]
ptransFun = Param [Transformation] -> [Transformation]
forall a. Param a -> a
fromParam (Param [Transformation] -> [Transformation])
-> (UncheckedMutationCfg -> Param [Transformation])
-> UncheckedMutationCfg
-> [Transformation]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UncheckedMutationCfg -> Param [Transformation]
_transFun
      pmeasure :: UncheckedMutationCfg -> [String]
pmeasure  = Param [String] -> [String]
forall a. Param a -> a
fromParam (Param [String] -> [String])
-> (UncheckedMutationCfg -> Param [String])
-> UncheckedMutationCfg
-> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UncheckedMutationCfg -> Param [String]
_measures

getMaxTerms :: MutationCfg -> Int
getMaxTerms :: MutationCfg -> Int
getMaxTerms (MCfg (Int, Int)
_ (Int
_, Int
maxTerms) Int
_ [Transformation]
_ [String]
_) = Int
maxTerms

getMeasure :: MutationCfg -> [Measure]
getMeasure :: MutationCfg -> [Measure]
getMeasure  (MCfg (Int, Int)
_ (Int, Int)
_ Int
_ [Transformation]
_ [String]
m) = (String -> Measure) -> [String] -> [Measure]
forall a b. (a -> b) -> [a] -> [b]
map String -> Measure
toMeasure [String]
m

-- | Parse a numerical csv file into predictors and target variables
parseFile :: String -> (LA.Matrix Double, Vector)
parseFile :: String -> (Matrix Double, Vector)
parseFile String
css = Matrix Double -> (Matrix Double, Vector)
forall t. Element t => Matrix t -> (Matrix t, Vector t)
ML.splitToXY (Matrix Double -> (Matrix Double, Vector))
-> ([[Double]] -> Matrix Double)
-> [[Double]]
-> (Matrix Double, Vector)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[Double]] -> Matrix Double
forall t. Element t => [[t]] -> Matrix t
LA.fromLists ([[Double]] -> (Matrix Double, Vector))
-> [[Double]] -> (Matrix Double, Vector)
forall a b. (a -> b) -> a -> b
$ ([String] -> [Double]) -> [[String]] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ((String -> Double) -> [String] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map String -> Double
forall a. Read a => String -> a
read) [[String]]
dat
  where
    dat :: [[String]]
dat = (String -> [String]) -> [String] -> [[String]]
forall a b. (a -> b) -> [a] -> [b]
map (String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
",") ([String] -> [[String]]) -> [String] -> [[String]]
forall a b. (a -> b) -> a -> b
$ String -> [String]
lines String
css

-- | Creates the mutation function and also returns the random term generator (for initialization)
withMutation :: MutationCfg -> Int -> (Mutation, Rnd Term)
withMutation :: MutationCfg -> Int -> (Mutation, Rnd Term)
withMutation (MCfg (Int, Int)
elim (Int, Int)
tlim Int
nzExp [Transformation]
transfun [String]
_) Int
dim = (Int
-> (Int, Int)
-> (Int, Int)
-> Rnd Term
-> Rnd Transformation
-> Mutation
mutFun Int
dim (Int, Int)
elim (Int, Int)
tlim Rnd Term
rndTerm Rnd Transformation
rndTrans, Rnd Term
rndTerm)
  where
    (Int
minExp, Int
maxExp) = (Int, Int)
elim
    rndInter :: Rnd Interaction
rndInter = Int -> Int -> Int -> Int -> Rnd Interaction
sampleInterMax Int
dim Int
nzExp Int
minExp Int
maxExp
    rndTrans :: Rnd Transformation
rndTrans = [Transformation] -> Rnd Transformation
sampleTrans [Transformation]
transfun -- (map read transfun)
    rndTerm :: Rnd Term
rndTerm  = Rnd Transformation -> Rnd Interaction -> Rnd Term
sampleTerm Rnd Transformation
rndTrans Rnd Interaction
rndInter

-- * Datasets configuration

data UncheckedDatasets = UD { UncheckedDatasets -> Param String
_trainset :: Param String, UncheckedDatasets -> Param String
_testset :: Param String }  deriving Int -> UncheckedDatasets -> ShowS
[UncheckedDatasets] -> ShowS
UncheckedDatasets -> String
(Int -> UncheckedDatasets -> ShowS)
-> (UncheckedDatasets -> String)
-> ([UncheckedDatasets] -> ShowS)
-> Show UncheckedDatasets
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UncheckedDatasets] -> ShowS
$cshowList :: [UncheckedDatasets] -> ShowS
show :: UncheckedDatasets -> String
$cshow :: UncheckedDatasets -> String
showsPrec :: Int -> UncheckedDatasets -> ShowS
$cshowsPrec :: Int -> UncheckedDatasets -> ShowS
Show
data Datasets = D String String deriving Int -> Datasets -> ShowS
[Datasets] -> ShowS
Datasets -> String
(Int -> Datasets -> ShowS)
-> (Datasets -> String) -> ([Datasets] -> ShowS) -> Show Datasets
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Datasets] -> ShowS
$cshowList :: [Datasets] -> ShowS
show :: Datasets -> String
$cshow :: Datasets -> String
showsPrec :: Int -> Datasets -> ShowS
$cshowsPrec :: Int -> Datasets -> ShowS
Show

-- | sets the training and test data set names 
trainingset, testset :: String -> UncheckedDatasets
trainingset :: String -> UncheckedDatasets
trainingset String
name = UncheckedDatasets
forall a. Monoid a => a
mempty { _trainset :: Param String
_trainset = String -> Param String
forall a. a -> Param a
Has String
name }
testset :: String -> UncheckedDatasets
testset     String
name = UncheckedDatasets
forall a. Monoid a => a
mempty { _testset :: Param String
_testset = String -> Param String
forall a. a -> Param a
Has String
name }

instance Semigroup UncheckedDatasets where
  (UD Param String
p1 Param String
p2) <> :: UncheckedDatasets -> UncheckedDatasets -> UncheckedDatasets
<> (UD Param String
q1 Param String
q2) = Param String -> Param String -> UncheckedDatasets
UD (Param String
p1Param String -> Param String -> Param String
forall a. Semigroup a => a -> a -> a
<>Param String
q1) (Param String
p2Param String -> Param String -> Param String
forall a. Semigroup a => a -> a -> a
<>Param String
q2)
instance Monoid UncheckedDatasets where
  mempty :: UncheckedDatasets
mempty = Param String -> Param String -> UncheckedDatasets
UD Param String
forall a. Monoid a => a
mempty Param String
forall a. Monoid a => a
mempty

instance Valid UncheckedDatasets Datasets where
  validateConfig :: UncheckedDatasets -> Datasets
validateConfig (UD Param String
None Param String
_) = String -> Datasets
forall a. HasCallStack => String -> a
error String
"No training data was set"
  validateConfig (UD Param String
_ Param String
None) = String -> Datasets
forall a. HasCallStack => String -> a
error String
"No test data was set"
  validateConfig (UD Param String
tr Param String
te) = String -> String -> Datasets
D (Param String -> String
forall a. Param a -> a
fromParam Param String
tr) (Param String -> String
forall a. Param a -> a
fromParam Param String
te)