{-# LANGUAGE TypeFamilies #-}
module ITEA.Regression where
import IT
import IT.Random
import IT.ITEA
import IT.Algorithms
import IT.FI2POP
import IT.Regression
import IT.Shape
import ITEA.Config
import ITEA.Report
import Data.List.NonEmpty hiding (map)
import qualified Numeric.LinearAlgebra as LA
import qualified Data.Vector as V
import Control.Monad.State
import System.Random
type AlgRunner = Rnd Population -> Mutation -> Fitness -> StdGen -> [Population]
readAndParse :: FilePath -> IO (LA.Matrix Double, Column Double)
readAndParse :: FilePath -> IO (Matrix Double, Column Double)
readAndParse FilePath
f = do (Matrix Double
xss, Column Double
ys) <- FilePath -> (Matrix Double, Column Double)
parseFile (FilePath -> (Matrix Double, Column Double))
-> IO FilePath -> IO (Matrix Double, Column Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> IO FilePath
readFile FilePath
f
(Matrix Double, Column Double) -> IO (Matrix Double, Column Double)
forall (m :: * -> *) a. Monad m => a -> m a
return (Matrix Double
1.0 Matrix Double -> Matrix Double -> Matrix Double
forall t. Element t => Matrix t -> Matrix t -> Matrix t
LA.||| Matrix Double
xss, Column Double
ys)
toVecOfColumns :: LA.Matrix Double -> Dataset Double
toVecOfColumns :: Matrix Double -> Dataset Double
toVecOfColumns = [Column Double] -> Dataset Double
forall a. [a] -> Vector a
V.fromList ([Column Double] -> Dataset Double)
-> (Matrix Double -> [Column Double])
-> Matrix Double
-> Dataset Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> [Column Double]
forall t. Element t => Matrix t -> [Vector t]
LA.toColumns
takeNRows, dropNRows :: Int -> LA.Matrix Double -> LA.Matrix Double
takeNRows :: Int -> Matrix Double -> Matrix Double
takeNRows Int
n Matrix Double
xss = Matrix Double
xss Matrix Double -> (Extractor, Extractor) -> Matrix Double
forall t.
Element t =>
Matrix t -> (Extractor, Extractor) -> Matrix t
LA.?? (Int -> Extractor
LA.Take Int
n, Extractor
LA.All)
dropNRows :: Int -> Matrix Double -> Matrix Double
dropNRows Int
n Matrix Double
xss = Matrix Double
xss Matrix Double -> (Extractor, Extractor) -> Matrix Double
forall t.
Element t =>
Matrix t -> (Extractor, Extractor) -> Matrix t
LA.?? (Int -> Extractor
LA.Drop Int
n, Extractor
LA.All)
splitValidation :: Double -> LA.Matrix Double -> LA.Vector Double
-> (Dataset Double, Column Double, Dataset Double, Column Double)
splitValidation :: Double
-> Matrix Double
-> Column Double
-> (Dataset Double, Column Double, Dataset Double, Column Double)
splitValidation Double
ratio Matrix Double
xss Column Double
ys
| Int
nRows Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
20 = (Matrix Double -> Dataset Double
toVecOfColumns Matrix Double
xss, Column Double
ys, Matrix Double -> Dataset Double
toVecOfColumns Matrix Double
xss, Column Double
ys)
| Bool
otherwise = (Dataset Double
xss_train, Column Double
y_train, Dataset Double
xss_val, Column Double
y_val)
where
nRows :: Int
nRows = Matrix Double -> Int
forall t. Matrix t -> Int
LA.rows Matrix Double
xss
nRowsTrain :: Int
nRowsTrain = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nRows Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
ratio)
nRowsVal :: Int
nRowsVal = Int
nRows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nRowsTrain
xss_train :: Dataset Double
xss_train = Matrix Double -> Dataset Double
toVecOfColumns (Matrix Double -> Dataset Double)
-> Matrix Double -> Dataset Double
forall a b. (a -> b) -> a -> b
$ Int -> Matrix Double -> Matrix Double
takeNRows Int
nRowsTrain Matrix Double
xss
xss_val :: Dataset Double
xss_val = Matrix Double -> Dataset Double
toVecOfColumns (Matrix Double -> Dataset Double)
-> Matrix Double -> Dataset Double
forall a b. (a -> b) -> a -> b
$ Int -> Matrix Double -> Matrix Double
dropNRows Int
nRowsTrain Matrix Double
xss
y_train :: Column Double
y_train = Int -> Int -> Column Double -> Column Double
forall t. Storable t => Int -> Int -> Vector t -> Vector t
LA.subVector Int
0 Int
nRowsTrain Column Double
ys
y_val :: Column Double
y_val = Int -> Int -> Column Double -> Column Double
forall t. Storable t => Int -> Int -> Vector t -> Vector t
LA.subVector Int
nRowsTrain Int
nRowsVal Column Double
ys
run :: AlgRunner
-> Datasets
-> MutationCfg
-> Output
-> Int
-> Int
-> Task
-> Penalty
-> [Shape]
-> Domains
-> IO ()
run :: AlgRunner
-> Datasets
-> MutationCfg
-> Output
-> Int
-> Int
-> Task
-> Penalty
-> [Shape]
-> Domains
-> IO ()
run AlgRunner
alg (D FilePath
tr FilePath
te) MutationCfg
mcfg Output
output Int
nPop Int
nGens Task
task Penalty
penalty [Shape]
shapes Domains
domains =
do StdGen
g <- IO StdGen
forall (m :: * -> *). MonadIO m => m StdGen
newStdGen
(Matrix Double
trainX, Column Double
trainY) <- FilePath -> IO (Matrix Double, Column Double)
readAndParse FilePath
tr
(Matrix Double
testX, Column Double
testY ) <- FilePath -> IO (Matrix Double, Column Double)
readAndParse FilePath
te
let
xss_all :: Dataset Double
xss_all = Matrix Double -> Dataset Double
toVecOfColumns Matrix Double
trainX
xss_test :: Dataset Double
xss_test = Matrix Double -> Dataset Double
toVecOfColumns Matrix Double
testX
(Dataset Double
xss_train, Column Double
y_train, Dataset Double
xss_val, Column Double
y_val) = Double
-> Matrix Double
-> Column Double
-> (Dataset Double, Column Double, Dataset Double, Column Double)
splitValidation Double
0.5 Matrix Double
trainX Column Double
trainY
measureList :: NonEmpty Measure
measureList = [Measure] -> NonEmpty Measure
forall a. [a] -> NonEmpty a
fromList ([Measure] -> NonEmpty Measure) -> [Measure] -> NonEmpty Measure
forall a b. (a -> b) -> a -> b
$ MutationCfg -> [Measure]
getMeasure MutationCfg
mcfg
fitTrain :: Expr -> Maybe Solution
fitTrain = Task
-> NonEmpty Measure
-> Constraint
-> Penalty
-> Dataset Double
-> Column Double
-> Dataset Double
-> Column Double
-> Expr
-> Maybe Solution
evalTrain Task
task NonEmpty Measure
measureList ([Shape] -> Domains -> Constraint
fromShapes [Shape]
shapes Domains
domains) Penalty
penalty Dataset Double
xss_train Column Double
y_train Dataset Double
xss_val Column Double
y_val
refit :: Expr -> Maybe Solution
refit = Task
-> NonEmpty Measure
-> Constraint
-> Penalty
-> Dataset Double
-> Column Double
-> Dataset Double
-> Column Double
-> Expr
-> Maybe Solution
evalTrain Task
task NonEmpty Measure
measureList ([Shape] -> Domains -> Constraint
fromShapes [Shape]
shapes Domains
domains) Penalty
penalty Dataset Double
xss_all Column Double
trainY Dataset Double
xss_all Column Double
trainY
fitTest :: Solution -> Maybe [Double]
fitTest = Task
-> NonEmpty Measure
-> Dataset Double
-> Column Double
-> Solution
-> Maybe [Double]
evalTest Task
task NonEmpty Measure
measureList Dataset Double
xss_test Column Double
testY
dim :: Int
dim = Matrix Double -> Int
forall t. Matrix t -> Int
LA.cols Matrix Double
trainX Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
(Mutation
mutFun, Rnd Term
rndTerm) = MutationCfg -> Int -> (Mutation, Rnd Term)
withMutation MutationCfg
mcfg Int
dim
p0 :: Rnd Population
p0 = Int
-> Int -> Rnd Term -> (Expr -> Maybe Solution) -> Rnd Population
initialPop (MutationCfg -> Int
getMaxTerms MutationCfg
mcfg) Int
nPop Rnd Term
rndTerm Expr -> Maybe Solution
fitTrain
gens :: [Population]
gens = AlgRunner
alg Rnd Population
p0 Mutation
mutFun Expr -> Maybe Solution
fitTrain StdGen
g
Output
-> NonEmpty Measure
-> [Population]
-> Int
-> (Solution -> Maybe [Double])
-> (Expr -> Maybe Solution)
-> IO ()
genReports Output
output NonEmpty Measure
measureList [Population]
gens Int
nGens Solution -> Maybe [Double]
fitTest Expr -> Maybe Solution
refit
runITEA, runFI2POP :: AlgRunner
runITEA :: AlgRunner
runITEA Rnd Population
p0 Mutation
mutFun Expr -> Maybe Solution
fitTrain StdGen
g = (Rnd Population
p0 Rnd Population
-> (Population -> StateT StdGen Identity [Population])
-> StateT StdGen Identity [Population]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Mutation
-> (Expr -> Maybe Solution)
-> Population
-> StateT StdGen Identity [Population]
itea Mutation
mutFun Expr -> Maybe Solution
fitTrain) StateT StdGen Identity [Population] -> StdGen -> [Population]
forall s a. State s a -> s -> a
`evalState` StdGen
g
runFI2POP :: AlgRunner
runFI2POP Rnd Population
p0 Mutation
mutFun Expr -> Maybe Solution
fitTrain StdGen
g =
let p0' :: StateT StdGen Identity (Population, Population)
p0' = Population -> (Population, Population)
splitPop (Population -> (Population, Population))
-> Rnd Population
-> StateT StdGen Identity (Population, Population)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Rnd Population
p0
p :: [(Population, Population)]
p = (StateT StdGen Identity (Population, Population)
p0' StateT StdGen Identity (Population, Population)
-> ((Population, Population)
-> StateT StdGen Identity [(Population, Population)])
-> StateT StdGen Identity [(Population, Population)]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Mutation
-> (Expr -> Maybe Solution)
-> (Population, Population)
-> StateT StdGen Identity [(Population, Population)]
fi2pop Mutation
mutFun Expr -> Maybe Solution
fitTrain) StateT StdGen Identity [(Population, Population)]
-> StdGen -> [(Population, Population)]
forall s a. State s a -> s -> a
`evalState` StdGen
g
in ((Population, Population) -> Population)
-> [(Population, Population)] -> [Population]
forall a b. (a -> b) -> [a] -> [b]
map (Population, Population) -> Population
forall a b. (a, b) -> a
fst [(Population, Population)]
p