{-|
Module      : MachineLearning.Utils.Report
Description : TIR expression data structures
Copyright   : (c) Fabricio Olivetti de Franca, 2022
License     : GPL-3
Maintainer  : fabricio.olivetti@gmail.com
Stability   : experimental
Portability : POSIX

Report generation.
-}
module MachineLearning.Utils.Report where

import           System.Directory
import           System.IO
import           System.Clock
import           Data.Maybe                  (fromMaybe)
import           Data.List                   (intercalate, foldl')
import           Data.List.Split             (splitOn)
import           Data.Int                    (Int64)
import qualified Data.Vector           as V
import qualified Data.Vector.Storable  as VS
import qualified Numeric.LinearAlgebra as LA

import Control.Evolution (Population)
import MachineLearning.Model.Measure                         (Measure(..))
import MachineLearning.TIR                         (Individual(..), assembleTree, replaceConsts)
import Data.SRTree.Print                            (showDefault, showPython)
import MachineLearning.Utils.Config                          (Config(..), Output(..), IOCfg(..), Task(..), AlgorithmCfg(..), getLogType, getMeasures)
import Data.SRTree (SRTree(..))

-- | Creates a file if it does not exist
createIfDoesNotExist :: FilePath -> IO Handle
createIfDoesNotExist :: FilePath -> IO Handle
createIfDoesNotExist FilePath
fname = do
  Bool
isCreated <- FilePath -> IO Bool
doesFileExist FilePath
fname
  if   Bool
isCreated
  then FilePath -> IOMode -> IO Handle
openFile FilePath
fname IOMode
AppendMode
  else FilePath -> IOMode -> IO Handle
openFile FilePath
fname IOMode
WriteMode

-- | writes all the stats from the final champion solution
writeChampionStats :: Config -> (Individual -> Maybe [Double]) -> Int64 -> Individual -> IO ()
writeChampionStats :: Config
-> (Individual -> Maybe [Double]) -> Int64 -> Individual -> IO ()
writeChampionStats Config
cfg Individual -> Maybe [Double]
fitTest Int64
totTime Individual
champion = do
  let dirname :: FilePath
dirname = case Config -> Output
getLogType Config
cfg of
                  Output
Screen         -> FilePath
"results/"
                  PartialLog FilePath
dir -> FilePath
dir
                  EvoLog FilePath
dir     -> FilePath
dir
      statsFname :: FilePath
statsFname = FilePath
dirname FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"/stats.json"
      exprFname :: FilePath
exprFname  = FilePath
dirname FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"/exprs.csv"
      cfgFname :: FilePath
cfgFname   = FilePath
dirname FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"/parameters.csv"
      task :: Task
task       = AlgorithmCfg -> Task
_task (AlgorithmCfg -> Task) -> AlgorithmCfg -> Task
forall a b. (a -> b) -> a -> b
$ Config -> AlgorithmCfg
_algorithmCfg Config
cfg
  Bool -> FilePath -> IO ()
createDirectoryIfMissing Bool
True FilePath
dirname
  FilePath -> Config -> IO ()
writeCfg FilePath
cfgFname Config
cfg
  Task -> FilePath -> Individual -> IO ()
writeExprs Task
task FilePath
exprFname Individual
champion
  FilePath
-> (Individual -> Maybe [Double])
-> [Measure]
-> Int64
-> Individual
-> IO ()
writeStats FilePath
statsFname Individual -> Maybe [Double]
fitTest (Config -> [Measure]
getMeasures Config
cfg) Int64
totTime Individual
champion

-- | writes the stats to a file
writeStats :: FilePath -> (Individual -> Maybe [Double]) -> [Measure] -> Int64 -> Individual -> IO ()
writeStats :: FilePath
-> (Individual -> Maybe [Double])
-> [Measure]
-> Int64
-> Individual
-> IO ()
writeStats FilePath
statsFname Individual -> Maybe [Double]
fitTest [Measure]
measures Int64
totTime Individual
champion = do
    Handle
h <- FilePath -> IO Handle
createIfDoesNotExist FilePath
statsFname
    Handle -> FilePath -> IO ()
hPutStrLn Handle
h (FilePath -> IO ()) -> FilePath -> IO ()
forall a b. (a -> b) -> a -> b
$ FilePath
"{" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath -> [FilePath] -> FilePath
forall a. [a] -> [[a]] -> [a]
intercalate FilePath
"," [FilePath]
json FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"}"
    Handle -> IO ()
hClose Handle
h
  where
    mNames :: [FilePath]
mNames     = (Measure -> FilePath) -> [Measure] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map Measure -> FilePath
_name [Measure]
measures
    trainNames :: [FilePath]
trainNames = (FilePath -> FilePath) -> [FilePath] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map (FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++FilePath
"_train") [FilePath]
mNames
    trainErrors :: [FilePath]
trainErrors = (Double -> FilePath) -> [Double] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map Double -> FilePath
forall a. Show a => a -> FilePath
show ([Double] -> [FilePath]) -> [Double] -> [FilePath]
forall a b. (a -> b) -> a -> b
$ Individual -> [Double]
_fit Individual
champion
    testErrors :: [FilePath]
testErrors  = (Double -> FilePath) -> [Double] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map Double -> FilePath
forall a. Show a => a -> FilePath
show ([Double] -> [FilePath]) -> [Double] -> [FilePath]
forall a b. (a -> b) -> a -> b
$ [Double] -> Maybe [Double] -> [Double]
forall a. a -> Maybe a -> a
fromMaybe [Double]
nans (Maybe [Double] -> [Double]) -> Maybe [Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Individual -> Maybe [Double]
fitTest Individual
champion
    testNames :: [FilePath]
testNames  = (FilePath -> FilePath) -> [FilePath] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map (FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++FilePath
"_test")  [FilePath]
mNames
    fields :: [FilePath]
fields     = FilePath
"tot_time" FilePath -> [FilePath] -> [FilePath]
forall a. a -> [a] -> [a]
: ([FilePath]
trainNames [FilePath] -> [FilePath] -> [FilePath]
forall a. [a] -> [a] -> [a]
++ [FilePath]
testNames)
    nans :: [Double]
nans       = Int -> Double -> [Double]
forall a. Int -> a -> [a]
replicate ([FilePath] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [FilePath]
testNames) (Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
0)
    errors :: [FilePath]
errors     = Int64 -> FilePath
forall a. Show a => a -> FilePath
show Int64
totTime FilePath -> [FilePath] -> [FilePath]
forall a. a -> [a] -> [a]
: ([FilePath]
trainErrors [FilePath] -> [FilePath] -> [FilePath]
forall a. [a] -> [a] -> [a]
++ [FilePath]
testErrors)
    toJson :: a -> FilePath -> FilePath
toJson a
k FilePath
v = a -> FilePath
forall a. Show a => a -> FilePath
show a
k FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
" : " FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
v
    json :: [FilePath]
json       = (FilePath -> FilePath -> FilePath)
-> [FilePath] -> [FilePath] -> [FilePath]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith FilePath -> FilePath -> FilePath
forall {a}. Show a => a -> FilePath -> FilePath
toJson [FilePath]
fields [FilePath]
errors

-- | creates a report file
writeCfg :: FilePath -> Config -> IO ()
writeCfg :: FilePath -> Config -> IO ()
writeCfg FilePath
cfgFname Config
cfg = do
  Handle
h <- FilePath -> IO Handle
createIfDoesNotExist FilePath
cfgFname
  Handle -> Config -> IO ()
forall a. Show a => Handle -> a -> IO ()
hPrint Handle
h Config
cfg
  Handle -> IO ()
hClose Handle
h

-- | pretty write the expressions
writeExprs :: Task -> FilePath -> Individual -> IO ()
writeExprs :: Task -> FilePath -> Individual -> IO ()
writeExprs Task
task FilePath
exprFname Individual
champion = do
    Handle
h <- FilePath -> IO Handle
createIfDoesNotExist FilePath
exprFname
    Handle -> FilePath -> IO ()
hPutStrLn Handle
h (FilePath -> IO ()) -> FilePath -> IO ()
forall a b. (a -> b) -> a -> b
$ [FilePath] -> FilePath
unlines [FilePath]
logs
    Handle -> FilePath -> IO ()
hPutStrLn Handle
h FilePath
"======================================"
    Handle -> IO ()
hClose Handle
h
  where
    tir :: TIR
tir    = Individual -> TIR
_chromo Individual
champion
    trees :: [SRTree Int Double]
trees  = (Vector Double -> SRTree Int Double)
-> [Vector Double] -> [SRTree Int Double]
forall a b. (a -> b) -> [a] -> [b]
map Vector Double -> SRTree Int Double
getTree [Vector Double]
ws
    ws :: [Vector Double]
ws     = Individual -> [Vector Double]
_weights Individual
champion
    bias :: Double
bias   = Vector Double -> Double
forall a. Vector a -> a
V.head (Vector Double -> Double) -> Vector Double -> Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VS.convert (Vector Double -> Vector Double) -> Vector Double -> Vector Double
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> Vector Double
forall a. [a] -> a
head [Vector Double]
ws
    logs :: [FilePath]
logs   = (SRTree Int Double -> Vector Double -> FilePath)
-> [SRTree Int Double] -> [Vector Double] -> [FilePath]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\SRTree Int Double
t Vector Double
w -> FilePath
"\"" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ SRTree Int Double -> FilePath
forall {ix} {val}. (Show ix, Show val) => SRTree ix val -> FilePath
showDefault SRTree Int Double
t FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"\",\"" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Vector Double -> FilePath
forall a. Show a => a -> FilePath
show Vector Double
w FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"\",\"" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ SRTree Int Double -> FilePath
forall {ix} {val}. (Show ix, Show val) => SRTree ix val -> FilePath
showPython SRTree Int Double
t FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"\"") [SRTree Int Double]
trees [Vector Double]
ws 

    getTree :: LA.Vector Double -> SRTree Int Double
    getTree :: Vector Double -> SRTree Int Double
getTree Vector Double
w = let bias :: Double
bias   = Vector Double -> Double
forall a. Vector a -> a
V.head (Vector Double -> Double) -> Vector Double -> Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VS.convert Vector Double
w
                    consts :: Vector Double
consts = Vector Double -> Vector Double
forall a. Vector a -> Vector a
V.tail (Vector Double -> Vector Double) -> Vector Double -> Vector Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VS.convert Vector Double
w
                    sigm :: a -> a
sigm a
z = a
1 a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
1a -> a -> a
forall a. Num a => a -> a -> a
+a -> a
forall a. Floating a => a -> a
exp(-a
z))
                in  case Task
task of 
                      Classification Int
_ -> SRTree Int Double -> SRTree Int Double
forall a. Floating a => a -> a
sigm (SRTree Int Double -> SRTree Int Double)
-> SRTree Int Double -> SRTree Int Double
forall a b. (a -> b) -> a -> b
$ Double -> TIR -> SRTree Int Double
assembleTree Double
bias (TIR -> SRTree Int Double) -> TIR -> SRTree Int Double
forall a b. (a -> b) -> a -> b
$ TIR -> Vector Double -> TIR
replaceConsts TIR
tir Vector Double
consts
                      ClassMult      Int
_ -> SRTree Int Double -> SRTree Int Double
forall a. Floating a => a -> a
sigm (SRTree Int Double -> SRTree Int Double)
-> SRTree Int Double -> SRTree Int Double
forall a b. (a -> b) -> a -> b
$ Double -> TIR -> SRTree Int Double
assembleTree Double
bias (TIR -> SRTree Int Double) -> TIR -> SRTree Int Double
forall a b. (a -> b) -> a -> b
$ TIR -> Vector Double -> TIR
replaceConsts TIR
tir Vector Double
consts
                      Task
_                -> Double -> TIR -> SRTree Int Double
assembleTree Double
bias (TIR -> SRTree Int Double) -> TIR -> SRTree Int Double
forall a b. (a -> b) -> a -> b
$ TIR -> Vector Double -> TIR
replaceConsts TIR
tir Vector Double
consts

-- | creates a log of the evolution process
evoLog :: Handle -> (Individual -> Maybe [Double]) -> Population Individual -> IO ()
evoLog :: Handle
-> (Individual -> Maybe [Double]) -> Population Individual -> IO ()
evoLog Handle
h Individual -> Maybe [Double]
fitness Population Individual
pop = do
  let fitTrain :: [Double]
fitTrain   = Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (Individual -> Double) -> Population Individual -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Double] -> Double
forall a. [a] -> a
head([Double] -> Double)
-> (Individual -> [Double]) -> Individual -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Individual -> [Double]
_fit) Population Individual
pop
      fitTest :: [Double]
fitTest    = Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (Individual -> Double) -> Population Individual -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe [Double] -> Double
replaceWithNan (Maybe [Double] -> Double)
-> (Individual -> Maybe [Double]) -> Individual -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Individual -> Maybe [Double]
fitness) Population Individual
pop
      statsTrain :: [Double]
statsTrain = [Double] -> [Double]
getStats [Double]
fitTrain
      statsTest :: [Double]
statsTest  = [Double] -> [Double]
getStats [Double]
fitTest
  Handle -> FilePath -> IO ()
hPutStrLn Handle
h (FilePath -> IO ()) -> FilePath -> IO ()
forall a b. (a -> b) -> a -> b
$ [Double] -> FilePath
forall a. Show a => [a] -> FilePath
statsToStr [Double]
statsTrain FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"," FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ [Double] -> FilePath
forall a. Show a => [a] -> FilePath
statsToStr [Double]
statsTest

getStats :: [Double] -> [Double]
getStats :: [Double] -> [Double]
getStats = [Double] -> [Double]
postAgg ([Double] -> [Double])
-> ([Double] -> [Double]) -> [Double] -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> Double -> [Double])
-> [Double] -> [Double] -> [Double]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' [Double] -> Double -> [Double]
aggregate []

statsToStr :: Show a => [a] -> String
statsToStr :: forall a. Show a => [a] -> FilePath
statsToStr = FilePath -> [FilePath] -> FilePath
forall a. [a] -> [[a]] -> [a]
intercalate FilePath
"," ([FilePath] -> FilePath) -> ([a] -> [FilePath]) -> [a] -> FilePath
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> FilePath) -> [a] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map a -> FilePath
forall a. Show a => a -> FilePath
show

replaceWithNan :: Maybe [Double] -> Double
replaceWithNan :: Maybe [Double] -> Double
replaceWithNan Maybe [Double]
Nothing      = Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
0
replaceWithNan (Just [])    = Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
0
replaceWithNan (Just (Double
x:[Double]
_)) = Double
x

-- | Opens the first file available in the format "name.{i}.csv"
-- where 'i' follows a sequence from 0 onward.
openNext :: String -> IO Handle
openNext :: FilePath -> IO Handle
openNext FilePath
fname = [FilePath] -> IO Handle
go [FilePath
fname FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"." FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Int -> FilePath
forall a. Show a => a -> FilePath
show Int
n FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
".csv" | Int
n <- [Int
0 :: Int ..]]
  where
    -- this is a partial function applied to an infinite list
    -- so, what harm can it do?
    go :: [FilePath] -> IO Handle
go []       = FilePath -> IO Handle
forall a. HasCallStack => FilePath -> a
error FilePath
"end of inifinity stream"
    go (FilePath
fn:[FilePath]
fns) = do Bool
b <- FilePath -> IO Bool
doesFileExist FilePath
fn
                     if Bool
b
                        then [FilePath] -> IO Handle
go [FilePath]
fns
                        else FilePath -> IOMode -> IO Handle
openFile FilePath
fn IOMode
WriteMode

postAgg :: [Double] -> [Double]
postAgg :: [Double] -> [Double]
postAgg [Double
best, Double
worst, Double
tot, Double
count] = [Double
best, Double
worst, Double
totDouble -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
count]
postAgg [Double]
_ = FilePath -> [Double]
forall a. HasCallStack => FilePath -> a
error FilePath
"wrong parameters count"

-- | aggregates the best, worst and average solutions
aggregate :: [Double] -> Double -> [Double]
aggregate :: [Double] -> Double -> [Double]
aggregate [] Double
train = [Double
train,Double
train,Double
train,Double
1]
aggregate [Double
best, Double
worst, Double
tot, Double
count] Double
train = [Double -> Double -> Double
forall a. Ord a => a -> a -> a
min Double
best Double
train, Double -> Double -> Double
forall a. Ord a => a -> a -> a
max Double
worst Double
train, Double
totDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
train, Double
countDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
1]
aggregate [Double]
_ Double
_ = FilePath -> [Double]
forall a. HasCallStack => FilePath -> a
error FilePath
"wrong parameters count in aggregate"

-- | creates a log name instead of overwritting last one
createLogName :: Config -> FilePath
createLogName :: Config -> FilePath
createLogName Config
cfg = FilePath
fileDir FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"/" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
dataset FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"_evo"
  where
    fileDir :: FilePath
fileDir = case Config -> Output
getLogType Config
cfg of
                EvoLog FilePath
dir -> FilePath
dir
                Output
_           -> FilePath
""
    dataset :: FilePath
dataset = [FilePath] -> FilePath
forall a. [a] -> a
last ([FilePath] -> FilePath) -> [FilePath] -> FilePath
forall a b. (a -> b) -> a -> b
$ FilePath -> FilePath -> [FilePath]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn FilePath
"/" (FilePath -> [FilePath]) -> FilePath -> [FilePath]
forall a b. (a -> b) -> a -> b
$ (IOCfg -> FilePath
_trainFilename (IOCfg -> FilePath) -> (Config -> IOCfg) -> Config -> FilePath
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Config -> IOCfg
_ioCfg) Config
cfg

-- | creates an IO logger function
makeLogger :: Config -> (Individual -> Maybe [Double]) -> IO (Population Individual -> IO (), Maybe Handle)
makeLogger :: Config
-> (Individual -> Maybe [Double])
-> IO (Population Individual -> IO (), Maybe Handle)
makeLogger Config
cfg Individual -> Maybe [Double]
fitTest = case Config -> Output
getLogType Config
cfg of
                           EvoLog FilePath
dir -> do Bool -> FilePath -> IO ()
createDirectoryIfMissing Bool
True FilePath
dir
                                            Handle
h <- FilePath -> IO Handle
openNext (Config -> FilePath
createLogName Config
cfg)
                                            Handle -> FilePath -> IO ()
hPutStrLn Handle
h FilePath
"bestTrain,worstTrain,avgTrain,bestTest,worstTest,avgTest"
                                            (Population Individual -> IO (), Maybe Handle)
-> IO (Population Individual -> IO (), Maybe Handle)
forall (m :: * -> *) a. Monad m => a -> m a
return (Handle
-> (Individual -> Maybe [Double]) -> Population Individual -> IO ()
evoLog Handle
h Individual -> Maybe [Double]
fitTest, Handle -> Maybe Handle
forall a. a -> Maybe a
Just Handle
h)
                           Output
_          -> (Population Individual -> IO (), Maybe Handle)
-> IO (Population Individual -> IO (), Maybe Handle)
forall (m :: * -> *) a. Monad m => a -> m a
return (\Population Individual
_ -> () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return (), Maybe Handle
forall a. Maybe a
Nothing)

-- | closes a file
closeIfJust :: Maybe Handle -> IO ()
closeIfJust :: Maybe Handle -> IO ()
closeIfJust Maybe Handle
Nothing  = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
closeIfJust (Just Handle
h) = Handle -> IO ()
hClose Handle
h