{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TypeApplications #-}
{-|
Module      : Example.Regression
Description : Example of usage for Symbolic Regression
Copyright   : (c) Fabricio Olivetti de Franca, 2020
License     : GPL-3
Maintainer  : fabricio.olivetti@gmail.com
Stability   : experimental
Portability : POSIX

Configuration parsing and report generation.
-}
module ITEA.Report where

import System.Directory
import System.IO
import System.Clock

import IT
import IT.Algorithms
import IT.Metrics

import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NE
import qualified Numeric.LinearAlgebra as LA
import Data.Maybe
import Data.List (intercalate, foldl')
import Control.Monad

-- | Output configuration  
data Output = Screen | PartialLog String | FullLog String deriving ReadPrec [Output]
ReadPrec Output
Int -> ReadS Output
ReadS [Output]
(Int -> ReadS Output)
-> ReadS [Output]
-> ReadPrec Output
-> ReadPrec [Output]
-> Read Output
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Output]
$creadListPrec :: ReadPrec [Output]
readPrec :: ReadPrec Output
$creadPrec :: ReadPrec Output
readList :: ReadS [Output]
$creadList :: ReadS [Output]
readsPrec :: Int -> ReadS Output
$creadsPrec :: Int -> ReadS Output
Read

-- | Get best solution from all generations
getBest :: Int  -> [Population] -> Solution
getBest :: Int -> [Population] -> Solution
getBest Int
n [Population]
ps     = Population -> Solution
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum (Population -> Solution) -> Population -> Solution
forall a b. (a -> b) -> a -> b
$ Int -> [Population] -> Population
getAllBests Int
n [Population]
ps

getBestMaybe :: Int -> [Population] -> Maybe Solution
getBestMaybe :: Int -> [Population] -> Maybe Solution
getBestMaybe Int
n [Population]
ps = case Int -> [Population] -> Population
getAllBests Int
n [Population]
ps of
                       [] -> Maybe Solution
forall a. Maybe a
Nothing
                       Population
xs -> Solution -> Maybe Solution
forall a. a -> Maybe a
Just (Population -> Solution
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum Population
xs)

getAllBests :: Int -> [Population] -> [Solution]
getAllBests :: Int -> [Population] -> Population
getAllBests Int
n [Population]
ps = (Population -> Population) -> [Population] -> Population
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Population -> Population
forall a. Ord a => [a] -> [a]
minimum' ([Population] -> Population) -> [Population] -> Population
forall a b. (a -> b) -> a -> b
$ Int -> [Population] -> [Population]
forall a. Int -> [a] -> [a]
take Int
n [Population]
ps
  where
    minimum' :: [a] -> [a]
minimum' [] = []
    minimum' [a]
xs = [[a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [a]
xs]

-- | Creates a file if it does not exist
createIfDoesNotExist :: String -> FilePath -> IO Handle
createIfDoesNotExist :: String -> String -> IO Handle
createIfDoesNotExist String
headReport String
fname = do
  Bool
isCreated <- String -> IO Bool
doesFileExist String
fname
  Handle
h <- if   Bool
isCreated
       then String -> IOMode -> IO Handle
openFile String
fname IOMode
AppendMode
       else String -> IOMode -> IO Handle
openFile String
fname IOMode
WriteMode
  if Bool
isCreated then Handle -> String -> IO ()
hPutStrLn Handle
h String
"" else Handle -> String -> IO ()
hPutStrLn Handle
h String
headReport
  Handle -> IO Handle
forall (m :: * -> *) a. Monad m => a -> m a
return Handle
h

interleave :: [a] -> [a] -> [a]
interleave :: [a] -> [a] -> [a]
interleave [a]
xs' [a]
ys' = [a] -> [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a] -> [a]
getLeft [a]
xs' [a]
ys' []
  where
    getLeft :: [a] -> [a] -> [a] -> [a]
getLeft [] [a]
ys [a]
zs      = [a]
zs [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
ys
    getLeft (a
x:[a]
xs) [a]
ys [a]
zs  = [a] -> [a] -> [a] -> [a]
getRight [a]
xs [a]
ys (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
zs)
    getRight :: [a] -> [a] -> [a] -> [a]
getRight [a]
xs [] [a]
zs     = [a]
zs [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
xs
    getRight [a]
xs (a
y:[a]
ys) [a]
zs = [a] -> [a] -> [a] -> [a]
getLeft [a]
xs [a]
ys (a
ya -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
zs)


-- | Generates the reports into the output
genReports :: Output -> NonEmpty Measure -> [Population] -> Int -> (Solution -> Maybe [Double]) -> (Expr -> Maybe Solution) -> IO ()
genReports :: Output
-> NonEmpty Measure
-> [Population]
-> Int
-> (Solution -> Maybe [Double])
-> (Expr -> Maybe Solution)
-> IO ()
genReports Output
Screen NonEmpty Measure
_ [Population]
pop Int
n Solution -> Maybe [Double]
fitTest Expr -> Maybe Solution
refit = do
  let best :: Solution
best  = Int -> [Population] -> Solution
getBest Int
n [Population]
pop
      best' :: Solution
best' = case Expr -> Maybe Solution
refit (Solution -> Expr
_expr Solution
best) of
                Maybe Solution
Nothing -> Solution
best
                Just Solution
x  -> Solution
x
  String -> IO ()
putStrLn String
"Best expression applied to the training set:\n"
  Solution -> IO ()
forall a. Show a => a -> IO ()
print Solution
best'
  String -> IO ()
putStrLn String
"Best expression applied to the test set:\n"
  Maybe [Double] -> IO ()
forall a. Show a => a -> IO ()
print (Solution -> Maybe [Double]
fitTest Solution
best')

genReports (PartialLog String
dirname) NonEmpty Measure
measures [Population]
pop Int
n Solution -> Maybe [Double]
fitTest Expr -> Maybe Solution
refit = do
  Bool -> String -> IO ()
createDirectoryIfMissing Bool
True String
dirname
  let
    fname :: String
fname      = String
dirname String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"/stats.csv"
    fnameExpr :: String
fnameExpr  = String
dirname String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"/exprs.csv"
    mNames :: NonEmpty String
mNames     = (Measure -> String) -> NonEmpty Measure -> NonEmpty String
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map Measure -> String
_name NonEmpty Measure
measures
    trainNames :: [String]
trainNames = NonEmpty String -> [String]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty String -> [String]) -> NonEmpty String -> [String]
forall a b. (a -> b) -> a -> b
$ (String -> String) -> NonEmpty String -> NonEmpty String
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map (String -> String -> String
forall a. [a] -> [a] -> [a]
++String
"_train") NonEmpty String
mNames
    testNames :: [String]
testNames  = NonEmpty String -> [String]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty String -> [String]) -> NonEmpty String -> [String]
forall a b. (a -> b) -> a -> b
$ (String -> String) -> NonEmpty String -> NonEmpty String
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map (String -> String -> String
forall a. [a] -> [a] -> [a]
++String
"_test") NonEmpty String
mNames
    headReport :: String
headReport = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"," ([String
"name", String
"time", String
"length"] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
interleave [String]
trainNames [String]
testNames)
    headExpr :: String
headExpr   = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"," [String
"expr", String
"weights", String
"python"]
    best' :: Solution
best'      = Int -> [Population] -> Solution
getBest Int
n [Population]
pop
    best :: Solution
best       = case Expr -> Maybe Solution
refit (Solution -> Expr
_expr Solution
best') of
                   Maybe Solution
Nothing -> Solution
best'
                   Just Solution
x  -> Solution
x

  Handle
hStats     <- String -> String -> IO Handle
createIfDoesNotExist String
headReport String
fname
  Handle
hStatsExpr <- String -> String -> IO Handle
createIfDoesNotExist String
headExpr String
fnameExpr

  TimeSpec
t0 <- Clock -> IO TimeSpec
getTime Clock
Realtime
  Solution -> IO ()
forall a. Show a => a -> IO ()
print Solution
best -- force evaluation. Don't be lazy.
  TimeSpec
t1 <- Clock -> IO TimeSpec
getTime Clock
Realtime

  let
    totTime :: String
totTime         = Int64 -> String
forall a. Show a => a -> String
show (Int64 -> String) -> Int64 -> String
forall a b. (a -> b) -> a -> b
$ TimeSpec -> Int64
sec TimeSpec
t1 Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- TimeSpec -> Int64
sec TimeSpec
t0
    nFit :: Int
nFit            = [Double] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Solution -> [Double]
_fit Solution
best)
    bestTest :: [Double]
bestTest        = [Double] -> Maybe [Double] -> [Double]
forall a. a -> Maybe a -> a
fromMaybe (Int -> Double -> [Double]
forall a. Int -> a -> [a]
replicate Int
nFit (Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
0)) (Maybe [Double] -> [Double]) -> Maybe [Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Solution -> Maybe [Double]
fitTest Solution
best
    measuresResults :: [String]
measuresResults = (Double -> String) -> [Double] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Double -> String
forall a. Show a => a -> String
show ([Double] -> [String]) -> [Double] -> [String]
forall a b. (a -> b) -> a -> b
$ [Double] -> [Double] -> [Double]
forall a. [a] -> [a] -> [a]
interleave (Solution -> [Double]
_fit Solution
best) [Double]
bestTest
    exprWithWeight :: String
exprWithWeight  = String
"\"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Expr -> String
forall a. Show a => a -> String
show Expr
expr String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\",\"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Double] -> String
forall a. Show a => a -> String
show [Double]
ws String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\",\"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Expr -> [Double] -> String
toPython Expr
expr [Double]
ws String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\""
    ws :: [Double]
ws              = Vector Double -> [Double]
forall a. Storable a => Vector a -> [a]
LA.toList (Vector Double -> [Double])
-> ([Vector Double] -> Vector Double)
-> [Vector Double]
-> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Vector Double] -> Vector Double
forall a. [a] -> a
head ([Vector Double] -> [Double]) -> [Vector Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Solution -> [Vector Double]
_weights Solution
best
    expr :: Expr
expr            = Solution -> Expr
_expr Solution
best

    stats :: String
stats           = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"," ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String
dirname, String
totTime, Int -> String
forall a. Show a => a -> String
show (Solution -> Int
_len Solution
best)] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String]
measuresResults

  Handle -> String -> IO ()
hPutStr Handle
hStats String
stats
  Handle -> String -> IO ()
hPutStr Handle
hStatsExpr String
exprWithWeight
  Handle -> IO ()
hClose Handle
hStats
  Handle -> IO ()
hClose Handle
hStatsExpr

-- FullLog is the same as PartialLog plus the best, worst, avg fit for every generation.
--
-- TODO: this code is ugly, but it's low priority to fix it.
genReports (FullLog String
dirname) NonEmpty Measure
measures [Population]
pop Int
n Solution -> Maybe [Double]
fitTest Expr -> Maybe Solution
refit =
  do
    let
        pop' :: [Population]
pop'      = Int -> [Population] -> [Population]
forall a. Int -> [a] -> [a]
take Int
n [Population]
pop

        statsTrain :: [[Double]]
statsTrain = (Population -> [Double]) -> [Population] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ([Double] -> [Double]
postAgg ([Double] -> [Double])
-> (Population -> [Double]) -> Population -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> Double -> [Double])
-> [Double] -> [Double] -> [Double]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' [Double] -> Double -> [Double]
aggregate [] ([Double] -> [Double])
-> (Population -> [Double]) -> Population -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Solution -> Double) -> Population -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map ([Double] -> Double
forall a. [a] -> a
head([Double] -> Double)
-> (Solution -> [Double]) -> Solution -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Solution -> [Double]
_fit)) [Population]
pop'
        statsTest :: [[Double]]
statsTest  = (Population -> [Double]) -> [Population] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ([Double] -> [Double]
postAgg ([Double] -> [Double])
-> (Population -> [Double]) -> Population -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> Double -> [Double])
-> [Double] -> [Double] -> [Double]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' [Double] -> Double -> [Double]
aggregate [] ([Double] -> [Double])
-> (Population -> [Double]) -> Population -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Solution -> Double) -> Population -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Solution -> Double
getTest) [Population]
pop'

        getTest :: Solution -> Double
getTest      = Maybe [Double] -> Double
forall p. Fractional p => Maybe [p] -> p
replaceWithNan (Maybe [Double] -> Double)
-> (Solution -> Maybe [Double]) -> Solution -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Solution -> Maybe [Double]
fitTest

        replaceWithNan :: Maybe [p] -> p
replaceWithNan Maybe [p]
Nothing      = p
1p -> p -> p
forall a. Fractional a => a -> a -> a
/p
0
        replaceWithNan (Just [])    = p
1p -> p -> p
forall a. Fractional a => a -> a -> a
/p
0
        replaceWithNan (Just (p
x:[p]
_)) = p
x

        statsNamesTrain :: [String]
statsNamesTrain = [String
"TrainBest", String
"TrainWorst", String
"TrainAvg"]
        statsNamesTest :: [String]
statsNamesTest  = [String
"TestBest", String
"TestWorse", String
"TestAvg"]
        fulldirname :: String
fulldirname     = String
dirname String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"/FullLog"
        fnamesTrain :: [String]
fnamesTrain     = (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\String
s -> String
fulldirnameString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"/"String -> String -> String
forall a. [a] -> [a] -> [a]
++String
s) [String]
statsNamesTrain
        fnamesTest :: [String]
fnamesTest      = (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\String
s -> String
fulldirnameString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"/"String -> String -> String
forall a. [a] -> [a] -> [a]
++String
s) [String]
statsNamesTest

    Bool -> String -> IO ()
createDirectoryIfMissing Bool
True String
fulldirname
    [Handle]
hs <- [IO Handle] -> IO [Handle]
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence ([IO Handle] -> IO [Handle]) -> [IO Handle] -> IO [Handle]
forall a b. (a -> b) -> a -> b
$ String -> IO Handle
openNext (String -> IO Handle) -> [String] -> [IO Handle]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
fnamesTrain [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String]
fnamesTest
    ([Double] -> IO ()) -> [[Double]] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ([Handle] -> [Double] -> IO ()
toFile [Handle]
hs) ([[Double]] -> IO ()) -> [[Double]] -> IO ()
forall a b. (a -> b) -> a -> b
$ ([Double] -> [Double] -> [Double])
-> [[Double]] -> [[Double]] -> [[Double]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith [Double] -> [Double] -> [Double]
forall a. [a] -> [a] -> [a]
(++) [[Double]]
statsTrain [[Double]]
statsTest
    (Handle -> IO ()) -> [Handle] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Handle -> IO ()
hClose [Handle]
hs

    Output
-> NonEmpty Measure
-> [Population]
-> Int
-> (Solution -> Maybe [Double])
-> (Expr -> Maybe Solution)
-> IO ()
genReports (String -> Output
PartialLog String
dirname) NonEmpty Measure
measures [Population]
pop Int
n Solution -> Maybe [Double]
fitTest Expr -> Maybe Solution
refit

-- | Opens the first file available in the format "name.{i}.csv"
-- where 'i' follows a sequence from 0 onward.
openNext :: String -> IO Handle
openNext :: String -> IO Handle
openNext String
fname = [String] -> IO Handle
go [String
fname String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"." String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".csv" | Int
n <- [Int
0 :: Int ..]]
  where
    -- this is a partial function applied to an infinite list
    -- so, what harm can it do?
    go :: [String] -> IO Handle
go []       = String -> IO Handle
forall a. HasCallStack => String -> a
error String
"end of inifinity stream"
    go (String
fn:[String]
fns) = do Bool
b <- String -> IO Bool
doesFileExist String
fn
                     if Bool
b
                        then [String] -> IO Handle
go [String]
fns
                        else String -> IOMode -> IO Handle
openFile String
fn IOMode
WriteMode

postAgg :: [Double] -> [Double]
postAgg :: [Double] -> [Double]
postAgg [Double
best, Double
worst, Double
tot, Double
count] = [Double
best, Double
worst, Double
totDouble -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
count]
postAgg [Double]
_ = String -> [Double]
forall a. HasCallStack => String -> a
error String
"wrong parameters count"

aggregate :: [Double] -> Double -> [Double]
aggregate :: [Double] -> Double -> [Double]
aggregate [] Double
train = [Double
train,Double
train,Double
train,Double
1]
aggregate [Double
best, Double
worst, Double
tot, Double
count] Double
train = [Double -> Double -> Double
forall a. Ord a => a -> a -> a
min Double
best Double
train, Double -> Double -> Double
forall a. Ord a => a -> a -> a
max Double
worst Double
train, Double
totDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
train, Double
countDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
1]
aggregate [Double]
_ Double
_ = String -> [Double]
forall a. HasCallStack => String -> a
error String
"wrong parameters count in aggregate"

-- | Write a sequence to a sequence of opened files
toFile :: [Handle] -> [Double] -> IO ()
toFile :: [Handle] -> [Double] -> IO ()
toFile [Handle]
hs [Double]
ps = (Handle -> String -> IO ()) -> [Handle] -> [String] -> IO ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Handle -> String -> IO ()
hPutStrLn [Handle]
hs
             ([String] -> IO ()) -> [String] -> IO ()
forall a b. (a -> b) -> a -> b
$ (Double -> String) -> [Double] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Double -> String
forall a. Show a => a -> String
show [Double]
ps