{-# LANGUAGE TypeFamilies #-}
{-|
Module      : IT.Regression
Description : Specific functions for Symbolic Regression
Copyright   : (c) Fabricio Olivetti de Franca, 2020
License     : GPL-3
Maintainer  : fabricio.olivetti@gmail.com
Stability   : experimental
Portability : POSIX

Definitions of IT data structure and support functions.
-}
module IT.Regression where

import IT
import IT.Algorithms
import IT.Eval
import IT.Metrics

import Data.List
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NE
import qualified Data.Vector.Storable as V
import qualified Numeric.LinearAlgebra as LA
import qualified MachineLearning.Classification.Binary as BC
import qualified MachineLearning.LogisticModel as LM
import qualified MachineLearning.Classification.OneVsAll as OVA

-- * IT specific stuff
data Task = Regression | Classification | ClassMult
         deriving (Task -> Task -> Bool
(Task -> Task -> Bool) -> (Task -> Task -> Bool) -> Eq Task
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Task -> Task -> Bool
$c/= :: Task -> Task -> Bool
== :: Task -> Task -> Bool
$c== :: Task -> Task -> Bool
Eq, ReadPrec [Task]
ReadPrec Task
Int -> ReadS Task
ReadS [Task]
(Int -> ReadS Task)
-> ReadS [Task] -> ReadPrec Task -> ReadPrec [Task] -> Read Task
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Task]
$creadListPrec :: ReadPrec [Task]
readPrec :: ReadPrec Task
$creadPrec :: ReadPrec Task
readList :: ReadS [Task]
$creadList :: ReadS [Task]
readsPrec :: Int -> ReadS Task
$creadsPrec :: Int -> ReadS Task
Read)

type FitFun = Vector -> Vector -> Double

data Penalty = NoPenalty | Len Double | Shape Double deriving (Int -> Penalty -> ShowS
[Penalty] -> ShowS
Penalty -> String
(Int -> Penalty -> ShowS)
-> (Penalty -> String) -> ([Penalty] -> ShowS) -> Show Penalty
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Penalty] -> ShowS
$cshowList :: [Penalty] -> ShowS
show :: Penalty -> String
$cshow :: Penalty -> String
showsPrec :: Int -> Penalty -> ShowS
$cshowsPrec :: Int -> Penalty -> ShowS
Show, ReadPrec [Penalty]
ReadPrec Penalty
Int -> ReadS Penalty
ReadS [Penalty]
(Int -> ReadS Penalty)
-> ReadS [Penalty]
-> ReadPrec Penalty
-> ReadPrec [Penalty]
-> Read Penalty
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Penalty]
$creadListPrec :: ReadPrec [Penalty]
readPrec :: ReadPrec Penalty
$creadPrec :: ReadPrec Penalty
readList :: ReadS [Penalty]
$creadList :: ReadS [Penalty]
readsPrec :: Int -> ReadS Penalty
$creadsPrec :: Int -> ReadS Penalty
Read)


-- | Predict a linear model
predict :: LA.Matrix Double -> Vector -> Vector
predict :: Matrix Double -> Vector -> Vector
predict Matrix Double
xs Vector
w = Matrix Double
xs Matrix Double -> Vector -> Vector
forall t. Numeric t => Matrix t -> Vector t -> Vector t
LA.#> Vector
w

-- | Solve the OLS *zss*w = ys*
solveOLS :: LA.Matrix Double -> Vector -> Vector
solveOLS :: Matrix Double -> Vector -> Vector
solveOLS Matrix Double
zss = Matrix Double -> Vector
forall t. Element t => Matrix t -> Vector t
LA.flatten (Matrix Double -> Vector)
-> (Vector -> Matrix Double) -> Vector -> Vector
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> Matrix Double -> Matrix Double
forall t. Field t => Matrix t -> Matrix t -> Matrix t
LA.linearSolveSVD Matrix Double
zss (Matrix Double -> Matrix Double)
-> (Vector -> Matrix Double) -> Vector -> Matrix Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector -> Matrix Double
forall a. Storable a => Vector a -> Matrix a
LA.asColumn

isInvalidMatrix :: LA.Matrix Double -> Bool
isInvalidMatrix :: Matrix Double -> Bool
isInvalidMatrix Matrix Double
zss = Matrix Double -> Int
forall t. Matrix t -> Int
LA.rows Matrix Double
zss Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| (Double -> Bool) -> Vector -> Bool
forall a. Storable a => (a -> Bool) -> Vector a -> Bool
V.any Double -> Bool
isInvalid (Matrix Double -> Vector
forall t. Element t => Matrix t -> Vector t
LA.flatten Matrix Double
zss)

-- | Applies OLS and returns a Solution
-- if the expression is invalid, it returns Infinity as a fitness
regress :: LA.Matrix Double -> Vector -> [Vector]
regress :: Matrix Double -> Vector -> [Vector]
regress Matrix Double
zss Vector
ys = [Matrix Double -> Vector -> Vector
solveOLS Matrix Double
zss Vector
ys]

classify :: LA.Matrix Double -> Vector -> [Vector]
classify :: Matrix Double -> Vector -> [Vector]
classify Matrix Double
zss Vector
ys
    = let ws0 :: Vector
ws0     = Double -> Int -> Vector
forall e d (c :: * -> *). Konst e d c => e -> d -> c e
LA.konst Double
0 (Matrix Double -> Int
forall t. Matrix t -> Int
LA.cols Matrix Double
zss)
          (Vector
ws, Matrix Double
_) = MinimizeMethod
-> Double
-> Int
-> Regularization
-> Matrix Double
-> Vector
-> Vector
-> (Vector, Matrix Double)
BC.learn (Double -> Double -> MinimizeMethod
BC.ConjugateGradientPR Double
0.1 Double
0.1) Double
0.0001 Int
500 Regularization
BC.RegNone Matrix Double
zss Vector
ys Vector
ws0
      in  [Vector
ws]

classifyMult :: LA.Matrix Double -> Vector -> [Vector]
classifyMult :: Matrix Double -> Vector -> [Vector]
classifyMult Matrix Double
zss Vector
ys
    = let ws0 :: [Vector]
ws0       = Int -> Vector -> [Vector]
forall a. Int -> a -> [a]
replicate Int
numLabels (Vector -> [Vector]) -> Vector -> [Vector]
forall a b. (a -> b) -> a -> b
$ Double -> Int -> Vector
forall e d (c :: * -> *). Konst e d c => e -> d -> c e
LA.konst Double
0 (Matrix Double -> Int
forall t. Matrix t -> Int
LA.cols Matrix Double
zss)
          numLabels :: Int
numLabels = [Double] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Double] -> Int) -> [Double] -> Int
forall a b. (a -> b) -> a -> b
$ [Double] -> [Double]
forall a. Eq a => [a] -> [a]
nub ([Double] -> [Double]) -> [Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Vector -> [Double]
forall a. Storable a => Vector a -> [a]
LA.toList Vector
ys
          ([Vector]
ws, [Matrix Double]
_)   = MinimizeMethod
-> Double
-> Int
-> Regularization
-> Int
-> Matrix Double
-> Vector
-> [Vector]
-> ([Vector], [Matrix Double])
OVA.learn (Double -> Double -> MinimizeMethod
OVA.ConjugateGradientPR Double
0.1 Double
0.1) Double
0.0001 Int
500 Regularization
OVA.RegNone Int
numLabels Matrix Double
zss Vector
ys [Vector]
ws0
      in  [Vector]
ws

-- | Fitness function for regression
-- 
--  Split the dataset into twice the available cores
--  evaluate the expressions in parallel
--  run a Linear regression on the evaluated expressions
--  Remove from the population any expression that leads to NaNs or Infs
-- it was fitnessReg

fitTask :: Task -> LA.Matrix Double -> Vector -> [Vector]
fitTask :: Task -> Matrix Double -> Vector -> [Vector]
fitTask Task
Regression     = Matrix Double -> Vector -> [Vector]
regress
fitTask Task
Classification = Matrix Double -> Vector -> [Vector]
classify
fitTask Task
ClassMult      = Matrix Double -> Vector -> [Vector]
classifyMult

predictTask :: Task -> LA.Matrix Double -> [Vector] -> Vector
predictTask :: Task -> Matrix Double -> [Vector] -> Vector
predictTask Task
_ Matrix Double
_ []                   = String -> Vector
forall a. HasCallStack => String -> a
error String
"predictTask: empty coefficients matrix"
predictTask Task
Regression Matrix Double
zss (Vector
w:[Vector]
_)     = Matrix Double -> Vector -> Vector
predict Matrix Double
zss Vector
w
predictTask Task
Classification Matrix Double
zss (Vector
w:[Vector]
_) = LogisticModel -> Matrix Double -> Vector -> Vector
forall a. Model a => a -> Matrix Double -> Vector -> Vector
LM.hypothesis LogisticModel
LM.Logistic Matrix Double
zss Vector
w
predictTask Task
ClassMult Matrix Double
zss [Vector]
ws         = Matrix Double -> [Vector] -> Vector
OVA.predict Matrix Double
zss [Vector]
ws

evalPenalty :: Penalty -> Int -> Double -> Double
evalPenalty :: Penalty -> Int -> Double -> Double
evalPenalty Penalty
NoPenalty Int
_   Double
_   = Double
0.0
evalPenalty (Len Double
c)   Int
len Double
_   = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
c
evalPenalty (Shape Double
c) Int
_   Double
val = Double
valDouble -> Double -> Double
forall a. Num a => a -> a -> a
*Double
c

applyMeasures :: NonEmpty Measure -> Vector -> Vector -> [Double]
applyMeasures :: NonEmpty Measure -> Vector -> Vector -> [Double]
applyMeasures NonEmpty Measure
measures Vector
ysHat Vector
ys = NonEmpty Double -> [Double]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty Double -> [Double]) -> NonEmpty Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (Measure -> Double) -> NonEmpty Measure -> NonEmpty Double
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map (((Vector -> Vector -> Double) -> (Vector, Vector) -> Double
forall a b c. (a -> b -> c) -> (a, b) -> c
`uncurry` (Vector
ysHat, Vector
ys)) ((Vector -> Vector -> Double) -> Double)
-> (Measure -> Vector -> Vector -> Double) -> Measure -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Measure -> Vector -> Vector -> Double
_fun) NonEmpty Measure
measures

evalTrain :: Task
          -> NonEmpty Measure
          -> Constraint
          -> Penalty
          -> Dataset Double
          -> Vector
          -> Dataset Double
          -> Vector
          -> Expr
          -> Maybe Solution
evalTrain :: Task
-> NonEmpty Measure
-> Constraint
-> Penalty
-> Dataset Double
-> Vector
-> Dataset Double
-> Vector
-> Expr
-> Maybe Solution
evalTrain Task
task NonEmpty Measure
measures Constraint
cnstrFun Penalty
penalty Dataset Double
xss_train Vector
ys_train Dataset Double
xss_val Vector
ys_val Expr
expr
  | Expr -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Expr
expr' = Maybe Solution
forall a. Maybe a
Nothing 
  | Bool
otherwise  = Solution -> Maybe Solution
forall a. a -> Maybe a
Just (Solution -> Maybe Solution) -> Solution -> Maybe Solution
forall a b. (a -> b) -> a -> b
$ Expr -> [Double] -> Double -> Int -> Double -> [Vector] -> Solution
Sol Expr
expr' [Double]
fit Double
cnst Int
len Double
pnlty [Vector]
ws
  where
    ws :: [Vector]
ws    = Task -> Matrix Double -> Vector -> [Vector]
fitTask Task
task Matrix Double
zss Vector
ys_train
    ysHat :: Vector
ysHat = Task -> Matrix Double -> [Vector] -> Vector
predictTask Task
task Matrix Double
zss_val [Vector]
ws
    fit :: [Double]
fit   = NonEmpty Measure -> Vector -> Vector -> [Double]
applyMeasures NonEmpty Measure
measures Vector
ysHat Vector
ys_val
    ws' :: [Double]
ws'   = Vector -> [Double]
forall a. Storable a => Vector a -> [a]
V.toList (Vector -> [Double]) -> Vector -> [Double]
forall a b. (a -> b) -> a -> b
$ [Vector] -> Vector
forall a. [a] -> a
head [Vector]
ws
    len :: Int
len   = Expr -> [Double] -> Int
exprLength Expr
expr' [Double]
ws'
    cnst :: Double
cnst  = Constraint
cnstrFun Expr
expr' [Double]
ws'
    pnlty :: Double
pnlty = Penalty -> Int -> Double -> Double
evalPenalty Penalty
penalty Int
len Double
cnst

    (Expr
expr', Matrix Double
zss) = Dataset Double -> Expr -> (Expr, Matrix Double)
cleanExpr Dataset Double
xss_train Expr
expr
    zss_val :: Matrix Double
zss_val      = Dataset Double -> Expr -> Matrix Double
exprToMatrix Dataset Double
xss_val Expr
expr'

-- | Evaluates an expression into the test set. This is different from `fitnessReg` since
-- it doesn't apply OLS.
-- It was: fitnessTest
evalTest :: Task -> NonEmpty Measure -> Dataset Double -> Vector -> Solution -> Maybe [Double]
evalTest :: Task
-> NonEmpty Measure
-> Dataset Double
-> Vector
-> Solution
-> Maybe [Double]
evalTest Task
task NonEmpty Measure
measures Dataset Double
xss Vector
ys Solution
sol
  | Vector -> Int
forall a. Storable a => Vector a -> Int
V.length ([Vector] -> Vector
forall a. [a] -> a
head [Vector]
ws) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Matrix Double -> Int
forall t. Matrix t -> Int
LA.cols Matrix Double
zss = Maybe [Double]
forall a. Maybe a
Nothing
  | Bool
otherwise                         = [Double] -> Maybe [Double]
forall a. a -> Maybe a
Just [Double]
fit
  where
    zss :: Matrix Double
zss   = Dataset Double -> Expr -> Matrix Double
exprToMatrix Dataset Double
xss (Solution -> Expr
_expr Solution
sol)
    ws :: [Vector]
ws    = Solution -> [Vector]
_weights Solution
sol
    ysHat :: Vector
ysHat = Task -> Matrix Double -> [Vector] -> Vector
predictTask Task
task Matrix Double
zss [Vector]
ws 
    fit :: [Double]
fit   = NonEmpty Measure -> Vector -> Vector -> [Double]
applyMeasures NonEmpty Measure
measures Vector
ysHat Vector
ys 

{-
-- | Experimental: round off floating point to the 1e-10 place.
roundoff :: RealFrac a => a -> a
roundoff x
  | abs x < thr   = 0.0
  | abs x > 1e200 = x
  | otherwise = fromInteger (round (x / thr)) * thr
  where thr = 1e-15

-- what to do with you?
tryToRound :: (Vector -> Double) -> LA.Matrix Double -> (Vector, [Vector]) -> (Vector, [Vector])
tryToRound f zss (ysHat, (ws:_)) =
  let ws'         = V.map roundoff ws
      ysHat'      = predict zss ws'
  in  (ysHat', [ws']) --if abs (f ysHat' - f ysHat) < 0.01
          --then (ysHat', [ws'])
          --else (ysHat, [ws])
tryToRound _ _ _ = error "empty weight list"
-}