In the backend, I encapsulated the preceding Q-learning implementation and additionally created a Scala controller that controls the model behavior from the frontend. The structure is given here:
import java.nio.file.Paths
import org.codehaus.janino.Java
import ml.stats.TSeries.{normalize, zipWithShift}
import ml.workflow.data.DataSource
import ml.trading.OptionModel
import ml.Predef.{DblPair, DblVec}
import ml.reinforcement.qlearning.{QLConfig, QLModel, QLearning}
import scala.util.{Failure, Success, Try}
import play.api._
import play.api.data.Form
import play.api.libs.json._
import play.api.mvc._
import scala.util.{Failure, Success, Try}
class API extends Controller {
protected val name: String = "Q-learning"
private var sPath = Paths.get((s"${"public/data/IBM.csv"}")).toAbsolutePath.toString
private var oPath = Paths.get((s"${"public/data/IBM_O.csv"}")).toAbsolutePath.toString
// Run configuration parameters
private var STRIKE_PRICE = 190.0 // Option strike price
private var MIN_TIME_EXPIRATION = 6 // Minimum expiration time for the option recorded
private var QUANTIZATION_STEP = 32 // Quantization step (Double => Int)
private var ALPHA = 0.2 // Learning rate
private var DISCOUNT = 0.6 // Discount rate used in the Q-Value update equation
private var MAX_EPISODE_LEN = 128 // Maximum number of iteration for an episode
private var NUM_EPISODES = 20 // Number of episodes used for training.
private var MIN_COVERAGE = 0.1
private var NUM_NEIGHBOR_STATES = 3 // Number of states accessible from any other state
private var REWARD_TYPE = "Maximum reward"
private var ret = JsObject(Seq())
private var retry = 0
private def run(REWARD_TYPE: String,quantizeR: Int,alpha: Double,gamma: Double) = {
val maybeModel = createModel(createOptionModel(DataSource(sPath, false, false, 1).get, quantizeR), DataSource(oPath, false, false, 1).get.extract.get, alpha, gamma)
if (maybeModel != None) {
val model = maybeModel.get
if (REWARD_TYPE != "Random") {
var value = JsArray(Seq())
var x = model.bestPolicy.EQ.distinct.map(x => {value = value.append(JsObject(Seq("x" -> JsNumber(x._1), "y" -> JsNumber(x._2))))})ret = ret.+("OPTIMAL", value)
}
}
}
/** Create an option model for a given stock with default strike and minimum expiration time parameters.
*/
privatedef createOptionModel(src: DataSource, quantizeR: Int): OptionModel =
new OptionModel("IBM", STRIKE_PRICE, src, MIN_TIME_EXPIRATION, quantizeR)
/** Create a model for the profit and loss on an option given
* the underlying security. The profit and loss is adjusted to
* produce positive values.
*/
privatedef createModel(ibmOption: OptionModel,oPrice: Seq[Double],alpha: Double,gamma: Double): Option[QLModel] = {
val qPriceMap = ibmOption.quantize(oPrice.toArray)
val numStates = qPriceMap.size
val neighbors = (n: Int) => {
def getProximity(idx: Int, radius: Int): List[Int] = {
val idx_max = if (idx + radius >= numStates) numStates - 1
else idx + radius
val idx_min = if (idx < radius) 0
else idx - radiusscala.collection.immutable.Range(idx_min, idx_max + 1)
.filter(_ != idx)./:(List[Int]())((xs, n) => n :: xs)
}
getProximity(n, NUM_NEIGHBOR_STATES)
}
// Compute the minimum value for the profit, loss so the maximum loss is converted to a null profit
val qPrice: DblVec = qPriceMap.values.toVector
val profit: DblVec = normalize(zipWithShift(qPrice, 1).map {
case (x, y) => y - x }).get
val maxProfitIndex = profit.zipWithIndex.maxBy(_._1)._2
val reward = (x: Double, y: Double) => Math.exp(30.0 * (y - x))
val probabilities = (x: Double, y: Double) =>
if (y < 0.3 * x) 0.0 else 1.0ret = ret.+("GOAL_STATE_INDEX", JsNumber(maxProfitIndex))
if (!QLearning.validateConstraints(profit.size, neighbors)) {ret = ret.+("error", JsString("QLearningEval Incorrect states transition constraint"))
thrownew IllegalStateException("QLearningEval Incorrect states transition constraint")}
val instances = qPriceMap.keySet.toSeq.drop(1)
val config = QLConfig(alpha, gamma, MAX_EPISODE_LEN, NUM_EPISODES, MIN_COVERAGE)
val qLearning = QLearning[Array[Int]](config,Array[Int]
(maxProfitIndex),profit,reward,probabilities,instances,Some(neighbors))
val modelO = qLearning.getModel
if (modelO.isDefined) {
val numTransitions = numStates * (numStates - 1)ret = ret.+("COVERAGE",
JsNumber(modelO.get.coverage))ret = ret.+("COVERAGE_STATES", JsNumber(numStates))
ret = ret.+("COVERAGE_TRANSITIONS", JsNumber(numTransitions))
var value = JsArray()
var x = qLearning._counters.last._2.distinct.map(x => {value = value.append(JsNumber(x))
})
ret = ret.+("Q_VALUE", value)modelO
}
else {
if (retry > 5) {ret = ret.+("error", JsString(s"$name model undefined"))
return None
}
retry += 1Thread.sleep(500)
return createModel(ibmOption,oPrice,alpha,gamma)
}
}
def compute = Action(parse.anyContent) { request =>
try {
if (request.body.asMultipartFormData != None) {
val formData = request.body.asMultipartFormData.get
if (formData.file("STOCK_PRICES").nonEmpty && formData.file("STOCK_PRICES").get.filename.nonEmpty)sPath = formData.file("STOCK_PRICES").get.ref.file.toString
if (formData.file("OPTION_PRICES").nonEmpty && formData.file("OPTION_PRICES").get.filename.nonEmpty)oPath = formData.file("OPTION_PRICES").get.ref.file.toString
val parts = formData.dataParts
if (parts.get("STRIKE_PRICE") != None)STRIKE_PRICE = parts.get("STRIKE_PRICE").get.mkString("").toDouble
if (parts.get("MIN_TIME_EXPIRATION") != None)MIN_TIME_EXPIRATION = parts.get("MIN_TIME_EXPIRATION").get.mkString("").toInt
if (parts.get("QUANTIZATION_STEP") != None)QUANTIZATION_STEP = parts.get("QUANTIZATION_STEP").get.mkString("").toInt
if (parts.get("ALPHA") != None)ALPHA = parts.get("ALPHA").get.mkString("").toDouble
if (parts.get("DISCOUNT") != None)DISCOUNT = parts.get("DISCOUNT").get.mkString("").toDouble
if (parts.get("MAX_EPISODE_LEN") != None)MAX_EPISODE_LEN = parts.get("MAX_EPISODE_LEN").get.mkString("").toInt
if (parts.get("NUM_EPISODES") != None)NUM_EPISODES = parts.get("NUM_EPISODES").get.mkString("").toInt
if (parts.get("MIN_COVERAGE") != None)MIN_COVERAGE = parts.get("MIN_COVERAGE").get.mkString("").toDouble
if (parts.get("NUM_NEIGHBOR_STATES") != None)NUM_NEIGHBOR_STATES = parts.get("NUM_NEIGHBOR_STATES").get.mkString("").toInt
if (parts.get("REWARD_TYPE") != None)REWARD_TYPE = parts.get("REWARD_TYPE").get.mkString("")
}
ret = JsObject(Seq("STRIKE_PRICE" ->
JsNumber(STRIKE_PRICE),"MIN_TIME_EXPIRATION" -> JsNumber(MIN_TIME_EXPIRATION),
"QUANTIZATION_STEP" ->
JsNumber(QUANTIZATION_STEP),
"ALPHA" -> JsNumber(ALPHA),
"DISCOUNT" -> JsNumber(DISCOUNT),
"MAX_EPISODE_LEN" ->
JsNumber(MAX_EPISODE_LEN),
"NUM_EPISODES" -> JsNumber(NUM_EPISODES),
"MIN_COVERAGE" -> JsNumber(MIN_COVERAGE),
"NUM_NEIGHBOR_STATES" ->
JsNumber(NUM_NEIGHBOR_STATES),
"REWARD_TYPE" -> JsString(REWARD_TYPE)))
run(REWARD_TYPE, QUANTIZATION_STEP, ALPHA, DISCOUNT)
}
catch {
case e: Exception => {
ret = ret.+("exception", JsString(e.toString))
}
}
Ok(ret)
}
}
Look at the preceding code carefully; it has more or less the same structure as the QLearningMain.scala file. There are only two important things here, as follows:
- Compute is done as an Action that takes the input from the UI and computes the value
- Then the result is returned as a JSON object using the JsObject() method to be shown on the UI (see the following)