6 Code
We aim to determine the optimal policy using bandit algorithms. We will focus on Nvidia and AT&T assets, seeking to maximize rewards while minimizing risk. Our simulation will span 100 days, with an initial capital of $100,000. We will compare the portfolio’s performance against a no-risk benchmark simulation.
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import lognorm
# Function to sample from a log-normal distribution for the returns
def sample_dynamic_return(mu, sigma):
"""Sample a value from a log-normal distribution with changing parameters."""
return np.random.lognormal(mean=mu, sigma=sigma)
# Cost function calculation: Expected return + Risk (variance + covariance)
def cost_function(risky_proportions, r, sigma, covariance_matrix):
"""
Calculate the cost for portfolio optimization, considering only risky assets.
Args:
risky_proportions (ndarray): Proportions of the risky assets at step i-1.
r (ndarray): Returns at step i.
covariance_matrix (ndarray): Covariance matrix of asset returns.
Returns:
float: The total cost.
"""
# No-risk asset proportion is the remainder of 1 - sum(risky_proportions)
= 1 - np.sum(risky_proportions)
no_risk_proportion
# Portfolio weights (risky assets only for the decision-making)
= np.concatenate([risky_proportions, [no_risk_proportion]])
weights
# First term: Expected return (weighted sum of returns)
= np.dot(weights, r)
expected_return
# Second term: Portfolio risk (variance + covariance) using the risky assets only
= 0.5 * np.dot(risky_proportions,sigma- np.dot(covariance_matrix, risky_proportions))
portfolio_risk
# Total cost
return expected_return + portfolio_risk
# Bandit algorithm with dynamic portfolio optimization (using cost function)
def bandit_algorithm_with_dynamic_returns(stock_params, no_risk_return, initial_money, num_steps, epsilon, covariance_matrix, verbose=True):
"""
Bandit algorithm for dynamic portfolio allocation considering a cost function.
Args:
stock_params (list of tuples): Each tuple contains (mu, sigma) for a stock's log-normal distribution.
no_risk_return (float): Fixed return of the no-risk asset.
initial_money (float): Initial amount of money to allocate.
num_steps (int): Number of steps in the simulation.
epsilon (float): Exploration probability.
covariance_matrix (ndarray): Covariance matrix for the assets.
verbose (bool): Whether to log actions and rewards at each step.
Returns:
portfolio_value (list): Portfolio value over time.
total_rewards (list): Total rewards (portfolio return) over time.
allocations (list): Allocations of money at each step.
Q (ndarray): Final estimated values for each asset.
"""
= len(stock_params) # Number of risky assets
num_risky_assets = num_risky_assets + 1 # Including the no-risk asset
num_assets = np.zeros(num_assets) # Estimated rewards for each arm
Q = np.zeros(num_assets) # Number of times each arm is selected
N = [initial_money] # Store portfolio value over time
portfolio_value = [0] # Store total rewards over time (portfolio return)
total_rewards = [] # Store allocations at each step
allocations
for step in range(num_steps):
# Epsilon-greedy action selection
if np.random.random() < epsilon:
= np.random.dirichlet(np.ones(num_assets), size=1).flatten() # Explore
proportions else:
= np.zeros(num_assets)
proportions = np.argmax(Q)
best_action = 1 # Exploit, allocate everything to the best action
proportions[best_action]
# Ensure risky proportions sum to 1
= proportions / proportions.sum()
proportions = [proportions[0],proportions[1]]
risky_proportions
# Sample new returns for each asset (risky assets and no-risk asset)
= np.zeros(num_assets)
rewards for i in range(num_risky_assets):
= stock_params[i]
mu, sigma = sample_dynamic_return(mu, sigma)-1 # Risky assets: sample dynamic returns
rewards[i] = [float(stock_params[0][1])**2,float(stock_params[1][1])**2]
s print(step, rewards, '\n')
# The no-risk asset has a fixed return
-1] = no_risk_return # No-risk asset
rewards[
# Calculate portfolio return using cost function
= np.dot(proportions, rewards)
portfolio_return # Update portfolio value
= portfolio_value[-1] * (1 + portfolio_return) # New portfolio value
new_value
portfolio_value.append(new_value)print(portfolio_value)
# Track total rewards (portfolio return at this step)
total_rewards.append(portfolio_return)
# Update the action-value estimate
for i in range(num_assets):
+= 1
N[i] += (rewards[i] - Q[i]) / N[i]
Q[i]
# Log allocations and portfolio value
allocations.append(risky_proportions)
# Print step details if verbose is enabled
if verbose:
print(f"Step {step + 1}: Allocations={risky_proportions}, Portfolio Value={new_value:.4f}, Total Reward={portfolio_return:.4f}, Q={Q}, N={N}")
print('Q=',Q[-1])
return portfolio_value, total_rewards, allocations
# Example stock parameters with dynamic mu and sigma
= [
stock_params 0.005, 0.03), # Risky asset 1 (Nvidia)
(-0.002, 0.02) # Risky asset 2 (Losing stock)
(
]
# Riskless asset return
= .0003 # 12% annual return, converted to daily return
no_risk_return
# Portfolio parameters
= 100000 # Starting amount of money
initial_money = 100 # Number of steps (time periods)
num_steps = 0.01 # Exploration rate for the bandit algorithm
epsilon = np.array([[stock_params[0][1]**2, 0],
covariance_matrix 0, stock_params[1][1]**2]]) # Covariance for risky assets
[
# Run the bandit algorithm with dynamic returns
= bandit_algorithm_with_dynamic_returns(stock_params, no_risk_return, initial_money, num_steps, epsilon, covariance_matrix)
portfolio_value, total_rewards, allocations
# Plot the results
=(12, 6))
plt.figure(figsize
# Plot total portfolio value over time
= [initial_money*(1.0003)**x for x in range(num_steps)]
No_risk_simulation 1, 2, 1)
plt.subplot(='Portfolio Value')
plt.plot(portfolio_value, label= 'No risk simulation')
plt.plot(No_risk_simulation,label 'Total Portfolio Value Over Time')
plt.title('Steps')
plt.xlabel('Portfolio Value')
plt.ylabel(
plt.legend()
# Plot the asset allocations
= np.array(allocations)
allocations 1, 2, 2)
plt.subplot(0], label='Nvidia Proportion', color='green')
plt.plot(allocations[:, 1], label='Losing Stock Proportion', color='red')
plt.plot(allocations[:, 'Risky Asset Allocations Over Time')
plt.title('Steps')
plt.xlabel('Proportion')
plt.ylabel(
plt.legend()
plt.tight_layout()
plt.show()
= np.array(allocations)
allocations range(num_steps),height=allocations[:, 0], label='Nvidia Proportion', color='green')
plt.bar(range(num_steps),height=allocations[:, 1], label='Losing Stock Proportion', color='red')
plt.bar('Risky Asset Allocations Over Time')
plt.title('Steps')
plt.xlabel('Proportion')
plt.ylabel(
plt.legend()
plt.tight_layout()
plt.show()
We can see in the graph how the porfolio value is greater than the simulation with no risk.
We can see the optimal policy is to put all your money in Nvidia asset most of the time.