Leveraging Power of Reinforcement learning in Digital Marketing

AI in Digital Marketing
Sourish Dey
#importing necessary libraries
import pandas as pd
import os as os
import numpy as np
import matplotlib.pyplot as plt
import random
import math
#Dataset reading
ucb = pd.read_csv('ucb_data.csv')
# check out first 10 data points
user = ucb.user
ucb = ucb.drop(['user'], axis = 1) #Dropping user field
N = 10000 # no of iterations
d = 6 # no of ad versions
total_reward_rand = 0 #variable to store total rewards or clicks throuout the iterations

#Loop through the iterations and random selection of ad
ad_selected = [] #List to store diff ad versions selected over iterations
for i in range(0,N): #For loop from 0 to 9999
ad = random.randrange(d)
reward = ucb.values[i,ad]
total_reward_rand = total_reward_rand + reward

print('total clicks without any strategy: {}'.format(total_reward_rand))#plot- Distribution of ad_selected
plt.title('Distribution of ads')

Let’s see whether UCB algorithm improves the click rate by exploiting the ad, having the highest probability of getting clicked. The code lines are commented and self-explanatory. However, feel free to ask.

N = 10000 # no of iterations
d = 6 # no of ad versions
ad_selected = [] #List to store diff ad versions selected over iterations
no_selection = [0] * d #List to store no of times diff ad versions selected
sum_reward = [0] * d #List to store no of clicks diff ad versions get upto round N
total_reward_ucb = 0 # To store Total No clicks upto round N
#loop through no of rounds 
for i in range(0,N):
max_upper_bnd = 0 # Variable to store value of max UCB and resets at each round
ad = 0 #temporary Variable to store ad version having highest UCB and starts with ad1
for j in range(0,d): #Here loop starts with 0 index till index 5
if (no_selection[j] > 0):
avg_reward = sum_reward[j]/no_selection[j]
delta_j = (math.sqrt(3/2) * math.log(i + 1)/no_selection[j]) #Refer Step2 for fig2
upper_bnd = avg_reward + delta_j #UCB calculation for ad version j for iteration i
upper_bnd = 1e5 #High value assigned during initial rounds
if upper_bnd > max_upper_bnd:
max_upper_bnd = upper_bnd
ad = j
ad_selected.append(ad) #List updates with ad selected at each round
no_selection[ad] = no_selection[ad] + 1
reward = ucb.values[i,ad] #Picks the user action- click/no click for selected ad at round i
sum_reward[ad] = sum_reward[ad] + reward #list updates to store rewards for diff add version
total_reward_ucb = total_reward_ucb + reward #Total rewards/clicks updates after every round
print('total clicks with UCB Algorithm: {}'.format(total_reward_ucb))#plot- Distribution of ad_selected
plt.title('Distribution of ads')
#How many times Ad 3 selected after 1000 rounds till end
no_displayed = pd.Series(ad_selected[1000:N]).value_counts()
select_rate = no_displayed/len(ad_selected[1000:N])
#Let's also look click though rate of diff ad versions
sum_reward.sort(reverse = True)
no_selection.sort(reverse = True)
click_probability = pd.Series(sum_reward)/pd.Series(no_selection)
ucb_output = pd.DataFrame({'Ad Version':no_displayed.index, 'No Displayed' : no_displayed.values, 'Display Rate' :select_rate.values, 'Click probability(CTR)':click_probability})