def configure_plotly_browser_state():
import IPython
display(IPython.core.display.HTML('''
<script src="/static/components/requirejs/require.js"></script>
<script>
requirejs.config({
paths: {
base: '/static/base',
plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
},
});
</script>
'''))
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from datetime import datetime, date, time, timedelta
from scipy import optimize,stats
from dateutil import relativedelta
from functools import reduce
import operator
plt.rcParams['figure.figsize'] = [20, 10]
init_notebook_mode(connected=True)
datetime.today().strftime("%m.%d.%Y")
from google.colab import auth
auth.authenticate_user()
print('Authenticated')
%%bigquery --project dashappsanalytics df
select * from `dashappsanalytics.app_store.SUBSCRIPTION_EVENT` ;
df.info()
def data_preparation(df):
#df[df['app_apple_id'].isin([ubeats_app_id])]
df['event_date'] = pd.to_datetime(df['event_date'])
df['original_start_date'] = pd.to_datetime(df['original_start_date'])
df['time_range'] = df['event_date'] - df['original_start_date']
return df
df = data_preparation(df)
ubeats_app_id = '1238153568'
mmm_app_id = '1355347116'
piano_app_id= '1276147378'
weekly_subscriptions_price_7_99 = 7.99
weekly_subscriptions_price_8_99 = 8.99
weekly_subscriptions_price_3_99 = 3.99
monthly_subscriptions_price = 16.99
annual_subscriptions_price = 39.99
weekly_subscription_names = [
'Weekly subscription with 3-day trial',
'Week subscription with 3 days trial',
'Weekly subscription with 3-day trial by 8.99',
'Week subscription with 7 days trial',
'Week subscription with 3 days trial by 3.99',
'Week subscription with 3 days trial by 0.99',
]
weekly_subscription_names_7_99 = [
'Weekly subscription with 3-day trial',
'Week subscription with 3 days trial',
'Week subscription with 7 days trial',
]
weekly_subscription_names_8_99 = [
'Weekly subscription with 3-day trial by 8.99',
]
weekly_subscription_names_3_99 = [
'Week subscription with 3 days trial by 3.99',
]
monthly_subscription_names = [
'Month subscription new',
'Month subscription with 7 days trial',
'Month subscription',
]
annual_subscription_names = [
'Year subscription new',
'Year subscription',
]
billing_retry_events = [
'Billing Retry from Introductory Offer',
'Billing Retry from Introductory Price',
'Billing Retry from Paid Subscription',
]
payment_events = [
# 'Billing Retry from Introductory Offer',
# 'Billing Retry from Introductory Price',
# 'Billing Retry from Paid Subscription',
# 'Cancel',
# 'Cancelled from Billing Retry',
# 'Crossgrade',
# 'Crossgrade from Billing Retry',
# 'Crossgrade from Introductory Price',
# 'Introductory Price Crossgrade from Billing Retry',
# 'Introductory Price from Introductory Price',
'Paid Subscription from Introductory Offer',
'Paid Subscription from Introductory Price',
'Reactivate',
'Reactivate with Crossgrade',
# 'Reactivate with Introductory Price',
# 'Refund',
'Renew',
'Renewal from Billing Retry',
# 'Start Introductory Offer',
# 'Start Introductory Price',
# 'Subscribe'
]
trial_started_events = [
"Start Introductory Offer",
"Start Introductory Price",
]
subscription_names = [
'Month subscription',
'Month subscription new',
'Month subscription with 7 days trial',
'Week subscription with 3 days trial',
'Week subscription with 3 days trial by 0.99',
'Week subscription with 3 days trial by 3.99',
'Week subscription with 7 days trial',
'Week with 3 days trial',
'Week with 3 days trial by 2.99',
'Weekly subscription with 3-day trial',
'Weekly subscription with 3-day trial by 8.99',
'Year subscription',
'Year subscription new',
]
ubeats = df[df['app_apple_id']== ubeats_app_id]
df.head(4).T
ubeats['event'].value_counts()
ubeats[ubeats['event'] == 'Subscribe'].head(5).T
ubeats[ubeats['time_range'] == timedelta(0)]['event'].value_counts()
# arr = []
# for original_start_date, group in ubeats[ubeats['event'] == 'Cancel'].groupby('original_start_date'):
# arr.append(ubeats[(ubeats['original_start_date'] == original_start_date) & (ubeats['event_date'] > original_start_date)]['event'].value_counts())
# reduce(lambda a, x: a.add(x, fill_value=0), arr)
Предположение: Subscribe - подписка без триала
# np.sort(ubeats[ubeats['event'] == 'Start Introductory Price']['event_date'].value_counts().keys())
# np.sort(ubeats[ubeats['event'] == 'Start Introductory Price']['original_start_date'].value_counts().keys())
# np.sort(ubeats[ubeats['event'] == 'Start Introductory Offer']['original_start_date'].value_counts().keys())
# np.sort(ubeats[ubeats['event'] == 'Start Introductory Offer']['event_date'].value_counts().keys())
2019-03-25 ивент Start Introductory Price сменился на Start Introductory Offer
def show_plot(start_date, days_count):
data = [[], [], []]
for i in range(days_count):
data[0].append(ubeats[(ubeats['event'] == 'Start Introductory Price') & (ubeats['original_start_date'] == start_date + timedelta(i))]['quantity'].sum())
data[1].append(ubeats[(ubeats['event'] == 'Start Introductory Offer') & (ubeats['original_start_date'] == start_date + timedelta(i))]['quantity'].sum())
data[2].append(start_date + timedelta(i))
data = [
go.Scatter(x = data[2], y = data[0], name='Price'),
go.Scatter(x = data[2], y = data[1], name='Offer'),
]
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_plot(datetime(2019, 3, 1), 70)
Start Introductory Price = Start Introductory Offer
ubeats['subscription_name'].value_counts()
# points = [[days],[values]]
def calc_points(start_date, end_date, df, events):
counter = 0
points = [[],[]]
cohort = df[(df['original_start_date'] >= start_date) & (df['original_start_date'] < end_date)]
cohort_size = cohort[cohort['event'].isin(trial_started_events)]['quantity'].sum()
for time_range, group in cohort[cohort['event'].isin(events)].groupby('time_range'):
counter += group['quantity'].sum() / cohort_size
points[0].append(time_range.days)
points[1].append(counter)
return points
def getPointByDay(points, day):
for i in range(len(points[0])):
if points[0][i] >= day:
return [points[0][i], points[1][i]]
return [[day], [0]]
def getPointByValue(points, value):
for i in range(len(points[0])):
if points[1][i] >= value:
return [points[0][i], points[1][i]]
return [['not exist'], ['not exist']]
Данные с 01-03-2018 по 01-05-2019
points = calc_points(datetime(2018, 3, 1), datetime(2019, 5, 1), ubeats, payment_events)
data = [go.Scatter(x = points[0], y = points[1])]
layout=go.Layout(xaxis={'title':'Dx'}, yaxis={'title':'Purchases / Start Introductory Price'})
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
Данные с 01-06-2018 по 01-05-2019
june_points = calc_points(datetime(2018, 6, 1), datetime(2018, 7, 1), ubeats, payment_events)
july_points = calc_points(datetime(2018, 7, 1), datetime(2018, 8, 1), ubeats, payment_events)
aug_points = calc_points(datetime(2018, 8, 1), datetime(2018, 9, 1), ubeats, payment_events)
sep_points = calc_points(datetime(2018, 9, 1), datetime(2018, 10, 1), ubeats, payment_events)
oct_points = calc_points(datetime(2018, 10, 1), datetime(2018, 11, 1), ubeats, payment_events)
nov_points = calc_points(datetime(2018, 11, 1), datetime(2018, 12, 1), ubeats, payment_events)
dec_points = calc_points(datetime(2018, 12, 1), datetime(2019, 1, 1), ubeats, payment_events)
jan_points = calc_points(datetime(2019, 1, 1), datetime(2019, 2, 1), ubeats, payment_events)
feb_points = calc_points(datetime(2019, 2, 1), datetime(2019, 3, 1), ubeats, payment_events)
mar_points = calc_points(datetime(2019, 3, 1), datetime(2019, 4, 1), ubeats, payment_events)
apr_points = calc_points(datetime(2019, 4, 1), datetime(2019, 5, 1), ubeats, payment_events)
data = [
go.Scatter(x = june_points[0], y = june_points[1], name='June'),
go.Scatter(x = july_points[0], y = july_points[1], name='July'),
go.Scatter(x = aug_points[0], y = aug_points[1], name='Aug'),
go.Scatter(x = sep_points[0], y = sep_points[1], name='Sept'),
go.Scatter(x = oct_points[0], y = oct_points[1], name='Oct'),
go.Scatter(x = nov_points[0], y = nov_points[1], name='Nov'),
go.Scatter(x = dec_points[0], y = dec_points[1], name='Dec'),
go.Scatter(x = jan_points[0], y = jan_points[1], name='Jan'),
go.Scatter(x = feb_points[0], y = feb_points[1], name='Feb'),
go.Scatter(x = mar_points[0], y = mar_points[1], name='Mar'),
go.Scatter(x = apr_points[0], y = apr_points[1], name='Apr'),
]
layout=go.Layout(xaxis={'title':'Dx'}, yaxis={'title':'Purchases / Start Introductory Price'})
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
График описывает скорость окупаемости триалов.
В апреле упала конверсия триалов.
на графике представлены 1ая и 2ая неделя марта и апреля
mar_points_week_1 = calc_points(datetime(2019, 3, 1), datetime(2019, 3, 8), ubeats, payment_events)
mar_points_week_2 = calc_points(datetime(2019, 3, 8), datetime(2019, 3, 15), ubeats, payment_events)
mar_points_week_3 = calc_points(datetime(2019, 3, 15), datetime(2019, 3, 22), ubeats, payment_events)
mar_points_week_4 = calc_points(datetime(2019, 3, 22), datetime(2019, 3, 29), ubeats, payment_events)
apr_points_week_1 = calc_points(datetime(2019, 4, 1), datetime(2019, 4, 8), ubeats, payment_events)
apr_points_week_2 = calc_points(datetime(2019, 4, 8), datetime(2019, 4, 15), ubeats, payment_events)
apr_points_week_3 = calc_points(datetime(2019, 4, 15), datetime(2019, 4, 22), ubeats, payment_events)
data = [
go.Scatter(x = mar_points_week_1[0], y = mar_points_week_1[1], name='Mar_1'),
go.Scatter(x = mar_points_week_2[0], y = mar_points_week_2[1], name='Mar_2'),
go.Scatter(x = mar_points_week_3[0], y = mar_points_week_3[1], name='Mar_3'),
go.Scatter(x = mar_points_week_4[0], y = mar_points_week_4[1], name='Mar_4'),
go.Scatter(x = apr_points_week_1[0], y = apr_points_week_1[1], name='Apr_1'),
go.Scatter(x = apr_points_week_2[0], y = apr_points_week_2[1], name='Apr_2'),
go.Scatter(x = apr_points_week_3[0], y = apr_points_week_3[1], name='Apr_3'),
]
layout=go.Layout(xaxis={'title':'Dx'}, yaxis={'title':'Purchases / Start Introductory Price'})
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
Апрель ~ Март
Данные с 01-03-2018 по 01-05-2019
def calc_points_paid_only(start_date, end_date, df, events):
counter = 0
points = [[],[]]
cohort = df[(df['original_start_date'] >= start_date) & (df['original_start_date'] < end_date)]
cohort_size = cohort[(cohort['consecutive_paid_periods'] == 1) & (cohort['event'].isin(events))]['quantity'].sum()
for time_range, group in cohort[cohort['event'].isin(events)].groupby('time_range'):
counter += group['quantity'].sum() / cohort_size
points[0].append(time_range.days)
points[1].append(counter)
return points
points_paid_only = calc_points_paid_only(datetime(2018, 3, 1), datetime(2019, 5, 1), ubeats, payment_events)
data = [go.Scatter(x = points_paid_only[0], y = points_paid_only[1])]
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
Данные с 01-06-2018 по 01-05-2019
june_points = calc_points_paid_only(datetime(2018, 6, 1), datetime(2018, 7, 1), ubeats, payment_events)
july_points = calc_points_paid_only(datetime(2018, 7, 1), datetime(2018, 8, 1), ubeats, payment_events)
aug_points = calc_points_paid_only(datetime(2018, 8, 1), datetime(2018, 9, 1), ubeats, payment_events)
sep_points = calc_points_paid_only(datetime(2018, 9, 1), datetime(2018, 10, 1), ubeats, payment_events)
oct_points = calc_points_paid_only(datetime(2018, 10, 1), datetime(2018, 11, 1), ubeats, payment_events)
nov_points = calc_points_paid_only(datetime(2018, 11, 1), datetime(2018, 12, 1), ubeats, payment_events)
dec_points = calc_points_paid_only(datetime(2018, 12, 1), datetime(2019, 1, 1), ubeats, payment_events)
jan_points = calc_points_paid_only(datetime(2019, 1, 1), datetime(2019, 2, 1), ubeats, payment_events)
feb_points = calc_points_paid_only(datetime(2019, 2, 1), datetime(2019, 3, 1), ubeats, payment_events)
mar_points = calc_points_paid_only(datetime(2019, 3, 1), datetime(2019, 4, 1), ubeats, payment_events)
apr_points = calc_points_paid_only(datetime(2019, 4, 1), datetime(2019, 5, 1), ubeats, payment_events)
data = [
go.Scatter(x = june_points[0], y = june_points[1], name='June'),
go.Scatter(x = july_points[0], y = july_points[1], name='July'),
go.Scatter(x = aug_points[0], y = aug_points[1], name='Aug'),
go.Scatter(x = sep_points[0], y = sep_points[1], name='Sept'),
go.Scatter(x = oct_points[0], y = oct_points[1], name='Oct'),
go.Scatter(x = nov_points[0], y = nov_points[1], name='Nov'),
go.Scatter(x = dec_points[0], y = dec_points[1], name='Dec'),
go.Scatter(x = jan_points[0], y = jan_points[1], name='Jan'),
go.Scatter(x = feb_points[0], y = feb_points[1], name='Feb'),
go.Scatter(x = mar_points[0], y = mar_points[1], name='Mar'),
go.Scatter(x = apr_points[0], y = apr_points[1], name='Apr'),
]
layout = go.Layout()
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig, filename='evts-by-month-paid')
График описывает скорость окупаемости платящий пользователей.
Качество платящих пользователей в марте - апреле возросло.
print('Количество пользователей совершивших покупки в апреле (первая неделя):', ubeats[(ubeats['original_start_date'] >= datetime(2019, 4, 1)) & (ubeats['original_start_date'] < datetime(2019, 4, 7)) & (ubeats['event_date'] < datetime(2019, 4, 15)) & (ubeats['event'].isin(payment_events)) & ubeats['consecutive_paid_periods'] == 1]['quantity'].sum())
print('Количество пользователей совершивших покупки в марте (первая неделя):', ubeats[(ubeats['original_start_date'] >= datetime(2019, 3, 1)) & (ubeats['original_start_date'] < datetime(2019, 3, 7)) & (ubeats['event_date'] < datetime(2019, 3, 15)) & (ubeats['event'].isin(payment_events)) & ubeats['consecutive_paid_periods'] == 1]['quantity'].sum())
print('Количество триалов в апреле (первая неделя):', ubeats[(ubeats['original_start_date'] >= datetime(2019, 4, 1)) & (ubeats['original_start_date'] < datetime(2019, 4, 7)) & (ubeats['event_date'] < datetime(2019, 4, 15)) & (ubeats['event'].isin(trial_started_events))]['quantity'].sum())
print('Количество триалов в марте (первая неделя):', ubeats[(ubeats['original_start_date'] >= datetime(2019, 3, 1)) & (ubeats['original_start_date'] < datetime(2019, 3, 7)) & (ubeats['event_date'] < datetime(2019, 3, 15)) & (ubeats['event'].isin(trial_started_events))]['quantity'].sum())
y = list(map(lambda x: x.days, ubeats[(ubeats['consecutive_paid_periods'] == 1) & (ubeats['event'].isin(payment_events))]['time_range']))
data = [go.Box(y=y)]
layout = go.Layout(title='')
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig, filename='1')
print('8: ', len(list(filter(lambda x: x <= 8, y)))/ len(y))
print('5: ', len(list(filter(lambda x: x <= 5, y)))/ len(y))
print('3: ', len(list(filter(lambda x: x <= 3, y)))/ len(y))
Первая покупка совершается на 0-ой день.
На первые 3 дня приходится 69% первых покупок.
На первые 5 дней приходится 76% первых покупок.
На первые 8 дней приходится 81% всех первых покупок, остальные совершаются в течении 67 дней.
def get_points(df, start_date, end_date):
group = df[(df['original_start_date'] >= start_date) & (df['original_start_date'] < end_date)]
points = [[], []]
for pay_period, data in group[(group['event'].isin(events))].groupby('consecutive_paid_periods'):
points[0].append(pay_period)
points[1].append(data['quantity'].sum())
return points
def probability(points):
result = [[], [], []]
for i in range(1, len(points[0])):
result[0].append(points[0][i])
result[2].append(points[1][i])
result[1].append(points[1][i] / points[1][i-1])
return result
def hist(title, _x, _y):
print(title)
data = [go.Bar(x=_x, y=_y)]
layout = go.Layout(title=title)
fig = go.Figure(data=data, layout=layout)
return iplot(fig, filename='1')
x = {}
y = {}
for name, group in df.groupby('subscription_name'):
x[name] = []
y[name] = []
for pay_period, data in group[(group['event'].isin(payment_events))].groupby('consecutive_paid_periods'):
x[name].append(pay_period)
y[name].append(data['quantity'].sum())
за все время
configure_plotly_browser_state()
hist(subscription_names[0], x[subscription_names[0]], y[subscription_names[0]])
Аномалии в наблюдениях, покупок второго периода больше чем первого, горбы в наблюдениях. TODO: Найти причину
за все время
configure_plotly_browser_state()
hist(subscription_names[1], x[subscription_names[1]], y[subscription_names[1]])
Аномалии в наблюдениях, покупок второго периода больше чем первого. TODO: Найти причину
за все время
configure_plotly_browser_state()
hist(subscription_names[2], x[subscription_names[2]], y[subscription_names[2]])
Аномалии в наблюдениях, встречаются горбы. TODO: Найти причину
за все время
configure_plotly_browser_state()
hist(subscription_names[3], x[subscription_names[3]], y[subscription_names[3]])
Распределение количества покупок по периодам оплаты имеет вид $P(\lambda)$. TODO: определить $\lambda$
за все время
configure_plotly_browser_state()
hist(subscription_names[4], x[subscription_names[4]], y[subscription_names[4]])
Аномалии в наблюдениях, покупок второго периода больше чем первого, горбы в наблюдениях. TODO: Найти причину
за все время
configure_plotly_browser_state()
hist(subscription_names[5], x[subscription_names[5]], y[subscription_names[5]])
Аномалии в наблюдениях (горбы). TODO: Найти причину
за все время
configure_plotly_browser_state()
hist(subscription_names[6], x[subscription_names[6]], y[subscription_names[6]])
Распределение количества покупок по периодам оплаты имеет вид $P(\lambda)$. TODO: определить $\lambda$
за все время
configure_plotly_browser_state()
hist(subscription_names[8], x[subscription_names[8]], y[subscription_names[8]])
Распределение количества покупок по периодам оплаты имеет вид $P(\lambda)$. TODO: определить $\lambda$
за все время
configure_plotly_browser_state()
hist(subscription_names[9], x[subscription_names[9]], y[subscription_names[9]])
Распределение количества покупок по периодам оплаты имеет вид $P(\lambda)$. TODO: определить $\lambda$
за все время
configure_plotly_browser_state()
hist(subscription_names[10], x[subscription_names[10]], y[subscription_names[10]])
Распределение количества покупок по периодам оплаты имеет вид $P(\lambda)$. TODO: определить $\lambda$
за все время
configure_plotly_browser_state()
hist(subscription_names[11], x[subscription_names[11]], y[subscription_names[11]])
Возможно, есть часть годовых подписок, которых нет в данных (первые покупки).
Возможно, пользователь сменил тип потписки.
TODO: Проверить original_start_date на 2ой оплате.
за все время
configure_plotly_browser_state()
hist(subscription_names[12], x[subscription_names[12]], y[subscription_names[12]])
нельзя сделать какие-либо выводы
на 24.04.19 подписок = 29
за все время
def points_comparator(x):
return -x[1]
group = ubeats[ubeats['event'].isin(trial_started_events)]
start_introductory_price_points = list(map(lambda x: [x[0], x[1]['quantity'].sum()], group.groupby('country')))
start_introductory_price_points = sorted(start_introductory_price_points, key=points_comparator)
trace = go.Pie(labels=list(map(lambda x: x[0], start_introductory_price_points)), values=list(map(lambda x: x[1], start_introductory_price_points)))
configure_plotly_browser_state()
iplot([trace])
На диаграмме представлено соотношение стран по количеству триалов.
Из диаграммы можем сделать вывод откуда приходит большая часть трафика.
за все время
group = ubeats[(ubeats['consecutive_paid_periods'] == 1) & (ubeats['event'].isin(payment_events))]
points = list(map(lambda x: [x[0], x[1]['quantity'].sum()], group.groupby('country')))
points = sorted(points, key=points_comparator)
trace = go.Pie(labels=list(map(lambda x: x[0], points)), values=list(map(lambda x: x[1], points)))
configure_plotly_browser_state()
iplot([trace])
На диаграмме представлено соотношение стран по количеству платящих пользователей.
По диаграмме можем сказать в каких странах находится большая часть платящих пользователей.
график конверсии по неделям (для стран).
график построен для стран, в которых не менее 100 триалов за рассматриваемое время.
с 01.01.19 по 01.05.19
def get_convert_by_week(df, start_date, week_count):
data = []
for i in range(week_count):
trials = df[(df['original_start_date'] >= start_date + timedelta(7*i)) & (df['original_start_date'] < start_date + timedelta( 7*(i+1) )) & (df['event'].isin(trial_started_events))]['quantity'].sum()
payments = df[(df['original_start_date'] >= start_date + timedelta(7*i)) & (df['original_start_date'] < start_date + timedelta( 7*(i+1) )) & (df['consecutive_paid_periods'] == 1) ]['quantity'].sum()
if trials == 0:
data.append([0, start_date + timedelta(7*i)])
if trials > 0:
data.append([payments / trials, start_date + timedelta(7*i)])
return data
def show_bar(df, start_date, week_count):
data = []
for name, group in df.groupby('country'):
trials = group[(ubeats['event'].isin(trial_started_events))]['quantity'].sum()
if trials > 100:
points = get_convert_by_week(group, start_date, week_count)
data.append(go.Scatter(x = list(map(lambda x: x[1], points)) , y = list(map(lambda x: x[0], points)), name=name, visible='legendonly'))
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_bar(ubeats[(ubeats['original_start_date'] >= datetime(2019, 1, 1)) & (ubeats['original_start_date'] < datetime(2019, 5, 1))], datetime(2019, 2, 10), 12)
График построен по данным с 01.01.19 по 01.05.19
Из графика видно, как менялась конверсия по странам в течении времени.
По графику можем сравнить конверсию между странами.
По умолчанию графики спрятаны. Нажмите на страну, чтобы открыть/скрыть график.
для стран, в которых > 100 триалов
def get_data(df):
d = {}
for name, group in df.groupby('country'):
billing_retry = group[group['event'].isin(billing_retry_events)]['quantity'].sum()
trials = group[group['event'].isin(trial_started_events)]['quantity'].sum()
if trials > 100:
d[name] = billing_retry / trials
return d
d = get_data(ubeats)
d = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
trace1 = go.Bar(
x=list(map(lambda x: x[0], d)),
y=list(map(lambda x: x[1], d)),
name='with refund'
)
data = [trace1]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
Billing Retry from Introductory Offer & Billing Retry from Introductory Price к количеству триалов
Для стран > 100 триалов
def get_data(df):
events = [
'Billing Retry from Introductory Offer',
'Billing Retry from Introductory Price',
]
d = {}
for name, group in df.groupby('country'):
billing_retry = group[group['event'].isin(events)]['quantity'].sum()
trials = group[group['event'].isin(trial_started_events)]['quantity'].sum()
if trials > 100:
d[name] = billing_retry / trials
return d
d = get_data(ubeats)
d = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
trace1 = go.Bar(
x=list(map(lambda x: x[0], d)),
y=list(map(lambda x: x[1], d)),
name='with refund'
)
data = [trace1]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
Отношение Billing Retry from Paid Subscription к количеству платящих пользователей.
Для стран, в которых > 30 платящих пользователей.
def get_billing_retry_from_paid_subscription_data(df):
events = [
'Billing Retry from Paid Subscription',
]
d = {}
for name, group in df.groupby('country'):
billing_retry = group[group['event'].isin(events)]['quantity'].sum()
payments = group[(group['event'].isin(payment_events)) & (group['consecutive_paid_periods'] == 1)]['quantity'].sum()
if payments > 30:
d[name] = billing_retry / payments
return d
d = get_billing_retry_from_paid_subscription_data(ubeats)
d = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
trace1 = go.Bar(
x=list(map(lambda x: x[0], d)),
y=list(map(lambda x: x[1], d)),
name='with refund'
)
data = [trace1]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
Oy - количество ивентов.
Ox - дата, в которою произошел ивент.
def getEventsByDay(df, day, events):
return df[(df['event'].isin(events)) & (df['event_date'] == day)]['quantity'].sum()
def show_plot(df, start_date, end_date, events):
data = [[],[]]
current_date = start_date
while current_date < end_date:
data[0].append(current_date)
data[1].append(getEventsByDay(df, current_date, events))
current_date += timedelta(1)
print(data)
fig = go.Figure(data=[go.Scatter(x = data[0], y = data[1])], layout=layout)
configure_plotly_browser_state()
iplot(fig)
show_plot(ubeats, datetime(2018, 3, 1), datetime.today(), billing_retry_events)
Чаще всего в billing retry уходят по четвергам.
Не понятна причина, почему именно четверги.
Oy - количество ивентов.
Ox - дата, в которою произошел ивент.
show_plot(ubeats, datetime(2018, 3, 1), datetime.today(), ['Billing Retry from Paid Subscription'])
Чаще всего в billing retry уходят по четвергам.
Не понятна причина, почему именно четверги.
Oy - количество ивентов.
Ox - дата, в которою произошел ивент.
show_plot(ubeats, datetime(2018, 3, 1), datetime.today(), ['Renewal from Billing Retry'])
Чаще всего в billing retry уходят по четвергам.
Вероятно, выходят из billing retry в тот же день (проверить)
Oy - количество ивентов.
Ox - дата, в которою произошел ивент.
show_plot(ubeats, datetime(2018, 3, 1), datetime.today(), ['Cancelled from Billing Retry'])
Наблюдается аномалия 26-04-2018.
На первый взгляд, график не имеет периодической структуры.
def show_bar(df, start_date, events):
c_d = start_date
data = [[],[]]
while c_d < datetime.today():
e_d = c_d + relativedelta.relativedelta(months=1)
billing_retry = df[(df['original_start_date'] > c_d) & (df['original_start_date'] < e_d) & (df['event'].isin(billing_retry_events))]['quantity'].sum()
cancelled_from_billing_retry = df[(df['original_start_date'] > c_d) & (df['original_start_date'] < e_d) & (df['event'].isin(events))]['quantity'].sum()
data[0].append(c_d.strftime("%m.%d.%Y"))
data[1].append(cancelled_from_billing_retry/billing_retry)
c_d = e_d
# data.append()
fig = go.Figure(data=[go.Bar(x=data[0], y=data[1])])
configure_plotly_browser_state()
iplot(fig)
show_bar(ubeats, datetime(2018, 5, 1), ['Cancelled from Billing Retry'])
На диаграмме представлено какая часть пользователей, пришедших за определенный период (месяц), отменит подписку, в случае ухода в billing retry.
show_bar(ubeats, datetime(2018, 5, 1), ['Renewal from Billing Retry'])
не информативно
def getAliveCount(df):
alive = (
df[(df['event'] == "Start Introductory Price")]["quantity"].sum() +
df[(df['event'] == "Start Introductory Offer")]["quantity"].sum() +
df[(df['event'] == "Reactivate")]["quantity"].sum() +
df[(df['event'] == "Reactivate with Crossgrade")]["quantity"].sum() +
df[(df['event'] == "Reactivate with Introductory Price")]["quantity"].sum() +
df[(df['event'] == "Subscribe")]["quantity"].sum()
) - (
df[(df['event'] == "Cancel")]["quantity"].sum() +
df[(df['event'] == "Cancelled from Billing Retry")]["quantity"].sum()
)
return alive
def getBillingRetryCount(df):
count = (
df[(df['event'].isin(billing_retry_events))]["quantity"].sum()
) - (
df[(df['event'] == "Cancelled from Billing Retry")]["quantity"].sum() +
df[(df['event'] == "Renewal from Billing Retry")]["quantity"].sum()
)
return count
def show_bar(df, start_date, end_date):
info = [{'alive': [], 'billing_retry': []}, []]
data = []
current_date = start_date
while current_date < end_date:
s_d = current_date
e_d = current_date + relativedelta.relativedelta(months=1)
info[1].append(s_d)
info[0]['alive'].append(getAliveCount(df[(df['original_start_date'] >= s_d) & (df['original_start_date'] < e_d)]))
info[0]['billing_retry'].append(getBillingRetryCount(df[(df['original_start_date'] >= s_d) & (df['original_start_date'] < e_d)]))
current_date = e_d
for name in info[0].keys():
data.append(go.Bar(x=info[1], y=info[0][name], name=name))
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_bar(ubeats, datetime(2018, 4, 1), datetime.today())
Диаграмма показывает сколько активных пользователей осталось и сколько из них в billing retry. (по месяцам)
по данным за все время
# without Refunds
data = []
group = ubeats[(ubeats['event'].isin(payment_events)) ]
group_refund = ubeats[(ubeats['event'] == 'Refund')]
trials = df[df['event'] == 'Start Introductory Price']['quantity'].sum()
values = [1, ]
for j in range(1, 30):
count_2 = group[(group['consecutive_paid_periods'] == j)]['quantity'].sum()
refund = group_refund[(group_refund['consecutive_paid_periods'] == j)]['quantity'].sum()
values.append((count_2 - refund) / trials)
data.append(go.Scatter(x = list(range(len(values))), y = values, name='trials'))
for i in range(1, 15):
count = group[(group['consecutive_paid_periods'] == i)]['quantity'].sum()
count_refund = group_refund[(group_refund['consecutive_paid_periods'] == i)]['quantity'].sum()
values = []
for j in range(i, 30):
count_2 = group[(group['consecutive_paid_periods'] == j)]['quantity'].sum()
refund = group_refund[(group_refund['consecutive_paid_periods'] == j)]['quantity'].sum()
values.append((count_2 - refund) / (count - count_refund))
data.append(go.Scatter(x = list(range(len(values))), y = values, name='{0}'.format(i)))
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
Номер графика -- сколько подписок было куплено ранее.
Будем считать, что постоянный пользователь -- пользователь, который оплатил 4-е периода подписки (После 4-ого перидоа подписки, вероятность покупки следующей растет не больше чем на 1.5% )
По графику можно предположить, какая чать пользователей проживет еще N периодов.
качеством страны будем считать:
Будем рассматривать страны, в которых не менее 500 триалов.
groups = ubeats[(ubeats['event'].isin(trial_started_events))]
d = {}
for c, group in groups.groupby('country'):
d[c] = group['quantity'].sum()
trials = pd.Series(d).sort_values(ascending=False)
countries = trials[trials > 500].keys()
trials = trials[countries]
# ubeats = df[(df['app_apple_id']== ubeats_app_id) & (df['event'].isin(events)) ]
# ubeats_refund = df[(df['app_apple_id']== ubeats_app_id) & (df['event'] == 'Refund')]
groups = ubeats[(ubeats['event'].isin(payment_events)) & (ubeats['consecutive_paid_periods'] == 1)]
groups_refund = ubeats[(ubeats['event'] == 'Refund') & (ubeats['consecutive_paid_periods'] == 1)]
d = {}
d_refund = {}
for c, group in groups.groupby('country'):
d[c] = group['quantity'].sum()
for c, group in groups_refund.groupby('country'):
d_refund[c] = group['quantity'].sum()
first_payments = pd.Series(d)[countries].sort_values(ascending=False).fillna(0)
first_payments_refund = pd.Series(d_refund)[countries].sort_values(ascending=False).fillna(0)
groups = ubeats[(ubeats['consecutive_paid_periods'] == 7) & (ubeats['event'].isin(payment_events))]
groups_refund = ubeats[(ubeats['event'] == 'Refund') & (ubeats['consecutive_paid_periods'] == 7)]
d = {}
d_refund = {}
for c, group in groups.groupby('country'):
d[c] = group['quantity'].sum()
for c, group in groups_refund.groupby('country'):
d_refund[c] = group['quantity'].sum()
regular_customers = pd.Series(d)[countries].sort_values(ascending=False).fillna(0)
regular_customers_refund = pd.Series(d_refund)[countries].fillna(0)
за все время.
data = (regular_customers / trials).fillna(0).sort_values(ascending=False)
data_2 = ((regular_customers - regular_customers_refund) / trials).fillna(0).sort_values(ascending=False)
trace1 = go.Bar(
x=data.keys(),
y=data.values,
name='with refund'
)
trace2 = go.Bar(
x=data_2.keys(),
y=data_2.values,
name='without refund'
)
data = [trace1, trace2]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
На диаграмме представлены вероятности перехода триала в постоянного пользователя (по странам)
за все время.
data = (first_payments / trials).fillna(0).sort_values(ascending=False)
data_2 = ((first_payments - first_payments_refund) / trials).fillna(0).sort_values(ascending=False)
trace1 = go.Bar(
x=data.keys(),
y=data.values,
name='with refund'
)
trace2 = go.Bar(
x=data_2.keys(),
y=data_2.values,
name='without refund'
)
data = [trace1, trace2]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
На диаграмме представлены вероятности перехода триала в покупку (по странам)
за все время
data = (regular_customers / first_payments).fillna(0).sort_values(ascending=False)
# data = (regular_customers / trials).fillna(0).sort_values(ascending=False)
data_2 = ((regular_customers - regular_customers_refund) / first_payments).fillna(0).sort_values(ascending=False)
trace1 = go.Bar(
x=data.keys(),
y=data.values,
name='with refund'
)
trace2 = go.Bar(
x=data_2.keys(),
y=data_2.values,
name='without refund'
)
data = [trace1, trace2]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
configure_plotly_browser_state()
iplot(fig)
trials_mar = ubeats[(ubeats['original_start_date'] >= datetime(2019, 3, 1)) & (ubeats['original_start_date'] < datetime(2019, 4, 1)) & (ubeats['event'].isin(trial_started_events))]['quantity'].sum()
trials_apr = ubeats[(ubeats['original_start_date'] >= datetime(2019, 4, 1)) & (ubeats['original_start_date'] < datetime(2019, 4, 14)) & (ubeats['event'].isin(trial_started_events))]['quantity'].sum()
paying_users_mar = ubeats[(ubeats['original_start_date'] >= datetime(2019, 3, 1)) & (ubeats['original_start_date'] < datetime(2019, 4, 1)) & (ubeats['event'].isin(payment_events)) & (ubeats['consecutive_paid_periods'] == 1)]['quantity'].sum()
paying_users_apr = ubeats[(ubeats['original_start_date'] >= datetime(2019, 4, 1)) & (ubeats['original_start_date'] < datetime(2019, 4, 14)) & (ubeats['event'].isin(payment_events)) & (ubeats['consecutive_paid_periods'] == 1)]['quantity'].sum()
print('Конверсия в марте: ', paying_users_mar / trials_mar)
print('Конверсия в апреле: ', paying_users_apr / trials_apr)
В рассмотрение взяты данные за март (01.03.19 - 01.04.19) и за апрель (01.04.19-14.01.19)
def calcLTV(df, start_date, end_date):
share = 0.7
payments = (
float(df[(df['subscription_name'].isin(weekly_subscription_names_7_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_7_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_8_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_8_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_3_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_3_99 +
float(df[(df['subscription_name'].isin(monthly_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * monthly_subscriptions_price +
float(df[(df['subscription_name'].isin(annual_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * annual_subscriptions_price
)
payments_refund = (
float(df[(df['subscription_name'].isin(weekly_subscription_names_7_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_7_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_8_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_8_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_3_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_3_99 +
float(df[(df['subscription_name'].isin(monthly_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * monthly_subscriptions_price +
float(df[(df['subscription_name'].isin(annual_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * annual_subscriptions_price
)
paying_users_count = float(
df[(df['original_start_date'] >= start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events)) & (df['consecutive_paid_periods'] == 1)]['quantity'].sum() -
df[(df['original_start_date'] >= start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund') & (df['consecutive_paid_periods'] == 1)]['quantity'].sum()
)
if paying_users_count == 0:
return 0
return share * float(( payments - payments_refund ) / paying_users_count )
def show_bar(df, start_date, month_count):
d = {}
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d[s_d.strftime("%m/%d/%Y")] = calcLTV(df, s_d, e_d)
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
fig = go.Figure(data=[trace])
configure_plotly_browser_state()
iplot(fig)
show_bar(ubeats, datetime(2018, 3, 1), 14)
print('LTV 03.18: ', calcLTV(ubeats, datetime(2018, 3, 1), datetime(2018, 4, 1)))
print('LTV 04.18: ', calcLTV(ubeats, datetime(2018, 4, 1), datetime(2018, 5, 1)))
print('LTV 05.18: ', calcLTV(ubeats, datetime(2018, 5, 1), datetime(2018, 6, 1)))
print('LTV 06.18: ', calcLTV(ubeats, datetime(2018, 6, 1), datetime(2018, 7, 1)))
print('LTV 07.18: ', calcLTV(ubeats, datetime(2018, 7, 1), datetime(2018, 8, 1)))
print('LTV 08.18: ', calcLTV(ubeats, datetime(2018, 8, 1), datetime(2018, 9, 1)))
print('LTV 09.18: ', calcLTV(ubeats, datetime(2018, 9, 1), datetime(2018, 10, 1)))
print('LTV 10.18: ', calcLTV(ubeats, datetime(2018, 10, 1), datetime(2018, 11, 1)))
print('LTV 11.18: ', calcLTV(ubeats, datetime(2018, 11, 1), datetime(2018, 12, 1)))
print('LTV 12.18: ', calcLTV(ubeats, datetime(2018, 12, 1), datetime(2019, 1, 1)))
print('LTV 01.19: ', calcLTV(ubeats, datetime(2019, 1, 1), datetime(2019, 2, 1)))
print('LTV 02.19: ', calcLTV(ubeats, datetime(2019, 2, 1), datetime(2019, 3, 1)))
print('LTV 03.19: ', calcLTV(ubeats, datetime(2019, 3, 1), datetime(2019, 4, 1)))
print('LTV 04.19: ', calcLTV(ubeats, datetime(2019, 4, 1), datetime(2019, 5, 1)))
24.04.2019 (без учета разных видов подписок)
LTV 03.18: 41.47262264150944
LTV 04.18: 44.53098051372896
LTV 05.18: 43.149011306532664
LTV 06.18: 38.62239329268292
LTV 07.18: 38.54275531011045
LTV 08.18: 36.302018766756035
LTV 09.18: 31.96379572446556
LTV 10.18: 30.267810810810808
LTV 11.18: 27.64907740585774
LTV 12.18: 24.458327878103834
LTV 01.19: 22.658964788732394
LTV 02.19: 19.131297106109326
LTV 03.19: 14.012302841918293
LTV 04.19: 8.036897826086957
24.04.2019 (с учетом разных видов подписок)
LTV 03.18: 42.61306595744681
LTV 04.18: 46.41545429864254
LTV 05.18: 44.5779220945083
LTV 06.18: 40.085287656903766
LTV 07.18: 39.61326666666667
LTV 08.18: 37.27651092896175
LTV 09.18: 33.00666992068334
LTV 10.18: 31.468456483126108
LTV 11.18: 28.62864727011494
LTV 12.18: 25.435714202561112
LTV 01.19: 23.554221413721418
LTV 02.19: 19.865958139534882
LTV 03.19: 14.4964738771769
LTV 04.19: 8.246946784922393
ubeats['subscription_name'].value_counts()
Рассматриваются только месяца, в которых было больше 5 подписок.
def show_bar_by_countries(df, start_date, week_count):
data = []
for i in range(week_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d = {}
for name, group in df.groupby('country'):
if group[(group['original_start_date'] >= s_d) & (group['original_start_date'] < e_d) & (group['event'].isin(payment_events)) & (group['consecutive_paid_periods'] == 1)]['quantity'].sum() > 5:
d[name] = calcLTV(group, s_d, e_d)
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
data.append(trace)
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_bar_by_countries(ubeats, datetime(2018, 3, 1), 14)
диаграмма показывает LTV по месяцам (для стран)
def show_bar_by_subscriptions(df, start_date, week_count):
data = []
for i in range(week_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d = {}
for name, group in df.groupby('subscription_name'):
if group[(group['original_start_date'] >= s_d) & (group['original_start_date'] < e_d) & (group['event'].isin(payment_events)) & (group['consecutive_paid_periods'] == 1)]['quantity'].sum() > 20:
d[name] = calcLTV(group, s_d, e_d)
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
data.append(trace)
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
def show_bar_by_subscription_payments(df, start_date, month_count):
data = []
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d = {}
for name, group in df.groupby('subscription_name'):
payments = group[(group['original_start_date'] >= s_d) & (group['original_start_date'] < e_d) & (group['event'].isin(payment_events)) & (group['consecutive_paid_periods'] == 1)]['quantity'].sum()
if payments > 20:
d[name] = payments
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
data.append(trace)
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
диаграмма по периодам, в которых больше 20 платящих пользователей.
show_bar_by_subscriptions(ubeats, datetime(2018, 3, 1), 14)
Количество покупающих пользователей по месяцам (по ивентам)
show_bar_by_subscription_payments(ubeats, datetime(2018, 3, 1), 14)
Диаграмма показывает, какие подписки можно сравнивать между собой.
def calc_points(df_cohort, start_date, end_date):
data = []
ed = start_date
while ed < end_date:
data.append(calcLTV(df_cohort[df_cohort['event_date'] <= ed], start_date, ed))
ed += timedelta(7)
return data
def show_plot(df, start_date, month_count):
data = []
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
points = calc_points(df[(df['original_start_date'] >= s_d) & (df['original_start_date'] < e_d)], s_d, s_d + relativedelta.relativedelta(months=(month_count - i)))
data.append(go.Scatter(x = list(range(len(points))), y = points, name=s_d.strftime("%m/%d/%Y")))
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_plot(ubeats, datetime(2018, 4, 1), 13)
На графике представлено изменение LTV с течением времени (по месяцам).
Ось х = количество прошедших недель с начала месяца.
Ось у = LTV.
def calc_trials_LTV(df, start_date, end_date):
share = 0.7
payments = (
float(df[(df['subscription_name'].isin(weekly_subscription_names_7_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_7_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_8_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_8_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_3_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_3_99 +
float(df[(df['subscription_name'].isin(monthly_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * monthly_subscriptions_price +
float(df[(df['subscription_name'].isin(annual_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * annual_subscriptions_price
)
payments_refund = (
float(df[(df['subscription_name'].isin(weekly_subscription_names_7_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_7_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_8_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_8_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_3_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_3_99 +
float(df[(df['subscription_name'].isin(monthly_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * monthly_subscriptions_price +
float(df[(df['subscription_name'].isin(annual_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * annual_subscriptions_price
)
trials_count = float(
df[(df['original_start_date'] >= start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(trial_started_events))]['quantity'].sum()
)
if trials_count == 0:
return 0
return share * float(( payments - payments_refund ) / trials_count )
def show_bar(df, start_date, month_count):
d = {}
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d[s_d.strftime("%m/%d/%Y")] = calc_trials_LTV(df, s_d, e_d)
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
fig = go.Figure(data=[trace])
configure_plotly_browser_state()
iplot(fig)
show_bar(ubeats, datetime(2018, 3, 1), 14)
Рассматриваются только месяца, в которых было больше 5 подписок.
def show_bar_by_countries(df, start_date, week_count):
data = []
for i in range(week_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d = {}
for name, group in df.groupby('country'):
if group[(group['original_start_date'] >= s_d) & (group['original_start_date'] < e_d) & (group['event'].isin(payment_events)) & (group['consecutive_paid_periods'] == 1)]['quantity'].sum() > 20:
d[name] = calc_trials_LTV(group, s_d, e_d)
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
data.append(trace)
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_bar_by_countries(ubeats, datetime(2018, 3, 1), 14)
диаграмма показывает LTV(trials) по месяцам (для стран)
def calc_points(df_cohort, start_date, end_date):
data = []
ed = start_date
while ed < end_date:
data.append(calc_trials_LTV(df_cohort[df_cohort['event_date'] <= ed], start_date, ed))
ed += timedelta(7)
return data
def show_plot(df, start_date, month_count):
data = []
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
points = calc_points(df[(df['original_start_date'] >= s_d) & (df['original_start_date'] < e_d)], s_d, s_d + relativedelta.relativedelta(months=(month_count - i)))
data.append(go.Scatter(x = list(range(len(points))), y = points, name=s_d.strftime("%m/%d/%Y")))
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_plot(ubeats, datetime(2018, 4, 1), 13)
На графике представлено изменение LTV (trials) с течением времени (по месяцам).
Ось х = количество прошедших недель с начала месяца.
Ось у = LTV (trials).
def calc_revenue(df, start_date, end_date):
share = 0.7
payments = (
float(df[(df['subscription_name'].isin(weekly_subscription_names_7_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_7_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_8_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_8_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_3_99) ) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * weekly_subscriptions_price_3_99 +
float(df[(df['subscription_name'].isin(monthly_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * monthly_subscriptions_price +
float(df[(df['subscription_name'].isin(annual_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'].isin(payment_events))]['quantity'].sum()) * annual_subscriptions_price
)
payments_refund = (
float(df[(df['subscription_name'].isin(weekly_subscription_names_7_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_7_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_8_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_8_99 +
float(df[(df['subscription_name'].isin(weekly_subscription_names_3_99)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * weekly_subscriptions_price_3_99 +
float(df[(df['subscription_name'].isin(monthly_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * monthly_subscriptions_price +
float(df[(df['subscription_name'].isin(annual_subscription_names)) & (df['original_start_date'] >=start_date) & (df['original_start_date'] < end_date) & (df['event'] == 'Refund')]['quantity'].sum()) * annual_subscriptions_price
)
return share * float(( payments - payments_refund ))
def show_bar(df, start_date, month_count):
d = {}
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
d[s_d.strftime("%m/%d/%Y")] = calc_revenue(df, s_d, e_d)
trace = go.Bar(x=list(d.keys()), y=list(d.values()), name = s_d.strftime("%m/%d/%Y"))
fig = go.Figure(data=[trace])
configure_plotly_browser_state()
iplot(fig)
show_bar(ubeats, datetime(2018, 4, 1), 13)
На диаграмме представлена выручка по месяцам.
def calc_points(df_cohort, start_date, end_date):
data = []
ed = start_date
while ed < end_date:
data.append(calc_revenue(df_cohort[df_cohort['event_date'] <= ed], start_date, ed))
ed += timedelta(7)
return data
def show_plot(df, start_date, month_count):
data = []
for i in range(month_count):
s_d = start_date + relativedelta.relativedelta(months=i)
e_d = start_date + relativedelta.relativedelta(months=i+1)
points = calc_points(df[(df['original_start_date'] >= s_d) & (df['original_start_date'] < e_d)], s_d, s_d + relativedelta.relativedelta(months=(month_count - i)))
data.append(go.Scatter(x = list(range(len(points))), y = points, name=s_d.strftime("%m/%d/%Y")))
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
show_plot(ubeats, datetime(2018, 3, 1), 14)
На графике представлено изменение выручки с течением времени (по месяцам).
Ось х = количество прошедших недель с начала месяца.
Ось у = выручка.
прогноз на основании половины подписок.
def get_predict(df, current_df):
trials = df[df['event'].isin(trial_started_events)]['quantity'].sum()
current_trials = current_df[current_df['event'].isin(trial_started_events)]['quantity'].sum()
predict = []
arr_2 = []
arr = []
data = [1, ]
current_data = [1, ]
paid_current = current_df[current_df['event'].isin(payment_events)]
paid = df[df['event'].isin(payment_events)]
for name, group in paid.groupby('consecutive_paid_periods'):
data.append( group[group['event'].isin(payment_events)]['quantity'].sum() / trials)
for name, group in paid_current.groupby('consecutive_paid_periods'):
current_data.append(group[group['event'].isin(payment_events)]['quantity'].sum() / current_trials)
arr = (np.array(data[1:len(current_data) // 2]) / np.array(current_data[1:len(current_data) // 2]))
for i in range(1, len(arr)):
arr_2.append(arr[i-1] / arr[i])
mean = np.mean(arr_2)
for i in range(len(arr), len(data)-1):
arr = np.append(arr, arr[i-1] / mean)
return [(data[1:] / arr), current_data[1:]]
predict_1, real_1 = get_predict(ubeats, ubeats[(ubeats['original_start_date'] >= datetime(2018, 7, 1)) & (ubeats['original_start_date'] <= datetime(2018, 8, 7))])
predict_2, real_2 = get_predict(ubeats, ubeats[(ubeats['original_start_date'] >= datetime(2019, 2, 1)) & (ubeats['original_start_date'] <= datetime(2019, 3, 1))])
data = [
go.Scatter(x = list(range(len(predict_1))), y = predict_1, name='predict_1'),
go.Scatter(x = list(range(len(predict_2))), y = predict_2, name='predict_2'),
# go.Scatter(x = list(range(len(predict_3))), y = predict_3, name='predict_3'),
# go.Scatter(x = list(range(len(real_1))), y = real_1, name='real_1'),
# go.Scatter(x = list(range(len(real_2))), y = real_2, name='real_2'),
# go.Scatter(x = list(range(len(real_3))), y = real_3, name='real_3'),
]
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
с 01.01.2019
refunds = ubeats[(ubeats['event'] == 'Refund') & (ubeats['event_date'] > datetime(2019,1,1))]['event_date'].value_counts().sort_index()
data = [go.Scatter(x = refunds.keys(), y = refunds.values)]
fig = go.Figure(data=data)
configure_plotly_browser_state()
iplot(fig)
c 01.05.2019
ubeats[(ubeats['event'] == 'Refund') & (ubeats['event_date'] > datetime(2019,5,1))]['original_start_date'].value_counts().sort_index()