Commit 0a597634 by 柴鹏飞

埋点数据管理类

parent 1f87ec70
# -*- coding: utf-8 -*-
import pandas as pd
from ydl_ai_recommender.src.core.manager import DatabaseDataManager
from ydl_ai_recommender.src.core.manager import UserProfileManager
from ydl_ai_recommender.src.utils.log import create_logger
EXPOSE_CLICK_SQL = """
SELECT
t1.session_id,
t1.user_ids,
t1.duration,
t1.cnt AS ae_cnt,
t2.cnt AS doctor_card_cnt,
t3.cnt AS private_chat_cnt,
t1.doctor_ids AS ae_doctor_ids,
t2.doctor_ids AS doctor_card_doctor_ids,
t3.doctor_ids AS private_chat_doctor_ids
FROM
(
SELECT
session_id,
GROUP_CONCAT(DISTINCT(uid)) AS user_ids,
GROUP_CONCAT(DISTINCT(doctor_id)) AS doctor_ids,
ROUND((MAX(client_time) - MIN(client_time)) / 1000 / 60, 1) AS duration,
COUNT(1) AS cnt
FROM
dwd.dwd_event_doctor_ae
WHERE
dt >= '{start_date}'
AND dt < '{end_date}'
AND uid IS NOT NULL
AND uid <> 0
GROUP BY
session_id
) t1
LEFT JOIN (
SELECT
session_id,
GROUP_CONCAT(DISTINCT(doctor_id)) AS doctor_ids,
COUNT(1) AS cnt
FROM
dwd.dwd_event_doctor_card_click
WHERE
dt >= '{start_date}'
AND dt < '{end_date}'
GROUP BY
session_id
) t2 ON t1.session_id = t2.session_id
LEFT JOIN (
SELECT
session_id,
GROUP_CONCAT(DISTINCT(doctor_id)) AS doctor_ids,
COUNT(1) AS cnt
FROM
dwd.dwd_event_private_chat_click
WHERE
dt >= '{start_date}'
AND dt < '{end_date}'
GROUP BY
session_id
) t3 ON t1.session_id = t3.session_id
WHERE
t2.cnt IS NOT NULL
OR t2.cnt IS NOT NULL
"""
EXPOSE_CLICK_USER_SQL = """
"""
class UserEventManager(DatabaseDataManager):
"""
埋点数据管理
"""
def __init__(self, client=None) -> None:
super().__init__(client, create_logger(__name__, 'user_event_manager.log'))
def update_data(self, start_date: str = '2023-01-01', end_date: str = '2023-02-01'):
""" 从数据库中拉取曝光点击数据并保存 """
sql = EXPOSE_CLICK_SQL.format(start_date=start_date, end_date=end_date)
_, all_data = self.fetch_data_from_db(sql)
df = pd.DataFrame(all_data)
self.save_xlsx_data(df, f'expose_click_{start_date}_{end_date}.xlsx')
# 相关用户的画像数据也要下载
manager = UserProfileManager()
sql = 'SELECT {} FROM ads.ads_register_user_profiles'.format(manager.select_items_str)
sql += f" WHERE uid IN (SELECT uid FROM dwd.dwd_event_doctor_ae WHERE dt >= '{start_date}' AND dt < '{end_date}')"
_, all_data = self.fetch_data_from_db(sql)
df = pd.DataFrame(all_data)
self.save_xlsx_data(df, f'expose_click_user_{start_date}_{end_date}.xlsx')
def load_data(self, start_date: str, end_date: str) -> list:
return self.load_xlsx_data(f'expose_click_{start_date}_{end_date}.xlsx')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment