Commit a65552b2 by 柴鹏飞

召回模型获取用户画像改为从DMP

parent ae5af70b
......@@ -5,3 +5,6 @@ host = am-bp1w063w3g4908x1890650o.ads.aliyuncs.com
port = 3306
user =
password =
[DMP]
url =
......@@ -114,7 +114,11 @@ class UserPreferenceCateProfile(BaseProfile):
def convert(self, value):
ret = [0.] * 8
if pd.isnull(value):
if isinstance(value, list):
if not value:
return ret
elif pd.isnull(value):
return ret
if isinstance(value, str):
......
......@@ -2,9 +2,11 @@
import os
import json
from typing import List, Dict
import configparser
from typing import List
import faiss
import requests
import numpy as np
from ydl_ai_recommender.src.core.indexer import (
......@@ -15,7 +17,6 @@ from ydl_ai_recommender.src.core.indexer import (
CounselorCounselorCFIndexer,
)
from ydl_ai_recommender.src.core.profile import encode_user_profile
from ydl_ai_recommender.src.data.mysql_client import MySQLClientPool
from ydl_ai_recommender.src.utils import get_conf_path, get_data_path
from ydl_ai_recommender.src.utils.log import create_logger
......@@ -40,33 +41,33 @@ class Recommender():
class UserCFRecommender(Recommender):
def __init__(self, top_n=5, k=20, is_use_db=True, u2c='combination', c2c=None) -> None:
def __init__(self, top_n=5, k=20, u2c='combination', c2c=None) -> None:
"""
params:
top_n: 每个召回的用户获取的相关咨询师个数
k: 召回的相似用户数
is_use_db: 是否使用数据库
u2c: [用户->咨询师] 索引方法
c2c: [咨询师->咨询师] 索引方法,None 表示不使用咨询师拓展
"""
super().__init__()
self.select_items_str = ', '.join([
select_items = [
'uid', 'country_code', 'channel_id_type', 'ffrom_login', 'user_preference_cate', 'consult_pay_money',
'listen_pay_money', 'test_items_pay_money', 'course_pay_money', 'consult_order_num',
'listen_order_num', 'test_items_order_num', 'course_order_num', 'aidi_cst_bias_city',
'aidi_cst_bias_sex', 'aidi_cst_bias_price', 'aidi_cst_bias_server_type', 'user_login_city',
'd30_inquire_order_num', 'd30_session_num',
])
]
self.select_fields = {k: True for k in select_items}
# 召回 top_n 个相似用户
self.top_n = top_n
# 每个召回的用户取 k 个相关咨询师
self.k = k
self.logger = create_logger(__name__, 'recommender.log')
if is_use_db:
self.client = MySQLClientPool.create_from_config_file(get_conf_path())
else:
self.logger.warn('未连接数据库')
config = configparser.RawConfigParser()
config.read(get_conf_path())
self.dmp_url = config.get('DMP', 'url')
if u2c == 'chat':
self.indexer = UserCounselorChatIndexer(self.logger)
......@@ -107,18 +108,32 @@ class UserCFRecommender(Recommender):
if user_id == '0':
return []
sql = 'SELECT {} FROM ads.ads_register_user_profiles'.format(self.select_items_str)
sql += ' WHERE uid={}'.format(user_id)
headers = {
'X-App-Id': 'plough_cloud',
'Content-Type': 'application/json'
}
payload = {
"filter": {
"uid": user_id,
},
"fields": self.select_fields,
"limit": 10
}
try:
_, all_data = self.client.query(sql)
if len(all_data) == 0:
return []
return all_data[0]
response = requests.request('POST', self.dmp_url, headers=headers, json=payload)
resp = response.json()
return resp['data']['objects'][0]
except Exception as e:
self.logger.error('获取用户画像数据失败', exc_info=True)
self.logger.error('获取用户画像数据失败: %s', e, exc_info=True)
try:
self.logger.exception('response json data %s', resp)
except:
pass
return []
def _recommend(self, user_embedding):
D, I = self.index.search(np.array([user_embedding]), self.k)
counselors = []
......@@ -227,5 +242,5 @@ class ItemCFRecommender(Recommender):
if __name__ == '__main__':
recommender = ItemCFRecommender()
print(recommender.recommend('10957910'))
\ No newline at end of file
recommender = UserCFRecommender()
print(recommender.recommend('12047'))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment