Commit a65552b2 by 柴鹏飞

召回模型获取用户画像改为从DMP

parent ae5af70b
...@@ -5,3 +5,6 @@ host = am-bp1w063w3g4908x1890650o.ads.aliyuncs.com ...@@ -5,3 +5,6 @@ host = am-bp1w063w3g4908x1890650o.ads.aliyuncs.com
port = 3306 port = 3306
user = user =
password = password =
[DMP]
url =
...@@ -114,7 +114,11 @@ class UserPreferenceCateProfile(BaseProfile): ...@@ -114,7 +114,11 @@ class UserPreferenceCateProfile(BaseProfile):
def convert(self, value): def convert(self, value):
ret = [0.] * 8 ret = [0.] * 8
if pd.isnull(value):
if isinstance(value, list):
if not value:
return ret
elif pd.isnull(value):
return ret return ret
if isinstance(value, str): if isinstance(value, str):
......
...@@ -2,9 +2,11 @@ ...@@ -2,9 +2,11 @@
import os import os
import json import json
from typing import List, Dict import configparser
from typing import List
import faiss import faiss
import requests
import numpy as np import numpy as np
from ydl_ai_recommender.src.core.indexer import ( from ydl_ai_recommender.src.core.indexer import (
...@@ -15,7 +17,6 @@ from ydl_ai_recommender.src.core.indexer import ( ...@@ -15,7 +17,6 @@ from ydl_ai_recommender.src.core.indexer import (
CounselorCounselorCFIndexer, CounselorCounselorCFIndexer,
) )
from ydl_ai_recommender.src.core.profile import encode_user_profile from ydl_ai_recommender.src.core.profile import encode_user_profile
from ydl_ai_recommender.src.data.mysql_client import MySQLClientPool
from ydl_ai_recommender.src.utils import get_conf_path, get_data_path from ydl_ai_recommender.src.utils import get_conf_path, get_data_path
from ydl_ai_recommender.src.utils.log import create_logger from ydl_ai_recommender.src.utils.log import create_logger
...@@ -40,33 +41,33 @@ class Recommender(): ...@@ -40,33 +41,33 @@ class Recommender():
class UserCFRecommender(Recommender): class UserCFRecommender(Recommender):
def __init__(self, top_n=5, k=20, is_use_db=True, u2c='combination', c2c=None) -> None: def __init__(self, top_n=5, k=20, u2c='combination', c2c=None) -> None:
""" """
params: params:
top_n: 每个召回的用户获取的相关咨询师个数 top_n: 每个召回的用户获取的相关咨询师个数
k: 召回的相似用户数 k: 召回的相似用户数
is_use_db: 是否使用数据库
u2c: [用户->咨询师] 索引方法 u2c: [用户->咨询师] 索引方法
c2c: [咨询师->咨询师] 索引方法,None 表示不使用咨询师拓展 c2c: [咨询师->咨询师] 索引方法,None 表示不使用咨询师拓展
""" """
super().__init__() super().__init__()
self.select_items_str = ', '.join([ select_items = [
'uid', 'country_code', 'channel_id_type', 'ffrom_login', 'user_preference_cate', 'consult_pay_money', 'uid', 'country_code', 'channel_id_type', 'ffrom_login', 'user_preference_cate', 'consult_pay_money',
'listen_pay_money', 'test_items_pay_money', 'course_pay_money', 'consult_order_num', 'listen_pay_money', 'test_items_pay_money', 'course_pay_money', 'consult_order_num',
'listen_order_num', 'test_items_order_num', 'course_order_num', 'aidi_cst_bias_city', 'listen_order_num', 'test_items_order_num', 'course_order_num', 'aidi_cst_bias_city',
'aidi_cst_bias_sex', 'aidi_cst_bias_price', 'aidi_cst_bias_server_type', 'user_login_city', 'aidi_cst_bias_sex', 'aidi_cst_bias_price', 'aidi_cst_bias_server_type', 'user_login_city',
'd30_inquire_order_num', 'd30_session_num', 'd30_inquire_order_num', 'd30_session_num',
]) ]
self.select_fields = {k: True for k in select_items}
# 召回 top_n 个相似用户 # 召回 top_n 个相似用户
self.top_n = top_n self.top_n = top_n
# 每个召回的用户取 k 个相关咨询师 # 每个召回的用户取 k 个相关咨询师
self.k = k self.k = k
self.logger = create_logger(__name__, 'recommender.log') self.logger = create_logger(__name__, 'recommender.log')
if is_use_db:
self.client = MySQLClientPool.create_from_config_file(get_conf_path()) config = configparser.RawConfigParser()
else: config.read(get_conf_path())
self.logger.warn('未连接数据库') self.dmp_url = config.get('DMP', 'url')
if u2c == 'chat': if u2c == 'chat':
self.indexer = UserCounselorChatIndexer(self.logger) self.indexer = UserCounselorChatIndexer(self.logger)
...@@ -107,18 +108,32 @@ class UserCFRecommender(Recommender): ...@@ -107,18 +108,32 @@ class UserCFRecommender(Recommender):
if user_id == '0': if user_id == '0':
return [] return []
sql = 'SELECT {} FROM ads.ads_register_user_profiles'.format(self.select_items_str) headers = {
sql += ' WHERE uid={}'.format(user_id) 'X-App-Id': 'plough_cloud',
'Content-Type': 'application/json'
}
payload = {
"filter": {
"uid": user_id,
},
"fields": self.select_fields,
"limit": 10
}
try: try:
_, all_data = self.client.query(sql) response = requests.request('POST', self.dmp_url, headers=headers, json=payload)
if len(all_data) == 0: resp = response.json()
return [] return resp['data']['objects'][0]
return all_data[0]
except Exception as e: except Exception as e:
self.logger.error('获取用户画像数据失败', exc_info=True) self.logger.error('获取用户画像数据失败: %s', e, exc_info=True)
try:
self.logger.exception('response json data %s', resp)
except:
pass
return [] return []
def _recommend(self, user_embedding): def _recommend(self, user_embedding):
D, I = self.index.search(np.array([user_embedding]), self.k) D, I = self.index.search(np.array([user_embedding]), self.k)
counselors = [] counselors = []
...@@ -227,5 +242,5 @@ class ItemCFRecommender(Recommender): ...@@ -227,5 +242,5 @@ class ItemCFRecommender(Recommender):
if __name__ == '__main__': if __name__ == '__main__':
recommender = ItemCFRecommender() recommender = UserCFRecommender()
print(recommender.recommend('10957910')) print(recommender.recommend('12047'))
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment