Commit 9121e5a1 by 王金柱

更新xgboost模型预测过程中各阶段的耗时统计

parent 321b7e4c
...@@ -25,6 +25,18 @@ from ydl_ai_recommender.src.utils.log import create_logger ...@@ -25,6 +25,18 @@ from ydl_ai_recommender.src.utils.log import create_logger
logger = create_logger(__name__, 'service_xgb.log', is_rotating=True) logger = create_logger(__name__, 'service_xgb.log', is_rotating=True)
def cost_time(desc):
def the_func_cost_time(func):
def fun(*args,**kwargs):
t = time.perf_counter()
result = func(*args, **kwargs)
logger.info('函数:{},{} 耗时:{} ms'.format(str(func.__name__), desc, round((time.perf_counter()-t)*1000, 2)))
return result
return fun
return the_func_cost_time
class Recommender(): class Recommender():
def __init__(self) -> None: def __init__(self) -> None:
...@@ -266,20 +278,22 @@ class RecommendByXgboost(Recommender): ...@@ -266,20 +278,22 @@ class RecommendByXgboost(Recommender):
self.model = xgb.XGBClassifier(objective='binary:logistic', nthread=-1, **self.params) self.model = xgb.XGBClassifier(objective='binary:logistic', nthread=-1, **self.params)
self.model.load_model(os.path.join(get_project_path(), 'model_data/xgb_model.bin')) self.model.load_model(os.path.join(get_project_path(), 'model_data/xgb_model.bin'))
@cost_time(desc='')
def recall_data(self, user_id, size=0, is_merge=True):
return self.recommender.recommend(user_id, size=size, is_merge=True)
@cost_time(desc='模型推荐整个流程')
def recommend(self, user_id, size=0, is_merge=True): def recommend(self, user_id, size=0, is_merge=True):
s_u_profile_time = time.time() s_u_profile_time = time.time()
user_profile = self.get_user_profile(user_id) user_profile = self.get_user_profile(user_id)
if not user_profile: if not user_profile:
return self._recommend_top(size) return self._recommend_top(size)
recall_start = time.time()
# recommend_result = self.recommender.recommend(user_id, size=size, is_merge=True)
recommend_result = None
logger.info('recall call time:{}ms'.format(int((time.time()-recall_start)*1000)))
# recommend_result = self.recall_data(user_id, size=size, is_merge=True)
recommend_result = None
data_time = time.time() data_time = time.time()
predit_data = self.trans_feature_data(user_id, recommend_result) predit_data = self.trans_feature_data(user_id, user_profile, recommend_result)
logger.info('data_time: {}ms'.format(int((time.time()-data_time)*1000)))
doctor_ids = predit_data.pop('doctor_id') doctor_ids = predit_data.pop('doctor_id')
doctor_ids = doctor_ids.to_numpy() doctor_ids = doctor_ids.to_numpy()
...@@ -290,13 +304,14 @@ class RecommendByXgboost(Recommender): ...@@ -290,13 +304,14 @@ class RecommendByXgboost(Recommender):
result_dict = sorted(result_dict.items(), key=lambda x:x[1], reverse=True) result_dict = sorted(result_dict.items(), key=lambda x:x[1], reverse=True)
recommend_data = [{ recommend_data = [{
'counselor': int(c_id), 'counselor': int(c_id),
'score': float(proba), 'score': round(float(proba), 4),
'from': 'similar_users {}'.format(user_id), 'from': 'similar_users {}'.format(user_id),
} for (c_id, proba) in result_dict[0:50]] } for (c_id, proba) in result_dict[0:50]]
return recommend_data return recommend_data
def trans_feature_data(self, user_id, counselor_data): @cost_time(desc='')
user_feature_data = self.trans_user_feature_data(user_id) def trans_feature_data(self, user_id, user_profile, counselor_data):
user_feature_data = self.trans_user_feature_data(user_id, user_profile)
counselor_feature_data = self.trans_counselor_feature_data(counselor_data) counselor_feature_data = self.trans_counselor_feature_data(counselor_data)
counselor_num = len(counselor_feature_data) counselor_num = len(counselor_feature_data)
...@@ -307,9 +322,9 @@ class RecommendByXgboost(Recommender): ...@@ -307,9 +322,9 @@ class RecommendByXgboost(Recommender):
predit_feature_data = pd.concat([user_feature_data_dataframe, counselor_feature_data], axis=1) predit_feature_data = pd.concat([user_feature_data_dataframe, counselor_feature_data], axis=1)
return predit_feature_data return predit_feature_data
def trans_user_feature_data(self, user_id): def trans_user_feature_data(self, user_id, user_profile):
if not user_profile:
user_profile = self.get_user_profile(user_id) user_profile = self.get_user_profile(user_id)
from_login_encoder = self.get_encoder_from_dict('ffrom_login', user_profile['ffrom_login']) from_login_encoder = self.get_encoder_from_dict('ffrom_login', user_profile['ffrom_login'])
user_login_city_encoder = self.get_encoder_from_dict('user_login_city', user_profile['user_login_city']) user_login_city_encoder = self.get_encoder_from_dict('user_login_city', user_profile['user_login_city'])
...@@ -327,6 +342,7 @@ class RecommendByXgboost(Recommender): ...@@ -327,6 +342,7 @@ class RecommendByXgboost(Recommender):
# return counselor_profiles # return counselor_profiles
return self.all_counselors return self.all_counselors
@cost_time(desc='获取用户画像')
def get_user_profile(self, user_id): def get_user_profile(self, user_id):
if user_id == '0': if user_id == '0':
return [] return []
...@@ -347,7 +363,6 @@ class RecommendByXgboost(Recommender): ...@@ -347,7 +363,6 @@ class RecommendByXgboost(Recommender):
try: try:
get_profile_time = time.time() get_profile_time = time.time()
response = requests.request('POST', self.dmp_url, headers=headers, json=payload) response = requests.request('POST', self.dmp_url, headers=headers, json=payload)
logger.info('get user profile cost: {}ms'.format(int((time.time()-get_profile_time)*1000)))
resp = response.json() resp = response.json()
return resp['data']['objects'][0] return resp['data']['objects'][0]
except Exception as e: except Exception as e:
...@@ -398,11 +413,11 @@ if __name__ == '__main__': ...@@ -398,11 +413,11 @@ if __name__ == '__main__':
print() print()
s_time = time.time() s_time = time.time()
recommender = RecommendByXgboost() recommender = RecommendByXgboost()
recommender.recommend('12047') recommender.recommend('3251227')
print('all cost time: '.format(time.time()-s_time), recommender.recommend('12047')) print('all cost time: '.format(time.time()-s_time), recommender.recommend('12047'))
# print() # print()
# print() # print()
# s_time = time.time() # s_time = time.time()
# recommender.recommend('30004410') # recommender.recommend('37298')
# print('all cost time: '.format(time.time() - s_time), recommender.recommend('12047')) # print('all cost time: '.format(time.time() - s_time), recommender.recommend('12047'))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment