Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
Y
ydl_ai_recommender
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
闫发泽
ydl_ai_recommender
Commits
20d378e5
Commit
20d378e5
authored
Feb 28, 2023
by
柴鹏飞
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加咨询师画像
parent
b0845767
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
388 additions
and
28 deletions
+388
-28
update.py
bin/update.py
+30
-13
counselor_profile.py
src/core/counselor_profile.py
+285
-0
__init__.py
src/core/manager/__init__.py
+5
-3
profile_manager.py
src/core/manager/profile_manager.py
+63
-7
profile.py
src/core/profile.py
+3
-3
recommender.py
src/core/recommender.py
+2
-2
No files found.
bin/update.py
View file @
20d378e5
...
...
@@ -7,7 +7,9 @@ from datetime import datetime
from
ydl_ai_recommender.src.core.manager
import
(
OrderDataManager
,
ChatDataManager
,
ProfileManager
,
UserProfileManager
,
CounselorProfileManager
,
UserEventManager
,
)
from
ydl_ai_recommender.src.core.indexer
import
(
UserCounselorDefaultIndexer
,
...
...
@@ -26,7 +28,11 @@ logger = create_logger(__name__, 'update.log')
parser
=
argparse
.
ArgumentParser
(
description
=
'壹点灵 咨询师推荐 算法召回 离线更新数据模型'
)
parser
.
add_argument
(
'-t'
,
'--task'
,
type
=
str
,
required
=
True
,
choices
=
(
'load_db_data'
,
'make_embedding'
,
'make_index'
),
help
=
'执行任务名称'
choices
=
(
'load_db_data'
,
'make_embedding'
,
'make_index'
,
'load_user_event_data'
),
help
=
'执行任务名称'
)
parser
.
add_argument
(
'--only_update'
,
default
=
False
,
action
=
'store_true'
,
help
=
'从数据库导出数据到本地是否覆盖原数据,默认不覆盖'
)
parser
.
add_argument
(
'--index_last_date'
,
default
=
None
,
type
=
str
,
help
=
'构建索引最后日期,超过该日期的数据不使用'
)
...
...
@@ -61,15 +67,17 @@ if __name__ == '__main__':
logger
.
info
(
''
)
if
args
.
task
==
'load_db_data'
:
initialize_dir
()
if
args
.
only_update
is
False
:
initialize_dir
()
logger
.
info
(
'开始从数据库中更新数据'
)
client
=
MySQLClient
.
create_from_config_file
(
get_conf_path
())
managers
=
[
[
'画像数据'
,
ProfileManager
(
client
)],
[
'画像数据'
,
User
ProfileManager
(
client
)],
[
'订单数据'
,
OrderDataManager
(
client
)],
[
'询单数据'
,
ChatDataManager
(
client
)],
[
'咨询师画像'
,
CounselorProfileManager
(
client
)],
]
for
[
name
,
manager
]
in
managers
:
...
...
@@ -84,9 +92,15 @@ if __name__ == '__main__':
logger
.
info
(
''
)
logger
.
info
(
'--'
*
50
)
logger
.
info
(
'开始构建用户特征 embedding'
)
manager
=
ProfileManager
()
manager
=
User
ProfileManager
()
manager
.
make_embeddings
()
logger
.
info
(
'用户特征 embedding 构建完成'
)
logger
.
info
(
'--'
*
50
)
logger
.
info
(
'开始构建咨询师特征 embedding'
)
manager
=
CounselorProfileManager
()
manager
.
make_embeddings
()
logger
.
info
(
'咨询师特征 embedding 构建完成'
)
if
args
.
task
==
'make_index'
:
...
...
@@ -108,10 +122,13 @@ if __name__ == '__main__':
logger
.
info
(
'所有索引更新数据完成'
)
# if args.task == 'make_virtual_embedding':
# logger.info('')
# logger.info('开始构建用户特征虚拟embedding')
# manager = ProfileManager()
# manager.make_virtual_embedding()
# logger.info('用户特征虚拟 embedding 构建完成')
\ No newline at end of file
if
args
.
task
==
'load_user_event_data'
:
logger
.
info
(
''
)
logger
.
info
(
'开始下载用户埋点数据'
)
client
=
MySQLClient
.
create_from_config_file
(
get_conf_path
())
manager
=
UserEventManager
(
client
)
# 训练数据
manager
.
update_data
(
start_date
=
'2023-01-01'
,
end_date
=
'2023-02-01'
)
# 测试数据
manager
.
update_data
(
start_date
=
'2023-02-01'
,
end_date
=
'2023-02-10'
)
logger
.
info
(
'用户特征虚拟 embedding 构建完成'
)
\ No newline at end of file
src/core/counselor_profile.py
0 → 100644
View file @
20d378e5
# -*- coding: utf-8 -*-
import
json
from
typing
import
Dict
,
List
,
Any
,
Union
import
pandas
as
pd
from
.profile
import
BaseProfile
from
.profile
import
CityProfile
,
NumClassProfile
class
GenderProfile
(
BaseProfile
):
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
2
def
convert
(
self
,
value
):
try
:
value
=
int
(
value
)
except
Exception
:
return
[
0
,
0
]
if
value
==
1
:
return
[
1
,
0
]
elif
value
==
2
:
return
[
0
,
1
]
else
:
return
[
0
,
0
]
def
inconvert
(
self
,
embedding
):
if
embedding
[
0
]
==
1
:
return
'male'
elif
embedding
[
1
]
==
2
:
return
'feminine'
else
:
return
'unknown_sex'
class
NumScaleProfile
(
BaseProfile
):
def
__init__
(
self
,
min_val
=
0
,
scale
=
1
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
1
self
.
min_val
=
min_val
self
.
scale
=
scale
def
convert
(
self
,
value
):
try
:
value
=
int
(
value
)
except
Exception
:
return
[
0
]
value
-=
self
.
min_val
value
=
min
(
value
,
self
.
scale
)
value
=
max
(
value
,
0
)
return
[
value
/
self
.
scale
]
def
inconvert
(
self
,
embedding
:
List
[
float
])
->
str
:
if
embedding
[
0
]
==
0
:
return
'unknown'
else
:
return
str
(
int
(
embedding
[
0
]
*
self
.
scale
))
class
CertificationProfile
(
BaseProfile
):
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
4
# 这4类证书占比约95%
self
.
certification_names
=
[
'二级心理咨询师'
,
'三级心理咨询师'
,
'中级心理咨询师'
,
'社会心理师'
]
def
convert
(
self
,
value
):
ret
=
[
0
,
0
,
0
,
0
]
try
:
index
=
self
.
certification_names
.
index
(
value
)
ret
[
index
]
=
1
except
ValueError
:
pass
return
ret
def
inconvert
(
self
,
embedding
):
for
idx
,
val
in
enumerate
(
embedding
):
if
val
==
1
:
return
self
.
certification_names
[
idx
]
return
'其他证书'
class
EducationProfile
(
BaseProfile
):
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
6
def
convert
(
self
,
value
):
ret
=
[
0
,
0
,
0
,
0
,
0
,
0
]
try
:
value
=
int
(
value
)
except
Exception
:
return
ret
if
3
<=
value
<=
8
:
ret
[
value
-
3
]
=
1
return
ret
def
inconvert
(
self
,
embedding
):
for
idx
,
val
in
enumerate
(
embedding
):
if
val
==
1
:
return
str
(
idx
+
3
)
return
'unknown_education'
class
OrderCateProfile
(
BaseProfile
):
"""
order_tag_sum 订单标签类目
"""
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
10
self
.
cate_list
=
[
'恋爱情感'
,
'情绪压力'
,
'婚姻家庭'
,
'个人成长'
,
'亲子教育'
,
'人际关系'
,
'职场发展'
,
'心理健康'
,
'人际社交'
,
'两性心理'
,]
def
convert
(
self
,
value
):
ret
=
[
0
]
*
self
.
dim
try
:
value
=
json
.
loads
(
'['
+
value
+
']'
)
except
Exception
:
return
ret
for
item
in
value
:
try
:
i
=
self
.
cate_list
.
index
(
item
[
'cate_name'
])
ret
[
i
]
=
1.0
except
:
pass
return
ret
def
inconvert
(
self
,
embedding
):
ret
=
[]
for
idx
,
val
in
enumerate
(
embedding
):
if
val
==
1.0
:
ret
.
append
(
self
.
cate_list
[
idx
])
return
ret
class
TagGoodAtDirectionProfile
(
BaseProfile
):
"""
tag_good_at_direction 咨询师擅长方向标签
"""
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
10
self
.
cate_list
=
[
'情绪压力'
,
'亲子教育'
,
'恋爱情感'
,
'婚姻家庭'
,
'心理健康'
,
'个人成长'
,
'人际关系'
,
'职场发展'
,
'人际社交'
,
'两性心理'
,]
def
convert
(
self
,
value
):
ret
=
[
0
]
*
self
.
dim
try
:
value
=
json
.
loads
(
value
)
except
Exception
:
return
ret
for
item
in
value
:
try
:
i
=
self
.
cate_list
.
index
(
item
[
'cate_name'
])
ret
[
i
]
=
1.0
except
:
pass
return
ret
def
inconvert
(
self
,
embedding
):
ret
=
[]
for
idx
,
val
in
enumerate
(
embedding
):
if
val
==
1.0
:
ret
.
append
(
self
.
cate_list
[
idx
])
return
ret
class
TagGoodAtSubclassProfile
(
BaseProfile
):
"""
tag_good_at_** 咨询师擅长标签,包括:
咨询师擅长群体标签
咨询师咨询风格标签
咨询师擅长服务标签
咨询师语言能力标签
咨询师咨询流派标签
"""
def
__init__
(
self
,
cate_list
)
->
None
:
super
()
.
__init__
()
self
.
dim
=
len
(
cate_list
)
self
.
cate_list
=
cate_list
def
convert
(
self
,
value
):
ret
=
[
0
]
*
self
.
dim
try
:
value
=
json
.
loads
(
value
)
except
Exception
:
return
ret
for
item
in
value
:
try
:
i
=
self
.
cate_list
.
index
(
item
[
'tag_name'
])
ret
[
i
]
=
1.0
except
:
pass
return
ret
def
inconvert
(
self
,
embedding
):
ret
=
[]
for
idx
,
val
in
enumerate
(
embedding
):
if
val
==
1.0
:
ret
.
append
(
self
.
cate_list
[
idx
])
return
ret
good_at_group_tag_list
=
[
'青少年'
,
'职场人'
,
'家庭关系'
,
'大学生'
,
'伴侣'
,
'夫妻'
,
'中学生'
,
'公务员'
,
'留学生'
,
'精神康复者'
,
'小学生'
,
'孕产妇'
,
'老年人'
,
'成年人'
,
'中年人'
,
]
good_at_style_tag_list
=
[
'温暖'
,
'温和而真诚'
,
'耐心'
,
'专业'
,
'抱持'
,
'赋能'
,
'温和而坚定'
,
'深刻而有力'
,
'沉稳'
,
'真实'
,
'深入'
,
'富于创造性'
,
'清晰'
,
'简洁'
,
'接纳包容'
,
'真诚'
,
'敏锐专业'
,
'温和'
,
'坚定'
,
]
good_at_service_tag_list
=
[
'情绪疏导'
,
'关系改善'
,
'情绪管理'
,
'情感分析'
,
'情感陪伴'
,
'心理分析'
,
'动力提升'
,
'创伤修复'
,
'恋爱指导'
,
'行为矫正'
,
'离婚指导'
,
'认知调整'
,
'心态调整'
,
'经验指导'
,
'身心减压'
,
'性格改善'
,
'失眠改善'
,
'绘画分析'
,
'情感修复'
,
'梦境解析'
,
'习惯养成'
,
'危机消除'
,
]
good_at_language_tag_list
=
[
'普通话'
,
'英语'
,
'粤语'
,
'四川话'
,
'江西方言'
,
'闽南语'
,
'日语'
,
'韩语'
,
'德语'
,
'俄语'
,
'西班牙语'
,
'法语'
,
'意大利语'
,
]
good_at_school_tag_list
=
[
'认知行为疗法'
,
'叙事疗法'
,
'整合疗法'
,
'精神分析'
,
'焦点解决短期疗法'
,
'人本主义治疗'
,
'婚姻与家庭治疗'
,
'催眠疗法'
,
'森田疗法'
,
'格式塔疗法'
,
'心理动力学疗法'
,
'沙盘治疗'
,
'其他疗法'
,
'表达性艺术疗法'
,
'接纳承诺疗法'
,
'心理危机干预'
,
'辩证行为疗法'
,
'游戏治疗'
,
'情感关注疗法'
,
]
profile_converters
=
[
[
'gender'
,
GenderProfile
()],
[
'age'
,
NumScaleProfile
(
0
,
100
)],
[
'certification'
,
CertificationProfile
()],
[
'education'
,
EducationProfile
()],
[
'city'
,
CityProfile
(
level
=
4
)],
[
'work_years'
,
NumScaleProfile
(
0
,
20
)],
[
'product_min_price'
,
NumScaleProfile
(
10
,
1200
-
10
)],
[
'order_cate_sum'
,
OrderCateProfile
()],
[
'tag_good_at_direction'
,
TagGoodAtDirectionProfile
()],
[
'tag_good_at_group'
,
TagGoodAtSubclassProfile
(
good_at_group_tag_list
)],
[
'tag_good_at_style'
,
TagGoodAtSubclassProfile
(
good_at_style_tag_list
)],
[
'tag_good_at_service'
,
TagGoodAtSubclassProfile
(
good_at_service_tag_list
)],
[
'tag_good_at_language'
,
TagGoodAtSubclassProfile
(
good_at_language_tag_list
)],
[
'tag_good_at_school'
,
TagGoodAtSubclassProfile
(
good_at_school_tag_list
)],
]
def
encode_counselor_profile
(
profile
):
"""
将咨询师画像信息转换为向量
"""
embedding
=
[]
for
[
name
,
converter
]
in
profile_converters
:
embedding
.
extend
(
converter
.
convert
(
profile
[
name
]))
return
embedding
def
decode_counselor_profile
(
embedding
):
"""
向量转换为咨询师画像
"""
ret
=
{}
si
=
0
for
[
name
,
converter
]
in
profile_converters
:
ei
=
si
+
converter
.
dim
ret
[
name
]
=
converter
.
inconvert
(
embedding
[
si
:
ei
])
si
=
ei
return
ret
\ No newline at end of file
src/core/manager/__init__.py
View file @
20d378e5
...
...
@@ -3,6 +3,8 @@
from
.manager
import
Manager
from
.database_manager
import
DatabaseDataManager
from
.profile_manager
import
ProfileManager
from
.profile_manager
import
UserProfileManager
from
.profile_manager
import
CounselorProfileManager
from
.chat_data_manager
import
ChatDataManager
from
.order_data_manager
import
OrderDataManager
\ No newline at end of file
from
.order_data_manager
import
OrderDataManager
from
.user_event_manager
import
UserEventManager
\ No newline at end of file
src/core/manager/profile_manager.py
View file @
20d378e5
...
...
@@ -6,18 +6,19 @@ from typing import List
import
pandas
as
pd
from
ydl_ai_recommender.src.core.profile
import
encode_profile
from
ydl_ai_recommender.src.core.profile
import
encode_user_profile
from
ydl_ai_recommender.src.core.counselor_profile
import
encode_counselor_profile
from
ydl_ai_recommender.src.core.manager
import
DatabaseDataManager
from
ydl_ai_recommender.src.utils.log
import
create_logger
class
ProfileManager
(
DatabaseDataManager
):
class
User
ProfileManager
(
DatabaseDataManager
):
"""
订单用户画像数据管理
"""
def
__init__
(
self
,
client
=
None
)
->
None
:
super
()
.
__init__
(
client
,
create_logger
(
__name__
,
'profile_manager.log'
))
super
()
.
__init__
(
client
,
create_logger
(
__name__
,
'
user_
profile_manager.log'
))
self
.
select_items_str
=
', '
.
join
([
'uid'
,
'country_code'
,
'channel_id_type'
,
'ffrom_login'
,
'user_preference_cate'
,
'consult_pay_money'
,
'listen_pay_money'
,
'test_items_pay_money'
,
'course_pay_money'
,
'consult_order_num'
,
...
...
@@ -67,7 +68,7 @@ class ProfileManager(DatabaseDataManager):
self
.
logger
.
info
(
'开始构建订单用户的用户画像向量'
)
for
_
,
profile
in
user_profiles
.
iterrows
():
user_ids
.
append
(
str
(
profile
[
'uid'
]))
embeddings
.
append
(
encode_profile
(
profile
))
embeddings
.
append
(
encode_
user_
profile
(
profile
))
self
.
logger
.
info
(
'用户画像向量构建完成,共构建
%
s 用户'
,
len
(
user_ids
))
...
...
@@ -109,7 +110,62 @@ class ProfileManager(DatabaseDataManager):
json
.
dump
(
v_embedding_list
,
f
,
ensure_ascii
=
False
)
class
CounselorProfileManager
(
DatabaseDataManager
):
"""
咨询师画像数据管理
"""
def
__init__
(
self
,
client
=
None
)
->
None
:
super
()
.
__init__
(
client
,
create_logger
(
__name__
,
'counselor_profile_manager.log'
))
self
.
select_items_str
=
', '
.
join
([
'doctor_id'
,
'uid'
,
'gender'
,
'age'
,
'certification'
,
'education'
,
'city'
,
'work_years'
,
'product_min_price'
,
'order_tag_sum'
,
'order_cate_sum'
,
'tag_good_at_direction'
,
'tag_good_at_group'
,
'tag_good_at_style'
,
'tag_good_at_service'
,
'tag_good_at_language'
,
'tag_good_at_school'
,
'cate_good_at'
,
])
def
_make_query_sql
(
self
):
sql
=
'SELECT {} FROM ads.ads_doctor_stats'
.
format
(
self
.
select_items_str
)
sql
+=
' WHERE is_deleted = 0'
return
sql
def
update_data
(
self
):
""" 从数据库中拉取最新画像特征并保存 """
sql
=
self
.
_make_query_sql
()
_
,
all_data
=
self
.
fetch_data_from_db
(
sql
)
df
=
pd
.
DataFrame
(
all_data
)
self
.
save_xlsx_data
(
df
,
'all_counselor_profile.xlsx'
)
def
_load_profile_data
(
self
):
return
self
.
load_xlsx_data
(
'all_counselor_profile.xlsx'
)
def
make_embeddings
(
self
):
counselor_profiles
=
self
.
_load_profile_data
()
self
.
logger
.
info
(
'咨询师画像数据加载完成,共加载
%
s 条'
,
len
(
counselor_profiles
))
user_ids
,
embeddings
=
[],
[]
self
.
logger
.
info
(
'开始构建咨询师画像向量'
)
for
_
,
profile
in
counselor_profiles
.
iterrows
():
user_ids
.
append
(
str
(
profile
[
'uid'
]))
embeddings
.
append
(
encode_counselor_profile
(
profile
))
self
.
logger
.
info
(
'咨询师画像向量构建完成,共构建
%
s 咨询师'
,
len
(
user_ids
))
with
open
(
os
.
path
.
join
(
self
.
local_file_dir
,
'counselor_embeddings_ids.txt'
),
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
'
\n
'
.
join
(
user_ids
))
with
open
(
os
.
path
.
join
(
self
.
local_file_dir
,
'counselor_embeddings.json'
),
'w'
,
encoding
=
'utf-8'
)
as
f
:
json
.
dump
(
embeddings
,
f
,
ensure_ascii
=
False
)
return
embeddings
if
__name__
==
'__main__'
:
manager
=
ProfileManager
()
# manager = User
ProfileManager()
# manager.make_embeddings()
manager
.
make_virtual_embedding
()
\ No newline at end of file
# manager.make_virtual_embedding()
manager
=
CounselorProfileManager
()
manager
.
update_data
()
manager
.
make_embeddings
()
\ No newline at end of file
src/core/profile.py
View file @
20d378e5
...
...
@@ -271,7 +271,7 @@ class CityProfile(BaseProfile):
def
__init__
(
self
,
level
=
2
)
->
None
:
"""
level: 级别,2-省/直辖市 ;
3-区;
4-区;6-投递区
level: 级别,2-省/直辖市 ;4-区;6-投递区
"""
super
()
.
__init__
()
...
...
@@ -359,7 +359,7 @@ profile_converters = [
]
def
encode_profile
(
profile
):
def
encode_
user_
profile
(
profile
):
"""
将用户画像信息转换为向量
"""
...
...
@@ -369,7 +369,7 @@ def encode_profile(profile):
return
embedding
def
decode_profile
(
embedding
):
def
decode_
user_
profile
(
embedding
):
"""
向量转换为用户画像
"""
...
...
src/core/recommender.py
View file @
20d378e5
...
...
@@ -14,7 +14,7 @@ from ydl_ai_recommender.src.core.indexer import (
UserCounselorCombinationIndexer
,
CounselorCounselorCFIndexer
,
)
from
ydl_ai_recommender.src.core.profile
import
encode_profile
from
ydl_ai_recommender.src.core.profile
import
encode_
user_
profile
from
ydl_ai_recommender.src.data.mysql_client
import
MySQLClientPool
from
ydl_ai_recommender.src.utils
import
get_conf_path
,
get_data_path
from
ydl_ai_recommender.src.utils.log
import
create_logger
...
...
@@ -151,7 +151,7 @@ class UserCFRecommender(Recommender):
def
recommend_with_profile
(
self
,
user_profile
,
size
=
0
,
is_merge
=
True
):
user_embedding
=
encode_profile
(
user_profile
)
user_embedding
=
encode_
user_
profile
(
user_profile
)
counselors
=
self
.
_recommend
(
user_embedding
)
# size == 0 时,不追加默认推荐咨询师
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment