Commit 3d58e296 by 柴鹏飞

Merge branch 'YDL-REC-XGB' into 'master'

增加基于树模型XGBoost排序

See merge request chaipengfei/ydl_ai_recommender!1
parents a65552b2 487789ef
...@@ -10,3 +10,5 @@ dependencies: ...@@ -10,3 +10,5 @@ dependencies:
- pip - pip
- pip: - pip:
- -r requirements.txt - -r requirements.txt
- -i http://pypi.mirrors.ustc.edu.cn/simple/
- --trusted-host pypi.mirrors.ustc.edu.cn
This source diff could not be displayed because it is too large. You can view the blob instead.
{"ffrom_login_encoder": {"android_xiaomi": 76, "android_vivo": 74, "android_oppo": 72, "android_huawei": 62, "0": 0, "ATK_7_android_huawei": 18, "m_login_default": 88, "TK_yyjlcs": 55, "app_ios": 79, "m_ydl": 91, "TK_4": 53, "android_10": 57, "yj_listeners_tengxun": 102, "ATK_android_yyjlcs_huawei": 31, "ATK_7_android_oppo": 22, "login_miss_ffrom_default": 86, "AppletWechatListen": 47, "ATK_4_android_yyb": 12, "ATK_7_android_xiaomi": 26, "zj": 106, "yj_listeners_douyin": 101, "ATK_android_yyjlcs_oppo": 34, "ATK_android_yyjlcs_vivo": 35, "ATK_android_yyjlcs_xiaomi": 37, "ios_1": 81, "android_yyb": 77, "login_api_sms_missing_default": 85, "android_m": 64, "ATK_7_android_360": 16, "ATK_4_android_vivo": 9, "ATK_7_android_yyb": 27, "m_wx_app": 90, "AppletWechatYdlPsyConsult": 48, "ATK_7_android_baidu": 17, "ATK_4_android_baidu": 4, "AppletWechatAiXiaoyi": 45, "ATK_android_yyjlcs_lenovo": 32, "ATK_android_yyjlcs_baidu": 30, "mini_shiyebu": 93, "ATK_7_android_vivo": 24, "AppletWechatCourseFuLi": 46, "login_api_phone_missing_default": 84, "android_sanxing": 73, "ATK_android_yyjlcs_wdj": 36, "android_medical_xiaomi": 69, "pinganbaoxian": 95, "ATK_7_android_letv": 20, "zhonganhuyi": 105, "BG-platform": 49, "android_zhuzhan": 78, "ATK_android_yyjlcs_yyb": 38, "ATK_7_android_zhuzhan": 28, "ATK_4_android_meizu": 6, "android_baidu": 61, "ATK_android_yyjlcs_360": 29, "ATK_4_android_xiaomi": 11, "m": 87, "ATK_7_android_sanxing": 23, "ydl-dhzx": 97, "ydl-pro": 99, "TK_qinggan": 54, "m_wechat": 89, "yj_listeners_ydlxlgzh": 103, "android_meizu": 71, "BG-xinting": 50, "android_appdouyin": 60, "android_medical_doctor": 65, "mini_shrink": 94, "swan": 96, "ATK_6_android_huawei": 14, "ATK_android_yyjlcs_meizu": 33, "android_medical_huawei": 66, "android_360": 58, "ATK_7_android_meizu": 21, "ATK_7_android_lenovo": 19, "ATK_4_android_huawei": 5, "android_medical_vivo": 68, "ATK_4_android_oppo": 7, "ATK_4_android_sanxing": 8, "ATK_7_android_wdj": 25, "ATK_yyjlcs_360cn": 42, "yj_consultant_tengxun": 100, "android_medical_oppo": 67, "ios_medical_user": 83, "ATK_4_android_360": 3, "ATK_4_android_wdj": 10, "ios_medical_doctor": 82, "ATK_qinggan": 40, "android_Cpd_Honor": 59, "ydl-emotion": 98, "BaiduSem_AppAndroidYdl": 51, "ATK_3_android_baidu": 2, "android_lenovo": 63, "baidu": 80, "yyb": 104, "android_wdj": 75, "alipay": 56, "ATK_6_android_baidu": 13, "ATK_yyjlcs_xiaomi": 44, "ATK_10_android_huawei": 1, "ATK_6_android_meizu": 15, "ATK_yyjlcs_null": 43, "android_medical_yyb": 70, "ATK_ylcs_xiaomi": 41, "meizu": 92, "ATK_aqcs_yyb": 39, "BaiduSem_zhuzhan": 52}, "user_login_city_encoder": {"0": 0, "440800": 200, "370200": 131, "411500": 161, "440300": 195, "530100": 267, "451400": 228, "120000": 3, "500000": 245, "310000": 69, "330700": 89, "440100": 193, "330200": 84, "610100": 276, "420100": 164, "330100": 83, "520100": 264, "410100": 146, "450200": 216, "441500": 205, "450300": 217, "370600": 135, "110000": 1, "410400": 149, "340300": 96, "360700": 125, "330600": 88, "331000": 92, "320100": 70, "370100": 130, "510100": 246, "441200": 202, "630100": 296, "130100": 4, "230100": 57, "320200": 71, "130900": 12, "410300": 148, "440700": 199, "451100": 225, "451300": 227, "650800": 310, "650500": 307, "370800": 137, "220100": 49, "445200": 213, "431200": 191, "330300": 85, "460100": 229, "450800": 222, "511100": 255, "320500": 74, "411100": 157, "441901": 210, "430100": 180, "411400": 160, "140800": 22, "450700": 221, "520300": 266, "370500": 134, "130700": 10, "610600": 281, "610500": 280, "220500": 53, "320300": 72, "350100": 110, "360100": 119, "440500": 197, "410800": 153, "131000": 13, "140900": 23, "440600": 198, "540100": 275, "150500": 30, "370700": 136, "420600": 168, "340100": 94, "340400": 97, "431000": 189, "371700": 145, "130600": 9, "630200": 297, "350800": 117, "610400": 279, "340700": 100, "510900": 253, "630300": 298, "421100": 173, "340200": 95, "330500": 87, "140300": 17, "150100": 26, "231000": 66, "620100": 284, "220700": 55, "130800": 11, "442000": 211, "210200": 36, "361000": 128, "140500": 19, "610700": 282, "341300": 105, "350600": 115, "450100": 215, "421300": 175, "450500": 219, "441400": 204, "320700": 76, "140100": 15, "510400": 248, "370900": 138, "320400": 73, "130200": 5, "371300": 141, "360400": 122, "230500": 61, "650100": 303, "431300": 192, "130500": 8, "430200": 181, "511700": 260, "341700": 108, "370400": 133, "530800": 273, "371500": 143, "210100": 35, "350900": 118, "431100": 190, "340600": 99, "141000": 24, "420300": 166, "150600": 31, "610800": 283, "341200": 104, "511500": 258, "350200": 111, "511300": 256, "210800": 42, "230300": 59, "210900": 43, "140700": 21, "350500": 114, "350700": 116, "141100": 25, "441300": 203, "421200": 174, "445100": 212, "PHBWH00": 362, "530400": 269, "150400": 29, "210400": 38, "441600": 206, "510700": 251, "450900": 223, "441800": 208, "130300": 6, "430900": 188, "360300": 121, "441900": 209, "640100": 301, "371400": 142, "621200": 295, "211400": 48, "510600": 250, "530300": 268, "610300": 278, "231200": 68, "621000": 293, "371600": 144, "150900": 34, "211300": 47, "430500": 184, "430400": 183, "410500": 150, "530900": 274, "411600": 162, "211100": 45, "620900": 292, "211200": 46, "320900": 78, "441700": 207, "512000": 263, "620600": 289, "361100": 129, "510800": 252, "620500": 288, "330400": 86, "411300": 159, "231100": 67, "130400": 7, "341000": 102, "320600": 75, "650300": 305, "230800": 64, "420200": 165, "440400": 196, "350400": 113, "330900": 91, "440900": 201, "210700": 41, "420900": 171, "320800": 77, "371000": 139, "321300": 82, "429005": 177, "360900": 127, "430800": 187, "340500": 98, "520200": 265, "350300": 112, "210600": 40, "140200": 16, "410700": 152, "651000": 311, "511600": 259, "42A2100": 179, "420500": 167, "450400": 218, "511000": 254, "230200": 58, "651300": 314, "411000": 156, "410200": 147, "360200": 120, "430300": 182, "220800": 56, "430600": 185, "NSHF00": 354, "445300": 214, "370300": 132, "410900": 155, "131100": 14, "410600": 151, "510500": 249, "650700": 309, "421000": 172, "510300": 247, "621100": 294, "330800": 90, "NSWHBS00": 355, "321000": 79, "140400": 18, "620700": 290, "331100": 93, "341500": 106, "220200": 50, "440200": 194, "150800": 33, "630400": 299, "511900": 262, "371100": 140, "321100": 80, "411700": 163, "420700": 169, "PKTAS00": 364, "430700": 186, "410881": 154, "511800": 261, "650600": 308, "420800": 170, "NY0000": 357, "651500": 316, "341600": 107, "46A3100": 239, "QLDBNE00": 365, "651200": 313, "469005": 233, "110105": 2, "WASEA00": 380, "650200": 304, "360800": 126, "CASFO00": 330, "511400": 257, "150300": 28, "451000": 224, "651400": 315, "450600": 220, "620800": 291, "210300": 37, "150200": 27, "411200": 158, "451200": 226, "230400": 60, "429004": 176, "610200": 277, "321200": 81, "630500": 300, "460200": 230, "230600": 62, "360600": 124, "341800": 109, "651100": 312, "340800": 101, "530500": 270, "530600": 271, "620400": 287, "MOSTL00": 352, "429006": 178, "341100": 103, "46A3300": 240, "210500": 39, "46A2500": 235, "220300": 51, "KAZAKS00": 346, "140600": 20, "SCTARD00": 369, "150700": 32, "211000": 44, "230900": 65, "620200": 285, "JPN2700": 345, "640200": 302, "JPN1300": 344, "620300": 286, "PHLMNL00": 363, "650400": 306, "530700": 272, "220400": 52, "ILCHI00": 339, "CANTOR00": 328, "KLKUL00": 348, "VICMEL00": 375, "360500": 123, "ITAROM00": 342, "46A3500": 242, "230700": 63, "KL0000": 347, "810100": 319, "PAPHL00": 361, "SPE0000": 370, "RUSMOW00": 366, "810300": 320, "46A2700": 236, "DEUNUE00": 334, "NZLAUK00": 358, "469006": 234, "CALAX00": 323, "CANVAN00": 329, "46A2800": 237, "FRAPAR00": 338, "SAADL00": 368, "MEXMEX00": 350, "930100": 321, "IRLDB00": 341, "220600": 54, "ENGLND00": 335, "46A3400": 241, "AREDU00": 322, "VNMHI00": 376, "469002": 231, "WAPER00": 379, "DEUHH00": 332, "WA0000": 377, "TH-100000": 371, "CANSUD00": 327, "CANMTR00": 325, "469003": 232, "CANLOD00": 324, "THA1000": 372, "ESPVLL00": 337, "NZLHLZ00": 359, "TXAUS00": 373, "CHLRM00": 331, "49SEO00": 244, "NVLAS00": 356, "RUSSPE00": 367, "TXDAL00": 374, "ESPMAD00": 336, "MNG6100": 351, "NLDAMS00": 353, "OR0000": 360, "IRLCK00": 340, "JK0000": 343, "WAALH00": 378, "651600": 317, "MABZD00": 349, "46A3600": 243, "CANOTT00": 326, "DEUKEL00": 333, "651700": 318, "46A3000": 238}, "cate_id_1_encoder": {"26": 5, "23": 3, "0": 0, "27": 6, "1": 1, "452": 7, "22": 2, "699": 8, "25": 4}, "cate_id_2_encoder": {"22": 2, "25": 4, "0": 0, "27": 6, "452": 7, "26": 5, "699": 8, "1": 1, "23": 3}, "cate_id_3_encoder": {"27": 6, "452": 7, "0": 0, "22": 2, "1": 1, "25": 4, "23": 3, "26": 5, "699": 8}, "cate_id_4_encoder": {"25": 4, "1": 1, "0": 0, "23": 3, "452": 7, "699": 8, "26": 5, "27": 6, "22": 2}, "cate_id_5_encoder": {"452": 7, "27": 6, "0": 0, "1": 1, "26": 5, "22": 2, "23": 3, "25": 4, "699": 8}}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"ffrom_login_encoder": {"0": 0, "m_login_default": 85, "android_huawei": 59, "ATK_7_android_360": 17, "ATK_7_android_huawei": 19, "android_oppo": 69, "android_xiaomi": 73, "android_yyb": 74, "AppletWechatListen": 44, "m_ydl": 88, "android_vivo": 71, "app_ios": 76, "ATK_android_yyjlcs_oppo": 34, "ATK_android_yyjlcs_huawei": 31, "ATK_7_android_oppo": 22, "m_wechat": 86, "zj": 103, "ATK_android_yyjlcs_vivo": 35, "AppletWechatAiXiaoyi": 42, "yj_listeners_douyin": 98, "ATK_7_android_xiaomi": 26, "yj_listeners_tengxun": 99, "android_10": 54, "TK_4": 50, "android_sanxing": 70, "ATK_android_yyjlcs_xiaomi": 37, "ios_1": 78, "AppletWechatYdlPsyConsult": 45, "ATK_7_android_yyb": 27, "android_m": 61, "ATK_7_android_zhuzhan": 28, "AppletWechatCourseFuLi": 43, "m": 84, "ATK_7_android_vivo": 24, "login_api_sms_missing_default": 82, "zhonganhuyi": 102, "TK_yyjlcs": 52, "android_medical_vivo": 65, "login_api_phone_missing_default": 81, "ATK_android_yyjlcs_baidu": 30, "pinganbaoxian": 92, "yj_consultant_tengxun": 97, "ATK_7_android_baidu": 18, "android_zhuzhan": 75, "login_miss_ffrom_default": 83, "BG-platform": 46, "ATK_4_android_vivo": 11, "ATK_4_android_yyb": 14, "android_appdouyin": 57, "ATK_android_yyjlcs_yyb": 38, "ATK_android_yyjlcs_meizu": 33, "ydl-emotion": 95, "ATK_7_android_letv": 20, "android_baidu": 58, "ydl-dhzx": 94, "android_medical_huawei": 63, "android_medical_doctor": 62, "ATK_4_android_360": 4, "ATK_7_android_wdj": 25, "swan": 93, "ATK_4_android_wdj": 12, "android_medical_xiaomi": 66, "ATK_4_android_xiaomi": 13, "mini_shrink": 91, "android_meizu": 68, "m_wx_app": 87, "BG-xinting": 47, "ydl-pro": 96, "mini_shiyebu": 90, "ATK_android_yyjlcs_lenovo": 32, "ATK_7_android_sanxing": 23, "ATK_4_android_meizu": 8, "baidu": 77, "yj_listeners_ydlxlgzh": 100, "ios_medical_doctor": 79, "ATK_4_android_baidu": 5, "android_lenovo": 60, "alipay": 53, "ATK_4_android_oppo": 9, "android_medical_oppo": 64, "BaiduSem_AppAndroidYdl": 48, "ATK_android_yyjlcs_360": 29, "ATK_4_android_lenovo": 7, "yyb": 101, "ATK_3_android_baidu": 3, "TK_qinggan": 51, "android_360": 55, "meizu": 89, "ios_medical_user": 80, "ATK_4_android_sanxing": 10, "android_medical_yyb": 67, "ATK_6_android_huawei": 15, "ATK_qinggan": 40, "android_Cpd_Honor": 56, "ATK_yyjlcs_xiaomi": 41, "BaiduSem_zhuzhan": 49, "ATK_10_android_huawei": 1, "android_wdj": 72, "ATK_4_android_huawei": 6, "ATK_android_yyjlcs_wdj": 36, "ATK_10_android_vivo": 2, "ATK_7_android_meizu": 21, "ATK_aqcs_yyb": 39, "ATK_6_android_meizu": 16}, "user_login_city_encoder": {"0": 0, "140100": 15, "500000": 244, "310000": 70, "371300": 142, "110000": 1, "410700": 153, "320500": 75, "150100": 26, "420100": 165, "442000": 212, "340600": 100, "330100": 84, "361100": 130, "440200": 195, "512000": 262, "130600": 9, "370600": 136, "441500": 206, "530100": 266, "210100": 35, "320700": 77, "210800": 42, "140300": 17, "320100": 71, "320400": 74, "460100": 230, "120000": 3, "410500": 151, "370200": 132, "130100": 4, "220100": 49, "520100": 263, "230100": 57, "610300": 277, "610100": 275, "510100": 245, "530900": 273, "410100": 147, "421100": 174, "130400": 7, "450100": 216, "361000": 129, "340800": 102, "511700": 259, "320200": 72, "340300": 97, "410800": 154, "350500": 115, "320600": 76, "210200": 36, "441800": 209, "430800": 188, "360100": 120, "130200": 5, "440100": 194, "430100": 181, "450300": 218, "420200": 166, "610500": 279, "370100": 131, "530500": 269, "350900": 119, "341500": 107, "130300": 6, "640100": 300, "420800": 171, "360900": 128, "320900": 79, "370700": 137, "330300": 86, "441900": 210, "210400": 38, "411500": 162, "411300": 160, "440300": 196, "AREDU00": 320, "620100": 283, "360800": 127, "510300": 246, "350700": 117, "140700": 21, "410600": 152, "340100": 95, "445300": 215, "341700": 109, "370500": 135, "KL0000": 346, "130800": 11, "370800": 138, "340200": 96, "330800": 91, "330200": 85, "445200": 214, "131000": 13, "431100": 191, "350200": 112, "530800": 272, "430700": 187, "150600": 31, "440400": 197, "320800": 78, "341800": 110, "321200": 82, "530300": 267, "431300": 193, "341200": 105, "411000": 157, "441300": 204, "511900": 261, "140800": 22, "510600": 249, "330500": 88, "431000": 190, "PKTAS00": 362, "JAMHAN00": 341, "410400": 150, "140400": 18, "371600": 145, "441901": 211, "210300": 37, "511300": 255, "370400": 134, "340500": 99, "450200": 217, "410900": 156, "321000": 80, "340400": 98, "410300": 149, "441400": 205, "440500": 198, "GATAT00": 335, "370300": 133, "350300": 113, "610700": 281, "370900": 139, "140900": 23, "610400": 278, "411600": 163, "150400": 29, "230800": 64, "371700": 146, "441700": 208, "511500": 257, "150700": 32, "650100": 302, "430200": 182, "630100": 295, "630300": 297, "330900": 92, "331100": 94, "371500": 144, "530600": 270, "350600": 116, "350100": 111, "511600": 258, "DEUHH00": 330, "210700": 41, "150200": 27, "321300": 83, "411400": 161, "451000": 225, "130700": 10, "320300": 73, "330600": 89, "621000": 292, "610800": 282, "650700": 308, "210900": 43, "651200": 312, "440700": 200, "450500": 220, "411700": 164, "330700": 90, "445100": 213, "350400": 114, "420900": 172, "410881": 155, "410200": 148, "341600": 108, "511000": 253, "441200": 203, "429006": 179, "341000": 103, "450700": 222, "511800": 260, "420600": 169, "651100": 311, "NSWHBS00": 354, "650500": 306, "430600": 186, "421200": 175, "360600": 125, "420700": 170, "340700": 101, "211400": 48, "220200": 50, "620500": 287, "620600": 288, "150900": 34, "469002": 232, "520300": 265, "411200": 159, "330400": 87, "131100": 14, "360400": 123, "510800": 251, "510500": 248, "511400": 256, "431200": 192, "150500": 30, "130900": 12, "331000": 93, "141000": 24, "321100": 81, "430900": 189, "440800": 201, "450600": 221, "371000": 140, "469006": 235, "520200": 264, "440600": 199, "230700": 63, "130500": 8, "440900": 202, "530400": 268, "450800": 223, "460200": 231, "211200": 46, "651000": 310, "230600": 62, "210600": 40, "451200": 227, "150300": 28, "220300": 51, "511100": 254, "341100": 104, "141100": 25, "640200": 301, "510400": 247, "341300": 106, "220700": 55, "140500": 19, "421000": 173, "420300": 167, "220800": 56, "430500": 185, "211100": 45, "420500": 168, "650300": 304, "230900": 65, "371400": 143, "620300": 285, "510900": 252, "360200": 121, "CANSUD00": 326, "210500": 39, "540100": 274, "371100": 141, "360700": 126, "651400": 314, "650200": 303, "610600": 280, "230300": 59, "451300": 228, "CANMTR00": 324, "510700": 250, "VICMEL00": 374, "PHBWH00": 360, "450900": 224, "441600": 207, "451400": 229, "451100": 226, "360500": 124, "110105": 2, "230500": 61, "140200": 16, "231200": 68, "211300": 47, "651300": 313, "360300": 122, "220400": 52, "630200": 296, "450400": 219, "430400": 184, "NY0000": 356, "230200": 58, "220500": 53, "430300": 183, "411100": 158, "469003": 233, "530700": 271, "231000": 66, "220600": 54, "231100": 67, "KLKUL00": 347, "620200": 284, "140600": 20, "651500": 315, "211000": 44, "620900": 291, "CANTOR00": 327, "620800": 290, "620700": 289, "230400": 60, "610200": 276, "SAADL00": 366, "650800": 309, "MEXMEX00": 349, "150800": 33, "421300": 176, "JPN1300": 343, "651700": 317, "46A2700": 237, "FRAPAR00": 334, "350800": 118, "469005": 234, "620400": 286, "621200": 294, "TXAUS00": 372, "429004": 177, "PAPHL00": 359, "630500": 299, "TH-100000": 370, "650600": 307, "VNMHI00": 375, "NVLAS00": 355, "621100": 293, "630400": 298, "KAZAKS00": 345, "THA1000": 371, "SCTARD00": 367, "ENGLND00": 331, "TXDAL00": 373, "NZLAUK00": 357, "42A2100": 180, "651600": 316, "46A3400": 240, "MABZD00": 348, "CANLOD00": 323, "429005": 178, "PHLMNL00": 361, "ITAROM00": 340, "CALAX00": 322, "JK0000": 342, "650400": 305, "46A3100": 238, "IDMJL00": 336, "SCTEDH00": 368, "ILCHI00": 337, "JPN2700": 344, "OR0000": 358, "WA0000": 376, "CANVAN00": 328, "QLDBNE00": 363, "MNG6100": 350, "NLDAMS00": 352, "46A3300": 239, "CHLRM00": 329, "ESPMAD00": 333, "810100": 318, "810300": 319, "46A2500": 236, "260000": 69, "ITAMIL00": 339, "QLDTWB00": 364, "WAPER00": 377, "SPE0000": 369, "RUSMOW00": 365, "ENGTRU00": 332, "46A3500": 241, "49SEO00": 243, "46A3600": 242, "CANOTT00": 325, "NSHF00": 353, "AZPHX00": 321, "MOSTL00": 351, "IRLDB00": 338}, "cate_id_1_encoder": {"0": 0, "452": 7, "26": 5, "23": 3, "25": 4, "22": 2, "699": 8, "1": 1, "27": 6}, "cate_id_2_encoder": {"0": 0, "25": 4, "452": 7, "699": 8, "26": 5, "1": 1, "23": 3, "22": 2, "27": 6}, "cate_id_3_encoder": {"0": 0, "22": 2, "27": 6, "25": 4, "23": 3, "452": 7, "1": 1, "26": 5, "699": 8}, "cate_id_4_encoder": {"0": 0, "26": 5, "699": 8, "452": 7, "1": 1, "25": 4, "22": 2, "23": 3, "27": 6}, "cate_id_5_encoder": {"0": 0, "1": 1, "23": 3, "25": 4, "699": 8, "27": 6, "22": 2, "26": 5, "452": 7}}
\ No newline at end of file
...@@ -5,9 +5,12 @@ import json ...@@ -5,9 +5,12 @@ import json
import configparser import configparser
from typing import List from typing import List
import xgboost as xgb
import time
import faiss import faiss
import requests import requests
import numpy as np import numpy as np
import pandas as pd
from ydl_ai_recommender.src.core.indexer import ( from ydl_ai_recommender.src.core.indexer import (
UserCounselorChatIndexer, UserCounselorChatIndexer,
...@@ -17,11 +20,24 @@ from ydl_ai_recommender.src.core.indexer import ( ...@@ -17,11 +20,24 @@ from ydl_ai_recommender.src.core.indexer import (
CounselorCounselorCFIndexer, CounselorCounselorCFIndexer,
) )
from ydl_ai_recommender.src.core.profile import encode_user_profile from ydl_ai_recommender.src.core.profile import encode_user_profile
from ydl_ai_recommender.src.utils import get_conf_path, get_data_path from ydl_ai_recommender.src.utils import get_conf_path, get_data_path, read_user_encoder_dict, read_counselors, get_project_path
from ydl_ai_recommender.src.utils.log import create_logger from ydl_ai_recommender.src.utils.log import create_logger
logger = create_logger(__name__, 'recommender.log', is_rotating=True)
class Recommender():
def cost_time(desc):
def the_func_cost_time(func):
def fun(*args,**kwargs):
t = time.perf_counter()
result = func(*args, **kwargs)
logger.info('函数:{},{} 耗时:{} ms'.format(str(func.__name__), desc, round((time.perf_counter()-t)*1000, 2)))
return result
return fun
return the_func_cost_time
class Recommender:
def __init__(self) -> None: def __init__(self) -> None:
self.logger = create_logger(__name__, 'recommender.log') self.logger = create_logger(__name__, 'recommender.log')
...@@ -241,6 +257,162 @@ class ItemCFRecommender(Recommender): ...@@ -241,6 +257,162 @@ class ItemCFRecommender(Recommender):
return counselors return counselors
class RecommendByXgboost(Recommender):
def __init__(self, top_n=5, k=20, is_use_db=True, u2c='combination', c2c=None) -> None:
super().__init__()
config = configparser.RawConfigParser()
config.read(get_conf_path())
self.dmp_url = config.get('DMP', 'url')
select_items = ['uid', 'ffrom_login', 'user_login_city', 'user_preference_cate']
self.select_fields = {k: True for k in select_items}
self.user_encoder_convert = read_user_encoder_dict()
self.all_counselors = read_counselors()
#self.recommender = UserCFRecommender(top_n=top_n, k=k, u2c=u2c)
self.params = {'n_estimators': 150, 'max_depth': 7, 'min_child_weight': 5, 'gamma': 0, 'subsample': 0.9,
'colsample_bytree': 0.5, 'reg_alpha': 0, 'reg_lambda': 1, 'learning_rate': 0.1,
'max_delta_step': 0,
'scale_pos_weight': 1}
self.model = xgb.XGBClassifier(objective='binary:logistic', nthread=-1, **self.params)
self.model.load_model(os.path.join(get_project_path(), 'model_data/xgb_model.bin'))
@cost_time(desc='召回咨询师')
def recall_data(self, user_id, size=0, is_merge=True):
return self.recommender.recommend(user_id, size=size, is_merge=True)
@cost_time(desc='模型推荐整个流程')
def recommend(self, user_id, size=0, is_merge=True):
user_profile = self.get_user_profile(user_id)
if not user_profile:
return self._recommend_top(size)
# recommend_result = self.recall_data(user_id, size=size, is_merge=True)
recommend_result = None
data_time = time.time()
predit_data = self.trans_feature_data(user_id, user_profile, recommend_result)
doctor_ids = predit_data.pop('doctor_id')
doctor_ids = doctor_ids.to_numpy()
pre_time = time.time()
predit_result = self.model.predict_proba(predit_data)[:, 1]
self.logger.info('predit_time:{}ms'.format(int((time.time()-pre_time)*1000)))
result_dict = dict(zip(doctor_ids, predit_result))
result_dict = sorted(result_dict.items(), key=lambda x:x[1], reverse=True)
recommend_data = [{
'counselor': c_id,
'score': round(float(proba), 4),
'from': 'similar_users {}'.format(user_id),
} for (c_id, proba) in result_dict[0:50]]
return recommend_data
@cost_time(desc='')
def trans_feature_data(self, user_id, user_profile, counselor_data):
user_feature_data = self.trans_user_feature_data(user_id, user_profile)
counselor_feature_data = self.trans_counselor_feature_data(counselor_data)
counselor_num = len(counselor_feature_data)
user_feature_data_dataframe = pd.DataFrame([user_feature_data]*counselor_num, columns=['ffrom_login_encoder'\
, 'user_login_city_encoder', 'cate_id_1_encoder', 'cate_id_2_encoder', 'cate_id_3_encoder'\
, 'cate_id_4_encoder', 'cate_id_5_encoder'])
predit_feature_data = pd.concat([user_feature_data_dataframe, counselor_feature_data], axis=1)
return predit_feature_data
def trans_user_feature_data(self, user_id, user_profile):
if not user_profile:
user_profile = self.get_user_profile(user_id)
from_login_encoder = self.get_encoder_from_dict('ffrom_login', user_profile['ffrom_login'])
user_login_city_encoder = self.get_encoder_from_dict('user_login_city', user_profile['user_login_city'])
user_preference_cate = user_profile['user_preference_cate']
user_preference_cate_top_5_encoder = self.process_user_preference_cate(user_preference_cate)
user_feature_data = [from_login_encoder, user_login_city_encoder]
user_feature_data.extend(user_preference_cate_top_5_encoder)
return user_feature_data
def trans_counselor_feature_data(self, counselor_data):
# counselor_ids = [str(item['counselor']) for item in counselor_data]
# counselor_profiles = self.all_counselors[self.all_counselors['doctor_id'].isin(counselor_ids)].reset_index(drop=True)
# return counselor_profiles
return self.all_counselors
@cost_time(desc='获取用户画像')
def get_user_profile(self, user_id):
if user_id == '0':
return []
headers = {
'X-App-Id': 'plough_cloud',
'Content-Type': 'application/json'
}
payload = {
"filter": {
"uid": user_id,
},
"fields": self.select_fields,
"limit": 10
}
try:
get_profile_time = time.time()
response = requests.request('POST', self.dmp_url, headers=headers, json=payload)
resp = response.json()
return resp['data']['objects'][0]
except Exception as e:
self.logger.error('获取用户画像数据失败: %s', e, exc_info=True)
try:
self.logger.exception('response json data %s', resp)
except:
pass
return []
def process_user_preference_cate(self, preference_cate):
result = [0, 0, 0, 0, 0]
ids = []
if isinstance(preference_cate, str):
pref_data = json.loads(preference_cate)
for info in pref_data:
ids.append(info['cate_id'])
ids = ids[0:min(5, len(ids))]
for ind, val in enumerate(ids):
result[ind] = val
encoder_result = []
for ind, val in enumerate(result):
value_convert_dict = self.user_encoder_convert.get('cate_id_{}_encoder'.format(ind+1))
if value_convert_dict is not None:
encoder_result.append(value_convert_dict.get(val, 0))
if len(encoder_result)<5:
encoder_result.extend([0]*(5-len(encoder_result)))
return encoder_result
def get_encoder_from_dict(self, feature_name, feature_value):
value_convert_dict = self.user_encoder_convert.get('{}_encoder'.format(feature_name))
if value_convert_dict is None:
return 0
return value_convert_dict.get(str(feature_value), 0)
if __name__ == '__main__': if __name__ == '__main__':
recommender = UserCFRecommender() # s_time = time.time()
print(recommender.recommend('12047')) # recommender1 = UserCFRecommender()
\ No newline at end of file # recommender1.recommend('30004410')
# print('all cost time: {}'.format(time.time() - s_time), recommender1.recommend('12047'))
print()
print()
s_time = time.time()
recommender = RecommendByXgboost()
recommender.recommend('3251227')
print('all cost time: '.format(time.time()-s_time), recommender.recommend('12047'))
# print()
# print()
# s_time = time.time()
# recommender.recommend('37298')
# print('all cost time: '.format(time.time() - s_time), recommender.recommend('12047'))
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json import json
import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import tornado.web import tornado.web
...@@ -11,11 +13,12 @@ from tornado.concurrent import run_on_executor ...@@ -11,11 +13,12 @@ from tornado.concurrent import run_on_executor
from ydl_ai_recommender.src.utils.log import create_logger from ydl_ai_recommender.src.utils.log import create_logger
from ydl_ai_recommender.src.core.recommender import UserCFRecommender from ydl_ai_recommender.src.core.recommender import UserCFRecommender
from ydl_ai_recommender.src.core.recommender import RecommendByXgboost
logger = create_logger(__name__, 'service.log', is_rotating=True) logger = create_logger(__name__, 'service.log', is_rotating=True)
recommender = UserCFRecommender(top_n=2, k=50, u2c='order') recommender = UserCFRecommender(top_n=2, k=50, u2c='order')
recommenderByXgb = RecommendByXgboost()
class RecommendHandler(tornado.web.RequestHandler): class RecommendHandler(tornado.web.RequestHandler):
executor = ThreadPoolExecutor(1) executor = ThreadPoolExecutor(1)
...@@ -73,13 +76,71 @@ class RecommendHandler(tornado.web.RequestHandler): ...@@ -73,13 +76,71 @@ class RecommendHandler(tornado.web.RequestHandler):
logger.info('response@@uid=%s@@ret=%s', uid, ret_str) logger.info('response@@uid=%s@@ret=%s', uid, ret_str)
return ret return ret
class RecommendXgbHandler(tornado.web.RequestHandler):
executor = ThreadPoolExecutor(1)
@tornado.gen.coroutine
def get(self):
uid = self.get_argument('uid', None)
if uid is None:
logger.warn('请求参数不正确,无uid')
size = self.get_argument('size', 100)
try:
size = int(size)
except Exception as e:
logger.warn('size=%s 不是数字', size)
size = 100
ret = yield self.run(uid, size)
self.write(ret)
@tornado.gen.coroutine
def post(self):
param = json.loads(self.request.body.decode('utf-8'))
uid = param.get('uid', None)
size = param.get('size', 100)
if uid is None:
logger.warn('请求参数不正确,无uid')
ret = yield self.run(uid, size)
self.write(ret)
@run_on_executor
def run(self, uid, size=100):
logger.info('request@@uid=%s@@size=%s', uid, size)
try:
start_time = time.time()
recommend_result = recommenderByXgb.recommend(uid, size=size, is_merge=True)
logger.info('request@@uid=%s@@size=%s, cost %s ms', uid, size, (time.time()-start_time)*1000)
ret = {
'status': 'success',
'code': 0,
'data': recommend_result,
'total_count': len(recommend_result),
}
except Exception as e:
logger.error('执行推荐函数报错', exc_info=True)
ret = {
'status': 'error',
'code': 1,
'data': [],
'total_count': 0,
}
ret_str = json.dumps(ret, ensure_ascii=False)
logger.info('response@@uid=%s@@ret=%s', uid, ret_str)
return ret
if __name__ == '__main__': if __name__ == '__main__':
tornado.options.define('port', default=8868, type=int, help='服务启动的端口号') tornado.options.define('port', default=8868, type=int, help='服务启动的端口号')
tornado.options.parse_command_line() tornado.options.parse_command_line()
app = tornado.web.Application(handlers=[(r'/ai_counselor_recommend', RecommendHandler)], autoreload=False, debug=False) app = tornado.web.Application(handlers=[(r'/ai_counselor_recommend', RecommendHandler),
(r'/ai_counselor_recommend/xgb/v1', RecommendXgbHandler)]
, autoreload=True, debug=False)
http_server = tornado.httpserver.HTTPServer(app) http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(tornado.options.options.port) http_server.listen(tornado.options.options.port)
tornado.ioloop.IOLoop.instance().start() tornado.ioloop.IOLoop.instance().start()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import json
import pandas as pd
def get_project_path(): def get_project_path():
...@@ -22,3 +24,14 @@ def get_conf_path(): ...@@ -22,3 +24,14 @@ def get_conf_path():
def get_model_path(): def get_model_path():
project_path = get_project_path() project_path = get_project_path()
return os.path.join(project_path, 'model') return os.path.join(project_path, 'model')
def read_user_encoder_dict():
user_encoder_json_data = None
project_path = get_project_path()
with open(os.path.join(project_path, 'model_data/user_encoder_json_data.json'), 'r') as f:
user_encoder_json_data = json.load(f)
return user_encoder_json_data
def read_counselors():
project_path = get_project_path()
return pd.read_csv(os.path.join(project_path, 'model_data/doctor_profile_selected_feature.csv'), sep=',', index_col=0, dtype={'doctor_id':str})
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment