Commit 3d58e296 by 柴鹏飞

Merge branch 'YDL-REC-XGB' into 'master'

增加基于树模型XGBoost排序

See merge request chaipengfei/ydl_ai_recommender!1
parents a65552b2 487789ef
......@@ -10,3 +10,5 @@ dependencies:
- pip
- pip:
- -r requirements.txt
- -i http://pypi.mirrors.ustc.edu.cn/simple/
- --trusted-host pypi.mirrors.ustc.edu.cn
This source diff could not be displayed because it is too large. You can view the blob instead.
{"ffrom_login_encoder": {"android_xiaomi": 76, "android_vivo": 74, "android_oppo": 72, "android_huawei": 62, "0": 0, "ATK_7_android_huawei": 18, "m_login_default": 88, "TK_yyjlcs": 55, "app_ios": 79, "m_ydl": 91, "TK_4": 53, "android_10": 57, "yj_listeners_tengxun": 102, "ATK_android_yyjlcs_huawei": 31, "ATK_7_android_oppo": 22, "login_miss_ffrom_default": 86, "AppletWechatListen": 47, "ATK_4_android_yyb": 12, "ATK_7_android_xiaomi": 26, "zj": 106, "yj_listeners_douyin": 101, "ATK_android_yyjlcs_oppo": 34, "ATK_android_yyjlcs_vivo": 35, "ATK_android_yyjlcs_xiaomi": 37, "ios_1": 81, "android_yyb": 77, "login_api_sms_missing_default": 85, "android_m": 64, "ATK_7_android_360": 16, "ATK_4_android_vivo": 9, "ATK_7_android_yyb": 27, "m_wx_app": 90, "AppletWechatYdlPsyConsult": 48, "ATK_7_android_baidu": 17, "ATK_4_android_baidu": 4, "AppletWechatAiXiaoyi": 45, "ATK_android_yyjlcs_lenovo": 32, "ATK_android_yyjlcs_baidu": 30, "mini_shiyebu": 93, "ATK_7_android_vivo": 24, "AppletWechatCourseFuLi": 46, "login_api_phone_missing_default": 84, "android_sanxing": 73, "ATK_android_yyjlcs_wdj": 36, "android_medical_xiaomi": 69, "pinganbaoxian": 95, "ATK_7_android_letv": 20, "zhonganhuyi": 105, "BG-platform": 49, "android_zhuzhan": 78, "ATK_android_yyjlcs_yyb": 38, "ATK_7_android_zhuzhan": 28, "ATK_4_android_meizu": 6, "android_baidu": 61, "ATK_android_yyjlcs_360": 29, "ATK_4_android_xiaomi": 11, "m": 87, "ATK_7_android_sanxing": 23, "ydl-dhzx": 97, "ydl-pro": 99, "TK_qinggan": 54, "m_wechat": 89, "yj_listeners_ydlxlgzh": 103, "android_meizu": 71, "BG-xinting": 50, "android_appdouyin": 60, "android_medical_doctor": 65, "mini_shrink": 94, "swan": 96, "ATK_6_android_huawei": 14, "ATK_android_yyjlcs_meizu": 33, "android_medical_huawei": 66, "android_360": 58, "ATK_7_android_meizu": 21, "ATK_7_android_lenovo": 19, "ATK_4_android_huawei": 5, "android_medical_vivo": 68, "ATK_4_android_oppo": 7, "ATK_4_android_sanxing": 8, "ATK_7_android_wdj": 25, "ATK_yyjlcs_360cn": 42, "yj_consultant_tengxun": 100, "android_medical_oppo": 67, "ios_medical_user": 83, "ATK_4_android_360": 3, "ATK_4_android_wdj": 10, "ios_medical_doctor": 82, "ATK_qinggan": 40, "android_Cpd_Honor": 59, "ydl-emotion": 98, "BaiduSem_AppAndroidYdl": 51, "ATK_3_android_baidu": 2, "android_lenovo": 63, "baidu": 80, "yyb": 104, "android_wdj": 75, "alipay": 56, "ATK_6_android_baidu": 13, "ATK_yyjlcs_xiaomi": 44, "ATK_10_android_huawei": 1, "ATK_6_android_meizu": 15, "ATK_yyjlcs_null": 43, "android_medical_yyb": 70, "ATK_ylcs_xiaomi": 41, "meizu": 92, "ATK_aqcs_yyb": 39, "BaiduSem_zhuzhan": 52}, "user_login_city_encoder": {"0": 0, "440800": 200, "370200": 131, "411500": 161, "440300": 195, "530100": 267, "451400": 228, "120000": 3, "500000": 245, "310000": 69, "330700": 89, "440100": 193, "330200": 84, "610100": 276, "420100": 164, "330100": 83, "520100": 264, "410100": 146, "450200": 216, "441500": 205, "450300": 217, "370600": 135, "110000": 1, "410400": 149, "340300": 96, "360700": 125, "330600": 88, "331000": 92, "320100": 70, "370100": 130, "510100": 246, "441200": 202, "630100": 296, "130100": 4, "230100": 57, "320200": 71, "130900": 12, "410300": 148, "440700": 199, "451100": 225, "451300": 227, "650800": 310, "650500": 307, "370800": 137, "220100": 49, "445200": 213, "431200": 191, "330300": 85, "460100": 229, "450800": 222, "511100": 255, "320500": 74, "411100": 157, "441901": 210, "430100": 180, "411400": 160, "140800": 22, "450700": 221, "520300": 266, "370500": 134, "130700": 10, "610600": 281, "610500": 280, "220500": 53, "320300": 72, "350100": 110, "360100": 119, "440500": 197, "410800": 153, "131000": 13, "140900": 23, "440600": 198, "540100": 275, "150500": 30, "370700": 136, "420600": 168, "340100": 94, "340400": 97, "431000": 189, "371700": 145, "130600": 9, "630200": 297, "350800": 117, "610400": 279, "340700": 100, "510900": 253, "630300": 298, "421100": 173, "340200": 95, "330500": 87, "140300": 17, "150100": 26, "231000": 66, "620100": 284, "220700": 55, "130800": 11, "442000": 211, "210200": 36, "361000": 128, "140500": 19, "610700": 282, "341300": 105, "350600": 115, "450100": 215, "421300": 175, "450500": 219, "441400": 204, "320700": 76, "140100": 15, "510400": 248, "370900": 138, "320400": 73, "130200": 5, "371300": 141, "360400": 122, "230500": 61, "650100": 303, "431300": 192, "130500": 8, "430200": 181, "511700": 260, "341700": 108, "370400": 133, "530800": 273, "371500": 143, "210100": 35, "350900": 118, "431100": 190, "340600": 99, "141000": 24, "420300": 166, "150600": 31, "610800": 283, "341200": 104, "511500": 258, "350200": 111, "511300": 256, "210800": 42, "230300": 59, "210900": 43, "140700": 21, "350500": 114, "350700": 116, "141100": 25, "441300": 203, "421200": 174, "445100": 212, "PHBWH00": 362, "530400": 269, "150400": 29, "210400": 38, "441600": 206, "510700": 251, "450900": 223, "441800": 208, "130300": 6, "430900": 188, "360300": 121, "441900": 209, "640100": 301, "371400": 142, "621200": 295, "211400": 48, "510600": 250, "530300": 268, "610300": 278, "231200": 68, "621000": 293, "371600": 144, "150900": 34, "211300": 47, "430500": 184, "430400": 183, "410500": 150, "530900": 274, "411600": 162, "211100": 45, "620900": 292, "211200": 46, "320900": 78, "441700": 207, "512000": 263, "620600": 289, "361100": 129, "510800": 252, "620500": 288, "330400": 86, "411300": 159, "231100": 67, "130400": 7, "341000": 102, "320600": 75, "650300": 305, "230800": 64, "420200": 165, "440400": 196, "350400": 113, "330900": 91, "440900": 201, "210700": 41, "420900": 171, "320800": 77, "371000": 139, "321300": 82, "429005": 177, "360900": 127, "430800": 187, "340500": 98, "520200": 265, "350300": 112, "210600": 40, "140200": 16, "410700": 152, "651000": 311, "511600": 259, "42A2100": 179, "420500": 167, "450400": 218, "511000": 254, "230200": 58, "651300": 314, "411000": 156, "410200": 147, "360200": 120, "430300": 182, "220800": 56, "430600": 185, "NSHF00": 354, "445300": 214, "370300": 132, "410900": 155, "131100": 14, "410600": 151, "510500": 249, "650700": 309, "421000": 172, "510300": 247, "621100": 294, "330800": 90, "NSWHBS00": 355, "321000": 79, "140400": 18, "620700": 290, "331100": 93, "341500": 106, "220200": 50, "440200": 194, "150800": 33, "630400": 299, "511900": 262, "371100": 140, "321100": 80, "411700": 163, "420700": 169, "PKTAS00": 364, "430700": 186, "410881": 154, "511800": 261, "650600": 308, "420800": 170, "NY0000": 357, "651500": 316, "341600": 107, "46A3100": 239, "QLDBNE00": 365, "651200": 313, "469005": 233, "110105": 2, "WASEA00": 380, "650200": 304, "360800": 126, "CASFO00": 330, "511400": 257, "150300": 28, "451000": 224, "651400": 315, "450600": 220, "620800": 291, "210300": 37, "150200": 27, "411200": 158, "451200": 226, "230400": 60, "429004": 176, "610200": 277, "321200": 81, "630500": 300, "460200": 230, "230600": 62, "360600": 124, "341800": 109, "651100": 312, "340800": 101, "530500": 270, "530600": 271, "620400": 287, "MOSTL00": 352, "429006": 178, "341100": 103, "46A3300": 240, "210500": 39, "46A2500": 235, "220300": 51, "KAZAKS00": 346, "140600": 20, "SCTARD00": 369, "150700": 32, "211000": 44, "230900": 65, "620200": 285, "JPN2700": 345, "640200": 302, "JPN1300": 344, "620300": 286, "PHLMNL00": 363, "650400": 306, "530700": 272, "220400": 52, "ILCHI00": 339, "CANTOR00": 328, "KLKUL00": 348, "VICMEL00": 375, "360500": 123, "ITAROM00": 342, "46A3500": 242, "230700": 63, "KL0000": 347, "810100": 319, "PAPHL00": 361, "SPE0000": 370, "RUSMOW00": 366, "810300": 320, "46A2700": 236, "DEUNUE00": 334, "NZLAUK00": 358, "469006": 234, "CALAX00": 323, "CANVAN00": 329, "46A2800": 237, "FRAPAR00": 338, "SAADL00": 368, "MEXMEX00": 350, "930100": 321, "IRLDB00": 341, "220600": 54, "ENGLND00": 335, "46A3400": 241, "AREDU00": 322, "VNMHI00": 376, "469002": 231, "WAPER00": 379, "DEUHH00": 332, "WA0000": 377, "TH-100000": 371, "CANSUD00": 327, "CANMTR00": 325, "469003": 232, "CANLOD00": 324, "THA1000": 372, "ESPVLL00": 337, "NZLHLZ00": 359, "TXAUS00": 373, "CHLRM00": 331, "49SEO00": 244, "NVLAS00": 356, "RUSSPE00": 367, "TXDAL00": 374, "ESPMAD00": 336, "MNG6100": 351, "NLDAMS00": 353, "OR0000": 360, "IRLCK00": 340, "JK0000": 343, "WAALH00": 378, "651600": 317, "MABZD00": 349, "46A3600": 243, "CANOTT00": 326, "DEUKEL00": 333, "651700": 318, "46A3000": 238}, "cate_id_1_encoder": {"26": 5, "23": 3, "0": 0, "27": 6, "1": 1, "452": 7, "22": 2, "699": 8, "25": 4}, "cate_id_2_encoder": {"22": 2, "25": 4, "0": 0, "27": 6, "452": 7, "26": 5, "699": 8, "1": 1, "23": 3}, "cate_id_3_encoder": {"27": 6, "452": 7, "0": 0, "22": 2, "1": 1, "25": 4, "23": 3, "26": 5, "699": 8}, "cate_id_4_encoder": {"25": 4, "1": 1, "0": 0, "23": 3, "452": 7, "699": 8, "26": 5, "27": 6, "22": 2}, "cate_id_5_encoder": {"452": 7, "27": 6, "0": 0, "1": 1, "26": 5, "22": 2, "23": 3, "25": 4, "699": 8}}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"ffrom_login_encoder": {"0": 0, "m_login_default": 85, "android_huawei": 59, "ATK_7_android_360": 17, "ATK_7_android_huawei": 19, "android_oppo": 69, "android_xiaomi": 73, "android_yyb": 74, "AppletWechatListen": 44, "m_ydl": 88, "android_vivo": 71, "app_ios": 76, "ATK_android_yyjlcs_oppo": 34, "ATK_android_yyjlcs_huawei": 31, "ATK_7_android_oppo": 22, "m_wechat": 86, "zj": 103, "ATK_android_yyjlcs_vivo": 35, "AppletWechatAiXiaoyi": 42, "yj_listeners_douyin": 98, "ATK_7_android_xiaomi": 26, "yj_listeners_tengxun": 99, "android_10": 54, "TK_4": 50, "android_sanxing": 70, "ATK_android_yyjlcs_xiaomi": 37, "ios_1": 78, "AppletWechatYdlPsyConsult": 45, "ATK_7_android_yyb": 27, "android_m": 61, "ATK_7_android_zhuzhan": 28, "AppletWechatCourseFuLi": 43, "m": 84, "ATK_7_android_vivo": 24, "login_api_sms_missing_default": 82, "zhonganhuyi": 102, "TK_yyjlcs": 52, "android_medical_vivo": 65, "login_api_phone_missing_default": 81, "ATK_android_yyjlcs_baidu": 30, "pinganbaoxian": 92, "yj_consultant_tengxun": 97, "ATK_7_android_baidu": 18, "android_zhuzhan": 75, "login_miss_ffrom_default": 83, "BG-platform": 46, "ATK_4_android_vivo": 11, "ATK_4_android_yyb": 14, "android_appdouyin": 57, "ATK_android_yyjlcs_yyb": 38, "ATK_android_yyjlcs_meizu": 33, "ydl-emotion": 95, "ATK_7_android_letv": 20, "android_baidu": 58, "ydl-dhzx": 94, "android_medical_huawei": 63, "android_medical_doctor": 62, "ATK_4_android_360": 4, "ATK_7_android_wdj": 25, "swan": 93, "ATK_4_android_wdj": 12, "android_medical_xiaomi": 66, "ATK_4_android_xiaomi": 13, "mini_shrink": 91, "android_meizu": 68, "m_wx_app": 87, "BG-xinting": 47, "ydl-pro": 96, "mini_shiyebu": 90, "ATK_android_yyjlcs_lenovo": 32, "ATK_7_android_sanxing": 23, "ATK_4_android_meizu": 8, "baidu": 77, "yj_listeners_ydlxlgzh": 100, "ios_medical_doctor": 79, "ATK_4_android_baidu": 5, "android_lenovo": 60, "alipay": 53, "ATK_4_android_oppo": 9, "android_medical_oppo": 64, "BaiduSem_AppAndroidYdl": 48, "ATK_android_yyjlcs_360": 29, "ATK_4_android_lenovo": 7, "yyb": 101, "ATK_3_android_baidu": 3, "TK_qinggan": 51, "android_360": 55, "meizu": 89, "ios_medical_user": 80, "ATK_4_android_sanxing": 10, "android_medical_yyb": 67, "ATK_6_android_huawei": 15, "ATK_qinggan": 40, "android_Cpd_Honor": 56, "ATK_yyjlcs_xiaomi": 41, "BaiduSem_zhuzhan": 49, "ATK_10_android_huawei": 1, "android_wdj": 72, "ATK_4_android_huawei": 6, "ATK_android_yyjlcs_wdj": 36, "ATK_10_android_vivo": 2, "ATK_7_android_meizu": 21, "ATK_aqcs_yyb": 39, "ATK_6_android_meizu": 16}, "user_login_city_encoder": {"0": 0, "140100": 15, "500000": 244, "310000": 70, "371300": 142, "110000": 1, "410700": 153, "320500": 75, "150100": 26, "420100": 165, "442000": 212, "340600": 100, "330100": 84, "361100": 130, "440200": 195, "512000": 262, "130600": 9, "370600": 136, "441500": 206, "530100": 266, "210100": 35, "320700": 77, "210800": 42, "140300": 17, "320100": 71, "320400": 74, "460100": 230, "120000": 3, "410500": 151, "370200": 132, "130100": 4, "220100": 49, "520100": 263, "230100": 57, "610300": 277, "610100": 275, "510100": 245, "530900": 273, "410100": 147, "421100": 174, "130400": 7, "450100": 216, "361000": 129, "340800": 102, "511700": 259, "320200": 72, "340300": 97, "410800": 154, "350500": 115, "320600": 76, "210200": 36, "441800": 209, "430800": 188, "360100": 120, "130200": 5, "440100": 194, "430100": 181, "450300": 218, "420200": 166, "610500": 279, "370100": 131, "530500": 269, "350900": 119, "341500": 107, "130300": 6, "640100": 300, "420800": 171, "360900": 128, "320900": 79, "370700": 137, "330300": 86, "441900": 210, "210400": 38, "411500": 162, "411300": 160, "440300": 196, "AREDU00": 320, "620100": 283, "360800": 127, "510300": 246, "350700": 117, "140700": 21, "410600": 152, "340100": 95, "445300": 215, "341700": 109, "370500": 135, "KL0000": 346, "130800": 11, "370800": 138, "340200": 96, "330800": 91, "330200": 85, "445200": 214, "131000": 13, "431100": 191, "350200": 112, "530800": 272, "430700": 187, "150600": 31, "440400": 197, "320800": 78, "341800": 110, "321200": 82, "530300": 267, "431300": 193, "341200": 105, "411000": 157, "441300": 204, "511900": 261, "140800": 22, "510600": 249, "330500": 88, "431000": 190, "PKTAS00": 362, "JAMHAN00": 341, "410400": 150, "140400": 18, "371600": 145, "441901": 211, "210300": 37, "511300": 255, "370400": 134, "340500": 99, "450200": 217, "410900": 156, "321000": 80, "340400": 98, "410300": 149, "441400": 205, "440500": 198, "GATAT00": 335, "370300": 133, "350300": 113, "610700": 281, "370900": 139, "140900": 23, "610400": 278, "411600": 163, "150400": 29, "230800": 64, "371700": 146, "441700": 208, "511500": 257, "150700": 32, "650100": 302, "430200": 182, "630100": 295, "630300": 297, "330900": 92, "331100": 94, "371500": 144, "530600": 270, "350600": 116, "350100": 111, "511600": 258, "DEUHH00": 330, "210700": 41, "150200": 27, "321300": 83, "411400": 161, "451000": 225, "130700": 10, "320300": 73, "330600": 89, "621000": 292, "610800": 282, "650700": 308, "210900": 43, "651200": 312, "440700": 200, "450500": 220, "411700": 164, "330700": 90, "445100": 213, "350400": 114, "420900": 172, "410881": 155, "410200": 148, "341600": 108, "511000": 253, "441200": 203, "429006": 179, "341000": 103, "450700": 222, "511800": 260, "420600": 169, "651100": 311, "NSWHBS00": 354, "650500": 306, "430600": 186, "421200": 175, "360600": 125, "420700": 170, "340700": 101, "211400": 48, "220200": 50, "620500": 287, "620600": 288, "150900": 34, "469002": 232, "520300": 265, "411200": 159, "330400": 87, "131100": 14, "360400": 123, "510800": 251, "510500": 248, "511400": 256, "431200": 192, "150500": 30, "130900": 12, "331000": 93, "141000": 24, "321100": 81, "430900": 189, "440800": 201, "450600": 221, "371000": 140, "469006": 235, "520200": 264, "440600": 199, "230700": 63, "130500": 8, "440900": 202, "530400": 268, "450800": 223, "460200": 231, "211200": 46, "651000": 310, "230600": 62, "210600": 40, "451200": 227, "150300": 28, "220300": 51, "511100": 254, "341100": 104, "141100": 25, "640200": 301, "510400": 247, "341300": 106, "220700": 55, "140500": 19, "421000": 173, "420300": 167, "220800": 56, "430500": 185, "211100": 45, "420500": 168, "650300": 304, "230900": 65, "371400": 143, "620300": 285, "510900": 252, "360200": 121, "CANSUD00": 326, "210500": 39, "540100": 274, "371100": 141, "360700": 126, "651400": 314, "650200": 303, "610600": 280, "230300": 59, "451300": 228, "CANMTR00": 324, "510700": 250, "VICMEL00": 374, "PHBWH00": 360, "450900": 224, "441600": 207, "451400": 229, "451100": 226, "360500": 124, "110105": 2, "230500": 61, "140200": 16, "231200": 68, "211300": 47, "651300": 313, "360300": 122, "220400": 52, "630200": 296, "450400": 219, "430400": 184, "NY0000": 356, "230200": 58, "220500": 53, "430300": 183, "411100": 158, "469003": 233, "530700": 271, "231000": 66, "220600": 54, "231100": 67, "KLKUL00": 347, "620200": 284, "140600": 20, "651500": 315, "211000": 44, "620900": 291, "CANTOR00": 327, "620800": 290, "620700": 289, "230400": 60, "610200": 276, "SAADL00": 366, "650800": 309, "MEXMEX00": 349, "150800": 33, "421300": 176, "JPN1300": 343, "651700": 317, "46A2700": 237, "FRAPAR00": 334, "350800": 118, "469005": 234, "620400": 286, "621200": 294, "TXAUS00": 372, "429004": 177, "PAPHL00": 359, "630500": 299, "TH-100000": 370, "650600": 307, "VNMHI00": 375, "NVLAS00": 355, "621100": 293, "630400": 298, "KAZAKS00": 345, "THA1000": 371, "SCTARD00": 367, "ENGLND00": 331, "TXDAL00": 373, "NZLAUK00": 357, "42A2100": 180, "651600": 316, "46A3400": 240, "MABZD00": 348, "CANLOD00": 323, "429005": 178, "PHLMNL00": 361, "ITAROM00": 340, "CALAX00": 322, "JK0000": 342, "650400": 305, "46A3100": 238, "IDMJL00": 336, "SCTEDH00": 368, "ILCHI00": 337, "JPN2700": 344, "OR0000": 358, "WA0000": 376, "CANVAN00": 328, "QLDBNE00": 363, "MNG6100": 350, "NLDAMS00": 352, "46A3300": 239, "CHLRM00": 329, "ESPMAD00": 333, "810100": 318, "810300": 319, "46A2500": 236, "260000": 69, "ITAMIL00": 339, "QLDTWB00": 364, "WAPER00": 377, "SPE0000": 369, "RUSMOW00": 365, "ENGTRU00": 332, "46A3500": 241, "49SEO00": 243, "46A3600": 242, "CANOTT00": 325, "NSHF00": 353, "AZPHX00": 321, "MOSTL00": 351, "IRLDB00": 338}, "cate_id_1_encoder": {"0": 0, "452": 7, "26": 5, "23": 3, "25": 4, "22": 2, "699": 8, "1": 1, "27": 6}, "cate_id_2_encoder": {"0": 0, "25": 4, "452": 7, "699": 8, "26": 5, "1": 1, "23": 3, "22": 2, "27": 6}, "cate_id_3_encoder": {"0": 0, "22": 2, "27": 6, "25": 4, "23": 3, "452": 7, "1": 1, "26": 5, "699": 8}, "cate_id_4_encoder": {"0": 0, "26": 5, "699": 8, "452": 7, "1": 1, "25": 4, "22": 2, "23": 3, "27": 6}, "cate_id_5_encoder": {"0": 0, "1": 1, "23": 3, "25": 4, "699": 8, "27": 6, "22": 2, "26": 5, "452": 7}}
\ No newline at end of file
......@@ -5,9 +5,12 @@ import json
import configparser
from typing import List
import xgboost as xgb
import time
import faiss
import requests
import numpy as np
import pandas as pd
from ydl_ai_recommender.src.core.indexer import (
UserCounselorChatIndexer,
......@@ -17,11 +20,24 @@ from ydl_ai_recommender.src.core.indexer import (
CounselorCounselorCFIndexer,
)
from ydl_ai_recommender.src.core.profile import encode_user_profile
from ydl_ai_recommender.src.utils import get_conf_path, get_data_path
from ydl_ai_recommender.src.utils import get_conf_path, get_data_path, read_user_encoder_dict, read_counselors, get_project_path
from ydl_ai_recommender.src.utils.log import create_logger
logger = create_logger(__name__, 'recommender.log', is_rotating=True)
class Recommender():
def cost_time(desc):
def the_func_cost_time(func):
def fun(*args,**kwargs):
t = time.perf_counter()
result = func(*args, **kwargs)
logger.info('函数:{},{} 耗时:{} ms'.format(str(func.__name__), desc, round((time.perf_counter()-t)*1000, 2)))
return result
return fun
return the_func_cost_time
class Recommender:
def __init__(self) -> None:
self.logger = create_logger(__name__, 'recommender.log')
......@@ -241,6 +257,162 @@ class ItemCFRecommender(Recommender):
return counselors
class RecommendByXgboost(Recommender):
def __init__(self, top_n=5, k=20, is_use_db=True, u2c='combination', c2c=None) -> None:
super().__init__()
config = configparser.RawConfigParser()
config.read(get_conf_path())
self.dmp_url = config.get('DMP', 'url')
select_items = ['uid', 'ffrom_login', 'user_login_city', 'user_preference_cate']
self.select_fields = {k: True for k in select_items}
self.user_encoder_convert = read_user_encoder_dict()
self.all_counselors = read_counselors()
#self.recommender = UserCFRecommender(top_n=top_n, k=k, u2c=u2c)
self.params = {'n_estimators': 150, 'max_depth': 7, 'min_child_weight': 5, 'gamma': 0, 'subsample': 0.9,
'colsample_bytree': 0.5, 'reg_alpha': 0, 'reg_lambda': 1, 'learning_rate': 0.1,
'max_delta_step': 0,
'scale_pos_weight': 1}
self.model = xgb.XGBClassifier(objective='binary:logistic', nthread=-1, **self.params)
self.model.load_model(os.path.join(get_project_path(), 'model_data/xgb_model.bin'))
@cost_time(desc='召回咨询师')
def recall_data(self, user_id, size=0, is_merge=True):
return self.recommender.recommend(user_id, size=size, is_merge=True)
@cost_time(desc='模型推荐整个流程')
def recommend(self, user_id, size=0, is_merge=True):
user_profile = self.get_user_profile(user_id)
if not user_profile:
return self._recommend_top(size)
# recommend_result = self.recall_data(user_id, size=size, is_merge=True)
recommend_result = None
data_time = time.time()
predit_data = self.trans_feature_data(user_id, user_profile, recommend_result)
doctor_ids = predit_data.pop('doctor_id')
doctor_ids = doctor_ids.to_numpy()
pre_time = time.time()
predit_result = self.model.predict_proba(predit_data)[:, 1]
self.logger.info('predit_time:{}ms'.format(int((time.time()-pre_time)*1000)))
result_dict = dict(zip(doctor_ids, predit_result))
result_dict = sorted(result_dict.items(), key=lambda x:x[1], reverse=True)
recommend_data = [{
'counselor': c_id,
'score': round(float(proba), 4),
'from': 'similar_users {}'.format(user_id),
} for (c_id, proba) in result_dict[0:50]]
return recommend_data
@cost_time(desc='')
def trans_feature_data(self, user_id, user_profile, counselor_data):
user_feature_data = self.trans_user_feature_data(user_id, user_profile)
counselor_feature_data = self.trans_counselor_feature_data(counselor_data)
counselor_num = len(counselor_feature_data)
user_feature_data_dataframe = pd.DataFrame([user_feature_data]*counselor_num, columns=['ffrom_login_encoder'\
, 'user_login_city_encoder', 'cate_id_1_encoder', 'cate_id_2_encoder', 'cate_id_3_encoder'\
, 'cate_id_4_encoder', 'cate_id_5_encoder'])
predit_feature_data = pd.concat([user_feature_data_dataframe, counselor_feature_data], axis=1)
return predit_feature_data
def trans_user_feature_data(self, user_id, user_profile):
if not user_profile:
user_profile = self.get_user_profile(user_id)
from_login_encoder = self.get_encoder_from_dict('ffrom_login', user_profile['ffrom_login'])
user_login_city_encoder = self.get_encoder_from_dict('user_login_city', user_profile['user_login_city'])
user_preference_cate = user_profile['user_preference_cate']
user_preference_cate_top_5_encoder = self.process_user_preference_cate(user_preference_cate)
user_feature_data = [from_login_encoder, user_login_city_encoder]
user_feature_data.extend(user_preference_cate_top_5_encoder)
return user_feature_data
def trans_counselor_feature_data(self, counselor_data):
# counselor_ids = [str(item['counselor']) for item in counselor_data]
# counselor_profiles = self.all_counselors[self.all_counselors['doctor_id'].isin(counselor_ids)].reset_index(drop=True)
# return counselor_profiles
return self.all_counselors
@cost_time(desc='获取用户画像')
def get_user_profile(self, user_id):
if user_id == '0':
return []
headers = {
'X-App-Id': 'plough_cloud',
'Content-Type': 'application/json'
}
payload = {
"filter": {
"uid": user_id,
},
"fields": self.select_fields,
"limit": 10
}
try:
get_profile_time = time.time()
response = requests.request('POST', self.dmp_url, headers=headers, json=payload)
resp = response.json()
return resp['data']['objects'][0]
except Exception as e:
self.logger.error('获取用户画像数据失败: %s', e, exc_info=True)
try:
self.logger.exception('response json data %s', resp)
except:
pass
return []
def process_user_preference_cate(self, preference_cate):
result = [0, 0, 0, 0, 0]
ids = []
if isinstance(preference_cate, str):
pref_data = json.loads(preference_cate)
for info in pref_data:
ids.append(info['cate_id'])
ids = ids[0:min(5, len(ids))]
for ind, val in enumerate(ids):
result[ind] = val
encoder_result = []
for ind, val in enumerate(result):
value_convert_dict = self.user_encoder_convert.get('cate_id_{}_encoder'.format(ind+1))
if value_convert_dict is not None:
encoder_result.append(value_convert_dict.get(val, 0))
if len(encoder_result)<5:
encoder_result.extend([0]*(5-len(encoder_result)))
return encoder_result
def get_encoder_from_dict(self, feature_name, feature_value):
value_convert_dict = self.user_encoder_convert.get('{}_encoder'.format(feature_name))
if value_convert_dict is None:
return 0
return value_convert_dict.get(str(feature_value), 0)
if __name__ == '__main__':
recommender = UserCFRecommender()
print(recommender.recommend('12047'))
\ No newline at end of file
# s_time = time.time()
# recommender1 = UserCFRecommender()
# recommender1.recommend('30004410')
# print('all cost time: {}'.format(time.time() - s_time), recommender1.recommend('12047'))
print()
print()
s_time = time.time()
recommender = RecommendByXgboost()
recommender.recommend('3251227')
print('all cost time: '.format(time.time()-s_time), recommender.recommend('12047'))
# print()
# print()
# s_time = time.time()
# recommender.recommend('37298')
# print('all cost time: '.format(time.time() - s_time), recommender.recommend('12047'))
\ No newline at end of file
# -*- coding: utf-8 -*-
import json
import time
from concurrent.futures import ThreadPoolExecutor
import tornado.web
......@@ -11,11 +13,12 @@ from tornado.concurrent import run_on_executor
from ydl_ai_recommender.src.utils.log import create_logger
from ydl_ai_recommender.src.core.recommender import UserCFRecommender
from ydl_ai_recommender.src.core.recommender import RecommendByXgboost
logger = create_logger(__name__, 'service.log', is_rotating=True)
recommender = UserCFRecommender(top_n=2, k=50, u2c='order')
recommenderByXgb = RecommendByXgboost()
class RecommendHandler(tornado.web.RequestHandler):
executor = ThreadPoolExecutor(1)
......@@ -73,13 +76,71 @@ class RecommendHandler(tornado.web.RequestHandler):
logger.info('response@@uid=%s@@ret=%s', uid, ret_str)
return ret
class RecommendXgbHandler(tornado.web.RequestHandler):
executor = ThreadPoolExecutor(1)
@tornado.gen.coroutine
def get(self):
uid = self.get_argument('uid', None)
if uid is None:
logger.warn('请求参数不正确,无uid')
size = self.get_argument('size', 100)
try:
size = int(size)
except Exception as e:
logger.warn('size=%s 不是数字', size)
size = 100
ret = yield self.run(uid, size)
self.write(ret)
@tornado.gen.coroutine
def post(self):
param = json.loads(self.request.body.decode('utf-8'))
uid = param.get('uid', None)
size = param.get('size', 100)
if uid is None:
logger.warn('请求参数不正确,无uid')
ret = yield self.run(uid, size)
self.write(ret)
@run_on_executor
def run(self, uid, size=100):
logger.info('request@@uid=%s@@size=%s', uid, size)
try:
start_time = time.time()
recommend_result = recommenderByXgb.recommend(uid, size=size, is_merge=True)
logger.info('request@@uid=%s@@size=%s, cost %s ms', uid, size, (time.time()-start_time)*1000)
ret = {
'status': 'success',
'code': 0,
'data': recommend_result,
'total_count': len(recommend_result),
}
except Exception as e:
logger.error('执行推荐函数报错', exc_info=True)
ret = {
'status': 'error',
'code': 1,
'data': [],
'total_count': 0,
}
ret_str = json.dumps(ret, ensure_ascii=False)
logger.info('response@@uid=%s@@ret=%s', uid, ret_str)
return ret
if __name__ == '__main__':
tornado.options.define('port', default=8868, type=int, help='服务启动的端口号')
tornado.options.parse_command_line()
app = tornado.web.Application(handlers=[(r'/ai_counselor_recommend', RecommendHandler)], autoreload=False, debug=False)
app = tornado.web.Application(handlers=[(r'/ai_counselor_recommend', RecommendHandler),
(r'/ai_counselor_recommend/xgb/v1', RecommendXgbHandler)]
, autoreload=True, debug=False)
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(tornado.options.options.port)
tornado.ioloop.IOLoop.instance().start()
# -*- coding: utf-8 -*-
import os
import json
import pandas as pd
def get_project_path():
......@@ -21,4 +23,15 @@ def get_conf_path():
def get_model_path():
project_path = get_project_path()
return os.path.join(project_path, 'model')
\ No newline at end of file
return os.path.join(project_path, 'model')
def read_user_encoder_dict():
user_encoder_json_data = None
project_path = get_project_path()
with open(os.path.join(project_path, 'model_data/user_encoder_json_data.json'), 'r') as f:
user_encoder_json_data = json.load(f)
return user_encoder_json_data
def read_counselors():
project_path = get_project_path()
return pd.read_csv(os.path.join(project_path, 'model_data/doctor_profile_selected_feature.csv'), sep=',', index_col=0, dtype={'doctor_id':str})
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment