数据源配置

PAI-Rec 支持多种数据源的操作,只要有相关的数据源配置,PAI-Rec 启动时就会进行相应的初始化。在其他配置中需要相关的数据源,只需要引用相关的名称即可。

Redis

 "RedisConfs": { // redis 配置
        "redis_default": { // 自定义名称
            "Host": "127.0.0.1",
            "Port": 6379,
            "Password": "xxx",
            "MaxIdle": 3,
            "ConnectTimeout":30,
            "ReadTimeout":50,
            "WriteTimeout":50
        }
    }
  • redis_default 自定义的数据源名称

Hologres

 "HologresConfs": { // hologres 配置
      "holo-pai": { // 自定义名称
        "DSN": "postgres://{ACCESSID}:{ACCESSKEY}@{HOST}/{DATABASENAME}?sslmode=disable&connect_timeout=1"
      }
  }

Datahub

  "DatahubConfs": {
    "pairec_callback_dh": {
      "Endpoint": "http://dh-cn-hangzhou-int-vpc.aliyuncs.com",
      "AccessId": "",
      "AccessKey": "",
      "ProjectName": "",
      "TopicName": "pairec_callback_log",
      "Schemas": [
        {
          "Field": "request_id",
          "Type": "string"
        },
        {
          "Field": "module",
          "Type": "string"
        },
        {
          "Field": "scene",
          "Type": "string"
        },
        {
          "Field": "request_time",
          "Type": "integer"
        },
        {
          "Field": "user_features",
          "Type": "string"
        },
        {
          "Field": "item_features",
          "Type": "string"
        },
        {
          "Field": "request_info",
          "Type": "string"
        },
        {
          "Field": "user_id",
          "Type": "string"
        },
        {
          "Field": "item_id",
          "Type": "string"
        },
        {
          "Field": "raw_features",
          "Type": "string"
        },
        {
          "Field": "generate_features",
          "Type": "string"
        },
        {
          "Field": "context_features",
          "Type": "string"
        }
      ]
    }
    }

如果指定了 Schemas, PAI-REC 会尝试自动创建对应的 topic name 。 如果已经创建了 topic name, 可以简单配置

"DatahubConfs": {
    "pairec_callback_dh": {
      "Endpoint": "http://dh-cn-hangzhou-int-vpc.aliyuncs.com",
      "AccessId": "",
      "AccessKey": "",
      "ProjectName": "",
      "TopicName": "pairec_callback_log"
    }
    }

Kafka

  "KafkaConfs": { // kafka 配置
    "kafka": { // 自定义名称
      "BootstrapServers": "172.16.228.104:9092,172.16.228.103:9092,172.16.228.102:9092",
      "Topic": "pairec_log"
    }
  }

SLS(日志)

{
  "SlsConfs": {
    "sls-log": { // 自定义的 sls 数据源名称
      "AccessKeyId": "",
      "AccessKeySecret": "",
      "Endpoint": "cn-beijing-intranet.log.aliyuncs.com",
      "ProjectName": "pairec",
      "LogstoreName": "pairec_log"
    }
  }
}

TableStore(OTS)

  "TableStoreConfs":{ // ots 配置
  	"ots-default":{ // 自定义名称
  		"EndPoint":"",
  		"InstanceName":"",
  		"AccessKeyId":"",
  		"AccessKeySecret":""
  		}
  }

BE 引擎配置

  "BEConfs": { // be 引擎配置
    "be-test": { // 自定义名称
      "Username": "xxx",
      "Password": "",
      "Endpoint": "http://xxx.aime.aliyuncs.com"
  }

GraphCompute(igraph)

{
  "GraphConfs": {
    "graph_test": {
      "Host": "http://xxx.igraph.aliyuncs.com",
      "UserName": "test",
      "Passwd": "123456"
    }
  }
}

FeatureStore(特征平台)

{
  "FeatureStoreConfs": {
    "pairec-fs": {
      "Host": "xxx",
      "Token": "xxx",
      "ProjectName": "demo_rank"
    }
  }
}

数据源的各个 Confs 是 map 结构,数据源名称对应各种的配置。如果使用配置的数据源,按以下方式使用

  • redis : redisdb.GetRedis(redis_name)

  • mysql: mysqldb.GetMysql(mysql_name)

  • hologres: holo.GetPostgres(hologres_name)

  • ots : tablestoredb.GetTableStore(ots_name)

  • hbase: hbase.GetHBase(hbase_name)

  • Kafka:datasource.GetKafkaProducer(kafka_name)

  • Clickhouse: clickhouse.GetClickHouse(clickhouse_name)

  • BeEngine: beengine.GetBeClient(be_name)

  • GraphClient: graph.GetGraphClient(graph_name)

  • datahub: datahub.GetDatahub(datahub_name)