实体标注
百度 官方文档
结合上下文,识别文本中的实体并将其关联到百科知识库中的唯一实体对象,同时输出实体在当前上下文最合适的上位概念,以达到对文本中的实体进行辨别的作用
基本说明:
接口地址:https://aip.baidubce.com/rpc/2.0/kg/v1/cognitive/entity_annotation
返回格式:json
请求方式:post
请求示例:https://aip.baidubce.com/rpc/2.0/kg/v1/cognitive/entity_annotation?access_token=test
请求参数说明:
名称 类型 必填 说明
data string 必填 需要进行实体标注的文本字符串(最多64个汉字,utf8编码)
返回参数说明:
名称 类型 说明
log_id int 请求logid
entity_annotation array 实体标注结果的数组集合
_bdbkKgId string 百科newid
_bdbkUrl string 百科url
annoType string 标注类型:Instance | Category | Property
concept array 概念标注结果
+level1 string 一级概念
+level2 string 二级概念
confidence string 实体关联至该@id的置信度
desc string 实体的简介
mainReqRankList array 主需求实体,列出所有候选实体信息,根据热度从高到低排列
importance string 关联的实体在输入中的核心程度,越重要打分越高,打分区间为[0, 1]
mention string 实体在query中的名字
offset string 实体在query中的位置偏移
status string 用于对关联结果进行标识,包括LINKED(正常关联)| LINKED_MAINREQ(主需求关联) | NIL(NIL实体,即不在库中)| UNK (未知)四种种状态。<br>只有当status值为LINKED, LINKED_MAINREQ时,才为有效的关联结果,NIL及UNK可以认为是mention识别的结果但无法和库中已有id进行关联, 在entity_annotation层次下
JSON返回示例:
{
	"log_id": 6367018173853945311,
	"entity_annotation": [{
			"status": "LINKED",
			"confidence": "0.991616",
			"concept": {
				"level1": "人物",
				"level2": "文化人物,娱乐人物"
			},
			"_bdbkKgId": "114923",
			"mention": "刘德华",
			"_bdbkUrl": "http://baike.baidu.com/item/%E5%88%98%E5%BE%B7%E5%8D%8E/114923",
			"offset": "0",
			"desc": "我们香港男演员、歌手、词作人"
		},
		{
			"status": "LINKED",
			"confidence": "0.817889",
			"concept": {
				"level1": "语言文化",
				"level2": "文字词汇"
			},
			"_bdbkKgId": "827",
			"mention": "老婆",
			"_bdbkUrl": "http://baike.baidu.com/item/%E8%80%81%E5%A9%86/827",
			"offset": "4",
			"desc": "汉语词语"
		}
	]
}
服务级错误码参照
错误码 说明
100 包含了无效或错误参数,请检查代码
110 Access Token失效
111 Access token过期
282000 内部错误
282356 输入长度超限
完整教学代码示例
<?php
/**
 * Created by PhpStorm.
 * User: FZS
 * Time: 2019/3/13 17:10
 */
//----------------------------------
// 百度实体标注调用类
//----------------------------------
class freeApi{
    private $apiKey = false; //百度应用AppID
    private $secretKey = false; //百度应用API Key
    private $tokenUrl = 'https://aip.baidubce.com/oauth/2.0/token';
    private $apiUrl = 'https://aip.baidubce.com/rpc/2.0/kg/v1/cognitive/entity_annotation';
    public function __construct($apikey,$secretkey){
        $this->apiKey = $apikey;
        $this->secretKey = $secretkey;
    }
    /**
     * 获取token
     * @return array
     */
    public function getToken(){
        $params = [
            'grant_type' => 'client_credentials',
            'client_id'  => $this->apiKey,
            'client_secret' => $this->secretKey,
        ];
        $params = $this->handleUrl($params);
        return $this->returnArray($this->freeApiCurl($this->tokenUrl,$params,1));
    }
    /**
     * url拼接
     * @return string
     */
    private function handleUrl($params){
        $o = "";
        foreach ( $params as $k => $v )
        {
            $o.= "$k=" . urlencode( $v ). "&" ;
        }
        $params = substr($o,0,-1);
        return $params;
    }
    /**
     * 将JSON内容转为数据,并返回
     * @param string $content [内容]
     * @return array
     */
    public function returnArray($content){
        return json_decode($content,true);
    }
    /**
     * 获取实体标注结果
     * @return array
     */
    public function getResult(){
        $params = [
            'data' => "刘德华的老婆",
        ];
        $params = json_encode($params);
        return $this->returnArray($this->jsonPost($this->apiUrl.'?access_token='.$this->getToken()['access_token'],$params));
    }
    /**
     * 请求接口返回内容
     * @param  string $url [请求的URL地址]
     * @param  string $params [请求的参数]
     * @param  int $ipost [是否采用POST形式]
     * @return  string
     */
    public function freeApiCurl($url,$params=false,$ispost=0){
        $httpInfo = array();
        $ch = curl_init();
        curl_setopt( $ch, CURLOPT_HTTP_VERSION , CURL_HTTP_VERSION_1_1 );
        curl_setopt( $ch, CURLOPT_HTTP_VERSION , CURL_HTTP_VERSION_1_1 );
        curl_setopt( $ch, CURLOPT_USERAGENT , 'chuanshuoapi' );
        curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT , 60 );
        curl_setopt( $ch, CURLOPT_TIMEOUT , 60);
        curl_setopt( $ch, CURLOPT_RETURNTRANSFER , true );
        if( $ispost )
        {
            curl_setopt( $ch , CURLOPT_POST , true );
            curl_setopt( $ch , CURLOPT_POSTFIELDS , $params );
            curl_setopt( $ch , CURLOPT_URL , $url );
        }
        else
        {
            if($params){
                curl_setopt( $ch , CURLOPT_URL , $url.'?'.$params );
            }else{
                curl_setopt( $ch , CURLOPT_URL , $url);
            }
        }
        $response = curl_exec( $ch );
        if ($response === FALSE) {
            return false;
        }
        $httpCode = curl_getinfo( $ch , CURLINFO_HTTP_CODE );
        $httpInfo = array_merge( $httpInfo , curl_getinfo( $ch ) );
        curl_close( $ch );
        return $response;
    }
    public function jsonPost($url,$data_json){
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json'));
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS,$data_json);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        $response = curl_exec($ch);
        curl_close($ch);
        return $response;
    }
}
$api = new freeApi('qtBw5PXDmLgl3wHXpM75yiG3','IVOoNrmtHtrzhHkabNcNb85t1M2dSeep');
var_dump($api->getResult());