zl程序教程

您现在的位置是:首页 >  后端

当前栏目

PHP采集程序常用函数

PHP程序 函数 常用 采集
2023-06-13 09:14:07 时间
当前的脚本网址
functionget_php_url(){
if(!empty($_SERVER["REQUEST_URI"])){
$scriptName=$_SERVER["REQUEST_URI"];
$nowurl=$scriptName;
}else{
$scriptName=$_SERVER["PHP_SELF"];
if(empty($_SERVER["QUERY_STRING"]))$nowurl=$scriptName;
else$nowurl=$scriptName."?".$_SERVER["QUERY_STRING"];
}
return$nowurl;
}
//把全角数字转为半角数字
functionGetAlabNum($fnum){
$nums=array("0","1","2","3","4","5","6","7","8","9");
$fnums="0123456789";
for($i=0;$i<=9;$i++)$fnum=str_replace($nums[$i],$fnums[$i],$fnum);
$fnum=ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
if($fnum=="")$fnum=0;
return$fnum;
}
//去除HTML标记
functionText2Html($txt){
$txt=str_replace(""," ",$txt);
$txt=str_replace("<","<",$txt);
$txt=str_replace(">",">",$txt);
$txt=preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);
return$txt;
}
//清除HTML标记
functionClearHtml($str){
$str=str_replace("<","<",$str);
$str=str_replace(">",">",$str);
return$str;
}
//相对路径转化成绝对路径
functionrelative_to_absolute($content,$feed_url){
preg_match("/(http|https|ftp):\/\//",$feed_url,$protocol);
$server_url=preg_replace("/(http|https|ftp|news):\/\//","",$feed_url);
$server_url=preg_replace("/\/.*/","",$server_url);
if($server_url==""){
return$content;
}
if(isset($protocol[0])){
$new_content=preg_replace("/href="\//","href="".$protocol[0].$server_url."/",$content);
$new_content=preg_replace("/src="\//","src="".$protocol[0].$server_url."/",$new_content);
}else{
$new_content=$content;
}
return$new_content;
}
//取得所有链接
functionget_all_url($code){
preg_match_all("/<a\s+href=["|\"]?([^>"\"]+)["|\"]?\s*[^>]*>([^>]+)<\/a>/i",$code,$arr);
returnarray("name"=>$arr[2],"url"=>$arr[1]);
}
//获取指定标记中的内容
functionget_tag_data($str,$start,$end){
if($start==""||$end==""){
return;
}
$str=explode($start,$str);
$str=explode($end,$str[1]);
return$str[0];
}
//HTML表格的每行转为CSV格式数组
functionget_tr_array($table){
$table=preg_replace(""<td[^>]*?>"si",""",$table);
$table=str_replace("</td>","",",$table);
$table=str_replace("</tr>","{tr}",$table);
//去掉HTML标记
$table=preg_replace(""<[\/\!]*?[^<>]*?>"si","",$table);
//去掉空白字符
$table=preg_replace(""([\r\n])[\s]+"","",$table);
$table=str_replace("","",$table);
$table=str_replace("","",$table);
$table=explode(",{tr}",$table);
array_pop($table);
return$table;
}
//将HTML表格的每行每列转为数组,采集表格数据
functionget_td_array($table){
$table=preg_replace(""<table[^>]*?>"si","",$table);
$table=preg_replace(""<tr[^>]*?>"si","",$table);
$table=preg_replace(""<td[^>]*?>"si","",$table);
$table=str_replace("</tr>","{tr}",$table);
$table=str_replace("</td>","{td}",$table);
//去掉HTML标记
$table=preg_replace(""<[\/\!]*?[^<>]*?>"si","",$table);
//去掉空白字符
$table=preg_replace(""([\r\n])[\s]+"","",$table);
$table=str_replace("","",$table);
$table=str_replace("","",$table);
$table=explode("{tr}",$table);
array_pop($table);
foreach($tableas$key=>$tr){
$td=explode("{td}",$tr);
array_pop($td);
$td_array[]=$td;
}
return$td_array;
}
//返回字符串中的所有单词$distinct=true去除重复
functionsplit_en_str($str,$distinct=true){
preg_match_all("/([a-zA-Z]+)/",$str,$match);
if($distinct==true){
$match[1]=array_unique($match[1]);
}
sort($match[1]);
return$match[1];
}