PHP采集程序常用函数
2023-06-13 09:14:07 时间
当前的脚本网址
functionget_php_url(){
if(!empty($_SERVER["REQUEST_URI"])){
$scriptName=$_SERVER["REQUEST_URI"];
$nowurl=$scriptName;
}else{
$scriptName=$_SERVER["PHP_SELF"];
if(empty($_SERVER["QUERY_STRING"]))$nowurl=$scriptName;
else$nowurl=$scriptName."?".$_SERVER["QUERY_STRING"];
}
return$nowurl;
}
//把全角数字转为半角数字
functionGetAlabNum($fnum){
$nums=array("0","1","2","3","4","5","6","7","8","9");
$fnums="0123456789";
for($i=0;$i<=9;$i++)$fnum=str_replace($nums[$i],$fnums[$i],$fnum);
$fnum=ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
if($fnum=="")$fnum=0;
return$fnum;
}
//去除HTML标记
functionText2Html($txt){
$txt=str_replace(""," ",$txt);
$txt=str_replace("<","<",$txt);
$txt=str_replace(">",">",$txt);
$txt=preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);
return$txt;
}
//清除HTML标记
functionClearHtml($str){
$str=str_replace("<","<",$str);
$str=str_replace(">",">",$str);
return$str;
}
//相对路径转化成绝对路径
functionrelative_to_absolute($content,$feed_url){
preg_match("/(http|https|ftp):\/\//",$feed_url,$protocol);
$server_url=preg_replace("/(http|https|ftp|news):\/\//","",$feed_url);
$server_url=preg_replace("/\/.*/","",$server_url);
if($server_url==""){
return$content;
}
if(isset($protocol[0])){
$new_content=preg_replace("/href="\//","href="".$protocol[0].$server_url."/",$content);
$new_content=preg_replace("/src="\//","src="".$protocol[0].$server_url."/",$new_content);
}else{
$new_content=$content;
}
return$new_content;
}
//取得所有链接
functionget_all_url($code){
preg_match_all("/<a\s+href=["|\"]?([^>"\"]+)["|\"]?\s*[^>]*>([^>]+)<\/a>/i",$code,$arr);
returnarray("name"=>$arr[2],"url"=>$arr[1]);
}
//获取指定标记中的内容
functionget_tag_data($str,$start,$end){
if($start==""||$end==""){
return;
}
$str=explode($start,$str);
$str=explode($end,$str[1]);
return$str[0];
}
//HTML表格的每行转为CSV格式数组
functionget_tr_array($table){
$table=preg_replace(""<td[^>]*?>"si",""",$table);
$table=str_replace("</td>","",",$table);
$table=str_replace("</tr>","{tr}",$table);
//去掉HTML标记
$table=preg_replace(""<[\/\!]*?[^<>]*?>"si","",$table);
//去掉空白字符
$table=preg_replace(""([\r\n])[\s]+"","",$table);
$table=str_replace("","",$table);
$table=str_replace("","",$table);
$table=explode(",{tr}",$table);
array_pop($table);
return$table;
}
//将HTML表格的每行每列转为数组,采集表格数据
functionget_td_array($table){
$table=preg_replace(""<table[^>]*?>"si","",$table);
$table=preg_replace(""<tr[^>]*?>"si","",$table);
$table=preg_replace(""<td[^>]*?>"si","",$table);
$table=str_replace("</tr>","{tr}",$table);
$table=str_replace("</td>","{td}",$table);
//去掉HTML标记
$table=preg_replace(""<[\/\!]*?[^<>]*?>"si","",$table);
//去掉空白字符
$table=preg_replace(""([\r\n])[\s]+"","",$table);
$table=str_replace("","",$table);
$table=str_replace("","",$table);
$table=explode("{tr}",$table);
array_pop($table);
foreach($tableas$key=>$tr){
$td=explode("{td}",$tr);
array_pop($td);
$td_array[]=$td;
}
return$td_array;
}
//返回字符串中的所有单词$distinct=true去除重复
functionsplit_en_str($str,$distinct=true){
preg_match_all("/([a-zA-Z]+)/",$str,$match);
if($distinct==true){
$match[1]=array_unique($match[1]);
}
sort($match[1]);
return$match[1];
}
functionget_php_url(){
if(!empty($_SERVER["REQUEST_URI"])){
$scriptName=$_SERVER["REQUEST_URI"];
$nowurl=$scriptName;
}else{
$scriptName=$_SERVER["PHP_SELF"];
if(empty($_SERVER["QUERY_STRING"]))$nowurl=$scriptName;
else$nowurl=$scriptName."?".$_SERVER["QUERY_STRING"];
}
return$nowurl;
}
//把全角数字转为半角数字
functionGetAlabNum($fnum){
$nums=array("0","1","2","3","4","5","6","7","8","9");
$fnums="0123456789";
for($i=0;$i<=9;$i++)$fnum=str_replace($nums[$i],$fnums[$i],$fnum);
$fnum=ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
if($fnum=="")$fnum=0;
return$fnum;
}
//去除HTML标记
functionText2Html($txt){
$txt=str_replace(""," ",$txt);
$txt=str_replace("<","<",$txt);
$txt=str_replace(">",">",$txt);
$txt=preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);
return$txt;
}
//清除HTML标记
functionClearHtml($str){
$str=str_replace("<","<",$str);
$str=str_replace(">",">",$str);
return$str;
}
//相对路径转化成绝对路径
functionrelative_to_absolute($content,$feed_url){
preg_match("/(http|https|ftp):\/\//",$feed_url,$protocol);
$server_url=preg_replace("/(http|https|ftp|news):\/\//","",$feed_url);
$server_url=preg_replace("/\/.*/","",$server_url);
if($server_url==""){
return$content;
}
if(isset($protocol[0])){
$new_content=preg_replace("/href="\//","href="".$protocol[0].$server_url."/",$content);
$new_content=preg_replace("/src="\//","src="".$protocol[0].$server_url."/",$new_content);
}else{
$new_content=$content;
}
return$new_content;
}
//取得所有链接
functionget_all_url($code){
preg_match_all("/<a\s+href=["|\"]?([^>"\"]+)["|\"]?\s*[^>]*>([^>]+)<\/a>/i",$code,$arr);
returnarray("name"=>$arr[2],"url"=>$arr[1]);
}
//获取指定标记中的内容
functionget_tag_data($str,$start,$end){
if($start==""||$end==""){
return;
}
$str=explode($start,$str);
$str=explode($end,$str[1]);
return$str[0];
}
//HTML表格的每行转为CSV格式数组
functionget_tr_array($table){
$table=preg_replace(""<td[^>]*?>"si",""",$table);
$table=str_replace("</td>","",",$table);
$table=str_replace("</tr>","{tr}",$table);
//去掉HTML标记
$table=preg_replace(""<[\/\!]*?[^<>]*?>"si","",$table);
//去掉空白字符
$table=preg_replace(""([\r\n])[\s]+"","",$table);
$table=str_replace("","",$table);
$table=str_replace("","",$table);
$table=explode(",{tr}",$table);
array_pop($table);
return$table;
}
//将HTML表格的每行每列转为数组,采集表格数据
functionget_td_array($table){
$table=preg_replace(""<table[^>]*?>"si","",$table);
$table=preg_replace(""<tr[^>]*?>"si","",$table);
$table=preg_replace(""<td[^>]*?>"si","",$table);
$table=str_replace("</tr>","{tr}",$table);
$table=str_replace("</td>","{td}",$table);
//去掉HTML标记
$table=preg_replace(""<[\/\!]*?[^<>]*?>"si","",$table);
//去掉空白字符
$table=preg_replace(""([\r\n])[\s]+"","",$table);
$table=str_replace("","",$table);
$table=str_replace("","",$table);
$table=explode("{tr}",$table);
array_pop($table);
foreach($tableas$key=>$tr){
$td=explode("{td}",$tr);
array_pop($td);
$td_array[]=$td;
}
return$td_array;
}
//返回字符串中的所有单词$distinct=true去除重复
functionsplit_en_str($str,$distinct=true){
preg_match_all("/([a-zA-Z]+)/",$str,$match);
if($distinct==true){
$match[1]=array_unique($match[1]);
}
sort($match[1]);
return$match[1];
}
相关文章
- php://input allow_url_include,php allow_url_include的应用和解释_PHP教程
- 【笔记php】如何使用PHP从JSON提取数据?
- 深入揭示:MySQL与PHP的联系(mysql与php关系)
- MySQL求和算法在PHP中的应用(mysql求和php)
- 开发Linux下PHP模块开发:实现程序性能最大化(linux下php模块)
- 开发环境配置Linux实现PHP开发环境(linux配置php)
- PHP Fatal error: Uncaught Error: Call to undefined function pcntl_fork().. 开启php pcntl扩展实现多进程详解编程语言
- PHP与MySQL数据库之间的连接(php链接mysql)
- 学习Linux开启PHP学习之旅(linux开启php)
- 程序快速学会在Linux中执行PHP程序(linux执行php)
- Oracle与PHP:实现服务器端程序互通(oracle和php)
- 函数Linux下禁用PHP函数的步骤(linux关闭php)
- 安装完美:PHP与MySQL的默契(php和mysql安装)
- LINUX下编写PHP程序的技巧(linux编写php)
- 实现Linux版PHP动态编译——一案子的艰辛实践(php编译linux)
- MySQL封装,PHP解决方案(php封装mysql)
- PHP检测MySQL性能的方法(php检测mysql)
- 使用Linux命令行来优化你的PHP开发(linux命令行php)
- 构建LAMP环境 以增加PHP的MSSQL扩展(增加php扩展mssql)
- 优雅式PHP配置MySQL数据库(php 配置 mysql)
- 用PHP访问本地MSSQL数据库的实现方法(php访问本地mssql)
- 安装PHP扩展程序让网站支持Redis(安装php-redis)
- php-accelerator网站加速PHP缓冲的方法
- php论坛采集程序模拟登陆,抓取页面实现代码
- PHP变量的定义方法
- php下过滤html代码的函数提高程序安全性
- PHP数组操作汇总php数组的使用技巧
- discuz程序的PHP加密函数原理分析
- php多文件上传功能实现原理及代码
- Linux+php+apache+oracle环境搭建之CentOS下源码编译安装PHP