正则提取之–药品篇
[文章作者:陈毓端 若转载请标注原文链接:http://www.woyuw.com/?p=650]
今天帮一位老乡正则提取抓来的药品数据,以获取药品的属性。因为关键字比较混乱大体包过:’成分’,'适应症’,'注意事项’,'规格’,'药理用途’,'用法用量’等
下面是实现的简单代码,没有做效率优化
- <?php
- /*
- @Author 陈毓端
- @E-mail itw1@163.com
- @Homepage http://blog.woyuw.com
- */
- try {
- $dbh = new PDO("sqlite:./data.db3");
- $mysqldbh = new PDO("mysql:host=localhost;dbname=xxx",'root','xxx');
- $mysqldbh->exec("SET NAMES 'utf8';");
- }
- catch(PDOException $e)
- {
- echo $e->getMessage();
- }
- function regular ($str,$array){
- $tmp='/';
- foreach($array as $key){
- $tmp.=$key.'(.*)';
- }
- $tmp=$tmp."$/iUs";
- preg_match_all($tmp,$str,$out);
- $newArray=array();
- $i=0;
- foreach($out as $key){
- if($i<count($out)-1)
- $newArray[$array[$i]]=$out[$i+1];
- $i++;
- }
- return $newArray;
- }
- $sql = "SELECT * FROM Content limit 30";
- $values=array();
- foreach ($dbh->query($sql) as $row)
- {
- $array=array('成分','适应症','注意事项','规格');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['成分'])){
- array_push($values,$value);
- continue;
- }
- $array=array('成分','药理用途','用法用量');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['成分'])){
- array_push($values,$value);
- continue;
- }
- $array=array('成分','适应症','不良反应');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['成分'])){
- array_push($values,$value);
- continue;
- }
- $array=array('别名','外文名','药理作用','适应症','注意事项');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['别名'])){
- array_push($values,$value);
- continue;
- }
- $array=array('别名','药理作用','功能与主治','用法与用量','规格');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['别名'])){
- array_push($values,$value);
- continue;
- }
- $array=array('别名','外文名','适应症','注意事项');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['别名'])){
- array_push($values,$value);
- continue;
- }
- //if(!empty($value['别名'])){
- $array=array('别名','适应症','用量用法','不良反应');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['别名'])){
- array_push($values,$value);
- continue;
- }
- $array=array('别名','适应症','用量用法','注意事项','规格');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['别名'])){
- array_push($values,$value);
- continue;
- }
- $array=array('别名','适应症');
- $value=regular ( $row['otccontent'],$array);
- if(!empty($value['别名'])){
- array_push($values,$value);
- continue;
- }
- }
- print_r($values);
- ?>
Categories: php
