-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrss_php.php
160 lines (148 loc) · 4.59 KB
/
rss_php.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
<?php
/*
RSS_PHP - the PHP DOM based RSS Parser
Author: <rssphp.net>
Published: 200801 :: blacknet :: via rssphp.net
RSS_PHP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY.
Usage:
See the documentation at http://rssphp.net/documentation
Examples:
Can be found online at http://rssphp.net/examples
*/
class rss_php {
public $document;
public $channel;
public $items;
/****************************
public load methods
***/
# load RSS by URL
public function load($url=false, $unblock=true) {
if($url) {
if($unblock) {
$this->loadParser(file_get_contents($url, false, $this->randomContext()));
} else {
$this->loadParser(file_get_contents($url));
}
}
}
# load raw RSS data
public function loadRSS($rawxml=false) {
if($rawxml) {
$this->loadParser($rawxml);
}
}
/****************************
public load methods
@param $includeAttributes BOOLEAN
return array;
***/
# return full rss array
public function getRSS($includeAttributes=false) {
if($includeAttributes) {
return $this->document;
}
return $this->valueReturner();
}
# return channel data
public function getChannel($includeAttributes=false) {
if($includeAttributes) {
return $this->channel;
}
return $this->valueReturner($this->channel);
}
# return rss items
public function getItems($includeAttributes=false) {
if($includeAttributes) {
return $this->items;
}
return $this->valueReturner($this->items);
}
/****************************
internal methods
***/
private function loadParser($rss=false) {
if($rss) {
$this->document = array();
$this->channel = array();
$this->items = array();
$DOMDocument = new DOMDocument;
$DOMDocument->strictErrorChecking = false;
$DOMDocument->loadXML($rss);
$this->document = $this->extractDOM($DOMDocument->childNodes);
}
}
private function valueReturner($valueBlock=false) {
if(!$valueBlock) {
$valueBlock = $this->document;
}
foreach($valueBlock as $valueName => $values) {
if(isset($values['value'])) {
$values = $values['value'];
}
if(is_array($values)) {
$valueBlock[$valueName] = $this->valueReturner($values);
} else {
$valueBlock[$valueName] = $values;
}
}
return $valueBlock;
}
private function extractDOM($nodeList,$parentNodeName=false) {
$itemCounter = 0;
foreach($nodeList as $values) {
if(substr($values->nodeName,0,1) != '#') {
if($values->nodeName == 'item') {
$nodeName = $values->nodeName.':'.$itemCounter;
$itemCounter++;
} else {
$nodeName = $values->nodeName;
}
$tempNode[$nodeName] = array();
if($values->attributes) {
for($i=0;$values->attributes->item($i);$i++) {
$tempNode[$nodeName]['properties'][$values->attributes->item($i)->nodeName] = $values->attributes->item($i)->nodeValue;
}
}
if(!$values->firstChild) {
$tempNode[$nodeName]['value'] = $values->textContent;
} else {
$tempNode[$nodeName]['value'] = $this->extractDOM($values->childNodes, $values->nodeName);
}
if(in_array($parentNodeName, array('channel','rdf:RDF'))) {
if($values->nodeName == 'item') {
$this->items[] = $tempNode[$nodeName]['value'];
} elseif(!in_array($values->nodeName, array('rss','channel'))) {
$this->channel[$values->nodeName] = $tempNode[$nodeName];
}
}
} elseif(substr($values->nodeName,1) == 'text') {
$tempValue = trim(preg_replace('/\s\s+/',' ',str_replace("\n",' ', $values->textContent)));
if($tempValue) {
$tempNode = $tempValue;
}
} elseif(substr($values->nodeName,1) == 'cdata-section'){
$tempNode = $values->textContent;
}
}
return $tempNode;
}
private function randomContext() {
$headerstrings = array();
$headerstrings['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.'.rand(0,2).'; en-US; rv:1.'.rand(2,9).'.'.rand(0,4).'.'.rand(1,9).') Gecko/2007'.rand(10,12).rand(10,30).' Firefox/2.0.'.rand(0,1).'.'.rand(1,9);
$headerstrings['Accept-Charset'] = rand(0,1) ? 'en-gb,en;q=0.'.rand(3,8) : 'en-us,en;q=0.'.rand(3,8);
$headerstrings['Accept-Language'] = 'en-us,en;q=0.'.rand(4,6);
$setHeaders = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'."\r\n".
'Accept-Charset: '.$headerstrings['Accept-Charset']."\r\n".
'Accept-Language: '.$headerstrings['Accept-Language']."\r\n".
'User-Agent: '.$headerstrings['User-Agent']."\r\n";
$contextOptions = array(
'http'=>array(
'method'=>"GET",
'header'=>$setHeaders
)
);
return stream_context_create($contextOptions);
}
}
?>