akregator/src/librss

loader.cpp
1 /*
2  * loader.cpp
3  *
4  * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
5  *
6  * This program is distributed in the hope that it will be useful, but WITHOUT
7  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8  * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
9  * accompanying file 'COPYING'.
10  */
11 #include "loader.h"
12 #include "document.h"
13 #include "feeddetector.h"
14 
15 #include <tdeio/job.h>
16 #include <kprocess.h>
17 #include <kstaticdeleter.h>
18 #include <kurl.h>
19 #include <kdebug.h>
20 
21 #include <tqdom.h>
22 #include <tqbuffer.h>
23 #include <tqregexp.h>
24 #include <tqstring.h>
25 #include <tqstringlist.h>
26 #include <tqtimer.h>
27 
28 using namespace RSS;
29 
31 {
32 }
33 
35 {
36 }
37 
38 class FileRetriever::Private
39 {
40  public:
41 
42  Private()
43  : buffer(NULL),
44  lastError(0), job(NULL)
45  {
46  }
47 
48  ~Private()
49  {
50  delete buffer;
51  }
52 
53  TQBuffer *buffer;
54  int lastError;
55  TDEIO::Job *job;
56  static KStaticDeleter<TQString> userAgentsd;
57  static TQString* userAgent;
58 };
59 
60 KStaticDeleter<TQString> FileRetriever::Private::userAgentsd;
61 TQString* FileRetriever::Private::userAgent = 0L;
63  : d(new Private)
64 {
65 }
66 
68 {
69  delete d;
70 }
71 
72 bool FileRetriever::m_useCache = true;
73 
74 TQString FileRetriever::userAgent()
75 {
76  if (Private::userAgent == 0L)
77  FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString);
78  return *Private::userAgent;
79 }
80 
81 void FileRetriever::setUserAgent(const TQString &ua)
82 {
83  if (Private::userAgent == 0L)
84  FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString);
85  (*Private::userAgent) = ua;
86 }
87 
88 void FileRetriever::setUseCache(bool enabled)
89 {
90  m_useCache = enabled;
91 }
92 
93 void FileRetriever::retrieveData(const KURL &url)
94 {
95  if (d->buffer)
96  return;
97 
98  d->buffer = new TQBuffer;
99  d->buffer->open(IO_WriteOnly);
100 
101  KURL u=url;
102 
103  if (u.protocol()=="feed")
104  u.setProtocol("http");
105 
106  d->job = TDEIO::get(u, false, false);
107  d->job->addMetaData("cache", m_useCache ? "refresh" : "reload");
108 
109  TQString ua = userAgent();
110  if (!ua.isEmpty())
111  d->job->addMetaData("UserAgent", ua);
112 
113 
114  TQTimer::singleShot(1000*90, this, TQ_SLOT(slotTimeout()));
115 
116  connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
117  TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
118  connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), TQ_SLOT(slotResult(TDEIO::Job *)));
119  connect(d->job, TQ_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)),
120  TQ_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &)));
121 }
122 
123 void FileRetriever::slotTimeout()
124 {
125  abort();
126 
127  delete d->buffer;
128  d->buffer = NULL;
129 
130  d->lastError = TDEIO::ERR_SERVER_TIMEOUT;
131 
132  emit dataRetrieved(TQByteArray(), false);
133 }
134 
136 {
137  return d->lastError;
138 }
139 
140 void FileRetriever::slotData(TDEIO::Job *, const TQByteArray &data)
141 {
142  d->buffer->writeBlock(data.data(), data.size());
143 }
144 
145 void FileRetriever::slotResult(TDEIO::Job *job)
146 {
147  TQByteArray data = d->buffer->buffer();
148  data.detach();
149 
150  delete d->buffer;
151  d->buffer = NULL;
152 
153  d->lastError = job->error();
154  emit dataRetrieved(data, d->lastError == 0);
155 }
156 
157 void FileRetriever::slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &newUrl)
158 {
159  emit permanentRedirection(newUrl);
160 }
161 
162 void FileRetriever::abort()
163 {
164  if (d->job)
165  {
166  d->job->kill(true);
167  d->job = NULL;
168  }
169 }
170 
171 struct OutputRetriever::Private
172 {
173  Private() : process(NULL),
174  buffer(NULL),
175  lastError(0)
176  {
177  }
178 
179  ~Private()
180  {
181  delete process;
182  delete buffer;
183  }
184 
185  KShellProcess *process;
186  TQBuffer *buffer;
187  int lastError;
188 };
189 
191  d(new Private)
192 {
193 }
194 
196 {
197  delete d;
198 }
199 
200 void OutputRetriever::retrieveData(const KURL &url)
201 {
202  // Ignore subsequent calls if we didn't finish the previous job yet.
203  if (d->buffer || d->process)
204  return;
205 
206  d->buffer = new TQBuffer;
207  d->buffer->open(IO_WriteOnly);
208 
209  d->process = new KShellProcess();
210  connect(d->process, TQ_SIGNAL(processExited(TDEProcess *)),
211  TQ_SLOT(slotExited(TDEProcess *)));
212  connect(d->process, TQ_SIGNAL(receivedStdout(TDEProcess *, char *, int)),
213  TQ_SLOT(slotOutput(TDEProcess *, char *, int)));
214  *d->process << url.path();
215  d->process->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout);
216 }
217 
219 {
220  return d->lastError;
221 }
222 
223 void OutputRetriever::slotOutput(TDEProcess *, char *data, int length)
224 {
225  d->buffer->writeBlock(data, length);
226 }
227 
228 void OutputRetriever::slotExited(TDEProcess *p)
229 {
230  if (!p->normalExit())
231  d->lastError = p->exitStatus();
232 
233  TQByteArray data = d->buffer->buffer();
234  data.detach();
235 
236  delete d->buffer;
237  d->buffer = NULL;
238 
239  delete d->process;
240  d->process = NULL;
241 
242  emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
243 }
244 
245 struct Loader::Private
246 {
247  Private() : retriever(NULL),
248  lastError(0)
249  {
250  }
251 
252  ~Private()
253  {
254  delete retriever;
255  }
256 
257  DataRetriever *retriever;
258  int lastError;
259  KURL discoveredFeedURL;
260  KURL url;
261 };
262 
264 {
265  return new Loader;
266 }
267 
268 Loader *Loader::create(TQObject *object, const char *slot)
269 {
270  Loader *loader = create();
271  connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)),
272  object, slot);
273  return loader;
274 }
275 
276 Loader::Loader() : d(new Private)
277 {
278 }
279 
280 Loader::~Loader()
281 {
282  delete d;
283 }
284 
285 void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
286 {
287  if (d->retriever != NULL)
288  return;
289 
290  d->url=url;
291  d->retriever = retriever;
292 
293  connect(d->retriever, TQ_SIGNAL(dataRetrieved(const TQByteArray &, bool)),
294  this, TQ_SLOT(slotRetrieverDone(const TQByteArray &, bool)));
295 
296  d->retriever->retrieveData(url);
297 }
298 
299 int Loader::errorCode() const
300 {
301  return d->lastError;
302 }
303 
304 void Loader::abort()
305 {
306  if (d && d->retriever)
307  {
308  d->retriever->abort();
309  delete d->retriever;
310  d->retriever=NULL;
311  }
312  emit loadingComplete(this, TQDomDocument(), Aborted);
313  delete this;
314 }
315 
316 const KURL &Loader::discoveredFeedURL() const
317 {
318  return d->discoveredFeedURL;
319 }
320 
321 void Loader::slotRetrieverDone(const TQByteArray &data, bool success)
322 {
323  d->lastError = d->retriever->errorCode();
324 
325  delete d->retriever;
326  d->retriever = NULL;
327 
328  Document rssDoc;
329  Status status = Success;
330 
331  if (success) {
332  TQDomDocument doc;
333 
334  /* Some servers insert whitespace before the <?xml...?> declaration.
335  * TQDom doesn't tolerate that (and it's right, that's invalid XML),
336  * so we strip that.
337  */
338 
339  const char *charData = data.data();
340  int len = data.count();
341 
342  while (len && TQChar(*charData).isSpace()) {
343  --len;
344  ++charData;
345  }
346 
347  if ( len > 3 && TQChar(*charData) == TQChar(0357) ) { // 0357 0273 0277
348  len -= 3;
349  charData += 3;
350  }
351  TQByteArray tmpData;
352  tmpData.setRawData(charData, len);
353 
354  if (doc.setContent(tmpData))
355  {
356  rssDoc = Document(doc);
357  if (!rssDoc.isValid())
358  {
359  discoverFeeds(tmpData);
360  status = ParseError;
361  }
362  }
363  else
364  {
365  discoverFeeds(tmpData);
366  status = ParseError;
367  }
368 
369  tmpData.resetRawData(charData, len);
370  } else
371  status = RetrieveError;
372 
373  emit loadingComplete(this, rssDoc, status);
374 
375  delete this;
376 }
377 
378 void Loader::discoverFeeds(const TQByteArray &data)
379 {
380  TQString str = TQString(data).simplifyWhiteSpace();
381 
382  TQStringList feeds;
383 
384  FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str);
385 
386  for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it)
387  {
388  feeds += (*it).url();
389  }
390 
391  if (list.isEmpty())
392  feeds = FeedDetector::extractBruteForce(str);
393 
394  TQString feed = feeds.first();
395  TQString host = d->url.host();
396  KURL testURL;
397  // loop through, prefer feeds on same host
398  TQStringList::Iterator end( feeds.end() );
399  for ( TQStringList::Iterator it = feeds.begin(); it != end; ++it)
400  {
401  testURL=*it;
402  if (testURL.host() == host)
403  {
404  feed = *it;
405  break;
406  }
407  }
408 
409  d->discoveredFeedURL = feed.isNull() ? TQString() : FeedDetector::fixRelativeURL(feed, d->url);
410 }
411 
412 #include "loader.moc"
Represents a RSS document and provides all the features and properties as stored in it.
Definition: document.h:31
virtual void retrieveData(const KURL &url)
Executes the program referenced by the given URL and retrieves the data which the program prints to s...
Definition: loader.cpp:200
a class providing functions to detect linked feeds in HTML sources
Definition: feeddetector.h:55
void dataRetrieved(const TQByteArray &data, bool success)
Emit this signal to tell the Loader class that the retrieval process was finished.
int errorCode() const
Retrieves the error code of the last loading process (if any), as reported by the employed data retre...
Definition: loader.cpp:299
FileRetriever()
Default constructor.
Definition: loader.cpp:62
void loadFrom(const KURL &url, DataRetriever *retriever)
Loads the RSS file referenced by the given URL using the specified retrieval algorithm.
Definition: loader.cpp:285
virtual void retrieveData(const KURL &url)
Downloads the file referenced by the given URL and passes it's contents on to the Loader.
Definition: loader.cpp:93
This class is the preferred way of loading RSS files.
Definition: loader.h:257
virtual ~FileRetriever()
Destructor.
Definition: loader.cpp:67
bool isValid() const
Definition: document.cpp:519
virtual int errorCode() const
Definition: loader.cpp:135
virtual ~DataRetriever()
Destructor.
Definition: loader.cpp:34
Abstract baseclass for all data retriever classes.
Definition: loader.h:35
static FeedDetectorEntryList extractFromLinkTags(const TQString &s)
searches an HTML page for feeds listed in <link> tags <link> tags with rel attribute values alterna...
static Loader * create()
Constructs a Loader instance.
Definition: loader.cpp:263
void permanentRedirection(const KURL &url)
Signals a permanent redirection.
virtual int errorCode() const
Definition: loader.cpp:218
void loadingComplete(Loader *loader, Document doc, Status status)
This signal gets emitted when the loading process triggered by calling loadFrom() finished.
OutputRetriever()
Default constructor.
Definition: loader.cpp:190
virtual ~OutputRetriever()
Destructor.
Definition: loader.cpp:195
DataRetriever()
Default constructor.
Definition: loader.cpp:30
static TQStringList extractBruteForce(const TQString &s)
searches an HTML page for slightly feed-like looking links and catches everything not running away qu...