Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,16 @@ You can also check out this nice [working implementation](https://github.com/scr

- `addmeta` - Set to `false` to override Feedparser's default behavior, which
is to add the feed's `meta` information to each article.
Feed metadata is available as soon as Feedparser has enough information to
emit the first article. While bad practice and borderline pathological, feeds
can legally include additional channel metadata after articles, so the `meta`
object may be enriched until the stream ends. If you need complete metadata,
also handle the `meta` event and keep the emitted object until the stream ends.
If you only need the metadata available when each article streams, you can
use `item.meta` as usual.

- `feedurl` - The url (string) of the feed. FeedParser is very good at
resolving relative urls in feeds, including those embedded in HTML content
resolving relative urls in feeds, including those embedded in HTML content
fields. But some feeds use relative urls without declaring the `xml:base`
attribute any place in the feed. This is perfectly valid, but we don't know
the feed's url before we start parsing the feed and trying to resolve those
Expand Down
48 changes: 33 additions & 15 deletions lib/feedparser.js
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ FeedParser.prototype.handleProcessingInstruction = function (node) {
* @param {import('sax').QualifiedTag} node
*/
FeedParser.prototype.handleOpenTag = function (node) {
/** @type {ParsedNode} */
var n = {};
n['#name'] = node.name; // Avoid namespace collissions later...
n['#prefix'] = node.prefix; // The current ns prefix
Expand Down Expand Up @@ -243,6 +244,9 @@ FeedParser.prototype.handleCloseTag = function (el) {
, isIllegallyNested = false
;
var n = this.stack.shift();
// Parent nodes receive a string when this node is text-only; keep n as the parsed node.
/** @type {ParsedNode|string} */
var nodeValue = n;
el = el.split(':');

if (el.length > 1 && el[0] === n['#prefix']) {
Expand Down Expand Up @@ -322,7 +326,7 @@ FeedParser.prototype.handleCloseTag = function (el) {
n['#'] = n['#'].trim();
if (Object.keys(n).length === 1) {
// If there is only one text node, hoist it
n = n['#'];
nodeValue = n['#'];
}
}
}
Expand Down Expand Up @@ -359,11 +363,10 @@ FeedParser.prototype.handleCloseTag = function (el) {
}
if (this.meta.author && !item.author) item.author = this.meta.author;
this.push(item);
} else if (!this.meta.title && // We haven't yet parsed all the metadata
(node['#name'] === 'channel' ||
} else if (node['#name'] === 'channel' ||
node['#name'] === 'feed' ||
(node['#local'] === 'channel' && (node['#prefix'] === '' || node['#type'] === 'rdf')) ||
(node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom')))) {
(node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom'))) {
_.assign(this.meta, this.handleMeta(n, this.meta['#type'], this.options));
if (!this._emitted_meta) {
this.emit('meta', this.meta);
Expand All @@ -380,11 +383,11 @@ FeedParser.prototype.handleCloseTag = function (el) {
stdEl = node['#local'] || node['#name'];
}
if (!Object.prototype.hasOwnProperty.call(this.stack[0], stdEl)) {
this.stack[0][stdEl] = n;
this.stack[0][stdEl] = nodeValue;
} else if (this.stack[0][stdEl] instanceof Array) {
this.stack[0][stdEl].push(n);
this.stack[0][stdEl].push(nodeValue);
} else {
this.stack[0][stdEl] = [this.stack[0][stdEl], n];
this.stack[0][stdEl] = [this.stack[0][stdEl], nodeValue];
}
}
};
Expand All @@ -411,7 +414,7 @@ FeedParser.prototype.handleText = function (text) {
* @this {FeedParserInstance}
* @param {Object.<string, import('sax').QualifiedAttribute>} attrs
* @param {string} el
* @returns {Object.<string, string>}
* @returns {ParsedAttributes}
*/
FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) {
/*
Expand All @@ -426,7 +429,7 @@ FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) {
*/

var basepath = ''
, simplifiedAttributes = /** @type {Object.<string, string>} */ ({})
, simplifiedAttributes = /** @type {ParsedAttributes} */ ({})
;

if (this.xmlbase && this.xmlbase.length) {
Expand Down Expand Up @@ -499,6 +502,8 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
Object.keys(node).forEach((name) => {
var el = node[name];

if (name === 'item' || name === 'entry' || name.match(/:(item|entry)$/)) return;

if (normalize) {
switch (name) {
case ('title'):
Expand Down Expand Up @@ -1198,12 +1203,25 @@ FeedParser.prototype._flush = function (done) {
};

/**
* @typedef {Object} ParsedNode
* @typedef {Object.<string, string>} ParsedAttributes
*/

/**
* @typedef {{
* '#name'?: string,
* '#prefix'?: string,
* '#local'?: string,
* '#uri'?: string,
* '@'?: ParsedAttributes,
* '#'?: string
* }} ParsedNode
* The internal accumulator object that handleOpenTag builds and pushes onto
* this.stack. Keys accumulate as child elements are parsed. String keys
* '#name', '#prefix', '#local', '#uri' hold element namespace info; '@' holds
* simplified attributes; '#' holds text content. Named keys hold child element
* values which may be strings, nested ParsedNodes, or arrays of either.
* this.stack. The parser-owned keys describe the current element: '#name',
* '#prefix', '#local', '#uri', '@', and '#'. Parsed child elements are attached
* using their element names as keys. For example, a channel node can have
* `title`, `link`, `itunes:category`, and `item` keys; text-only children are
* stored as strings, children with attributes or their own children are stored
* as ParsedNodes, and repeated child names are stored as arrays.
*/

/**
Expand Down Expand Up @@ -1248,7 +1266,7 @@ FeedParser.prototype._flush = function (done) {
* @property {function(import('sax').QualifiedTag): void} handleOpenTag
* @property {function(string): void} handleCloseTag
* @property {function(string): void} handleText
* @property {function(Object.<string, import('sax').QualifiedAttribute>, string): Object.<string, string>} handleAttributes
* @property {function(Object.<string, import('sax').QualifiedAttribute>, string): ParsedAttributes} handleAttributes
* @property {function(ParsedNode, import('../index').Type, import('../index').Options): Object} handleMeta
* @property {function(ParsedNode, import('../index').Type, import('../index').Options): Object} handleItem
*/
Expand Down
14 changes: 14 additions & 0 deletions test/feeds/rss-with-trailing-meta.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<title>Trailing Meta Feed</title>
<link>https://example.com/</link>
<description>Feed metadata can appear after items.</description>
<item>
<title>First item</title>
<link>https://example.com/items/1</link>
<guid>https://example.com/items/1</guid>
</item>
<itunes:category text="Music"/>
</channel>
</rss>
62 changes: 62 additions & 0 deletions test/trailing-meta.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
describe('trailing metadata', function () {

var feed = __dirname + '/feeds/rss-with-trailing-meta.xml';

it('should include channel metadata that appears after items in final meta', function (done) {
var feedparser = new FeedParser();
var items = [];
var meta;
var metaEvents = 0;

fs.createReadStream(feed).pipe(feedparser)
.on('error', function (err) {
assert.ifError(err);
done(err);
})
.on('meta', function (_meta) {
meta = _meta;
metaEvents++;
})
.on('readable', function () {
var item;
while ((item = this.read()) !== null) {
items.push(item);
}
})
.on('end', function () {
assert.strictEqual(metaEvents, 1);
assert.deepStrictEqual(feedparser.meta.categories, ['Music']);
assert.strictEqual(meta, feedparser.meta);
assert.strictEqual(items.length, 1);
assert.strictEqual(items[0].meta, feedparser.meta);
assert.deepStrictEqual(items[0].meta.categories, ['Music']);
assert.strictEqual(feedparser.meta['rss:item'], undefined);
done();
});
});

it('should skip items in final native meta when normalize is false', function (done) {
var feedparser = new FeedParser({ normalize: false });

fs.createReadStream(feed).pipe(feedparser)
.on('error', function (err) {
assert.ifError(err);
done(err);
})
.on('readable', function () {
var item;
while ((item = this.read()) !== null) {
assert(item);
}
})
.on('end', function () {
assert.strictEqual(feedparser.meta.title, undefined);
assert.strictEqual(feedparser.meta['rss:title']['#'], 'Trailing Meta Feed');
assert.strictEqual(feedparser.meta['itunes:category']['@'].text, 'Music');
assert.strictEqual(feedparser.meta['rss:item'], undefined);
assert.strictEqual(feedparser.meta.item, undefined);
done();
});
});

});