From 9a07163e1f789ffb0de2d4c913ad423d62fd0da4 Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Mon, 18 May 2026 10:34:52 -0400 Subject: [PATCH 1/2] Improve node types --- lib/feedparser.js | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/lib/feedparser.js b/lib/feedparser.js index 9be6fb8..2581295 100644 --- a/lib/feedparser.js +++ b/lib/feedparser.js @@ -181,6 +181,7 @@ FeedParser.prototype.handleProcessingInstruction = function (node) { * @param {import('sax').QualifiedTag} node */ FeedParser.prototype.handleOpenTag = function (node) { + /** @type {ParsedNode} */ var n = {}; n['#name'] = node.name; // Avoid namespace collissions later... n['#prefix'] = node.prefix; // The current ns prefix @@ -243,6 +244,9 @@ FeedParser.prototype.handleCloseTag = function (el) { , isIllegallyNested = false ; var n = this.stack.shift(); + // Parent nodes receive a string when this node is text-only; keep n as the parsed node. + /** @type {ParsedNode|string} */ + var nodeValue = n; el = el.split(':'); if (el.length > 1 && el[0] === n['#prefix']) { @@ -322,7 +326,7 @@ FeedParser.prototype.handleCloseTag = function (el) { n['#'] = n['#'].trim(); if (Object.keys(n).length === 1) { // If there is only one text node, hoist it - n = n['#']; + nodeValue = n['#']; } } } @@ -380,11 +384,11 @@ FeedParser.prototype.handleCloseTag = function (el) { stdEl = node['#local'] || node['#name']; } if (!Object.prototype.hasOwnProperty.call(this.stack[0], stdEl)) { - this.stack[0][stdEl] = n; + this.stack[0][stdEl] = nodeValue; } else if (this.stack[0][stdEl] instanceof Array) { - this.stack[0][stdEl].push(n); + this.stack[0][stdEl].push(nodeValue); } else { - this.stack[0][stdEl] = [this.stack[0][stdEl], n]; + this.stack[0][stdEl] = [this.stack[0][stdEl], nodeValue]; } } }; @@ -411,7 +415,7 @@ FeedParser.prototype.handleText = function (text) { * @this {FeedParserInstance} * @param {Object.} attrs * @param {string} el - * @returns {Object.} + * @returns {ParsedAttributes} */ FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) { /* @@ -426,7 +430,7 @@ FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) { */ var basepath = '' - , simplifiedAttributes = /** @type {Object.} */ ({}) + , simplifiedAttributes = /** @type {ParsedAttributes} */ ({}) ; if (this.xmlbase && this.xmlbase.length) { @@ -1198,12 +1202,25 @@ FeedParser.prototype._flush = function (done) { }; /** - * @typedef {Object} ParsedNode + * @typedef {Object.} ParsedAttributes + */ + +/** + * @typedef {{ + * '#name'?: string, + * '#prefix'?: string, + * '#local'?: string, + * '#uri'?: string, + * '@'?: ParsedAttributes, + * '#'?: string + * }} ParsedNode * The internal accumulator object that handleOpenTag builds and pushes onto - * this.stack. Keys accumulate as child elements are parsed. String keys - * '#name', '#prefix', '#local', '#uri' hold element namespace info; '@' holds - * simplified attributes; '#' holds text content. Named keys hold child element - * values which may be strings, nested ParsedNodes, or arrays of either. + * this.stack. The parser-owned keys describe the current element: '#name', + * '#prefix', '#local', '#uri', '@', and '#'. Parsed child elements are attached + * using their element names as keys. For example, a channel node can have + * `title`, `link`, `itunes:category`, and `item` keys; text-only children are + * stored as strings, children with attributes or their own children are stored + * as ParsedNodes, and repeated child names are stored as arrays. */ /** @@ -1248,7 +1265,7 @@ FeedParser.prototype._flush = function (done) { * @property {function(import('sax').QualifiedTag): void} handleOpenTag * @property {function(string): void} handleCloseTag * @property {function(string): void} handleText - * @property {function(Object., string): Object.} handleAttributes + * @property {function(Object., string): ParsedAttributes} handleAttributes * @property {function(ParsedNode, import('../index').Type, import('../index').Options): Object} handleMeta * @property {function(ParsedNode, import('../index').Type, import('../index').Options): Object} handleItem */ From 5707c07c94ae923aad0ecdc762cba2d02e43ef07 Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Mon, 18 May 2026 10:56:25 -0400 Subject: [PATCH 2/2] Allow trailing feed metadata after items --- README.md | 9 +++- lib/feedparser.js | 7 +-- test/feeds/rss-with-trailing-meta.xml | 14 ++++++ test/trailing-meta.js | 62 +++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 test/feeds/rss-with-trailing-meta.xml create mode 100644 test/trailing-meta.js diff --git a/README.md b/README.md index b76b006..e670cda 100644 --- a/README.md +++ b/README.md @@ -113,9 +113,16 @@ You can also check out this nice [working implementation](https://github.com/scr - `addmeta` - Set to `false` to override Feedparser's default behavior, which is to add the feed's `meta` information to each article. + Feed metadata is available as soon as Feedparser has enough information to + emit the first article. While bad practice and borderline pathological, feeds + can legally include additional channel metadata after articles, so the `meta` + object may be enriched until the stream ends. If you need complete metadata, + also handle the `meta` event and keep the emitted object until the stream ends. + If you only need the metadata available when each article streams, you can + use `item.meta` as usual. - `feedurl` - The url (string) of the feed. FeedParser is very good at - resolving relative urls in feeds, including those embedded in HTML content + resolving relative urls in feeds, including those embedded in HTML content fields. But some feeds use relative urls without declaring the `xml:base` attribute any place in the feed. This is perfectly valid, but we don't know the feed's url before we start parsing the feed and trying to resolve those diff --git a/lib/feedparser.js b/lib/feedparser.js index 2581295..edffce2 100644 --- a/lib/feedparser.js +++ b/lib/feedparser.js @@ -363,11 +363,10 @@ FeedParser.prototype.handleCloseTag = function (el) { } if (this.meta.author && !item.author) item.author = this.meta.author; this.push(item); - } else if (!this.meta.title && // We haven't yet parsed all the metadata - (node['#name'] === 'channel' || + } else if (node['#name'] === 'channel' || node['#name'] === 'feed' || (node['#local'] === 'channel' && (node['#prefix'] === '' || node['#type'] === 'rdf')) || - (node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom')))) { + (node['#local'] === 'feed' && (node['#prefix'] === '' || node['#type'] === 'atom'))) { _.assign(this.meta, this.handleMeta(n, this.meta['#type'], this.options)); if (!this._emitted_meta) { this.emit('meta', this.meta); @@ -503,6 +502,8 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { Object.keys(node).forEach((name) => { var el = node[name]; + if (name === 'item' || name === 'entry' || name.match(/:(item|entry)$/)) return; + if (normalize) { switch (name) { case ('title'): diff --git a/test/feeds/rss-with-trailing-meta.xml b/test/feeds/rss-with-trailing-meta.xml new file mode 100644 index 0000000..bf2f61e --- /dev/null +++ b/test/feeds/rss-with-trailing-meta.xml @@ -0,0 +1,14 @@ + + + + Trailing Meta Feed + https://example.com/ + Feed metadata can appear after items. + + First item + https://example.com/items/1 + https://example.com/items/1 + + + + diff --git a/test/trailing-meta.js b/test/trailing-meta.js new file mode 100644 index 0000000..f709a46 --- /dev/null +++ b/test/trailing-meta.js @@ -0,0 +1,62 @@ +describe('trailing metadata', function () { + + var feed = __dirname + '/feeds/rss-with-trailing-meta.xml'; + + it('should include channel metadata that appears after items in final meta', function (done) { + var feedparser = new FeedParser(); + var items = []; + var meta; + var metaEvents = 0; + + fs.createReadStream(feed).pipe(feedparser) + .on('error', function (err) { + assert.ifError(err); + done(err); + }) + .on('meta', function (_meta) { + meta = _meta; + metaEvents++; + }) + .on('readable', function () { + var item; + while ((item = this.read()) !== null) { + items.push(item); + } + }) + .on('end', function () { + assert.strictEqual(metaEvents, 1); + assert.deepStrictEqual(feedparser.meta.categories, ['Music']); + assert.strictEqual(meta, feedparser.meta); + assert.strictEqual(items.length, 1); + assert.strictEqual(items[0].meta, feedparser.meta); + assert.deepStrictEqual(items[0].meta.categories, ['Music']); + assert.strictEqual(feedparser.meta['rss:item'], undefined); + done(); + }); + }); + + it('should skip items in final native meta when normalize is false', function (done) { + var feedparser = new FeedParser({ normalize: false }); + + fs.createReadStream(feed).pipe(feedparser) + .on('error', function (err) { + assert.ifError(err); + done(err); + }) + .on('readable', function () { + var item; + while ((item = this.read()) !== null) { + assert(item); + } + }) + .on('end', function () { + assert.strictEqual(feedparser.meta.title, undefined); + assert.strictEqual(feedparser.meta['rss:title']['#'], 'Trailing Meta Feed'); + assert.strictEqual(feedparser.meta['itunes:category']['@'].text, 'Music'); + assert.strictEqual(feedparser.meta['rss:item'], undefined); + assert.strictEqual(feedparser.meta.item, undefined); + done(); + }); + }); + +});