From dae4c5992e8bb35f23610df8e6f8acc64b4aa477 Mon Sep 17 00:00:00 2001 From: David Cook Date: Sun, 5 Mar 2017 15:33:58 -0600 Subject: [PATCH 01/12] Rewrite to not use innerHTML --- linkify-citations.js | 18703 +++++++++++++++++++++++++++++++++++-- linkify.min.js | 15 +- package.json | 2 +- src/linkify-citations.js | 91 +- 4 files changed, 17958 insertions(+), 853 deletions(-) diff --git a/linkify-citations.js b/linkify-citations.js index 46f5c6c..3c5bd03 100644 --- a/linkify-citations.js +++ b/linkify-citations.js @@ -1,1009 +1,18084 @@ (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 0) { + throw new Error('Invalid string. Length must be a multiple of 4') + } - // client can apply a filter that pre-processes text before extraction, - // and post-processes citations after extraction - var results; - if (options.filter && Citation.filters[options.filter]) - return Citation.filtered(options.filter, text, options); + // the number of equal signs (place holders) + // if there are two placeholders, than the two characters before it + // represent one byte + // if there is only one, then the three characters before it represent 2 bytes + // this is just a cheap hack to not do indexOf twice + var len = b64.length + placeHolders = '=' === b64.charAt(len - 2) ? 2 : '=' === b64.charAt(len - 1) ? 1 : 0 - // otherwise, do a single pass over the whole text. - else - return Citation.extract(text, options); - }, + // base64 is 4/3 + up to two characters of the original data + arr = new Arr(b64.length * 3 / 4 - placeHolders) - // return an array of matched and filter-mapped cites - filtered: function(name, text, options) { - var results = []; + // if there are placeholders, only get up to the last complete 4 chars + l = placeHolders > 0 ? b64.length - 4 : b64.length - var filter = Citation.filters[name]; + var L = 0 - // filter can break up the text into pieces with accompanying metadata - filter.from(text, options[name], function(piece, metadata) { - var response = Citation.extract(piece, options); + function push (v) { + arr[L++] = v + } - // ignores any replaced text, it falls off the edge of the earth + for (i = 0, j = 0; i < l; i += 4, j += 3) { + tmp = (decode(b64.charAt(i)) << 18) | (decode(b64.charAt(i + 1)) << 12) | (decode(b64.charAt(i + 2)) << 6) | decode(b64.charAt(i + 3)) + push((tmp & 0xFF0000) >> 16) + push((tmp & 0xFF00) >> 8) + push(tmp & 0xFF) + } - var filtered = response.citations.map(function(result) { + if (placeHolders === 2) { + tmp = (decode(b64.charAt(i)) << 2) | (decode(b64.charAt(i + 1)) >> 4) + push(tmp & 0xFF) + } else if (placeHolders === 1) { + tmp = (decode(b64.charAt(i)) << 10) | (decode(b64.charAt(i + 1)) << 4) | (decode(b64.charAt(i + 2)) >> 2) + push((tmp >> 8) & 0xFF) + push(tmp & 0xFF) + } - Object.keys(metadata).forEach(function(key) { - result[key] = metadata[key]; - }); + return arr + } - return result; - }); + function uint8ToBase64 (uint8) { + var i, + extraBytes = uint8.length % 3, // if we have 1 byte left, pad 2 bytes + output = "", + temp, length - results = results.concat(filtered); - }); + function encode (num) { + return lookup.charAt(num) + } - // doesn't return replaced text - return {citations: results}; - }, + function tripletToBase64 (num) { + return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F) + } + // go through the array every three bytes, we'll deal with trailing stuff later + for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) { + temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2]) + output += tripletToBase64(temp) + } - // run the citators over the text, return an array of matched cites - extract: function(text, options) { - if (!options) options = {}; + // pad the end with zeros, but make sure to not forget the extra bytes + switch (extraBytes) { + case 1: + temp = uint8[uint8.length - 1] + output += encode(temp >> 2) + output += encode((temp << 4) & 0x3F) + output += '==' + break + case 2: + temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1]) + output += encode(temp >> 10) + output += encode((temp >> 4) & 0x3F) + output += encode((temp << 2) & 0x3F) + output += '=' + break + } - // default: no excerpt - var excerpt = options.excerpt ? parseInt(options.excerpt, 10) : 0; + return output + } - // whether to return parent citations - // default: false - var parents = options.parents || false; + exports.toByteArray = b64ToByteArray + exports.fromByteArray = uint8ToBase64 +}(typeof exports === 'undefined' ? (this.base64js = {}) : exports)) - // default: all types, can be filtered to one, or an array of them - var types = Citation.selectedTypes(options); - if (types.length === 0) return null; +},{}],2:[function(require,module,exports){ +},{}],3:[function(require,module,exports){ +(function (global){ +'use strict'; - // The caller can provide a replace callback to alter every found citation. - // this function will be called with each (found and processed) cite object, - // and should return a string to be put in the cite's place. - // - // The resulting transformed string will be in the returned object as a 'text' field. - // this field will only be present if a replace callback was provided. - // - // providing this callback will also cause matched cites not to return the 'index' field, - // as the replace process will completely screw them up. only use the 'index' field if you - // plan on doing your own replacing. - var replace = options.replace; +var buffer = require('buffer'); +var Buffer = buffer.Buffer; +var SlowBuffer = buffer.SlowBuffer; +var MAX_LEN = buffer.kMaxLength || 2147483647; +exports.alloc = function alloc(size, fill, encoding) { + if (typeof Buffer.alloc === 'function') { + return Buffer.alloc(size, fill, encoding); + } + if (typeof encoding === 'number') { + throw new TypeError('encoding must not be number'); + } + if (typeof size !== 'number') { + throw new TypeError('size must be a number'); + } + if (size > MAX_LEN) { + throw new RangeError('size is too large'); + } + var enc = encoding; + var _fill = fill; + if (_fill === undefined) { + enc = undefined; + _fill = 0; + } + var buf = new Buffer(size); + if (typeof _fill === 'string') { + var fillBuf = new Buffer(_fill, enc); + var flen = fillBuf.length; + var i = -1; + while (++i < size) { + buf[i] = fillBuf[i % flen]; + } + } else { + buf.fill(_fill); + } + return buf; +} +exports.allocUnsafe = function allocUnsafe(size) { + if (typeof Buffer.allocUnsafe === 'function') { + return Buffer.allocUnsafe(size); + } + if (typeof size !== 'number') { + throw new TypeError('size must be a number'); + } + if (size > MAX_LEN) { + throw new RangeError('size is too large'); + } + return new Buffer(size); +} +exports.from = function from(value, encodingOrOffset, length) { + if (typeof Buffer.from === 'function' && (!global.Uint8Array || Uint8Array.from !== Buffer.from)) { + return Buffer.from(value, encodingOrOffset, length); + } + if (typeof value === 'number') { + throw new TypeError('"value" argument must not be a number'); + } + if (typeof value === 'string') { + return new Buffer(value, encodingOrOffset); + } + if (typeof ArrayBuffer !== 'undefined' && value instanceof ArrayBuffer) { + var offset = encodingOrOffset; + if (arguments.length === 1) { + return new Buffer(value); + } + if (typeof offset === 'undefined') { + offset = 0; + } + var len = length; + if (typeof len === 'undefined') { + len = value.byteLength - offset; + } + if (offset >= value.byteLength) { + throw new RangeError('\'offset\' is out of bounds'); + } + if (len > value.byteLength - offset) { + throw new RangeError('\'length\' is out of bounds'); + } + return new Buffer(value.slice(offset, offset + len)); + } + if (Buffer.isBuffer(value)) { + var out = new Buffer(value.length); + value.copy(out, 0, 0, value.length); + return out; + } + if (value) { + if (Array.isArray(value) || (typeof ArrayBuffer !== 'undefined' && value.buffer instanceof ArrayBuffer) || 'length' in value) { + return new Buffer(value); + } + if (value.type === 'Buffer' && Array.isArray(value.data)) { + return new Buffer(value.data); + } + } - // accumulate the results - var results = []; + throw new TypeError('First argument must be a string, Buffer, ' + 'ArrayBuffer, Array, or array-like object.'); +} +exports.allocUnsafeSlow = function allocUnsafeSlow(size) { + if (typeof Buffer.allocUnsafeSlow === 'function') { + return Buffer.allocUnsafeSlow(size); + } + if (typeof size !== 'number') { + throw new TypeError('size must be a number'); + } + if (size >= MAX_LEN) { + throw new RangeError('size is too large'); + } + return new SlowBuffer(size); +} +}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) +},{"buffer":4}],4:[function(require,module,exports){ +(function (global){ +/*! + * The buffer module from node.js, for the browser. + * + * @author Feross Aboukhadijeh + * @license MIT + */ +/* eslint-disable no-proto */ - // will hold the calculated context-specific patterns we are to run - // over the given text, tracked by index we expect to find them at. - // nextIndex tracks a running index as we loop through patterns. - // (citators could just be called indexedPatterns) - var citators = {}; - var nextIndex = 0; +'use strict' - // Go through every regex-based citator and prepare a set of patterns, - // indexed by the order of a matched arguments array. - types.forEach(function(type) { - if (Citation.types[type].type != "regex") return; +var base64 = require('base64-js') +var ieee754 = require('ieee754') +var isArray = require('isarray') - // Calculate the patterns this citator will contribute to the parse. - // (individual parsers can opt to make their parsing context-specific) - var patterns = Citation.types[type].patterns; - if (typeof(patterns) == "function") - patterns = patterns(options[type] || {}); +exports.Buffer = Buffer +exports.SlowBuffer = SlowBuffer +exports.INSPECT_MAX_BYTES = 50 +Buffer.poolSize = 8192 // not used by this implementation - // add each pattern, keeping a running tally of what we would - // expect its primary index to be when found in the master regex. - patterns.forEach(function(pattern) { - pattern.type = type; // will be needed later - citators[nextIndex] = pattern; - nextIndex += pattern.fields.length + 1; - }); - }); +var rootParent = {} - // If there are any regex-based patterns being applied, combine them - // and run a find/replace over the string. - var regexes = Object.keys(citators).map(function(key) {return citators[key].regex}); - if (regexes.length > 0) { +/** + * If `Buffer.TYPED_ARRAY_SUPPORT`: + * === true Use Uint8Array implementation (fastest) + * === false Use Object implementation (most compatible, even IE6) + * + * Browsers that support typed arrays are IE 10+, Firefox 4+, Chrome 7+, Safari 5.1+, + * Opera 11.6+, iOS 4.2+. + * + * Due to various browser bugs, sometimes the Object implementation will be used even + * when the browser supports typed arrays. + * + * Note: + * + * - Firefox 4-29 lacks support for adding new properties to `Uint8Array` instances, + * See: https://bugzilla.mozilla.org/show_bug.cgi?id=695438. + * + * - Safari 5-7 lacks support for changing the `Object.prototype.constructor` property + * on objects. + * + * - Chrome 9-10 is missing the `TypedArray.prototype.subarray` function. + * + * - IE10 has a broken `TypedArray.prototype.subarray` function which returns arrays of + * incorrect length in some situations. - // merge all regexes into one, so that each pattern will begin at a predictable place - var regex = new RegExp("(" + regexes.join(")|(") + ")", "ig"); + * We detect these buggy browsers and set `Buffer.TYPED_ARRAY_SUPPORT` to `false` so they + * get the Object implementation, which is slower but behaves correctly. + */ +Buffer.TYPED_ARRAY_SUPPORT = global.TYPED_ARRAY_SUPPORT !== undefined + ? global.TYPED_ARRAY_SUPPORT + : typedArraySupport() - var replaced = text.replace(regex, function() { - var match = arguments[0]; +function typedArraySupport () { + function Bar () {} + try { + var arr = new Uint8Array(1) + arr.foo = function () { return 42 } + arr.constructor = Bar + return arr.foo() === 42 && // typed array instances can be augmented + arr.constructor === Bar && // constructor can be set + typeof arr.subarray === 'function' && // chrome 9-10 lack `subarray` + arr.subarray(1, 1).byteLength === 0 // ie10 has broken `subarray` + } catch (e) { + return false + } +} - // offset is second-to-last argument - var index = arguments[arguments.length - 2]; +function kMaxLength () { + return Buffer.TYPED_ARRAY_SUPPORT + ? 0x7fffffff + : 0x3fffffff +} - // pull out just the regex-captured matches - var captures = Array.prototype.slice.call(arguments, 1, -2); +/** + * Class: Buffer + * ============= + * + * The Buffer constructor returns instances of `Uint8Array` that are augmented + * with function properties for all the node `Buffer` API functions. We use + * `Uint8Array` so that square bracket notation works as expected -- it returns + * a single octet. + * + * By augmenting the instances, we can avoid modifying the `Uint8Array` + * prototype. + */ +function Buffer (arg) { + if (!(this instanceof Buffer)) { + // Avoid going through an ArgumentsAdaptorTrampoline in the common case. + if (arguments.length > 1) return new Buffer(arg, arguments[1]) + return new Buffer(arg) + } - // find the first matched index in the captures - var matchIndex; - for (matchIndex=0; matchIndex 1 ? arguments[1] : 'utf8') + } - // one match can generate one or many citation results (e.g. ranges) - if (!Array.isArray(cites)) cites = [cites]; + // Unusual. + return fromObject(this, arg) +} - // put together the match-level information - var matchInfo = {type: citator.type}; - matchInfo.match = match.toString(); // match data can be converted to the plain string +function fromNumber (that, length) { + that = allocate(that, length < 0 ? 0 : checked(length) | 0) + if (!Buffer.TYPED_ARRAY_SUPPORT) { + for (var i = 0; i < length; i++) { + that[i] = 0 + } + } + return that +} - // store the matched character offset (if we're replacing we need it to handle - // some multiple citations, but the index will be useless to the caller after - // the replacement) so we wipe it out later. - matchInfo.index = index; +function fromString (that, string, encoding) { + if (typeof encoding !== 'string' || encoding === '') encoding = 'utf8' - // use index to grab surrounding excerpt - if (excerpt > 0) { - var proposedLeft = index - excerpt; - var left = proposedLeft > 0 ? proposedLeft : 0; + // Assumption: byteLength() return value is always < kMaxLength. + var length = byteLength(string, encoding) | 0 + that = allocate(that, length) - var proposedRight = index + matchInfo.match.length + excerpt; - var right = (proposedRight <= text.length) ? proposedRight : text.length; + that.write(string, encoding) + return that +} - matchInfo.excerpt = text.substring(left, right); - } +function fromObject (that, object) { + if (Buffer.isBuffer(object)) return fromBuffer(that, object) + if (isArray(object)) return fromArray(that, object) - // if we want parent cites too, make those now - if (parents && Citation.types[type].parents_by) { - cites = Citation._.flatten(cites.map(function(cite) { - return Citation.citeParents(cite, type); - })); - } + if (object == null) { + throw new TypeError('must start with number, buffer, array or string') + } - cites = cites.map(function(cite) { - var result = {}; + if (typeof ArrayBuffer !== 'undefined') { + if (object.buffer instanceof ArrayBuffer) { + return fromTypedArray(that, object) + } + if (object instanceof ArrayBuffer) { + return fromArrayBuffer(that, object) + } + } - // match-level info - Citation._.extend(result, matchInfo); + if (object.length) return fromArrayLike(that, object) - // handle _submatch, which lets the user-level citator override the - // match and index with a sub-part of the whole matched regex - if (cite._submatch) { - result.match = cite._submatch.text; - result.index += cite._submatch.offset; - delete cite._submatch; - } + return fromJsonObject(that, object) +} - // cite-level info, plus ID standardization - result[type] = cite; - result[type].id = Citation.types[type].id(cite); +function fromBuffer (that, buffer) { + var length = checked(buffer.length) | 0 + that = allocate(that, length) + buffer.copy(that, 0, 0, length) + return that +} - results.push(result); +function fromArray (that, array) { + var length = checked(array.length) | 0 + that = allocate(that, length) + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - return result; - }); +// Duplicate of fromArray() to keep fromArray() monomorphic. +function fromTypedArray (that, array) { + var length = checked(array.length) | 0 + that = allocate(that, length) + // Truncating the elements is probably not what people expect from typed + // arrays with BYTES_PER_ELEMENT > 1 but it's compatible with the behavior + // of the old Buffer constructor. + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - // If a replace function is given, replace each matched citation by the - // result of calling the replace function with the citation passed as its - // only argument. - // - // Most citators return only a single citation match per regex match, but - // some return multiple citations for strings like "§§ 32-701 through 32-703". +function fromArrayBuffer (that, array) { + if (Buffer.TYPED_ARRAY_SUPPORT) { + // Return an augmented `Uint8Array` instance, for best performance + array.byteLength + that = Buffer._augment(new Uint8Array(array)) + } else { + // Fallback: Return an object instance of the Buffer class + that = fromTypedArray(that, new Uint8Array(array)) + } + return that +} - // Collect the final match string here. - var finalstring = matchInfo.match; +function fromArrayLike (that, array) { + var length = checked(array.length) | 0 + that = allocate(that, length) + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - // Get the replace function. If options.replace is a function use that, - // or if it is an object mapping the citator type to a function use that. - var replace_func = null; - if (typeof(replace) === "function") - replace_func = replace; - else if ((typeof(replace) === "object") && (typeof(replace[type]) === "function")) - replace_func = replace[type]; - else - replace_func = null; +// Deserialize { type: 'Buffer', data: [1,2,3,...] } into a Buffer object. +// Returns a zero-length buffer for inputs that don't conform to the spec. +function fromJsonObject (that, object) { + var array + var length = 0 - // If there's a replacement function... - if (replace_func) { - // Process the citations in the order they are returned. Assume they are - // ordered from left to right. - var last_index = 0; - var dx = 0; - for (var i = 0; i < cites.length; i++) { - // Skip citations that overlap with the previous citation (e.g. there - // may be two citations for the same text range.) - if (cites[i].index >= last_index) { - // Execute the replacement function. If the return is truth-y, perform - // a replacement. - var replacement = replace_func(cites[i]); - if (replacement) { - // Replace the substring. - finalstring = finalstring.substring(0, cites[i].index-index+dx) + replacement + finalstring.substring(cites[i].index-index+cites[i].match.length+dx); - - // The replacement text may have a different length than the text - // being replaced. Keep track of the total change in string length - // as we go because we have to adjust future citation replacements's - // indexes so that we make the edit to finalstring in the right place. - dx += replacement.length - cites[i].match.length; + if (object.type === 'Buffer' && isArray(object.data)) { + array = object.data + length = checked(array.length) | 0 + } + that = allocate(that, length) - // And track the end of last citation so we can skip any future citations - // that overlap with this text range. - last_index = cites[i].index + cites[i].match.length; - } - } + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - // Per the citation API, delete the index field when doing a replacement. - // After replacements, the index will no longer be useful to the caller - // because the string has been edited. - delete cites[i].index; - } - } - return finalstring; - }); - } +if (Buffer.TYPED_ARRAY_SUPPORT) { + Buffer.prototype.__proto__ = Uint8Array.prototype + Buffer.__proto__ = Uint8Array +} else { + // pre-set for values that may exist in the future + Buffer.prototype.length = undefined + Buffer.prototype.parent = undefined +} - // TODO: do for any external cite types, not just "judicial" - if (types.indexOf("judicial") != -1) - results = results.concat(Citation.types.judicial.extract(text)); +function allocate (that, length) { + if (Buffer.TYPED_ARRAY_SUPPORT) { + // Return an augmented `Uint8Array` instance, for best performance + that = Buffer._augment(new Uint8Array(length)) + that.__proto__ = Buffer.prototype + } else { + // Fallback: Return an object instance of the Buffer class + that.length = length + that._isBuffer = true + } - var response = {citations: results}; - if (options.replace) response.text = replaced; + var fromPool = length !== 0 && length <= Buffer.poolSize >>> 1 + if (fromPool) that.parent = rootParent - return response; - }, + return that +} +function checked (length) { + // Note: cannot use `length < kMaxLength` here because that fails when + // length is NaN (which is otherwise coerced to zero.) + if (length >= kMaxLength()) { + throw new RangeError('Attempt to allocate Buffer larger than maximum ' + + 'size: 0x' + kMaxLength().toString(16) + ' bytes') + } + return length | 0 +} - // for a given set of cite-specific details, - // return itself and its parent citations - citeParents: function(citation, type) { - var field = Citation.types[type].parents_by; - var results = []; +function SlowBuffer (subject, encoding) { + if (!(this instanceof SlowBuffer)) return new SlowBuffer(subject, encoding) - for (var i=citation[field].length; i >= 0; i--) { - var parent = Citation._.extend({}, citation); - parent[field] = parent[field].slice(0, i); - results.push(parent); - } - return results; - }, + var buf = new Buffer(subject, encoding) + delete buf.parent + return buf +} - // given an array of captures *beginning* with values the pattern - // knows how to process, turn it into an object with those keys. - matchFor: function(captures, pattern) { - var match = {}; - for (var i=0; i 0) - types = options.types; - } else - types = [options.types]; - } +Buffer.compare = function compare (a, b) { + if (!Buffer.isBuffer(a) || !Buffer.isBuffer(b)) { + throw new TypeError('Arguments must be Buffers') + } - // only allow valid types - if (types) { - types = types.filter(function(type) { - return Object.keys(Citation.types).indexOf(type) != -1; - }); - } else - types = Object.keys(Citation.types); + if (a === b) return 0 - return types; - }, + var x = a.length + var y = b.length - // small replacement for several functions previously served by - // the `underscore` library. - _: { - extend: function(obj) { - Array.prototype.slice.call(arguments, 1).forEach(function(source) { - if (source) { - for (var prop in source) - obj[prop] = source[prop]; - } - }); - return obj; - }, + var i = 0 + var len = Math.min(x, y) + while (i < len) { + if (a[i] !== b[i]) break - flatten: function(array) { - var impl = function(input, output) { - input.forEach(function(value) { - if (Array.isArray(value)) - impl(value, output); - else - output.push(value); - }); - return output; - } + ++i + } - return impl(array, []); - } + if (i !== len) { + x = a[i] + y = b[i] } -}; + if (x < y) return -1 + if (y < x) return 1 + return 0 +} +Buffer.isEncoding = function isEncoding (encoding) { + switch (String(encoding).toLowerCase()) { + case 'hex': + case 'utf8': + case 'utf-8': + case 'ascii': + case 'binary': + case 'base64': + case 'raw': + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return true + default: + return false + } +} -// TODO: load only the citation types asked for -if (typeof(require) !== "undefined") { - Citation.types.usc = require("./citations/usc"); - Citation.types.law = require("./citations/law"); - Citation.types.cfr = require("./citations/cfr"); - Citation.types.va_code = require("./citations/va_code"); - Citation.types.dc_code = require("./citations/dc_code"); - Citation.types.dc_register = require("./citations/dc_register"); - Citation.types.dc_law = require("./citations/dc_law"); - Citation.types.dc_stat = require("./citations/dc_stat"); - Citation.types.stat = require("./citations/stat"); - Citation.types.reporter = require("./citations/reporter"); +Buffer.concat = function concat (list, length) { + if (!isArray(list)) throw new TypeError('list argument must be an Array of Buffers.') + if (list.length === 0) { + return new Buffer(0) + } - Citation.filters.lines = require("./filters/lines"); + var i + if (length === undefined) { + length = 0 + for (i = 0; i < list.length; i++) { + length += list[i].length + } + } + + var buf = new Buffer(length) + var pos = 0 + for (i = 0; i < list.length; i++) { + var item = list[i] + item.copy(buf, pos) + pos += item.length + } + return buf } -// auto-load in-browser -if (typeof(window) !== "undefined") - window.Citation = Citation; +function byteLength (string, encoding) { + if (typeof string !== 'string') string = '' + string -return Citation; + var len = string.length + if (len === 0) return 0 -})(); + // Use a for loop to avoid recursion + var loweredCase = false + for (;;) { + switch (encoding) { + case 'ascii': + case 'binary': + // Deprecated + case 'raw': + case 'raws': + return len + case 'utf8': + case 'utf-8': + return utf8ToBytes(string).length + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return len * 2 + case 'hex': + return len >>> 1 + case 'base64': + return base64ToBytes(string).length + default: + if (loweredCase) return utf8ToBytes(string).length // assume utf8 + encoding = ('' + encoding).toLowerCase() + loweredCase = true + } + } +} +Buffer.byteLength = byteLength -},{"./citations/cfr":2,"./citations/dc_code":3,"./citations/dc_law":4,"./citations/dc_register":5,"./citations/dc_stat":6,"./citations/law":7,"./citations/reporter":8,"./citations/stat":9,"./citations/usc":10,"./citations/va_code":11,"./filters/lines":12}],2:[function(require,module,exports){ -module.exports = { - type: "regex", +function slowToString (encoding, start, end) { + var loweredCase = false - id: function(data) { - return ["cfr", data.title, (data.section || data.part)] - .concat(data.subsections || []) - .join("/") - }, + start = start | 0 + end = end === undefined || end === Infinity ? this.length : end | 0 - patterns: [ - // done: - // 14 CFR part 25 - // 38 CFR Part 74.2 - // 48 CFR § 9903.201 - // 24 CFR 85.25(h) - // 5 CFR §531.610(f) - // 45 C.F.R. 3009.4 - // 47 CFR 54.506 (c) - // but not: 47 CFR 54.506 (whatever) - // 5CFR, part 575 + if (!encoding) encoding = 'utf8' + if (start < 0) start = 0 + if (end > this.length) end = this.length + if (end <= start) return '' - // maybe: - // 13 CFR Parts 125 and 134 - // 5CFR, part 575, subpart C - // 23 CFR 650, Subpart A - { - regex: - "(\\d+)\\s?" + - "C\\.?\\s?F\\.?\\s?R\\.?" + - "(?:[\\s,]+(?:§+|parts?))?" + - "\\s*((?:\\d+\\.?\\d*(?:\\s*\\((?:[a-zA-Z\\d]{1,2}|[ixvIXV]+)\\))*)+)", + while (true) { + switch (encoding) { + case 'hex': + return hexSlice(this, start, end) - fields: ['title', 'sections'], + case 'utf8': + case 'utf-8': + return utf8Slice(this, start, end) - processor: function(captures) { - var title = captures.title; - var part, section, subsections; + case 'ascii': + return asciiSlice(this, start, end) - // separate subsections for each section being considered - var split = captures.sections.split(/[\(\)]+/).filter(function(x) {return x;}); - section = split[0].trim(); - subsections = split.splice(1); + case 'binary': + return binarySlice(this, start, end) - if (section.indexOf(".") > 0) - part = section.split(".")[0]; - else { - part = section; - section = null; - subsections = null; // don't include empty array - } + case 'base64': + return base64Slice(this, start, end) - return { - title: title, - part: part, - section: section, - subsections: subsections - }; - } + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return utf16leSlice(this, start, end) + + default: + if (loweredCase) throw new TypeError('Unknown encoding: ' + encoding) + encoding = (encoding + '').toLowerCase() + loweredCase = true } + } +} - // todo: - // parts 121 and 135 of Title 14 of the Code of Federal Regulations - // { - // regex: - // "section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*)" + - // "(?:\\s+of|\\,) title (\\d+)", - // fields: ['section', 'subsections', 'title'], - // processor: function(captures) { - // return { - // title: captures.title, - // section: captures.section, - // subsections: captures.subsections.split(/[\(\)]+/).filter(function(x) {return x;}) - // }; - // } - // } - ] -}; +Buffer.prototype.toString = function toString () { + var length = this.length | 0 + if (length === 0) return '' + if (arguments.length === 0) return utf8Slice(this, 0, length) + return slowToString.apply(this, arguments) +} -},{}],3:[function(require,module,exports){ -var base_regex = - "(\\d+A?)" + // title - "\\s?\\-\\s?" + // dash - "([\\w\\d]+(?:\\.?[\\w\\d]+)?)" + // section identifier (letters/numbers/dots) - "((?:\\([^\\)]+\\))*)"; // subsection (any number of adjacent parenthesized subsections) +Buffer.prototype.equals = function equals (b) { + if (!Buffer.isBuffer(b)) throw new TypeError('Argument must be a Buffer') + if (this === b) return true + return Buffer.compare(this, b) === 0 +} -module.exports = { - type: "regex", +Buffer.prototype.inspect = function inspect () { + var str = '' + var max = exports.INSPECT_MAX_BYTES + if (this.length > 0) { + str = this.toString('hex', 0, max).match(/.{2}/g).join(' ') + if (this.length > max) str += ' ... ' + } + return '' +} - // normalize all cites to an ID, with and without subsections - id: function(cite) { - return ["dc-code", cite.title, cite.section] - .concat(cite.subsections) - .join("/"); - }, +Buffer.prototype.compare = function compare (b) { + if (!Buffer.isBuffer(b)) throw new TypeError('Argument must be a Buffer') + if (this === b) return 0 + return Buffer.compare(this, b) +} - // field to calculate parents from - parents_by: "subsections", +Buffer.prototype.indexOf = function indexOf (val, byteOffset) { + if (byteOffset > 0x7fffffff) byteOffset = 0x7fffffff + else if (byteOffset < -0x80000000) byteOffset = -0x80000000 + byteOffset >>= 0 - patterns: function(context) { - // D.C. Official Code 3-1202.04 - // D.C. Official Code § 3-1201.01 - // D.C. Official Code §§ 38-2602(b)(11) - // D.C. Official Code § 3- 1201.01 - // D.C. Official Code § 3 -1201.01 - // - // § 32-701 - // § 32-701(4) - // § 3-101.01 - // § 1-603.01(13) - // § 1- 1163.33 - // § 1 -1163.33 - // section 16-2326.01 + if (this.length === 0) return -1 + if (byteOffset >= this.length) return -1 - var prefix_regex = ""; - var section_regex = "(?:sections?|§+)\\s+"; - var sections_regex = "(?:sections|§§)\\s+"; - if (context.source != "dc_code") { - // Require "DC Official Code" but then make the section symbol optional. - prefix_regex = "D\\.?C\\.? (?:Official )?Code\\s+"; - section_regex = "(?:" + section_regex + ")?"; - sections_regex = "(?:" + sections_regex + ")?"; + // Negative offsets start from the end of the buffer + if (byteOffset < 0) byteOffset = Math.max(this.length + byteOffset, 0) + + if (typeof val === 'string') { + if (val.length === 0) return -1 // special case: looking for empty string always fails + return String.prototype.indexOf.call(this, val, byteOffset) + } + if (Buffer.isBuffer(val)) { + return arrayIndexOf(this, val, byteOffset) + } + if (typeof val === 'number') { + if (Buffer.TYPED_ARRAY_SUPPORT && Uint8Array.prototype.indexOf === 'function') { + return Uint8Array.prototype.indexOf.call(this, val, byteOffset) } + return arrayIndexOf(this, [ val ], byteOffset) + } - return [ - // multiple citations - // has precedence over a single citation - // Unlike the single citation, the matched parts are just the title/section/subsection - // and omits "DC Code" and the section symbols (if present) from the matched text. - { - regex: "(" + prefix_regex + sections_regex + ")(" + base_regex + "(?:(?:,|, and|\\s+and|\\s+through|\\s+to)\\s+" + base_regex + ")+)", + function arrayIndexOf (arr, val, byteOffset) { + var foundIndex = -1 + for (var i = 0; byteOffset + i < arr.length; i++) { + if (arr[byteOffset + i] === val[foundIndex === -1 ? 0 : i - foundIndex]) { + if (foundIndex === -1) foundIndex = i + if (i - foundIndex + 1 === val.length) return byteOffset + foundIndex + } else { + foundIndex = -1 + } + } + return -1 + } - fields: ["prefix", "multicite", "title1", "section1", "subsections1", "title2", "section2", "subsections2"], + throw new TypeError('val must be string, number or Buffer') +} - processor: function(captures) { - var rx = new RegExp(base_regex, "g"); - var matches = new Array(); - var match; - while((match = rx.exec(captures.multicite)) !== null) { - matches.push({ - _submatch: { - text: match[0], - offset: captures.prefix.length + match.index, - }, - title: match[1], - section: match[2], - subsections: split_subsections(match[3]) - }); +// `get` is deprecated +Buffer.prototype.get = function get (offset) { + console.log('.get() is deprecated. Access using array indexes instead.') + return this.readUInt8(offset) +} + +// `set` is deprecated +Buffer.prototype.set = function set (v, offset) { + console.log('.set() is deprecated. Access using array indexes instead.') + return this.writeUInt8(v, offset) +} + +function hexWrite (buf, string, offset, length) { + offset = Number(offset) || 0 + var remaining = buf.length - offset + if (!length) { + length = remaining + } else { + length = Number(length) + if (length > remaining) { + length = remaining + } + } + + // must be an even number of digits + var strLen = string.length + if (strLen % 2 !== 0) throw new Error('Invalid hex string') + + if (length > strLen / 2) { + length = strLen / 2 + } + for (var i = 0; i < length; i++) { + var parsed = parseInt(string.substr(i * 2, 2), 16) + if (isNaN(parsed)) throw new Error('Invalid hex string') + buf[offset + i] = parsed + } + return i +} + +function utf8Write (buf, string, offset, length) { + return blitBuffer(utf8ToBytes(string, buf.length - offset), buf, offset, length) +} + +function asciiWrite (buf, string, offset, length) { + return blitBuffer(asciiToBytes(string), buf, offset, length) +} + +function binaryWrite (buf, string, offset, length) { + return asciiWrite(buf, string, offset, length) +} + +function base64Write (buf, string, offset, length) { + return blitBuffer(base64ToBytes(string), buf, offset, length) +} + +function ucs2Write (buf, string, offset, length) { + return blitBuffer(utf16leToBytes(string, buf.length - offset), buf, offset, length) +} + +Buffer.prototype.write = function write (string, offset, length, encoding) { + // Buffer#write(string) + if (offset === undefined) { + encoding = 'utf8' + length = this.length + offset = 0 + // Buffer#write(string, encoding) + } else if (length === undefined && typeof offset === 'string') { + encoding = offset + length = this.length + offset = 0 + // Buffer#write(string, offset[, length][, encoding]) + } else if (isFinite(offset)) { + offset = offset | 0 + if (isFinite(length)) { + length = length | 0 + if (encoding === undefined) encoding = 'utf8' + } else { + encoding = length + length = undefined + } + // legacy write(string, encoding, offset, length) - remove in v0.13 + } else { + var swap = encoding + encoding = offset + offset = length | 0 + length = swap + } + + var remaining = this.length - offset + if (length === undefined || length > remaining) length = remaining + + if ((string.length > 0 && (length < 0 || offset < 0)) || offset > this.length) { + throw new RangeError('attempt to write outside buffer bounds') + } + + if (!encoding) encoding = 'utf8' + + var loweredCase = false + for (;;) { + switch (encoding) { + case 'hex': + return hexWrite(this, string, offset, length) + + case 'utf8': + case 'utf-8': + return utf8Write(this, string, offset, length) + + case 'ascii': + return asciiWrite(this, string, offset, length) + + case 'binary': + return binaryWrite(this, string, offset, length) + + case 'base64': + // Warning: maxLength not taken into account in base64Write + return base64Write(this, string, offset, length) + + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return ucs2Write(this, string, offset, length) + + default: + if (loweredCase) throw new TypeError('Unknown encoding: ' + encoding) + encoding = ('' + encoding).toLowerCase() + loweredCase = true + } + } +} + +Buffer.prototype.toJSON = function toJSON () { + return { + type: 'Buffer', + data: Array.prototype.slice.call(this._arr || this, 0) + } +} + +function base64Slice (buf, start, end) { + if (start === 0 && end === buf.length) { + return base64.fromByteArray(buf) + } else { + return base64.fromByteArray(buf.slice(start, end)) + } +} + +function utf8Slice (buf, start, end) { + end = Math.min(buf.length, end) + var res = [] + + var i = start + while (i < end) { + var firstByte = buf[i] + var codePoint = null + var bytesPerSequence = (firstByte > 0xEF) ? 4 + : (firstByte > 0xDF) ? 3 + : (firstByte > 0xBF) ? 2 + : 1 + + if (i + bytesPerSequence <= end) { + var secondByte, thirdByte, fourthByte, tempCodePoint + + switch (bytesPerSequence) { + case 1: + if (firstByte < 0x80) { + codePoint = firstByte } - return matches; + break + case 2: + secondByte = buf[i + 1] + if ((secondByte & 0xC0) === 0x80) { + tempCodePoint = (firstByte & 0x1F) << 0x6 | (secondByte & 0x3F) + if (tempCodePoint > 0x7F) { + codePoint = tempCodePoint + } + } + break + case 3: + secondByte = buf[i + 1] + thirdByte = buf[i + 2] + if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80) { + tempCodePoint = (firstByte & 0xF) << 0xC | (secondByte & 0x3F) << 0x6 | (thirdByte & 0x3F) + if (tempCodePoint > 0x7FF && (tempCodePoint < 0xD800 || tempCodePoint > 0xDFFF)) { + codePoint = tempCodePoint + } + } + break + case 4: + secondByte = buf[i + 1] + thirdByte = buf[i + 2] + fourthByte = buf[i + 3] + if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80 && (fourthByte & 0xC0) === 0x80) { + tempCodePoint = (firstByte & 0xF) << 0x12 | (secondByte & 0x3F) << 0xC | (thirdByte & 0x3F) << 0x6 | (fourthByte & 0x3F) + if (tempCodePoint > 0xFFFF && tempCodePoint < 0x110000) { + codePoint = tempCodePoint + } + } + } + } + + if (codePoint === null) { + // we did not generate a valid codePoint so insert a + // replacement char (U+FFFD) and advance only 1 byte + codePoint = 0xFFFD + bytesPerSequence = 1 + } else if (codePoint > 0xFFFF) { + // encode to utf16 (surrogate pair dance) + codePoint -= 0x10000 + res.push(codePoint >>> 10 & 0x3FF | 0xD800) + codePoint = 0xDC00 | codePoint & 0x3FF + } + + res.push(codePoint) + i += bytesPerSequence + } + + return decodeCodePointsArray(res) +} + +// Based on http://stackoverflow.com/a/22747272/680742, the browser with +// the lowest limit is Chrome, with 0x10000 args. +// We go 1 magnitude less, for safety +var MAX_ARGUMENTS_LENGTH = 0x1000 + +function decodeCodePointsArray (codePoints) { + var len = codePoints.length + if (len <= MAX_ARGUMENTS_LENGTH) { + return String.fromCharCode.apply(String, codePoints) // avoid extra slice() + } + + // Decode in chunks to avoid "call stack size exceeded". + var res = '' + var i = 0 + while (i < len) { + res += String.fromCharCode.apply( + String, + codePoints.slice(i, i += MAX_ARGUMENTS_LENGTH) + ) + } + return res +} + +function asciiSlice (buf, start, end) { + var ret = '' + end = Math.min(buf.length, end) + + for (var i = start; i < end; i++) { + ret += String.fromCharCode(buf[i] & 0x7F) + } + return ret +} + +function binarySlice (buf, start, end) { + var ret = '' + end = Math.min(buf.length, end) + + for (var i = start; i < end; i++) { + ret += String.fromCharCode(buf[i]) + } + return ret +} + +function hexSlice (buf, start, end) { + var len = buf.length + + if (!start || start < 0) start = 0 + if (!end || end < 0 || end > len) end = len + + var out = '' + for (var i = start; i < end; i++) { + out += toHex(buf[i]) + } + return out +} + +function utf16leSlice (buf, start, end) { + var bytes = buf.slice(start, end) + var res = '' + for (var i = 0; i < bytes.length; i += 2) { + res += String.fromCharCode(bytes[i] + bytes[i + 1] * 256) + } + return res +} + +Buffer.prototype.slice = function slice (start, end) { + var len = this.length + start = ~~start + end = end === undefined ? len : ~~end + + if (start < 0) { + start += len + if (start < 0) start = 0 + } else if (start > len) { + start = len + } + + if (end < 0) { + end += len + if (end < 0) end = 0 + } else if (end > len) { + end = len + } + + if (end < start) end = start + + var newBuf + if (Buffer.TYPED_ARRAY_SUPPORT) { + newBuf = Buffer._augment(this.subarray(start, end)) + } else { + var sliceLen = end - start + newBuf = new Buffer(sliceLen, undefined) + for (var i = 0; i < sliceLen; i++) { + newBuf[i] = this[i + start] + } + } + + if (newBuf.length) newBuf.parent = this.parent || this + + return newBuf +} + +/* + * Need to make sure that buffer isn't trying to write out of bounds. + */ +function checkOffset (offset, ext, length) { + if ((offset % 1) !== 0 || offset < 0) throw new RangeError('offset is not uint') + if (offset + ext > length) throw new RangeError('Trying to access beyond buffer length') +} + +Buffer.prototype.readUIntLE = function readUIntLE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkOffset(offset, byteLength, this.length) + + var val = this[offset] + var mul = 1 + var i = 0 + while (++i < byteLength && (mul *= 0x100)) { + val += this[offset + i] * mul + } + + return val +} + +Buffer.prototype.readUIntBE = function readUIntBE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) { + checkOffset(offset, byteLength, this.length) + } + + var val = this[offset + --byteLength] + var mul = 1 + while (byteLength > 0 && (mul *= 0x100)) { + val += this[offset + --byteLength] * mul + } + + return val +} + +Buffer.prototype.readUInt8 = function readUInt8 (offset, noAssert) { + if (!noAssert) checkOffset(offset, 1, this.length) + return this[offset] +} + +Buffer.prototype.readUInt16LE = function readUInt16LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + return this[offset] | (this[offset + 1] << 8) +} + +Buffer.prototype.readUInt16BE = function readUInt16BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + return (this[offset] << 8) | this[offset + 1] +} + +Buffer.prototype.readUInt32LE = function readUInt32LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return ((this[offset]) | + (this[offset + 1] << 8) | + (this[offset + 2] << 16)) + + (this[offset + 3] * 0x1000000) +} + +Buffer.prototype.readUInt32BE = function readUInt32BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return (this[offset] * 0x1000000) + + ((this[offset + 1] << 16) | + (this[offset + 2] << 8) | + this[offset + 3]) +} + +Buffer.prototype.readIntLE = function readIntLE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkOffset(offset, byteLength, this.length) + + var val = this[offset] + var mul = 1 + var i = 0 + while (++i < byteLength && (mul *= 0x100)) { + val += this[offset + i] * mul + } + mul *= 0x80 + + if (val >= mul) val -= Math.pow(2, 8 * byteLength) + + return val +} + +Buffer.prototype.readIntBE = function readIntBE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkOffset(offset, byteLength, this.length) + + var i = byteLength + var mul = 1 + var val = this[offset + --i] + while (i > 0 && (mul *= 0x100)) { + val += this[offset + --i] * mul + } + mul *= 0x80 + + if (val >= mul) val -= Math.pow(2, 8 * byteLength) + + return val +} + +Buffer.prototype.readInt8 = function readInt8 (offset, noAssert) { + if (!noAssert) checkOffset(offset, 1, this.length) + if (!(this[offset] & 0x80)) return (this[offset]) + return ((0xff - this[offset] + 1) * -1) +} + +Buffer.prototype.readInt16LE = function readInt16LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + var val = this[offset] | (this[offset + 1] << 8) + return (val & 0x8000) ? val | 0xFFFF0000 : val +} + +Buffer.prototype.readInt16BE = function readInt16BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + var val = this[offset + 1] | (this[offset] << 8) + return (val & 0x8000) ? val | 0xFFFF0000 : val +} + +Buffer.prototype.readInt32LE = function readInt32LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return (this[offset]) | + (this[offset + 1] << 8) | + (this[offset + 2] << 16) | + (this[offset + 3] << 24) +} + +Buffer.prototype.readInt32BE = function readInt32BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return (this[offset] << 24) | + (this[offset + 1] << 16) | + (this[offset + 2] << 8) | + (this[offset + 3]) +} + +Buffer.prototype.readFloatLE = function readFloatLE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + return ieee754.read(this, offset, true, 23, 4) +} + +Buffer.prototype.readFloatBE = function readFloatBE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + return ieee754.read(this, offset, false, 23, 4) +} + +Buffer.prototype.readDoubleLE = function readDoubleLE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 8, this.length) + return ieee754.read(this, offset, true, 52, 8) +} + +Buffer.prototype.readDoubleBE = function readDoubleBE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 8, this.length) + return ieee754.read(this, offset, false, 52, 8) +} + +function checkInt (buf, value, offset, ext, max, min) { + if (!Buffer.isBuffer(buf)) throw new TypeError('buffer must be a Buffer instance') + if (value > max || value < min) throw new RangeError('value is out of bounds') + if (offset + ext > buf.length) throw new RangeError('index out of range') +} + +Buffer.prototype.writeUIntLE = function writeUIntLE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkInt(this, value, offset, byteLength, Math.pow(2, 8 * byteLength), 0) + + var mul = 1 + var i = 0 + this[offset] = value & 0xFF + while (++i < byteLength && (mul *= 0x100)) { + this[offset + i] = (value / mul) & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeUIntBE = function writeUIntBE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkInt(this, value, offset, byteLength, Math.pow(2, 8 * byteLength), 0) + + var i = byteLength - 1 + var mul = 1 + this[offset + i] = value & 0xFF + while (--i >= 0 && (mul *= 0x100)) { + this[offset + i] = (value / mul) & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeUInt8 = function writeUInt8 (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 1, 0xff, 0) + if (!Buffer.TYPED_ARRAY_SUPPORT) value = Math.floor(value) + this[offset] = (value & 0xff) + return offset + 1 +} + +function objectWriteUInt16 (buf, value, offset, littleEndian) { + if (value < 0) value = 0xffff + value + 1 + for (var i = 0, j = Math.min(buf.length - offset, 2); i < j; i++) { + buf[offset + i] = (value & (0xff << (8 * (littleEndian ? i : 1 - i)))) >>> + (littleEndian ? i : 1 - i) * 8 + } +} + +Buffer.prototype.writeUInt16LE = function writeUInt16LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0xffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value & 0xff) + this[offset + 1] = (value >>> 8) + } else { + objectWriteUInt16(this, value, offset, true) + } + return offset + 2 +} + +Buffer.prototype.writeUInt16BE = function writeUInt16BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0xffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 8) + this[offset + 1] = (value & 0xff) + } else { + objectWriteUInt16(this, value, offset, false) + } + return offset + 2 +} + +function objectWriteUInt32 (buf, value, offset, littleEndian) { + if (value < 0) value = 0xffffffff + value + 1 + for (var i = 0, j = Math.min(buf.length - offset, 4); i < j; i++) { + buf[offset + i] = (value >>> (littleEndian ? i : 3 - i) * 8) & 0xff + } +} + +Buffer.prototype.writeUInt32LE = function writeUInt32LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0xffffffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset + 3] = (value >>> 24) + this[offset + 2] = (value >>> 16) + this[offset + 1] = (value >>> 8) + this[offset] = (value & 0xff) + } else { + objectWriteUInt32(this, value, offset, true) + } + return offset + 4 +} + +Buffer.prototype.writeUInt32BE = function writeUInt32BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0xffffffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 24) + this[offset + 1] = (value >>> 16) + this[offset + 2] = (value >>> 8) + this[offset + 3] = (value & 0xff) + } else { + objectWriteUInt32(this, value, offset, false) + } + return offset + 4 +} + +Buffer.prototype.writeIntLE = function writeIntLE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) { + var limit = Math.pow(2, 8 * byteLength - 1) + + checkInt(this, value, offset, byteLength, limit - 1, -limit) + } + + var i = 0 + var mul = 1 + var sub = value < 0 ? 1 : 0 + this[offset] = value & 0xFF + while (++i < byteLength && (mul *= 0x100)) { + this[offset + i] = ((value / mul) >> 0) - sub & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeIntBE = function writeIntBE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) { + var limit = Math.pow(2, 8 * byteLength - 1) + + checkInt(this, value, offset, byteLength, limit - 1, -limit) + } + + var i = byteLength - 1 + var mul = 1 + var sub = value < 0 ? 1 : 0 + this[offset + i] = value & 0xFF + while (--i >= 0 && (mul *= 0x100)) { + this[offset + i] = ((value / mul) >> 0) - sub & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeInt8 = function writeInt8 (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 1, 0x7f, -0x80) + if (!Buffer.TYPED_ARRAY_SUPPORT) value = Math.floor(value) + if (value < 0) value = 0xff + value + 1 + this[offset] = (value & 0xff) + return offset + 1 +} + +Buffer.prototype.writeInt16LE = function writeInt16LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0x7fff, -0x8000) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value & 0xff) + this[offset + 1] = (value >>> 8) + } else { + objectWriteUInt16(this, value, offset, true) + } + return offset + 2 +} + +Buffer.prototype.writeInt16BE = function writeInt16BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0x7fff, -0x8000) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 8) + this[offset + 1] = (value & 0xff) + } else { + objectWriteUInt16(this, value, offset, false) + } + return offset + 2 +} + +Buffer.prototype.writeInt32LE = function writeInt32LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0x7fffffff, -0x80000000) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value & 0xff) + this[offset + 1] = (value >>> 8) + this[offset + 2] = (value >>> 16) + this[offset + 3] = (value >>> 24) + } else { + objectWriteUInt32(this, value, offset, true) + } + return offset + 4 +} + +Buffer.prototype.writeInt32BE = function writeInt32BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0x7fffffff, -0x80000000) + if (value < 0) value = 0xffffffff + value + 1 + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 24) + this[offset + 1] = (value >>> 16) + this[offset + 2] = (value >>> 8) + this[offset + 3] = (value & 0xff) + } else { + objectWriteUInt32(this, value, offset, false) + } + return offset + 4 +} + +function checkIEEE754 (buf, value, offset, ext, max, min) { + if (value > max || value < min) throw new RangeError('value is out of bounds') + if (offset + ext > buf.length) throw new RangeError('index out of range') + if (offset < 0) throw new RangeError('index out of range') +} + +function writeFloat (buf, value, offset, littleEndian, noAssert) { + if (!noAssert) { + checkIEEE754(buf, value, offset, 4, 3.4028234663852886e+38, -3.4028234663852886e+38) + } + ieee754.write(buf, value, offset, littleEndian, 23, 4) + return offset + 4 +} + +Buffer.prototype.writeFloatLE = function writeFloatLE (value, offset, noAssert) { + return writeFloat(this, value, offset, true, noAssert) +} + +Buffer.prototype.writeFloatBE = function writeFloatBE (value, offset, noAssert) { + return writeFloat(this, value, offset, false, noAssert) +} + +function writeDouble (buf, value, offset, littleEndian, noAssert) { + if (!noAssert) { + checkIEEE754(buf, value, offset, 8, 1.7976931348623157E+308, -1.7976931348623157E+308) + } + ieee754.write(buf, value, offset, littleEndian, 52, 8) + return offset + 8 +} + +Buffer.prototype.writeDoubleLE = function writeDoubleLE (value, offset, noAssert) { + return writeDouble(this, value, offset, true, noAssert) +} + +Buffer.prototype.writeDoubleBE = function writeDoubleBE (value, offset, noAssert) { + return writeDouble(this, value, offset, false, noAssert) +} + +// copy(targetBuffer, targetStart=0, sourceStart=0, sourceEnd=buffer.length) +Buffer.prototype.copy = function copy (target, targetStart, start, end) { + if (!start) start = 0 + if (!end && end !== 0) end = this.length + if (targetStart >= target.length) targetStart = target.length + if (!targetStart) targetStart = 0 + if (end > 0 && end < start) end = start + + // Copy 0 bytes; we're done + if (end === start) return 0 + if (target.length === 0 || this.length === 0) return 0 + + // Fatal error conditions + if (targetStart < 0) { + throw new RangeError('targetStart out of bounds') + } + if (start < 0 || start >= this.length) throw new RangeError('sourceStart out of bounds') + if (end < 0) throw new RangeError('sourceEnd out of bounds') + + // Are we oob? + if (end > this.length) end = this.length + if (target.length - targetStart < end - start) { + end = target.length - targetStart + start + } + + var len = end - start + var i + + if (this === target && start < targetStart && targetStart < end) { + // descending copy from end + for (i = len - 1; i >= 0; i--) { + target[i + targetStart] = this[i + start] + } + } else if (len < 1000 || !Buffer.TYPED_ARRAY_SUPPORT) { + // ascending copy from start + for (i = 0; i < len; i++) { + target[i + targetStart] = this[i + start] + } + } else { + target._set(this.subarray(start, start + len), targetStart) + } + + return len +} + +// fill(value, start=0, end=buffer.length) +Buffer.prototype.fill = function fill (value, start, end) { + if (!value) value = 0 + if (!start) start = 0 + if (!end) end = this.length + + if (end < start) throw new RangeError('end < start') + + // Fill 0 bytes; we're done + if (end === start) return + if (this.length === 0) return + + if (start < 0 || start >= this.length) throw new RangeError('start out of bounds') + if (end < 0 || end > this.length) throw new RangeError('end out of bounds') + + var i + if (typeof value === 'number') { + for (i = start; i < end; i++) { + this[i] = value + } + } else { + var bytes = utf8ToBytes(value.toString()) + var len = bytes.length + for (i = start; i < end; i++) { + this[i] = bytes[i % len] + } + } + + return this +} + +/** + * Creates a new `ArrayBuffer` with the *copied* memory of the buffer instance. + * Added in Node 0.12. Only available in browsers that support ArrayBuffer. + */ +Buffer.prototype.toArrayBuffer = function toArrayBuffer () { + if (typeof Uint8Array !== 'undefined') { + if (Buffer.TYPED_ARRAY_SUPPORT) { + return (new Buffer(this)).buffer + } else { + var buf = new Uint8Array(this.length) + for (var i = 0, len = buf.length; i < len; i += 1) { + buf[i] = this[i] + } + return buf.buffer + } + } else { + throw new TypeError('Buffer.toArrayBuffer not supported in this browser') + } +} + +// HELPER FUNCTIONS +// ================ + +var BP = Buffer.prototype + +/** + * Augment a Uint8Array *instance* (not the Uint8Array class!) with Buffer methods + */ +Buffer._augment = function _augment (arr) { + arr.constructor = Buffer + arr._isBuffer = true + + // save reference to original Uint8Array set method before overwriting + arr._set = arr.set + + // deprecated + arr.get = BP.get + arr.set = BP.set + + arr.write = BP.write + arr.toString = BP.toString + arr.toLocaleString = BP.toString + arr.toJSON = BP.toJSON + arr.equals = BP.equals + arr.compare = BP.compare + arr.indexOf = BP.indexOf + arr.copy = BP.copy + arr.slice = BP.slice + arr.readUIntLE = BP.readUIntLE + arr.readUIntBE = BP.readUIntBE + arr.readUInt8 = BP.readUInt8 + arr.readUInt16LE = BP.readUInt16LE + arr.readUInt16BE = BP.readUInt16BE + arr.readUInt32LE = BP.readUInt32LE + arr.readUInt32BE = BP.readUInt32BE + arr.readIntLE = BP.readIntLE + arr.readIntBE = BP.readIntBE + arr.readInt8 = BP.readInt8 + arr.readInt16LE = BP.readInt16LE + arr.readInt16BE = BP.readInt16BE + arr.readInt32LE = BP.readInt32LE + arr.readInt32BE = BP.readInt32BE + arr.readFloatLE = BP.readFloatLE + arr.readFloatBE = BP.readFloatBE + arr.readDoubleLE = BP.readDoubleLE + arr.readDoubleBE = BP.readDoubleBE + arr.writeUInt8 = BP.writeUInt8 + arr.writeUIntLE = BP.writeUIntLE + arr.writeUIntBE = BP.writeUIntBE + arr.writeUInt16LE = BP.writeUInt16LE + arr.writeUInt16BE = BP.writeUInt16BE + arr.writeUInt32LE = BP.writeUInt32LE + arr.writeUInt32BE = BP.writeUInt32BE + arr.writeIntLE = BP.writeIntLE + arr.writeIntBE = BP.writeIntBE + arr.writeInt8 = BP.writeInt8 + arr.writeInt16LE = BP.writeInt16LE + arr.writeInt16BE = BP.writeInt16BE + arr.writeInt32LE = BP.writeInt32LE + arr.writeInt32BE = BP.writeInt32BE + arr.writeFloatLE = BP.writeFloatLE + arr.writeFloatBE = BP.writeFloatBE + arr.writeDoubleLE = BP.writeDoubleLE + arr.writeDoubleBE = BP.writeDoubleBE + arr.fill = BP.fill + arr.inspect = BP.inspect + arr.toArrayBuffer = BP.toArrayBuffer + + return arr +} + +var INVALID_BASE64_RE = /[^+\/0-9A-Za-z-_]/g + +function base64clean (str) { + // Node strips out invalid characters like \n and \t from the string, base64-js does not + str = stringtrim(str).replace(INVALID_BASE64_RE, '') + // Node converts strings with length < 2 to '' + if (str.length < 2) return '' + // Node allows for non-padded base64 strings (missing trailing ===), base64-js does not + while (str.length % 4 !== 0) { + str = str + '=' + } + return str +} + +function stringtrim (str) { + if (str.trim) return str.trim() + return str.replace(/^\s+|\s+$/g, '') +} + +function toHex (n) { + if (n < 16) return '0' + n.toString(16) + return n.toString(16) +} + +function utf8ToBytes (string, units) { + units = units || Infinity + var codePoint + var length = string.length + var leadSurrogate = null + var bytes = [] + + for (var i = 0; i < length; i++) { + codePoint = string.charCodeAt(i) + + // is surrogate component + if (codePoint > 0xD7FF && codePoint < 0xE000) { + // last char was a lead + if (!leadSurrogate) { + // no lead yet + if (codePoint > 0xDBFF) { + // unexpected trail + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + continue + } else if (i + 1 === length) { + // unpaired lead + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + continue + } + + // valid lead + leadSurrogate = codePoint + + continue + } + + // 2 leads in a row + if (codePoint < 0xDC00) { + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + leadSurrogate = codePoint + continue + } + + // valid surrogate pair + codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 + } else if (leadSurrogate) { + // valid bmp char, but last char was a lead + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + } + + leadSurrogate = null + + // encode utf8 + if (codePoint < 0x80) { + if ((units -= 1) < 0) break + bytes.push(codePoint) + } else if (codePoint < 0x800) { + if ((units -= 2) < 0) break + bytes.push( + codePoint >> 0x6 | 0xC0, + codePoint & 0x3F | 0x80 + ) + } else if (codePoint < 0x10000) { + if ((units -= 3) < 0) break + bytes.push( + codePoint >> 0xC | 0xE0, + codePoint >> 0x6 & 0x3F | 0x80, + codePoint & 0x3F | 0x80 + ) + } else if (codePoint < 0x110000) { + if ((units -= 4) < 0) break + bytes.push( + codePoint >> 0x12 | 0xF0, + codePoint >> 0xC & 0x3F | 0x80, + codePoint >> 0x6 & 0x3F | 0x80, + codePoint & 0x3F | 0x80 + ) + } else { + throw new Error('Invalid code point') + } + } + + return bytes +} + +function asciiToBytes (str) { + var byteArray = [] + for (var i = 0; i < str.length; i++) { + // Node's code seems to be doing this and not & 0x7F.. + byteArray.push(str.charCodeAt(i) & 0xFF) + } + return byteArray +} + +function utf16leToBytes (str, units) { + var c, hi, lo + var byteArray = [] + for (var i = 0; i < str.length; i++) { + if ((units -= 2) < 0) break + + c = str.charCodeAt(i) + hi = c >> 8 + lo = c % 256 + byteArray.push(lo) + byteArray.push(hi) + } + + return byteArray +} + +function base64ToBytes (str) { + return base64.toByteArray(base64clean(str)) +} + +function blitBuffer (src, dst, offset, length) { + for (var i = 0; i < length; i++) { + if ((i + offset >= dst.length) || (i >= src.length)) break + dst[i + offset] = src[i] + } + return i +} + +}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) +},{"base64-js":1,"ieee754":56,"isarray":5}],5:[function(require,module,exports){ +var toString = {}.toString; + +module.exports = Array.isArray || function (arr) { + return toString.call(arr) == '[object Array]'; +}; + +},{}],6:[function(require,module,exports){ +/* Citation.js - a legal citation extractor. + * + * Open source, dedicated to the public domain: https://github.com/unitedstates/citation + * + * Originally authored by Eric Mill (@konklone), at the Sunlight Foundation, + * many contributions by https://github.com/unitedstates/citation/graphs/contributors + */ + + +module.exports = (function(Citation) { + +Citation = { + + // will be filled in by individual citation types as available + types: {}, + + // filters that can pre-process text and post-process citations + filters: {}, + + // link sources that add permalink information to citations + links: {}, + + // TODO: document this inline + // check a block of text for citations of a given type - + // return an array of matches, with citation broken out into fields + find: function(text, options) { + if (!options) options = {}; + if (typeof(text) !== "string") return; + + // client can apply a filter that pre-processes text before extraction, + // and post-processes citations after extraction + var results; + if (options.filter && Citation.filters[options.filter]) + return Citation.filtered(options.filter, text, options); + + // otherwise, do a single pass over the whole text. + else + return Citation.extract(text, options); + }, + + // return an array of matched and filter-mapped cites + filtered: function(name, text, options) { + var results = []; + + var filter = Citation.filters[name]; + + // filter can break up the text into pieces with accompanying metadata + filter.from(text, options[name], function(piece, metadata) { + var response = Citation.extract(piece, options); + + // ignores any replaced text, it falls off the edge of the earth + + var filtered = response.citations.map(function(result) { + + Object.keys(metadata).forEach(function(key) { + result[key] = metadata[key]; + }); + + return result; + }); + + results = results.concat(filtered); + }); + + // doesn't return replaced text + return {citations: results}; + }, + + + // run the citators over the text, return an array of matched cites + extract: function(text, options) { + if (!options) options = {}; + + // default: no excerpt + var excerpt = options.excerpt ? parseInt(options.excerpt, 10) : 0; + + // whether to return parent citations + // default: false + var parents = options.parents || false; + + // default: all types, can be filtered to one, or an array of them + var types = Citation.selectedTypes(options); + if (types.length === 0) return null; + + + // The caller can provide a replace callback to alter every found citation. + // this function will be called with each (found and processed) cite object, + // and should return a string to be put in the cite's place. + // + // The resulting transformed string will be in the returned object as a 'text' field. + // this field will only be present if a replace callback was provided. + // + // providing this callback will also cause matched cites not to return the 'index' field, + // as the replace process will completely screw them up. only use the 'index' field if you + // plan on doing your own replacing. + var replace = options.replace; + + // accumulate the results + var results = []; + + + // will hold the calculated context-specific patterns we are to run + // over the given text, tracked by index we expect to find them at. + // nextIndex tracks a running index as we loop through patterns. + // (citators could just be called indexedPatterns) + var citators = {}; + var nextIndex = 0; + + // Go through every regex-based citator and prepare a set of patterns, + // indexed by the order of a matched arguments array. + types.forEach(function(type) { + if (Citation.types[type].type != "regex") return; + + // Calculate the patterns this citator will contribute to the parse. + // (individual parsers can opt to make their parsing context-specific) + var patterns = Citation.types[type].patterns; + if (typeof(patterns) == "function") + patterns = patterns(options[type] || {}); + + // add each pattern, keeping a running tally of what we would + // expect its primary index to be when found in the master regex. + patterns.forEach(function(pattern) { + pattern.type = type; // will be needed later + citators[nextIndex] = pattern; + nextIndex += pattern.fields.length + 1; + }); + }); + + // If there are any regex-based patterns being applied, combine them + // and run a find/replace over the string. + var regexes = Object.keys(citators).map(function(key) {return citators[key].regex}); + if (regexes.length > 0) { + + // merge all regexes into one, so that each pattern will begin at a predictable place + var regex = new RegExp("(" + regexes.join(")|(") + ")", "ig"); + + var replaced = text.replace(regex, function() { + var match = arguments[0]; + + // offset is second-to-last argument + var index = arguments[arguments.length - 2]; + + // pull out just the regex-captured matches + var captures = Array.prototype.slice.call(arguments, 1, -2); + + // find the first matched index in the captures + var matchIndex; + for (matchIndex=0; matchIndex 0) { + var proposedLeft = index - excerpt; + var left = proposedLeft > 0 ? proposedLeft : 0; + + var proposedRight = index + matchInfo.match.length + excerpt; + var right = (proposedRight <= text.length) ? proposedRight : text.length; + + matchInfo.excerpt = text.substring(left, right); + } + + + // if we want parent cites too, make those now + if (parents && Citation.types[type].parents_by) { + cites = Citation._.flatten(cites.map(function(cite) { + return Citation.citeParents(cite, type); + })); + } + + cites = cites.map(function(cite) { + var result = {}; + + // match-level info + Citation._.extend(result, matchInfo); + + // handle _submatch, which lets the user-level citator override the + // match and index with a sub-part of the whole matched regex + if (cite._submatch) { + result.match = cite._submatch.text; + result.index += cite._submatch.offset; + delete cite._submatch; + } + + // since a single text region can match multiple citations, such as when + // a range is given, clarify what this match represents + if ('canonical' in Citation.types[type]) + result.citation = Citation.types[type].canonical(cite); + + // cite-level info, plus ID standardization + result[type] = cite; + result[type].id = Citation.types[type].id(cite); + + // add permalinks if requested and a link source exists for this citation + // type. + if (options.links) + result[type].links = Citation.getLinksForCitation(type, cite); + + results.push(result); + + return result; + }); + + // If a replace function is given, replace each matched citation by the + // result of calling the replace function with the citation passed as its + // only argument. + // + // Most citators return only a single citation match per regex match, but + // some return multiple citations for strings like "§§ 32-701 through 32-703". + + // Collect the final match string here. + var finalstring = matchInfo.match; + + // Get the replace function. If options.replace is a function use that, + // or if it is an object mapping the citator type to a function use that. + var replace_func = null; + if (typeof(replace) === "function") + replace_func = replace; + else if ((typeof(replace) === "object") && (typeof(replace[type]) === "function")) + replace_func = replace[type]; + else + replace_func = null; + + // If there's a replacement function... + if (replace_func) { + // Process the citations in the order they are returned. Assume they are + // ordered from left to right. + var last_index = 0; + var dx = 0; + for (var i = 0; i < cites.length; i++) { + // Skip citations that overlap with the previous citation (e.g. there + // may be two citations for the same text range.) + if (cites[i].index >= last_index) { + // Execute the replacement function. If the return is truth-y, perform + // a replacement. + var replacement = replace_func(cites[i]); + if (replacement) { + // Replace the substring. + finalstring = finalstring.substring(0, cites[i].index-index+dx) + replacement + finalstring.substring(cites[i].index-index+cites[i].match.length+dx); + + // The replacement text may have a different length than the text + // being replaced. Keep track of the total change in string length + // as we go because we have to adjust future citation replacements's + // indexes so that we make the edit to finalstring in the right place. + dx += replacement.length - cites[i].match.length; + + // And track the end of last citation so we can skip any future citations + // that overlap with this text range. + last_index = cites[i].index + cites[i].match.length; + } + } + + // Per the citation API, delete the index field when doing a replacement. + // After replacements, the index will no longer be useful to the caller + // because the string has been edited. + delete cites[i].index; + } + } + return finalstring; + }); + } + + // TODO: do for any external cite types, not just "judicial" + if (types.indexOf("judicial") != -1) + results = results.concat(Citation.types.judicial.extract(text)); + + var response = {citations: results}; + if (options.replace) response.text = replaced; + + return response; + }, + + + // for a given set of cite-specific details, + // return itself and its parent citations + citeParents: function(citation, type) { + var field = Citation.types[type].parents_by; + var results = []; + + for (var i=citation[field].length; i >= 0; i--) { + var parent = Citation._.extend({}, citation); + parent[field] = parent[field].slice(0, i); + results.push(parent); + } + return results; + }, + + // given an array of captures *beginning* with values the pattern + // knows how to process, turn it into an object with those keys. + matchFor: function(captures, pattern) { + var match = {}; + for (var i=0; i 0) + types = options.types; + } else + types = [options.types]; + } + + // only allow valid types + if (types) { + types = types.filter(function(type) { + return Object.keys(Citation.types).indexOf(type) != -1; + }); + } else + types = Object.keys(Citation.types); + + return types; + }, + + getLinksForCitation: function(type, cite) { + // Create a place to store the links. + var links = {}; + + // Check each link source to see if it provides a link for this type + // of citation. + for (var link_source in Citation.links) { + var link_source_module = Citation.links[link_source]; + if (type in link_source_module.citations) { + + // This link source provides link info for this type of citation. + // The function may return null if it doesn't provide a link for + // the particular citation. + var link_info = link_source_module.citations[type](cite); + if (link_info) { + // Add source metadata. + link_info.source = { + name: link_source_module.name, + abbreviation: link_source_module.abbreviation, + link: link_source_module.link, + authoritative: link_source_module.authoritative + }; + + // Add to citation. + links[link_source_module.id] = link_info; + } + } + } + + return links; + }, + + // small replacement for several functions previously served by + // the `underscore` library. + _: { + extend: function(obj) { + Array.prototype.slice.call(arguments, 1).forEach(function(source) { + if (source) { + for (var prop in source) + obj[prop] = source[prop]; + } + }); + return obj; + }, + + flatten: function(array) { + var impl = function(input, output) { + input.forEach(function(value) { + if (Array.isArray(value)) + impl(value, output); + else + output.push(value); + }); + return output; + } + + return impl(array, []); + } + } + +}; + + +// TODO: load only the citation types, filters, and link sources asked for +if (typeof(require) !== "undefined") { + Citation.types.usc = require("./citations/usc"); + Citation.types.law = require("./citations/law"); + Citation.types.cfr = require("./citations/cfr"); + Citation.types.va_code = require("./citations/va_code"); + Citation.types.dc_code = require("./citations/dc_code"); + Citation.types.dc_register = require("./citations/dc_register"); + Citation.types.dc_law = require("./citations/dc_law"); + Citation.types.dc_stat = require("./citations/dc_stat"); + Citation.types.stat = require("./citations/stat"); + Citation.types.reporter = require("./citations/reporter"); + Citation.types.fedreg = require("./citations/fedreg"); + Citation.types.usconst = require("./citations/usconst"); + + + Citation.filters.lines = require("./filters/lines"); + Citation.filters.xpath_html = require("./filters/xpath_html"); + Citation.filters.xpath_xml = require("./filters/xpath_xml"); + + Citation.links.cornell_lii = require("./links/cornell_lii"); + Citation.links.courtlistener = require("./links/courtlistener"); + Citation.links.dc_council = require("./links/dc_council"); + Citation.links.govtrack = require("./links/govtrack"); + Citation.links.gpo = require("./links/gpo"); + Citation.links.house = require("./links/house"); + Citation.links.legislink = require("./links/legislink"); + Citation.links.libraryofcongress = require("./links/libraryofcongress"); + Citation.links.nara = require("./links/nara"); + Citation.links.vadecoded = require("./links/vadecoded"); +} + +// auto-load in-browser +if (typeof(window) !== "undefined") + window.Citation = Citation; + +return Citation; + +})(); + +},{"./citations/cfr":7,"./citations/dc_code":8,"./citations/dc_law":9,"./citations/dc_register":10,"./citations/dc_stat":11,"./citations/fedreg":12,"./citations/law":13,"./citations/reporter":14,"./citations/stat":15,"./citations/usc":16,"./citations/usconst":17,"./citations/va_code":18,"./filters/lines":19,"./filters/xpath_html":20,"./filters/xpath_xml":21,"./links/cornell_lii":22,"./links/courtlistener":23,"./links/dc_council":24,"./links/govtrack":25,"./links/gpo":26,"./links/house":27,"./links/legislink":28,"./links/libraryofcongress":29,"./links/nara":30,"./links/vadecoded":31}],7:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(data) { + return ["cfr", data.title, (data.section || data.part)] + .concat(data.subsections || []) + .join("/") + }, + + patterns: [ + // done: + // 14 CFR part 25 + // 38 CFR Part 74.2 + // 48 CFR § 9903.201 + // 24 CFR 85.25(h) + // 5 CFR §531.610(f) + // 45 C.F.R. 3009.4 + // 47 CFR 54.506 (c) + // but not: 47 CFR 54.506 (whatever) + // 5CFR, part 575 + + // maybe: + // 13 CFR Parts 125 and 134 + // 5CFR, part 575, subpart C + // 23 CFR 650, Subpart A + { + regex: + "(\\d+)\\s?" + // Title number + "C\\.?\\s?F\\.?\\s?R\\.?" + // CFR + "(?:[\\s,]+(?:§+|parts?))?" + // Extra separators (section sign, part) + "\\s*(\\d+(?:(?:[-–—]\\d+)?[a-z]?" + // Part number + "(?:\\.(?:13h[-–—]l|\\d+[-–—]?\\d*\\.5\\d|(?:\\d+T|T|\\d+[-–—]DD[-–—]|\\d+[-–—]WH[-–—]|\\d+[a-z]{1,2}\\d*[-–—])?\\d+)[a-z]{0,2}(?:(?:(?:\\([a-z\\d]{1,2}\\))*[-–—]\\d+)+[a-z]{0,2})?)?" + // Optionally: period and section number + "(?:(?:\\s*\\((?:[a-z\\d]{1,2}|[ixv]+)\\))+)?)?)", // Optionally: subsections, if there was a section number + + fields: ['title', 'sections'], + + processor: function(captures) { + var title = captures.title; + var part, section, subsections; + + // convert all dashes to hyphens, deduplicate hyphens, and look for + // subsections starting after the last hyphen + var hyphen_split = captures.sections.split(/[-–—]+/); + var head, tail; + if (hyphen_split.length > 1) { + head = hyphen_split.slice(0, -1).join("-") + "-"; + tail = hyphen_split[hyphen_split.length - 1]; + } else { + head = ""; + tail = hyphen_split[0]; + } + + // separate subsections for each section being considered + var paren_split = tail.split(/[\(\)]+/).filter(function(x) {return x;}); + section = head + paren_split[0].trim(); + subsections = paren_split.splice(1); + + if (section.indexOf(".") > 0) + part = section.split(".")[0]; + else { + part = section; + section = null; + subsections = null; // don't include empty array + } + + return { + title: title, + part: part, + section: section, + subsections: subsections + }; + } + } + + // todo: + // parts 121 and 135 of Title 14 of the Code of Federal Regulations + // { + // regex: + // "section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*)" + + // "(?:\\s+of|\\,) title (\\d+)", + // fields: ['section', 'subsections', 'title'], + // processor: function(captures) { + // return { + // title: captures.title, + // section: captures.section, + // subsections: captures.subsections.split(/[\(\)]+/).filter(function(x) {return x;}) + // }; + // } + // } + ] +}; + +},{}],8:[function(require,module,exports){ +var base_regex = + "(\\d+A?)" + // title + "\\s?\\-\\s?" + // dash + "([\\w\\d]+(?:\\.?[\\w\\d]+)?)" + // section identifier (letters/numbers/dots) + "((?:\\([^\\)]+\\))*)"; // subsection (any number of adjacent parenthesized subsections) + +module.exports = { + type: "regex", + + // normalize all cites to an ID, with and without subsections + id: function(cite) { + return ["dc-code", cite.title, cite.section] + .concat(cite.subsections) + .join("/"); + }, + + // field to calculate parents from + parents_by: "subsections", + + patterns: function(context) { + // D.C. Official Code 3-1202.04 + // D.C. Official Code § 3-1201.01 + // D.C. Official Code §§ 38-2602(b)(11) + // D.C. Official Code § 3- 1201.01 + // D.C. Official Code § 3 -1201.01 + // + // § 32-701 + // § 32-701(4) + // § 3-101.01 + // § 1-603.01(13) + // § 1- 1163.33 + // § 1 -1163.33 + // section 16-2326.01 + + var prefix_regex = ""; + var section_regex = "(?:sections?\\s+|§+\\s*)"; + var sections_regex = "(?:sections\\s+|§§\\s*)"; + if (context.source != "dc_code") { + // Require "DC Official Code" but then make the section symbol optional. + prefix_regex = "D\\.?C\\.? (?:Official )?Code\\s+"; + section_regex = "(?:" + section_regex + ")?"; + sections_regex = "(?:" + sections_regex + ")?"; + } + + return [ + // multiple citations + // has precedence over a single citation + // Unlike the single citation, the matched parts are just the title/section/subsection + // and omits "DC Code" and the section symbols (if present) from the matched text. + { + regex: "(" + prefix_regex + sections_regex + ")(" + base_regex + "(?:(?:,|, and|\\s+and|\\s+through|\\s+to)\\s+" + base_regex + ")+)", + + fields: ["prefix", "multicite", "title1", "section1", "subsections1", "title2", "section2", "subsections2"], + + processor: function(captures) { + var rx = new RegExp(base_regex, "g"); + var matches = new Array(); + var match; + while((match = rx.exec(captures.multicite)) !== null) { + matches.push({ + _submatch: { + text: match[0], + offset: captures.prefix.length + match.index, + }, + title: match[1], + section: match[2], + subsections: split_subsections(match[3]) + }); + } + return matches; + } + }, + + // a single citation + { + regex: prefix_regex + section_regex + base_regex, + + fields: ["title", "section", "subsections"], + + processor: function(captures) { + var title = captures.title; + var section = captures.section; + var subsections = split_subsections(captures.subsections); + + return { + title: title, + section: section, + subsections: subsections + }; + } + } + ]; + } +}; + +function split_subsections(match) { + if (match) + return match.split(/[\(\)]+/).filter(function(x) {return x}); + else + return []; +} + +},{}],9:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["dc-law", cite.period, cite.number].join("/"); + }, + + patterns: function(context) { + // If the context for this citation is the DC Code, then Law XX-YYY can be assumed + // to be a DC law. In other context, require the "DC Law" prefix. In the DC Code + // context also slurp in the "DC" prefix. + var context_regex = "D\\.?\\s*C\\.?\\s+"; + if (context.source == "dc_code") + context_regex = "(?:" + context_regex + ")?" + + return [ + // "D.C. Law 20-17" + // "DC Law 20-17" + // "DC Law 18-135A" + { + regex: + context_regex + "Law\\s+(\\d+)\\s?[-–]+\\s?(\\d+\\w?)", + fields: ["period", "number"], + processor: function(captures) { + return { + period: captures.period, + number: captures.number + }; + } + } + ]; + } +}; + +},{}],10:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["dc-register", cite.volume, cite.page].join("/"); + }, + + patterns: [ + // 54 DCR 8014 + { + regex: + "(\\d+)\\s+" + + "DCR" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],11:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["dcstat", cite.volume, cite.page].join("/") + }, + + patterns: [ + // "20 DCSTAT 1952" + { + regex: + "(\\d+)\\s+" + + "DCSTAT" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],12:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["fedreg", cite.volume, cite.page].join("/") + }, + + + patterns: [ + // "75 Fed. Reg. 28404" + // "69 FR 22135" + { + regex: + "(\\d+)\\s+" + + "(?:Fed\\.?\\sReg?\\.?|F\\.?R\\.?)" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],13:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["us-law", cite.type, cite.congress, cite.number] + .concat(cite.sections || []) + .join("/"); + }, + + canonical: function(cite) { + if (!cite.sections || cite.sections.length == 0) + // this style matches GPO at http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW&browsePath=112&isCollapsed=false&leafLevelBrowse=false&ycord=0 + return (cite.type == "public" ? "Pub. L." : "Pvt. L.") + " " + cite.congress + "-" + cite.number; + else + return "Section " + cite.sections[0] + cite.sections.slice(1).map(function(item) { return "(" + item + ")" }).join("") + + " of " + + (cite.type == "public" ? "Public" : "Private") + " Law " + cite.congress + "-" + cite.number; + }, + + // field to calculate parents from + parents_by: "sections", + + patterns: [ + // "Public Law 111-89" + // "Pub. L. 112-56" + // "Pub. L. No. 110-2" + // "Pub.L. 105-33" + // "Private Law 111-72" + // "Priv. L. No. 98-23" + // "section 552 of Public Law 111-89" + // "section 4402(e)(1) of Public Law 110-2" + { + regex: + "(?:section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*) of )?" + + "(pub(?:lic)?|priv(?:ate)?)\\.?\\s*l(?:aw)?\\.?(?:\\s*No\\.?)?" + + " +(\\d+)[-–]+(\\d+)", + fields: ['section', 'subsections', 'type', 'congress', 'number'], + processor: function(captures) { + var sections = []; + if (captures.section) sections.push(captures.section); + if (captures.subsections) sections = sections.concat(captures.subsections.split(/[\(\)]+/).filter(function(x) {return x})); + + return { + type: captures.type.match(/^priv/i) ? "private" : "public", + congress: captures.congress, + number: captures.number, + sections: sections + }; + } + }, + + // "PL 19-4" + // "P.L. 45-78" + // "section 552 of PL 19-4" + // "section 4402(e)(1) of PL 19-4" + { + regex: + "(?:section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*) of )?" + + "P\\.?L\\.? +(\\d+)[-–](\\d+)", + fields: ['section', 'subsections', 'congress', 'number'], + processor: function(captures) { + sections = []; + if (captures.section) sections.push(captures.section); + if (captures.subsections) sections = sections.concat(captures.subsections.split(/[\(\)]+/).filter(function(x) {return x})); + + return { + type: "public", + congress: captures.congress, + number: captures.number, + sections: sections + }; + } + } + ] +}; + +},{}],14:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["reporter", cite.volume, cite.reporter, cite.page].join("/") + }, + + canonical: function(cite) { + return cite.volume + " " + cite.reporter + " " + cite.page; + }, + + patterns: [ + { + regex: + "(\\d{1,3})\\s" + + "(\\w+(?:\\.\\w+(?:\\.)?)?(?:\\.\\dd)?|U\\.?\\s?S\\.?|F\\. Supp\\.(?:\\s\\dd)?)\\s" + + "(\\d{1,4})", + fields: ['volume', 'reporter', 'page'], + processor: function(match) { + return { + volume: match.volume, + reporter: match.reporter, + page: match.page, + }; + } + } + ] +}; + +},{}],15:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["stat", cite.volume, cite.page].join("/") + }, + + canonical: function(cite) { + return cite.volume + " Stat. " + cite.page; + }, + + patterns: [ + // "117 Stat. 1952" + // "77 STAT. 77" + { + regex: + "(\\d+[\\w]*)\\s+" + + "Stat\\.?" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],16:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["usc", cite.title, cite.section] + .concat(cite.subsections || []) + .join("/"); + }, + + canonical: function(cite) { + // title, which also may specify it is an appendix title + var title = cite.title; + var app = ""; + var title_without_app = cite.title.replace(/-app$/, ''); + if (title != title_without_app) app = "App. "; + + // subsections, possibly with a note/et-seq as a leaf which should + // be rendered differently from a normal subsection item + var subsections = cite.subsections.slice(); // clone + var suffix = ""; + var leaf = subsections.length > 0 ? subsections[subsections.length-1] : null; + if (leaf == "note") { + subsections.pop(); + suffix = " note" + } else if (leaf == "et-seq") { + subsections.pop(); + suffix = " et seq" + } + + return title_without_app + " U.S.C. " + app + cite.section + + subsections.map(function(item) { return "(" + item + ")" }).join("") + + suffix; + }, + + // field to calculate parents from + parents_by: "subsections", + + patterns: [ + // "5 USC 552" + // "5 U.S.C. § 552(a)(1)(E)" + // "7 U.S.C. 612c note" + // "29 U.S.C. 1081 et seq" + // "50 U.S.C. App. 595" + // "45 U.S.C. 10a-10c" + // "50 U.S.C. 404o-1(a)" - single section + // "45 U.S.C. 10a(1)-10c(2)" - range + // "50 U.S.C. App. §§ 451--473" - range + { + regex: + "(\\d+)\\s+" + // title + "U\\.?\\s?S\\.?\\s?C\\.?" + + "(?:\\s+(App)\.?)?\\s+" + // appendix + "(?:(§+)\\s*)?" + // symbol + "((?:[-–—]*\\d+[\\w\\d\\-–—]*(?:\\([^\\)]+\\))*)+)" + // sections + "(?:\\s+(note|et\\s+seq))?", // note + + fields: [ + 'title', 'appendix', + 'symbol', 'sections', 'note' + ], + + processor: function(match) { + // a few titles have distinct appendixes + var title = match.title; + if (match.appendix) title += "-app"; + + var sections = match.sections.split(/[-–—]+/); + var match_sections_normalized = match.sections.replace(/[–—]/g, '-'); + + var range = false; + + // two section symbols is unambiguous + if (match.symbol == "§§") // 2 section symbols + range = true; + + // paren before dash is unambiguous + else { + var dash = match_sections_normalized.indexOf("-"); + var paren = match_sections_normalized.indexOf("("); + if (dash > 0 && paren > 0 && paren < dash) + range = true; + } + + // if there's a hyphen and the range is ambiguous, + // also return the original section string as one + if ((sections.length > 1) && !range) + sections.unshift(match_sections_normalized); + + return sections.map(function(section) { + // separate subsections for each section being considered + var split = section.split(/[\(\)]+/).filter(function(x) {return x}); + section = split[0]; + subsections = split.splice(1); + if (match.note) + subsections.push(match.note.replace(" ", "-")); // "note" or "et seq" + + return { + title: title, + section: section, + subsections: subsections + }; + }); + } + }, + + // "section 552 of title 5" + // "section 552, title 5" + // "section 552(a)(1)(E) of title 5" + // "section 404o-1(a) of title 50" + { + regex: + "section (\\d+[\\w\\d\\-–—]*)((?:\\([^\\)]+\\))*)" + + "(?:\\s+of|\\,) title (\\d+)", + + fields: ['section', 'subsections', 'title'], + + processor: function(match) { + return { + title: match.title, + section: match.section.replace(/[–—]/g, '-'), + subsections: match.subsections.split(/[\(\)]+/).filter(function(x) {return x}) + }; + } + }, + + // "Section 14123(a)(2) of 49 U.S.C." + // "Section 14123(a)(2), 49 U.S.C." + { + regex: + "section (\\d+[\\w\\d\\-–—]*)((?:\\([^\\)]+\\))*)" + + "(?:\\s+of|\\,) (\\d+) " + + "U\\.?\\s?S\\.?\\s?C\\.?", + + fields: ['section', 'subsections', 'title'], + + processor: function(match) { + return { + title: match.title, + section: match.section.replace(/[–—]/g, '-'), + subsections: match.subsections.split(/[\(\)]+/).filter(function(x) {return x}) + }; + } + } + ] +}; + +},{}],17:[function(require,module,exports){ +/* Parses citations to the United States Constitution + * + * like: U.S. CONST., art. I, ¶ 8, cl. 17 + * as seen in http://pdfserver.amlaw.com/nlj/3-18-16%20dc%20council%20v%20mayor%20order%20NLJ.pdf + */ + +var arabic_number = parseInt; +var roman_numeral = require('nomar'); + +// All of the sub-parts that might be found in the citation. +var part_types = { + amendment: { abbrev: "Amdt.", regex: "Amdt\\.?|Amend\\.?", numbering: roman_numeral }, + article: { abbrev: "art.", regex: "art\\.?", numbering: roman_numeral }, + section: { abbrev: "§", regex: "§", numbering: arabic_number }, + paragraph: { abbrev: "¶", regex: "¶", numbering: arabic_number }, + clause: { abbrev: "cl.", regex: "cl\\.?", numbering: arabic_number }, +}; + +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["usconst"].concat((cite.part || []).map(function(part) { + if (!part) return "?"; + return part.type + "-" + part.number; + })).join("/"); + }, + + canonical: function(cite) { + var ret = "U.S. Const."; + for (var i = 0; i < (cite.part || []).length; i++) + if (cite.part[i]) // did this part parse? + ret += ", " + part_types[cite.part[i].type].abbrev + " " + cite.part[i].number_str; + return ret; + }, + + patterns: [ + // "U.S. CONST., art. I, ¶ 8, cl. 17" + { + regex: + "U\\.? ?S\\.? ?C(?:ONST|onst)\\.?" + + "((:?,? ?" + + "(" + + Object.keys(part_types).map(function(type) { return part_types[type].regex; }).join("|") + + ") ?([IVX0-9]+)" + + ")*)", + fields: ['part'], + processor: function(match) { + var part = match.part; + if (part) { + // Split the comma-separated list of parts into the Constitution. + part = part.split(/, ?/); + if (part[0].length == 0) + part.shift(); + part = part.map(process_part); + } + return { + part: part, + }; + } + } + ] +}; + +function process_part(part) { + for (var part_type in part_types) { + var match = new RegExp("(?:" + part_types[part_type].regex + ") ?([IVX0-9]+)" + "$" , 'i').exec(part); + if (match) { + return { + type: part_type, + number_str: match[1], + number: part_types[part_type].numbering(match[1]) + }; + } + } + return null; // somehow didn't match +} + + +},{"nomar":59}],18:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(data) { + return ["va-code", data.title, data.section].join("/"); + }, + + patterns: [ + + // Va. Code Ann. § 19.2-56.2 (2010) + // Va. Code Ann. § 19.2-56.2 (West 2010) + // Va. Code Ann. § 57-1 + // Va. Code Ann. § 57-2.02 + // Va. Code Ann. § 63.2-300 + // Va. Code Ann. § 66-25.1:1 + // Va. Code § 66-25.1:1 + // VA Code § 66-25.1:1 + { + regex: + "Va\\.? Code\\.?" + + "(?:\\s+Ann\\.?)?\\s+" + + "(?:§+\\s*)?" + + "([\\d\\.]+)\\-([\\d\\.:]+)" + + "(?:\\s+\\((?:West )?([12]\\d{3})\\))?", + fields: ['title', 'section', 'year'], + processor: function (captures) { + return { + title: captures.title, + section: captures.section, + year: captures.year + }; + } + } + ] +}; + +},{}],19:[function(require,module,exports){ +module.exports = { + + /* + Filters receive: + * text: the entire input text + * options: any filter-specific options, e.g. delimiter + * extract: execute this function once with every substring the filter + breaks the input text into, e.g. each line, + along with any associated metadata, e.g. the line number. + + */ + + // A line-by-line filter. + // + // Breaks the text up by line, and feeds each line into the extractor. + // Attaches the line number (1-indexed) as metadata to each cite, + // so that any character offsets will be relative to that line. + // + // Accepts options: + // delimiter: override the default delimiter + + from: function(text, options, extract) { + // by default, break lines on any combination of \n\r + var delimiter = (options && options.delimiter) || /[\n\r]+/; + + // split the text into an array of lines + var lines = text.split(new RegExp(delimiter)); + + // for each line, submit it to the extractor along with its line number + lines.forEach(function(line, i) { + extract(line, {line: (i+1)}); + }); + } + +}; + +},{}],20:[function(require,module,exports){ +var parse5 = require("parse5"); + +function recurse(node, partialXpath, extract) { + if (node.nodeName == "#text") { + // Pass contents of text nodes to the extractor + extract(node.value, {xpath: partialXpath}); + } else if (node.nodeName == "#comment" || node.nodeName == "#documentType") { + // Skip doctypes and comments + // (parse5 treats processing instructions, entities, and notations as + // comments) + return; + } else { + for (var i = 0; i < node.childNodes.length; i++) { + var next = node.childNodes[i]; + + // Incrementally build XPath expressions for each node + var nextName = next.nodeName; + var index = 1; // XPath indices are 1-based because reasons + for (var j = 0; j < i; j++) { + if (node.childNodes[j].nodeName == nextName) { + index++; + } + } + var nextXpath; + if (nextName == "#text") { + nextXpath = partialXpath + "/text()[" + index + "]"; + } else { + nextXpath = partialXpath + "/" + nextName + "[" + index + "]"; + } + + // Recurse through each child element node + recurse(next, nextXpath, extract); + } + } +} + +module.exports = { + + /* + Filters receive: + * text: the entire input text + * options: any filter-specific options + * extract: execute this function once with every substring the filter + breaks the input tet into, along with any associated metadata, e.g. + the XPath expression associated with each text fragment. + */ + + // An HTML/XPath filter. + // + // Parses the text as an HTML document, using an HTML5 parser, and feeds + // each text node into the extractor. Attaches an XPath expression that + // locates the text node as metadata to each cite. Character offsets will + // be relative to the beginning of the text node. + + from: function(text, options, extract) { + // Parse the input text + var doc = parse5.parse(text); + + // Hand off to recursive function, which will walk the DOM + recurse(doc, '', extract); + } + +}; + +},{"parse5":37}],21:[function(require,module,exports){ +var DOMParser = require("xmldom").DOMParser; + +function recurse(node, partialXpath, extract) { + if (node.nodeType == node.TEXT_NODE || node.nodeType == node.CDATA_SECTION_NODE) { + extract(node.nodeValue, {xpath: partialXpath}); + } else if (node.nodeType == node.ELEMENT_NODE || node.nodeType == node.DOCUMENT_NODE) { + for (var i = 0; i < node.childNodes.length; i++) { + var next = node.childNodes[i]; + var nextXpath, index, j; + + if (next.nodeType == next.TEXT_NODE || + next.nodeType == next.CDATA_SECTION_NODE) { + index = 1; + for (j = 0; j < i; j++) { + if (node.childNodes[j].nodeType == node.TEXT_NODE || + node.childNodes[j].nodeType == node.CDATA_SECTION_NODE) { + index++; + } + } + nextXpath = partialXpath + "/text()[" + index + "]"; + } else if (next.nodeType == next.ELEMENT_NODE) { + index = 1; + for (j = 0; j < i; j++) { + if (node.childNodes[j].nodeType == node.ELEMENT_NODE && + node.childNodes[j].nodeName == next.nodeName) { + index++; + } + } + nextXpath = partialXpath + "/" + next.nodeName + "[" + index + "]"; + } + + recurse(next, nextXpath, extract); + } + } +} + +module.exports = { + + /* + Filters receive: + * text: the entire input text + * options: any filter-specific options + * extract: execute this function once with every substring the filter + breaks the input tet into, along with any associated metadata, e.g. + the XPath expression associated with each text fragment. + */ + + // An XML/XPath filter. + // + // Parses the text as an XML document, using the "xmldom" parser, and feeds + // each text node into the extractor. Attaches an XPath expression that + // locates the text node as metadata to each cite. Character offsets will + // be relative to the beginning of the text node. + + from: function(text, options, extract) { + // Parse the input text + var parser, doc; + parser = new DOMParser(); + doc = parser.parseFromString(text, "text/xml"); + + // Hand off to recursive function, which will walk the DOM + recurse(doc, '', extract); + } + +}; + +},{"xmldom":80}],22:[function(require,module,exports){ +module.exports = { + id: "cornell_lii", + + name: "Cornell Legal Information Institute", + abbreviation: "Cornell LII", + link: "https://www.law.cornell.edu/uscode/text", + + authoritative: false, + + citations: { + usc: function(cite) { + var title = cite.title.replace(/-app$/, ''); + var is_appendix = cite.title.indexOf("-app") != -1; + + // (for current citations only, i.e. not tied to a publication or effective date) + var subsections = (cite.subsections.slice() || []); // clone + if (subsections.length && subsections[subsections.length-1] == "et-seq") subsections.pop(); // don't include eq-seq in a link + return { + landing: "https://www.law.cornell.edu/uscode/text/" + (title + (is_appendix ? "a" : "")) + + "/" + cite.section + + (subsections.length ? ("#" + subsections.join("_")) : ""), + note: "Link is to most current version of the US Code, as available at law.cornell.edu." + }; + } + } +} + +},{}],23:[function(require,module,exports){ +var form_canonical_cite = require("../citations/reporter").canonical; + +module.exports = { + id: "courtlistener", + + name: "Court Listener", + abbreviation: "CL", + link: "https://www.courtlistener.com", + + authoritative: false, + + citations: { + reporter: function(cite) { + // Create a link to the Court Listener search page for the citation. Citations + // can be ambiguous, and so there is no permalink to a case available without + // querying an API. + // + // The citation is wrapped in quotes in the query to force the CL API to do + // a phrase search (per Solr). Without quotes, a citation search on "410 U.S. 113" + // brings back `410 U.S. 257, 93 S. Ct. 880, 35 L. Ed. 2d 247, 1973 U.S. LEXIS 113` + // and `507 U.S. 410, 113 S. Ct. 1505, 123 L. Ed. 2d 99, 1993 U.S. LEXIS 2401`. + // (They match because "410" "US" and "113" appear somewhere in the whole string.) + // See https://github.com/freelawproject/courtlistener/issues/381, but that's only + // a partial fix because quotes are still needed to ensure the terms appear in + // the right order. + return { + landing: "https://www.courtlistener.com/?citation=" + encodeURIComponent("\"" + form_canonical_cite(cite) + "\"") + }; + } + } +} + +},{"../citations/reporter":14}],24:[function(require,module,exports){ +module.exports = { + id: "dc_council", + + name: "Council of the District of Columbia", + abbreviation: "DC Council", + link: "https://dccode.gov", + + authoritative: true, + + citations: { + dc_law: function(cite) { + return { + landing: "https://beta.code.dccouncil.us/dc/council/laws/" + cite.period + "-" + cite.number + ".html" + }; + }, + dc_code: function(cite) { + return { + landing: "https://beta.code.dccouncil.us/dc/council/code/sections/" + cite.title + "-" + cite.section + ".html" + }; + } + } +}; + +},{}],25:[function(require,module,exports){ +module.exports = { + id: "govtrack", + + name: "GovTrack.us", + abbreviation: "GovTrack.us", + link: "https://www.govtrack.us", + + authoritative: false, + + citations: { + law: function(cite) { + if (cite.congress < 82) return null; + return { + landing: "https://www.govtrack.us/search?q=" + (cite.type=="public"?"Pub":"Priv") + "Law+" + cite.congress + "-" + cite.number + }; + } + } +} + +},{}],26:[function(require,module,exports){ +module.exports = { + id: "usgpo", + + name: "U.S. Government Publishing Office", + abbreviation: "US GPO", + link: "https://www.gpo.gov", + + authoritative: true, + + citations: { + cfr: function(cite) { + var gpo_url = "http://api.fdsys.gov/link?collection=cfr&year=mostrecent" + + "&titlenum=" + cite.title + "&partnum=" + cite.part; + if (cite.section) // section, if present, is of the form PART.SECTION, and for the GPO url only include the (inner) section + gpo_url += "§ionnum=" + cite.section.substring(cite.part.length+1) + ""; + + return { + pdf: gpo_url + }; + }, + + fedreg: function(cite) { + return { + pdf: "http://api.fdsys.gov/link?collection=fr&volume=" + cite.volume + "&page=" + cite.page + }; + }, + + law: function(cite) { + if (cite.congress < 104) return null; + return { + pdf: "http://api.fdsys.gov/link?collection=plaw&congress=" + cite.congress + "&lawtype=" + cite.type + "&lawnum=" + cite.number, + mods: "http://api.fdsys.gov/link?collection=plaw&congress=" + cite.congress + "&lawtype=" + cite.type + "&lawnum=" + cite.number + "&link-type=mods" + }; + }, + + stat: function(cite) { + if (cite.volume < 65 || cite.volume > 125) return null; + var usgpo_url = "http://api.fdsys.gov/link?collection=statute&volume=" + cite.volume + "&page=" + cite.page; + return { + pdf: usgpo_url, + mods: usgpo_url + "&link-type=mods" + }; + }, + + usc: function(cite) { + var title = cite.title.replace(/-app$/, ''); + var is_appendix = cite.title.indexOf("-app") != -1; + + var edition; + for (var i = 0; i < us_code_editions.length; i++) { + if (us_code_editions[i].titles == null || us_code_editions[i].titles.indexOf(title) >= 0) { + // This edition contains the title. + edition = us_code_editions[i] + break; + } + } + + if (!edition) return null; + + var url = "http://api.fdsys.gov/link?collection=uscode&year=" + + edition.edition + "&title=" + title + + "§ion=" + cite.section + + "&type=" + (!is_appendix ? "usc" : "uscappendix"); + + return { + pdf: url, + html: url + "&link-type=html", + landing: url + "&link-type=contentdetail", + note: edition.edition + " edition." + ((cite.subsections && cite.subsections.length) ? " Sub-section citation is not reflected in the link." : "") + }; + } + } +} + + +// Map published editions of the US Code to the titles they contain. Not all +// published editions have the full US Code. Some are updates. This is per +// http://www.gpo.gov/fdsys/browse/collectionUScode.action?collectionCode=USCODE. +// Most recent first. +var us_code_editions = [ + { edition: '2014', titles: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'] }, + { edition: '2013', titles: null }, // all titles available in this edition +]; + +},{}],27:[function(require,module,exports){ +module.exports = { + id: "house", + + name: "Office of the Law Revision Counsel of the United States House of Representatives", + abbreviation: "House OLRC", + link: "http://uscode.house.gov/", + + authoritative: true, + + citations: { + usc: function(cite) { + var title = cite.title.replace(/-app$/, ''); + var is_appendix = cite.title.indexOf("-app") != -1; + return { + note: "Link is to most current version of the US Code.", + html: "http://uscode.house.gov/view.xhtml?req=(" + encodeURIComponent("title:" + (title + (is_appendix ? "a" : "")) + " section:" + cite.section + " edition:prelim") + ")" + } + } + } +} + +},{}],28:[function(require,module,exports){ +module.exports = { + id: "legislink", + + name: "Legislink", + abbreviation: "Legislink", + link: "http://legislink.org/us", + + authoritative: false, + + citations: { + stat: function(cite) { + var legislink_url = "http://legislink.org/us/stat-" + cite.volume + "-" + cite.page; + + // the format differs depending on the volume, and where it is a simple + // redirect to US GPO (and not hosted content) then we can note that. + if (cite.volume >= 125) { + // hosted content is a mirror of US GPO Public and Private Laws in text format + return { + text: legislink_url + }; + + } else if (cite.volume >= 65) { + // redirect to US GPO (so same content as the usgpo link) + return { + pdf: legislink_url, + note: "Link redirects to US GPO Statutes at Large." + }; + + } else { + // original content + return { + pdf: legislink_url + }; + } + } + } +} + +},{}],29:[function(require,module,exports){ +module.exports = { + id: "libraryofcongress", + + name: "Library of Congress", + abbreviation: "LoC", + link: "https://www.loc.gov", + + authoritative: true, + + citations: { + stat: function(cite) { + // LoC organizes the volumes by Congress and, for some Congresses, by chapter + // number. This is well and good but awful for direct linking of citations + // because we don't know the Congress number from a volume (through the 12th + // volume volumes contained more than one Congress) or the chapter number + // (which is a sequential numbering of public and private laws, I think?). + if (cite.volume >= 65) return null; + return { + landing: "https://www.loc.gov/law/help/statutes-at-large/index.php", + note: "Link is to LoC's general Statutes at Large landing page." + }; + }, + + usconst: function(cite) { + return { + landing: "https://www.congress.gov/constitution-annotated", + pdf: get_conan_link(cite), + note: "Link is to the Constitution Annotated." + } + } + } +} + +// Helper routines to get a direct link to the PDF of the Constitution Annotated +// for the cited section. + +var conan_links = { + "article-1": "9-2.pdf", + "article-2": "9-3.pdf", + "article-3": "9-4.pdf", + "article-4": "9-5.pdf", + "article-5": "9-6.pdf", + "article-6": "9-7.pdf", + "article-7": "9-8.pdf", + "amendment-1": "10-2.pdf", + "amendment-2": "10-3.pdf", + "amendment-3": "10-4.pdf", + "amendment-4": "10-5.pdf", + "amendment-5": "10-6.pdf", + "amendment-6": "10-7.pdf", + "amendment-7": "10-8.pdf", + "amendment-8": "10-9.pdf", + "amendment-9": "10-10.pdf", + "amendment-10": "10-11.pdf", + "amendment-11": "10-12.pdf", + "amendment-12": "10-13.pdf", + "amendment-13": "10-14.pdf", + "amendment-14": "10-15.pdf", + "amendment-15": "10-16.pdf", + "amendment-16": "10-17.pdf", + "amendment-17": "10-18.pdf", + "amendment-18": "10-19.pdf", + "amendment-19": "10-20.pdf", + "amendment-20": "10-21.pdf", + "amendment-21": "10-22.pdf", + "amendment-22": "10-23.pdf", + "amendment-23": "10-24.pdf", + "amendment-24": "10-25.pdf", + "amendment-25": "10-26.pdf", + "amendment-26": "10-27.pdf", + "amendment-27": "10-28.pdf" +} + +function get_conan_link(cite) { + for (var sec in conan_links) { + var id_prefix = "usconst/" + sec; + if (cite.id == id_prefix || cite.id.substring(0, id_prefix.length+1) == (id_prefix+"/")) + return "https://www.congress.gov/content/conan/pdf/GPO-CONAN-REV-2014-" + conan_links[sec]; + } + return null; +} +},{}],30:[function(require,module,exports){ +module.exports = { + id: "nara", + + name: "The National Archives and Records Administration", + abbreviation: "NARA", + link: "http://www.archives.gov", + + authoritative: true, + + citations: { + usconst: function(cite) { + return { + landing: "http://www.archives.gov/exhibits/charters/constitution_transcript.html" + } + } + } +} + +},{}],31:[function(require,module,exports){ +module.exports = { + id: "vadecoded", + + name: "Virginia Decoded", + abbreviation: "VACode.org", + link: "https://vacode.org", + + authoritative: false, + + citations: { + va_code: function(cite) { + return { + landing: "https://vacode.org/" + cite.title + "-" + cite.section + "/" + }; + } + } +}; + +},{}],32:[function(require,module,exports){ +'use strict'; + +//Const +var VALID_DOCTYPE_NAME = 'html', + QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd', + QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ + '+//silmaril//dtd html pro v0r11 19970101//en', + '-//advasoft ltd//dtd html 3.0 aswedit + extensions//en', + '-//as//dtd html 3.0 aswedit + extensions//en', + '-//ietf//dtd html 2.0 level 1//en', + '-//ietf//dtd html 2.0 level 2//en', + '-//ietf//dtd html 2.0 strict level 1//en', + '-//ietf//dtd html 2.0 strict level 2//en', + '-//ietf//dtd html 2.0 strict//en', + '-//ietf//dtd html 2.0//en', + '-//ietf//dtd html 2.1e//en', + '-//ietf//dtd html 3.0//en', + '-//ietf//dtd html 3.0//en//', + '-//ietf//dtd html 3.2 final//en', + '-//ietf//dtd html 3.2//en', + '-//ietf//dtd html 3//en', + '-//ietf//dtd html level 0//en', + '-//ietf//dtd html level 0//en//2.0', + '-//ietf//dtd html level 1//en', + '-//ietf//dtd html level 1//en//2.0', + '-//ietf//dtd html level 2//en', + '-//ietf//dtd html level 2//en//2.0', + '-//ietf//dtd html level 3//en', + '-//ietf//dtd html level 3//en//3.0', + '-//ietf//dtd html strict level 0//en', + '-//ietf//dtd html strict level 0//en//2.0', + '-//ietf//dtd html strict level 1//en', + '-//ietf//dtd html strict level 1//en//2.0', + '-//ietf//dtd html strict level 2//en', + '-//ietf//dtd html strict level 2//en//2.0', + '-//ietf//dtd html strict level 3//en', + '-//ietf//dtd html strict level 3//en//3.0', + '-//ietf//dtd html strict//en', + '-//ietf//dtd html strict//en//2.0', + '-//ietf//dtd html strict//en//3.0', + '-//ietf//dtd html//en', + '-//ietf//dtd html//en//2.0', + '-//ietf//dtd html//en//3.0', + '-//metrius//dtd metrius presentational//en', + '-//microsoft//dtd internet explorer 2.0 html strict//en', + '-//microsoft//dtd internet explorer 2.0 html//en', + '-//microsoft//dtd internet explorer 2.0 tables//en', + '-//microsoft//dtd internet explorer 3.0 html strict//en', + '-//microsoft//dtd internet explorer 3.0 html//en', + '-//microsoft//dtd internet explorer 3.0 tables//en', + '-//netscape comm. corp.//dtd html//en', + '-//netscape comm. corp.//dtd strict html//en', + '-//o\'reilly and associates//dtd html 2.0//en', + '-//o\'reilly and associates//dtd html extended 1.0//en', + '-//spyglass//dtd html 2.0 extended//en', + '-//sq//dtd html 2.0 hotmetal + extensions//en', + '-//sun microsystems corp.//dtd hotjava html//en', + '-//sun microsystems corp.//dtd hotjava strict html//en', + '-//w3c//dtd html 3 1995-03-24//en', + '-//w3c//dtd html 3.2 draft//en', + '-//w3c//dtd html 3.2 final//en', + '-//w3c//dtd html 3.2//en', + '-//w3c//dtd html 3.2s draft//en', + '-//w3c//dtd html 4.0 frameset//en', + '-//w3c//dtd html 4.0 transitional//en', + '-//w3c//dtd html experimental 19960712//en', + '-//w3c//dtd html experimental 970421//en', + '-//w3c//dtd w3 html//en', + '-//w3o//dtd w3 html 3.0//en', + '-//w3o//dtd w3 html 3.0//en//', + '-//webtechs//dtd mozilla html 2.0//en', + '-//webtechs//dtd mozilla html//en' + ], + QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ + '-//w3c//dtd html 4.01 frameset//', + '-//w3c//dtd html 4.01 transitional//' + ], + QUIRKS_MODE_PUBLIC_IDS = [ + '-//w3o//dtd w3 html strict 3.0//en//', + '-/w3c/dtd html 4.0 transitional/en', + 'html' + ]; + + +//Utils +function enquoteDoctypeId(id) { + var quote = id.indexOf('"') !== -1 ? '\'' : '"'; + + return quote + id + quote; +} + + +//API +exports.isQuirks = function (name, publicId, systemId) { + if (name !== VALID_DOCTYPE_NAME) + return true; + + if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) + return true; + + if (publicId !== null) { + publicId = publicId.toLowerCase(); + + if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) + return true; + + var prefixes = QUIRKS_MODE_PUBLIC_ID_PREFIXES; + + if (systemId === null) + prefixes = prefixes.concat(QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES); + + for (var i = 0; i < prefixes.length; i++) { + if (publicId.indexOf(prefixes[i]) === 0) + return true; + } + } + + return false; +}; + +exports.serializeContent = function (name, publicId, systemId) { + var str = '!DOCTYPE '; + + if (name) + str += name; + + if (publicId !== null) + str += ' PUBLIC ' + enquoteDoctypeId(publicId); + + else if (systemId !== null) + str += ' SYSTEM'; + + if (systemId !== null) + str += ' ' + enquoteDoctypeId(systemId); + + return str; +}; + +},{}],33:[function(require,module,exports){ +'use strict'; + +var Tokenizer = require('../tokenizer'), + HTML = require('./html'); + +//Aliases +var $ = HTML.TAG_NAMES, + NS = HTML.NAMESPACES, + ATTRS = HTML.ATTRS; + + +//MIME types +var MIME_TYPES = { + TEXT_HTML: 'text/html', + APPLICATION_XML: 'application/xhtml+xml' +}; + +//Attributes +var DEFINITION_URL_ATTR = 'definitionurl', + ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL', + SVG_ATTRS_ADJUSTMENT_MAP = { + 'attributename': 'attributeName', + 'attributetype': 'attributeType', + 'basefrequency': 'baseFrequency', + 'baseprofile': 'baseProfile', + 'calcmode': 'calcMode', + 'clippathunits': 'clipPathUnits', + 'diffuseconstant': 'diffuseConstant', + 'edgemode': 'edgeMode', + 'filterunits': 'filterUnits', + 'glyphref': 'glyphRef', + 'gradienttransform': 'gradientTransform', + 'gradientunits': 'gradientUnits', + 'kernelmatrix': 'kernelMatrix', + 'kernelunitlength': 'kernelUnitLength', + 'keypoints': 'keyPoints', + 'keysplines': 'keySplines', + 'keytimes': 'keyTimes', + 'lengthadjust': 'lengthAdjust', + 'limitingconeangle': 'limitingConeAngle', + 'markerheight': 'markerHeight', + 'markerunits': 'markerUnits', + 'markerwidth': 'markerWidth', + 'maskcontentunits': 'maskContentUnits', + 'maskunits': 'maskUnits', + 'numoctaves': 'numOctaves', + 'pathlength': 'pathLength', + 'patterncontentunits': 'patternContentUnits', + 'patterntransform': 'patternTransform', + 'patternunits': 'patternUnits', + 'pointsatx': 'pointsAtX', + 'pointsaty': 'pointsAtY', + 'pointsatz': 'pointsAtZ', + 'preservealpha': 'preserveAlpha', + 'preserveaspectratio': 'preserveAspectRatio', + 'primitiveunits': 'primitiveUnits', + 'refx': 'refX', + 'refy': 'refY', + 'repeatcount': 'repeatCount', + 'repeatdur': 'repeatDur', + 'requiredextensions': 'requiredExtensions', + 'requiredfeatures': 'requiredFeatures', + 'specularconstant': 'specularConstant', + 'specularexponent': 'specularExponent', + 'spreadmethod': 'spreadMethod', + 'startoffset': 'startOffset', + 'stddeviation': 'stdDeviation', + 'stitchtiles': 'stitchTiles', + 'surfacescale': 'surfaceScale', + 'systemlanguage': 'systemLanguage', + 'tablevalues': 'tableValues', + 'targetx': 'targetX', + 'targety': 'targetY', + 'textlength': 'textLength', + 'viewbox': 'viewBox', + 'viewtarget': 'viewTarget', + 'xchannelselector': 'xChannelSelector', + 'ychannelselector': 'yChannelSelector', + 'zoomandpan': 'zoomAndPan' + }, + XML_ATTRS_ADJUSTMENT_MAP = { + 'xlink:actuate': {prefix: 'xlink', name: 'actuate', namespace: NS.XLINK}, + 'xlink:arcrole': {prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK}, + 'xlink:href': {prefix: 'xlink', name: 'href', namespace: NS.XLINK}, + 'xlink:role': {prefix: 'xlink', name: 'role', namespace: NS.XLINK}, + 'xlink:show': {prefix: 'xlink', name: 'show', namespace: NS.XLINK}, + 'xlink:title': {prefix: 'xlink', name: 'title', namespace: NS.XLINK}, + 'xlink:type': {prefix: 'xlink', name: 'type', namespace: NS.XLINK}, + 'xml:base': {prefix: 'xml', name: 'base', namespace: NS.XML}, + 'xml:lang': {prefix: 'xml', name: 'lang', namespace: NS.XML}, + 'xml:space': {prefix: 'xml', name: 'space', namespace: NS.XML}, + 'xmlns': {prefix: '', name: 'xmlns', namespace: NS.XMLNS}, + 'xmlns:xlink': {prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS} + + }; + +//SVG tag names adjustment map +var SVG_TAG_NAMES_ADJUSTMENT_MAP = exports.SVG_TAG_NAMES_ADJUSTMENT_MAP = { + 'altglyph': 'altGlyph', + 'altglyphdef': 'altGlyphDef', + 'altglyphitem': 'altGlyphItem', + 'animatecolor': 'animateColor', + 'animatemotion': 'animateMotion', + 'animatetransform': 'animateTransform', + 'clippath': 'clipPath', + 'feblend': 'feBlend', + 'fecolormatrix': 'feColorMatrix', + 'fecomponenttransfer': 'feComponentTransfer', + 'fecomposite': 'feComposite', + 'feconvolvematrix': 'feConvolveMatrix', + 'fediffuselighting': 'feDiffuseLighting', + 'fedisplacementmap': 'feDisplacementMap', + 'fedistantlight': 'feDistantLight', + 'feflood': 'feFlood', + 'fefunca': 'feFuncA', + 'fefuncb': 'feFuncB', + 'fefuncg': 'feFuncG', + 'fefuncr': 'feFuncR', + 'fegaussianblur': 'feGaussianBlur', + 'feimage': 'feImage', + 'femerge': 'feMerge', + 'femergenode': 'feMergeNode', + 'femorphology': 'feMorphology', + 'feoffset': 'feOffset', + 'fepointlight': 'fePointLight', + 'fespecularlighting': 'feSpecularLighting', + 'fespotlight': 'feSpotLight', + 'fetile': 'feTile', + 'feturbulence': 'feTurbulence', + 'foreignobject': 'foreignObject', + 'glyphref': 'glyphRef', + 'lineargradient': 'linearGradient', + 'radialgradient': 'radialGradient', + 'textpath': 'textPath' +}; + +//Tags that causes exit from foreign content +var EXITS_FOREIGN_CONTENT = {}; + +EXITS_FOREIGN_CONTENT[$.B] = true; +EXITS_FOREIGN_CONTENT[$.BIG] = true; +EXITS_FOREIGN_CONTENT[$.BLOCKQUOTE] = true; +EXITS_FOREIGN_CONTENT[$.BODY] = true; +EXITS_FOREIGN_CONTENT[$.BR] = true; +EXITS_FOREIGN_CONTENT[$.CENTER] = true; +EXITS_FOREIGN_CONTENT[$.CODE] = true; +EXITS_FOREIGN_CONTENT[$.DD] = true; +EXITS_FOREIGN_CONTENT[$.DIV] = true; +EXITS_FOREIGN_CONTENT[$.DL] = true; +EXITS_FOREIGN_CONTENT[$.DT] = true; +EXITS_FOREIGN_CONTENT[$.EM] = true; +EXITS_FOREIGN_CONTENT[$.EMBED] = true; +EXITS_FOREIGN_CONTENT[$.H1] = true; +EXITS_FOREIGN_CONTENT[$.H2] = true; +EXITS_FOREIGN_CONTENT[$.H3] = true; +EXITS_FOREIGN_CONTENT[$.H4] = true; +EXITS_FOREIGN_CONTENT[$.H5] = true; +EXITS_FOREIGN_CONTENT[$.H6] = true; +EXITS_FOREIGN_CONTENT[$.HEAD] = true; +EXITS_FOREIGN_CONTENT[$.HR] = true; +EXITS_FOREIGN_CONTENT[$.I] = true; +EXITS_FOREIGN_CONTENT[$.IMG] = true; +EXITS_FOREIGN_CONTENT[$.LI] = true; +EXITS_FOREIGN_CONTENT[$.LISTING] = true; +EXITS_FOREIGN_CONTENT[$.MENU] = true; +EXITS_FOREIGN_CONTENT[$.META] = true; +EXITS_FOREIGN_CONTENT[$.NOBR] = true; +EXITS_FOREIGN_CONTENT[$.OL] = true; +EXITS_FOREIGN_CONTENT[$.P] = true; +EXITS_FOREIGN_CONTENT[$.PRE] = true; +EXITS_FOREIGN_CONTENT[$.RUBY] = true; +EXITS_FOREIGN_CONTENT[$.S] = true; +EXITS_FOREIGN_CONTENT[$.SMALL] = true; +EXITS_FOREIGN_CONTENT[$.SPAN] = true; +EXITS_FOREIGN_CONTENT[$.STRONG] = true; +EXITS_FOREIGN_CONTENT[$.STRIKE] = true; +EXITS_FOREIGN_CONTENT[$.SUB] = true; +EXITS_FOREIGN_CONTENT[$.SUP] = true; +EXITS_FOREIGN_CONTENT[$.TABLE] = true; +EXITS_FOREIGN_CONTENT[$.TT] = true; +EXITS_FOREIGN_CONTENT[$.U] = true; +EXITS_FOREIGN_CONTENT[$.UL] = true; +EXITS_FOREIGN_CONTENT[$.VAR] = true; + +//Check exit from foreign content +exports.causesExit = function (startTagToken) { + var tn = startTagToken.tagName; + var isFontWithAttrs = tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null); + + return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn]; +}; + +//Token adjustments +exports.adjustTokenMathMLAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + if (token.attrs[i].name === DEFINITION_URL_ATTR) { + token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; + break; + } + } +}; + +exports.adjustTokenSVGAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + var adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrName) + token.attrs[i].name = adjustedAttrName; + } +}; + +exports.adjustTokenXMLAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + var adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrEntry) { + token.attrs[i].prefix = adjustedAttrEntry.prefix; + token.attrs[i].name = adjustedAttrEntry.name; + token.attrs[i].namespace = adjustedAttrEntry.namespace; + } + } +}; + +exports.adjustTokenSVGTagName = function (token) { + var adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName]; + + if (adjustedTagName) + token.tagName = adjustedTagName; +}; + +//Integration points +function isMathMLTextIntegrationPoint(tn, ns) { + return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT); +} + +function isHtmlIntegrationPoint(tn, ns, attrs) { + if (ns === NS.MATHML && tn === $.ANNOTATION_XML) { + for (var i = 0; i < attrs.length; i++) { + if (attrs[i].name === ATTRS.ENCODING) { + var value = attrs[i].value.toLowerCase(); + + return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML; + } + } + } + + return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE); +} + +exports.isIntegrationPoint = function (tn, ns, attrs, foreignNS) { + if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) + return true; + + if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)) + return true; + + return false; +}; + +},{"../tokenizer":49,"./html":34}],34:[function(require,module,exports){ +'use strict'; + +var NS = exports.NAMESPACES = { + HTML: 'http://www.w3.org/1999/xhtml', + MATHML: 'http://www.w3.org/1998/Math/MathML', + SVG: 'http://www.w3.org/2000/svg', + XLINK: 'http://www.w3.org/1999/xlink', + XML: 'http://www.w3.org/XML/1998/namespace', + XMLNS: 'http://www.w3.org/2000/xmlns/' +}; + +exports.ATTRS = { + TYPE: 'type', + ACTION: 'action', + ENCODING: 'encoding', + PROMPT: 'prompt', + NAME: 'name', + COLOR: 'color', + FACE: 'face', + SIZE: 'size' +}; + +var $ = exports.TAG_NAMES = { + A: 'a', + ADDRESS: 'address', + ANNOTATION_XML: 'annotation-xml', + APPLET: 'applet', + AREA: 'area', + ARTICLE: 'article', + ASIDE: 'aside', + + B: 'b', + BASE: 'base', + BASEFONT: 'basefont', + BGSOUND: 'bgsound', + BIG: 'big', + BLOCKQUOTE: 'blockquote', + BODY: 'body', + BR: 'br', + BUTTON: 'button', + + CAPTION: 'caption', + CENTER: 'center', + CODE: 'code', + COL: 'col', + COLGROUP: 'colgroup', + + DD: 'dd', + DESC: 'desc', + DETAILS: 'details', + DIALOG: 'dialog', + DIR: 'dir', + DIV: 'div', + DL: 'dl', + DT: 'dt', + + EM: 'em', + EMBED: 'embed', + + FIELDSET: 'fieldset', + FIGCAPTION: 'figcaption', + FIGURE: 'figure', + FONT: 'font', + FOOTER: 'footer', + FOREIGN_OBJECT: 'foreignObject', + FORM: 'form', + FRAME: 'frame', + FRAMESET: 'frameset', + + H1: 'h1', + H2: 'h2', + H3: 'h3', + H4: 'h4', + H5: 'h5', + H6: 'h6', + HEAD: 'head', + HEADER: 'header', + HGROUP: 'hgroup', + HR: 'hr', + HTML: 'html', + + I: 'i', + IMG: 'img', + IMAGE: 'image', + INPUT: 'input', + IFRAME: 'iframe', + + KEYGEN: 'keygen', + + LABEL: 'label', + LI: 'li', + LINK: 'link', + LISTING: 'listing', + + MAIN: 'main', + MALIGNMARK: 'malignmark', + MARQUEE: 'marquee', + MATH: 'math', + MENU: 'menu', + MENUITEM: 'menuitem', + META: 'meta', + MGLYPH: 'mglyph', + MI: 'mi', + MO: 'mo', + MN: 'mn', + MS: 'ms', + MTEXT: 'mtext', + + NAV: 'nav', + NOBR: 'nobr', + NOFRAMES: 'noframes', + NOEMBED: 'noembed', + NOSCRIPT: 'noscript', + + OBJECT: 'object', + OL: 'ol', + OPTGROUP: 'optgroup', + OPTION: 'option', + + P: 'p', + PARAM: 'param', + PLAINTEXT: 'plaintext', + PRE: 'pre', + + RB: 'rb', + RP: 'rp', + RT: 'rt', + RTC: 'rtc', + RUBY: 'ruby', + + S: 's', + SCRIPT: 'script', + SECTION: 'section', + SELECT: 'select', + SOURCE: 'source', + SMALL: 'small', + SPAN: 'span', + STRIKE: 'strike', + STRONG: 'strong', + STYLE: 'style', + SUB: 'sub', + SUMMARY: 'summary', + SUP: 'sup', + + TABLE: 'table', + TBODY: 'tbody', + TEMPLATE: 'template', + TEXTAREA: 'textarea', + TFOOT: 'tfoot', + TD: 'td', + TH: 'th', + THEAD: 'thead', + TITLE: 'title', + TR: 'tr', + TRACK: 'track', + TT: 'tt', + + U: 'u', + UL: 'ul', + + SVG: 'svg', + + VAR: 'var', + + WBR: 'wbr', + + XMP: 'xmp' +}; + +var SPECIAL_ELEMENTS = exports.SPECIAL_ELEMENTS = {}; + +SPECIAL_ELEMENTS[NS.HTML] = {}; +SPECIAL_ELEMENTS[NS.HTML][$.ADDRESS] = true; +SPECIAL_ELEMENTS[NS.HTML][$.APPLET] = true; +SPECIAL_ELEMENTS[NS.HTML][$.AREA] = true; +SPECIAL_ELEMENTS[NS.HTML][$.ARTICLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.ASIDE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BASE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BASEFONT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BGSOUND] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BLOCKQUOTE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BODY] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BUTTON] = true; +SPECIAL_ELEMENTS[NS.HTML][$.CAPTION] = true; +SPECIAL_ELEMENTS[NS.HTML][$.CENTER] = true; +SPECIAL_ELEMENTS[NS.HTML][$.COL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.COLGROUP] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DETAILS] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DIR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DIV] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.EMBED] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FIELDSET] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FIGCAPTION] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FIGURE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FOOTER] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FORM] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FRAME] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FRAMESET] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H1] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H2] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H3] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H4] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H5] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H6] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HEAD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HEADER] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HGROUP] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HTML] = true; +SPECIAL_ELEMENTS[NS.HTML][$.IFRAME] = true; +SPECIAL_ELEMENTS[NS.HTML][$.IMG] = true; +SPECIAL_ELEMENTS[NS.HTML][$.INPUT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.LI] = true; +SPECIAL_ELEMENTS[NS.HTML][$.LINK] = true; +SPECIAL_ELEMENTS[NS.HTML][$.LISTING] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MAIN] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MARQUEE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MENU] = true; +SPECIAL_ELEMENTS[NS.HTML][$.META] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NAV] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NOEMBED] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NOFRAMES] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NOSCRIPT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.OBJECT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.OL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.P] = true; +SPECIAL_ELEMENTS[NS.HTML][$.PARAM] = true; +SPECIAL_ELEMENTS[NS.HTML][$.PLAINTEXT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.PRE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SCRIPT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SECTION] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SELECT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SOURCE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.STYLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SUMMARY] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TABLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TBODY] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TEMPLATE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TEXTAREA] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TFOOT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TH] = true; +SPECIAL_ELEMENTS[NS.HTML][$.THEAD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TITLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TRACK] = true; +SPECIAL_ELEMENTS[NS.HTML][$.UL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.WBR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.XMP] = true; + +SPECIAL_ELEMENTS[NS.MATHML] = {}; +SPECIAL_ELEMENTS[NS.MATHML][$.MI] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MO] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MN] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MS] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MTEXT] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.ANNOTATION_XML] = true; + +SPECIAL_ELEMENTS[NS.SVG] = {}; +SPECIAL_ELEMENTS[NS.SVG][$.TITLE] = true; +SPECIAL_ELEMENTS[NS.SVG][$.FOREIGN_OBJECT] = true; +SPECIAL_ELEMENTS[NS.SVG][$.DESC] = true; + +},{}],35:[function(require,module,exports){ +'use strict'; + +module.exports = function mergeOptions(defaults, options) { + options = options || {}; + + return [defaults, options].reduce(function (merged, optObj) { + Object.keys(optObj).forEach(function (key) { + merged[key] = optObj[key]; + }); + + return merged; + }, {}); +}; + +},{}],36:[function(require,module,exports){ +'use strict'; + +exports.REPLACEMENT_CHARACTER = '\uFFFD'; + +exports.CODE_POINTS = { + EOF: -1, + NULL: 0x00, + TABULATION: 0x09, + CARRIAGE_RETURN: 0x0D, + LINE_FEED: 0x0A, + FORM_FEED: 0x0C, + SPACE: 0x20, + EXCLAMATION_MARK: 0x21, + QUOTATION_MARK: 0x22, + NUMBER_SIGN: 0x23, + AMPERSAND: 0x26, + APOSTROPHE: 0x27, + HYPHEN_MINUS: 0x2D, + SOLIDUS: 0x2F, + DIGIT_0: 0x30, + DIGIT_9: 0x39, + SEMICOLON: 0x3B, + LESS_THAN_SIGN: 0x3C, + EQUALS_SIGN: 0x3D, + GREATER_THAN_SIGN: 0x3E, + QUESTION_MARK: 0x3F, + LATIN_CAPITAL_A: 0x41, + LATIN_CAPITAL_F: 0x46, + LATIN_CAPITAL_X: 0x58, + LATIN_CAPITAL_Z: 0x5A, + GRAVE_ACCENT: 0x60, + LATIN_SMALL_A: 0x61, + LATIN_SMALL_F: 0x66, + LATIN_SMALL_X: 0x78, + LATIN_SMALL_Z: 0x7A, + REPLACEMENT_CHARACTER: 0xFFFD +}; + +exports.CODE_POINT_SEQUENCES = { + DASH_DASH_STRING: [0x2D, 0x2D], //-- + DOCTYPE_STRING: [0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE + CDATA_START_STRING: [0x5B, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5B], //[CDATA[ + CDATA_END_STRING: [0x5D, 0x5D, 0x3E], //]]> + SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script + PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4C, 0x49, 0x43], //PUBLIC + SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4D] //SYSTEM +}; + +},{}],37:[function(require,module,exports){ +'use strict'; + +var Parser = require('./parser'), + Serializer = require('./serializer'); + +/** @namespace parse5 */ + +/** + * Parses an HTML string. + * @function parse + * @memberof parse5 + * @instance + * @param {string} html - Input HTML string. + * @param {ParserOptions} [options] - Parsing options. + * @returns {ASTNode} document + * @example + * var parse5 = require('parse5'); + * + * var document = parse5.parse('Hi there!'); + */ +exports.parse = function parse(html, options) { + var parser = new Parser(options); + + return parser.parse(html); +}; + +/** + * Parses an HTML fragment. + * @function parseFragment + * @memberof parse5 + * @instance + * @param {ASTNode} [fragmentContext] - Parsing context element. If specified, given fragment + * will be parsed as if it was set to the context element's `innerHTML` property. + * @param {string} html - Input HTML fragment string. + * @param {ParserOptions} [options] - Parsing options. + * @returns {ASTNode} documentFragment + * @example + * var parse5 = require('parse5'); + * + * var documentFragment = parse5.parseFragment('
'); + * + * // Parses the html fragment in the context of the parsed element. + * var trFragment = parser.parseFragment(documentFragment.childNodes[0], ''); + */ +exports.parseFragment = function parseFragment(fragmentContext, html, options) { + if (typeof fragmentContext === 'string') { + options = html; + html = fragmentContext; + fragmentContext = null; + } + + var parser = new Parser(options); + + return parser.parseFragment(html, fragmentContext); +}; + +/** + * Serializes an AST node to an HTML string. + * @function serialize + * @memberof parse5 + * @instance + * @param {ASTNode} node - Node to serialize. + * @param {SerializerOptions} [options] - Serialization options. + * @returns {String} html + * @example + * var parse5 = require('parse5'); + * + * var document = parse5.parse('Hi there!'); + * + * // Serializes a document. + * var html = parse5.serialize(document); + * + * // Serializes the element content. + * var bodyInnerHtml = parse5.serialize(document.childNodes[0].childNodes[1]); + */ +exports.serialize = function (node, options) { + var serializer = new Serializer(node, options); + + return serializer.serialize(); +}; + +/** + * Provides built-in tree adapters that can be used for parsing and serialization. + * @var treeAdapters + * @memberof parse5 + * @instance + * @property {TreeAdapter} default - Default tree format for parse5. + * @property {TreeAdapter} htmlparser2 - Quite popular [htmlparser2](https://github.com/fb55/htmlparser2) tree format + * (e.g. used by [cheerio](https://github.com/MatthewMueller/cheerio) and [jsdom](https://github.com/tmpvar/jsdom)). + * @example + * var parse5 = require('parse5'); + * + * // Uses the default tree adapter for parsing. + * var document = parse5.parse('
', { treeAdapter: parse5.treeAdapters.default }); + * + * // Uses the htmlparser2 tree adapter with the SerializerStream. + * var serializer = new parse5.SerializerStream(node, { treeAdapter: parse5.treeAdapters.htmlparser2 }); + */ +exports.treeAdapters = { + default: require('./tree_adapters/default'), + htmlparser2: require('./tree_adapters/htmlparser2') +}; + + +// Streaming +exports.ParserStream = require('./parser/stream'); +exports.SerializerStream = require('./serializer/stream'); +exports.SAXParser = require('./sax'); + +},{"./parser":41,"./parser/stream":43,"./sax":45,"./serializer":47,"./serializer/stream":48,"./tree_adapters/default":52,"./tree_adapters/htmlparser2":53}],38:[function(require,module,exports){ +'use strict'; + +var OpenElementStack = require('../parser/open_element_stack'), + Tokenizer = require('../tokenizer'), + HTML = require('../common/html'); + + +//Aliases +var $ = HTML.TAG_NAMES; + + +function setEndLocation(element, closingToken, treeAdapter) { + var loc = element.__location; + + if (!loc) + return; + + /** + * @typedef {Object} ElementLocationInfo + * @extends StartTagLocationInfo + * + * @property {StartTagLocationInfo} startTag - Element's start tag location info. + * @property {LocationInfo} endTag - Element's end tag location info. + */ + if (!loc.startTag) { + loc.startTag = { + line: loc.line, + col: loc.col, + startOffset: loc.startOffset, + endOffset: loc.endOffset + }; + if (loc.attrs) + loc.startTag.attrs = loc.attrs; + } + + if (closingToken.location) { + var ctLocation = closingToken.location, + tn = treeAdapter.getTagName(element), + // NOTE: For cases like

- First 'p' closes without a closing tag and + // for cases like - 'p' closes without a closing tag + isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && + tn === closingToken.tagName; + + if (isClosingEndTag) { + loc.endTag = { + line: ctLocation.line, + col: ctLocation.col, + startOffset: ctLocation.startOffset, + endOffset: ctLocation.endOffset + }; + } + + if (isClosingEndTag) + loc.endOffset = ctLocation.endOffset; + else + loc.endOffset = ctLocation.startOffset; + } +} + + +exports.assign = function (parser) { + //NOTE: obtain Parser proto this way to avoid module circular references + var parserProto = Object.getPrototypeOf(parser), + treeAdapter = parser.treeAdapter, + attachableElementLocation = null, + lastFosterParentingLocation = null, + currentToken = null; + + + //NOTE: patch _bootstrap method + parser._bootstrap = function (document, fragmentContext) { + parserProto._bootstrap.call(this, document, fragmentContext); + + attachableElementLocation = null; + lastFosterParentingLocation = null; + currentToken = null; + + //OpenElementStack + parser.openElements.pop = function () { + setEndLocation(this.current, currentToken, treeAdapter); + OpenElementStack.prototype.pop.call(this); + }; + + parser.openElements.popAllUpToHtmlElement = function () { + for (var i = this.stackTop; i > 0; i--) + setEndLocation(this.items[i], currentToken, treeAdapter); + + OpenElementStack.prototype.popAllUpToHtmlElement.call(this); + }; + + parser.openElements.remove = function (element) { + setEndLocation(element, currentToken, treeAdapter); + OpenElementStack.prototype.remove.call(this, element); + }; + }; + + + //Token processing + parser._processTokenInForeignContent = function (token) { + currentToken = token; + parserProto._processTokenInForeignContent.call(this, token); + }; + + parser._processToken = function (token) { + currentToken = token; + parserProto._processToken.call(this, token); + + //NOTE: and are never popped from the stack, so we need to updated + //their end location explicitly. + if (token.type === Tokenizer.END_TAG_TOKEN && + (token.tagName === $.HTML || + token.tagName === $.BODY && this.openElements.hasInScope($.BODY))) { + for (var i = this.openElements.stackTop; i >= 0; i--) { + var element = this.openElements.items[i]; + + if (this.treeAdapter.getTagName(element) === token.tagName) { + setEndLocation(element, token, treeAdapter); + break; + } + } + } + }; + + + //Doctype + parser._setDocumentType = function (token) { + parserProto._setDocumentType.call(this, token); + + var documentChildren = this.treeAdapter.getChildNodes(this.document), + cnLength = documentChildren.length; + + for (var i = 0; i < cnLength; i++) { + var node = documentChildren[i]; + + if (this.treeAdapter.isDocumentTypeNode(node)) { + node.__location = token.location; + break; + } + } + }; + + + //Elements + parser._attachElementToTree = function (element) { + //NOTE: _attachElementToTree is called from _appendElement, _insertElement and _insertTemplate methods. + //So we will use token location stored in this methods for the element. + element.__location = attachableElementLocation || null; + attachableElementLocation = null; + parserProto._attachElementToTree.call(this, element); + }; + + parser._appendElement = function (token, namespaceURI) { + attachableElementLocation = token.location; + parserProto._appendElement.call(this, token, namespaceURI); + }; + + parser._insertElement = function (token, namespaceURI) { + attachableElementLocation = token.location; + parserProto._insertElement.call(this, token, namespaceURI); + }; + + parser._insertTemplate = function (token) { + attachableElementLocation = token.location; + parserProto._insertTemplate.call(this, token); + + var tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current); + + tmplContent.__location = null; + }; + + parser._insertFakeRootElement = function () { + parserProto._insertFakeRootElement.call(this); + this.openElements.current.__location = null; + }; + + + //Comments + parser._appendCommentNode = function (token, parent) { + parserProto._appendCommentNode.call(this, token, parent); + + var children = this.treeAdapter.getChildNodes(parent), + commentNode = children[children.length - 1]; + + commentNode.__location = token.location; + }; + + + //Text + parser._findFosterParentingLocation = function () { + //NOTE: store last foster parenting location, so we will be able to find inserted text + //in case of foster parenting + lastFosterParentingLocation = parserProto._findFosterParentingLocation.call(this); + return lastFosterParentingLocation; + }; + + parser._insertCharacters = function (token) { + parserProto._insertCharacters.call(this, token); + + var hasFosterParent = this._shouldFosterParentOnInsertion(), + parent = hasFosterParent && lastFosterParentingLocation.parent || + this.openElements.currentTmplContent || + this.openElements.current, + siblings = this.treeAdapter.getChildNodes(parent), + textNodeIdx = hasFosterParent && lastFosterParentingLocation.beforeElement ? + siblings.indexOf(lastFosterParentingLocation.beforeElement) - 1 : + siblings.length - 1, + textNode = siblings[textNodeIdx]; + + //NOTE: if we have location assigned by another token, then just update end position + if (textNode.__location) + textNode.__location.endOffset = token.location.endOffset; + + else + textNode.__location = token.location; + }; +}; + + +},{"../common/html":34,"../parser/open_element_stack":42,"../tokenizer":49}],39:[function(require,module,exports){ +'use strict'; + +var UNICODE = require('../common/unicode'); + +//Aliases +var $ = UNICODE.CODE_POINTS; + + +exports.assign = function (tokenizer) { + //NOTE: obtain Tokenizer proto this way to avoid module circular references + var tokenizerProto = Object.getPrototypeOf(tokenizer), + tokenStartOffset = -1, + tokenCol = -1, + tokenLine = 1, + isEol = false, + lineStartPosStack = [0], + lineStartPos = 0, + col = -1, + line = 1; + + function attachLocationInfo(token) { + /** + * @typedef {Object} LocationInfo + * + * @property {Number} line - One-based line index + * @property {Number} col - One-based column index + * @property {Number} startOffset - Zero-based first character index + * @property {Number} endOffset - Zero-based last character index + */ + token.location = { + line: tokenLine, + col: tokenCol, + startOffset: tokenStartOffset, + endOffset: -1 + }; + } + + //NOTE: patch consumption method to track line/col information + tokenizer._consume = function () { + var cp = tokenizerProto._consume.call(this); + + //NOTE: LF should be in the last column of the line + if (isEol) { + isEol = false; + line++; + lineStartPosStack.push(this.preprocessor.sourcePos); + lineStartPos = this.preprocessor.sourcePos; + } + + if (cp === $.LINE_FEED) + isEol = true; + + col = this.preprocessor.sourcePos - lineStartPos + 1; + + return cp; + }; + + tokenizer._unconsume = function () { + tokenizerProto._unconsume.call(this); + isEol = false; + + while (lineStartPos > this.preprocessor.sourcePos && lineStartPosStack.length > 1) { + lineStartPos = lineStartPosStack.pop(); + line--; + } + + col = this.preprocessor.sourcePos - lineStartPos + 1; + }; + + //NOTE: patch token creation methods and attach location objects + tokenizer._createStartTagToken = function () { + tokenizerProto._createStartTagToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createEndTagToken = function () { + tokenizerProto._createEndTagToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createCommentToken = function () { + tokenizerProto._createCommentToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createDoctypeToken = function (initialName) { + tokenizerProto._createDoctypeToken.call(this, initialName); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createCharacterToken = function (type, ch) { + tokenizerProto._createCharacterToken.call(this, type, ch); + attachLocationInfo(this.currentCharacterToken); + }; + + tokenizer._createAttr = function (attrNameFirstCh) { + tokenizerProto._createAttr.call(this, attrNameFirstCh); + this.currentAttrLocation = { + line: line, + col: col, + startOffset: this.preprocessor.sourcePos, + endOffset: -1 + }; + }; + + tokenizer._leaveAttrName = function (toState) { + tokenizerProto._leaveAttrName.call(this, toState); + this._attachCurrentAttrLocationInfo(); + }; + + tokenizer._leaveAttrValue = function (toState) { + tokenizerProto._leaveAttrValue.call(this, toState); + this._attachCurrentAttrLocationInfo(); + }; + + tokenizer._attachCurrentAttrLocationInfo = function () { + this.currentAttrLocation.endOffset = this.preprocessor.sourcePos; + + if (!this.currentToken.location.attrs) + this.currentToken.location.attrs = {}; + + /** + * @typedef {Object} StartTagLocationInfo + * @extends LocationInfo + * + * @property {Dictionary} attrs - Start tag attributes' location info. + */ + this.currentToken.location.attrs[this.currentAttr.name] = this.currentAttrLocation; + }; + + //NOTE: patch token emission methods to determine end location + tokenizer._emitCurrentToken = function () { + //NOTE: if we have pending character token make it's end location equal to the + //current token's start location. + if (this.currentCharacterToken) + this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset; + + this.currentToken.location.endOffset = this.preprocessor.sourcePos + 1; + tokenizerProto._emitCurrentToken.call(this); + }; + + tokenizer._emitCurrentCharacterToken = function () { + //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(), + //then set it's location at the current preprocessor position. + //We don't need to increment preprocessor position, since character token + //emission is always forced by the start of the next character token here. + //So, we already have advanced position. + if (this.currentCharacterToken && this.currentCharacterToken.location.endOffset === -1) + this.currentCharacterToken.location.endOffset = this.preprocessor.sourcePos; + + tokenizerProto._emitCurrentCharacterToken.call(this); + }; + + //NOTE: patch initial states for each mode to obtain token start position + Object.keys(tokenizerProto.MODE) + + .map(function (modeName) { + return tokenizerProto.MODE[modeName]; + }) + + .forEach(function (state) { + tokenizer[state] = function (cp) { + tokenStartOffset = this.preprocessor.sourcePos; + tokenLine = line; + tokenCol = col; + tokenizerProto[state].call(this, cp); + }; + }); +}; + +},{"../common/unicode":36}],40:[function(require,module,exports){ +'use strict'; + +//Const +var NOAH_ARK_CAPACITY = 3; + +//List of formatting elements +var FormattingElementList = module.exports = function (treeAdapter) { + this.length = 0; + this.entries = []; + this.treeAdapter = treeAdapter; + this.bookmark = null; +}; + +//Entry types +FormattingElementList.MARKER_ENTRY = 'MARKER_ENTRY'; +FormattingElementList.ELEMENT_ENTRY = 'ELEMENT_ENTRY'; + +//Noah Ark's condition +//OPTIMIZATION: at first we try to find possible candidates for exclusion using +//lightweight heuristics without thorough attributes check. +FormattingElementList.prototype._getNoahArkConditionCandidates = function (newElement) { + var candidates = []; + + if (this.length >= NOAH_ARK_CAPACITY) { + var neAttrsLength = this.treeAdapter.getAttrList(newElement).length, + neTagName = this.treeAdapter.getTagName(newElement), + neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement); + + for (var i = this.length - 1; i >= 0; i--) { + var entry = this.entries[i]; + + if (entry.type === FormattingElementList.MARKER_ENTRY) + break; + + var element = entry.element, + elementAttrs = this.treeAdapter.getAttrList(element), + isCandidate = this.treeAdapter.getTagName(element) === neTagName && + this.treeAdapter.getNamespaceURI(element) === neNamespaceURI && + elementAttrs.length === neAttrsLength; + + if (isCandidate) + candidates.push({idx: i, attrs: elementAttrs}); + } + } + + return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates; +}; + +FormattingElementList.prototype._ensureNoahArkCondition = function (newElement) { + var candidates = this._getNoahArkConditionCandidates(newElement), + cLength = candidates.length; + + if (cLength) { + var neAttrs = this.treeAdapter.getAttrList(newElement), + neAttrsLength = neAttrs.length, + neAttrsMap = {}; + + //NOTE: build attrs map for the new element so we can perform fast lookups + for (var i = 0; i < neAttrsLength; i++) { + var neAttr = neAttrs[i]; + + neAttrsMap[neAttr.name] = neAttr.value; + } + + for (i = 0; i < neAttrsLength; i++) { + for (var j = 0; j < cLength; j++) { + var cAttr = candidates[j].attrs[i]; + + if (neAttrsMap[cAttr.name] !== cAttr.value) { + candidates.splice(j, 1); + cLength--; + } + + if (candidates.length < NOAH_ARK_CAPACITY) + return; + } + } + + //NOTE: remove bottommost candidates until Noah's Ark condition will not be met + for (i = cLength - 1; i >= NOAH_ARK_CAPACITY - 1; i--) { + this.entries.splice(candidates[i].idx, 1); + this.length--; + } + } +}; + +//Mutations +FormattingElementList.prototype.insertMarker = function () { + this.entries.push({type: FormattingElementList.MARKER_ENTRY}); + this.length++; +}; + +FormattingElementList.prototype.pushElement = function (element, token) { + this._ensureNoahArkCondition(element); + + this.entries.push({ + type: FormattingElementList.ELEMENT_ENTRY, + element: element, + token: token + }); + + this.length++; +}; + +FormattingElementList.prototype.insertElementAfterBookmark = function (element, token) { + var bookmarkIdx = this.length - 1; + + for (; bookmarkIdx >= 0; bookmarkIdx--) { + if (this.entries[bookmarkIdx] === this.bookmark) + break; + } + + this.entries.splice(bookmarkIdx + 1, 0, { + type: FormattingElementList.ELEMENT_ENTRY, + element: element, + token: token + }); + + this.length++; +}; + +FormattingElementList.prototype.removeEntry = function (entry) { + for (var i = this.length - 1; i >= 0; i--) { + if (this.entries[i] === entry) { + this.entries.splice(i, 1); + this.length--; + break; + } + } +}; + +FormattingElementList.prototype.clearToLastMarker = function () { + while (this.length) { + var entry = this.entries.pop(); + + this.length--; + + if (entry.type === FormattingElementList.MARKER_ENTRY) + break; + } +}; + +//Search +FormattingElementList.prototype.getElementEntryInScopeWithTagName = function (tagName) { + for (var i = this.length - 1; i >= 0; i--) { + var entry = this.entries[i]; + + if (entry.type === FormattingElementList.MARKER_ENTRY) + return null; + + if (this.treeAdapter.getTagName(entry.element) === tagName) + return entry; + } + + return null; +}; + +FormattingElementList.prototype.getElementEntry = function (element) { + for (var i = this.length - 1; i >= 0; i--) { + var entry = this.entries[i]; + + if (entry.type === FormattingElementList.ELEMENT_ENTRY && entry.element === element) + return entry; + } + + return null; +}; + +},{}],41:[function(require,module,exports){ +'use strict'; + +var Tokenizer = require('../tokenizer'), + OpenElementStack = require('./open_element_stack'), + FormattingElementList = require('./formatting_element_list'), + locationInfoMixin = require('../location_info/parser_mixin'), + defaultTreeAdapter = require('../tree_adapters/default'), + doctype = require('../common/doctype'), + foreignContent = require('../common/foreign_content'), + mergeOptions = require('../common/merge_options'), + UNICODE = require('../common/unicode'), + HTML = require('../common/html'); + +//Aliases +var $ = HTML.TAG_NAMES, + NS = HTML.NAMESPACES, + ATTRS = HTML.ATTRS; + +/** + * @typedef {Object} ParserOptions + * + * @property {Boolean} [locationInfo=false] - Enables source code location information for the nodes. + * When enabled, each node (except root node) has the `__location` property. In case the node is not an empty element, + * `__location` will be {@link ElementLocationInfo} object, otherwise it's {@link LocationInfo}. + * If the element was implicitly created by the parser it's `__location` property will be `null`. + * + * @property {TreeAdapter} [treeAdapter=parse5.treeAdapters.default] - Specifies the resulting tree format. + */ +var DEFAULT_OPTIONS = { + locationInfo: false, + treeAdapter: defaultTreeAdapter +}; + +//Misc constants +var HIDDEN_INPUT_TYPE = 'hidden'; + +//Adoption agency loops iteration count +var AA_OUTER_LOOP_ITER = 8, + AA_INNER_LOOP_ITER = 3; + +//Insertion modes +var INITIAL_MODE = 'INITIAL_MODE', + BEFORE_HTML_MODE = 'BEFORE_HTML_MODE', + BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE', + IN_HEAD_MODE = 'IN_HEAD_MODE', + AFTER_HEAD_MODE = 'AFTER_HEAD_MODE', + IN_BODY_MODE = 'IN_BODY_MODE', + TEXT_MODE = 'TEXT_MODE', + IN_TABLE_MODE = 'IN_TABLE_MODE', + IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE', + IN_CAPTION_MODE = 'IN_CAPTION_MODE', + IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE', + IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE', + IN_ROW_MODE = 'IN_ROW_MODE', + IN_CELL_MODE = 'IN_CELL_MODE', + IN_SELECT_MODE = 'IN_SELECT_MODE', + IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE', + IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE', + AFTER_BODY_MODE = 'AFTER_BODY_MODE', + IN_FRAMESET_MODE = 'IN_FRAMESET_MODE', + AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE', + AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE', + AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE'; + +//Insertion mode reset map +var INSERTION_MODE_RESET_MAP = {}; + +INSERTION_MODE_RESET_MAP[$.TR] = IN_ROW_MODE; +INSERTION_MODE_RESET_MAP[$.TBODY] = +INSERTION_MODE_RESET_MAP[$.THEAD] = +INSERTION_MODE_RESET_MAP[$.TFOOT] = IN_TABLE_BODY_MODE; +INSERTION_MODE_RESET_MAP[$.CAPTION] = IN_CAPTION_MODE; +INSERTION_MODE_RESET_MAP[$.COLGROUP] = IN_COLUMN_GROUP_MODE; +INSERTION_MODE_RESET_MAP[$.TABLE] = IN_TABLE_MODE; +INSERTION_MODE_RESET_MAP[$.BODY] = IN_BODY_MODE; +INSERTION_MODE_RESET_MAP[$.FRAMESET] = IN_FRAMESET_MODE; + +//Template insertion mode switch map +var TEMPLATE_INSERTION_MODE_SWITCH_MAP = {}; + +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.CAPTION] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.COLGROUP] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TBODY] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TFOOT] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.THEAD] = IN_TABLE_MODE; +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.COL] = IN_COLUMN_GROUP_MODE; +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TR] = IN_TABLE_BODY_MODE; +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TD] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TH] = IN_ROW_MODE; + +//Token handlers map for insertion modes +var _ = {}; + +_[INITIAL_MODE] = {}; +_[INITIAL_MODE][Tokenizer.CHARACTER_TOKEN] = +_[INITIAL_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInInitialMode; +_[INITIAL_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; +_[INITIAL_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[INITIAL_MODE][Tokenizer.DOCTYPE_TOKEN] = doctypeInInitialMode; +_[INITIAL_MODE][Tokenizer.START_TAG_TOKEN] = +_[INITIAL_MODE][Tokenizer.END_TAG_TOKEN] = +_[INITIAL_MODE][Tokenizer.EOF_TOKEN] = tokenInInitialMode; + +_[BEFORE_HTML_MODE] = {}; +_[BEFORE_HTML_MODE][Tokenizer.CHARACTER_TOKEN] = +_[BEFORE_HTML_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHtml; +_[BEFORE_HTML_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; +_[BEFORE_HTML_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[BEFORE_HTML_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[BEFORE_HTML_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHtml; +_[BEFORE_HTML_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHtml; +_[BEFORE_HTML_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHtml; + +_[BEFORE_HEAD_MODE] = {}; +_[BEFORE_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = +_[BEFORE_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHead; +_[BEFORE_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; +_[BEFORE_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[BEFORE_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHead; +_[BEFORE_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHead; +_[BEFORE_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHead; + +_[IN_HEAD_MODE] = {}; +_[IN_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHead; +_[IN_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHead; +_[IN_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHead; +_[IN_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenInHead; + +_[AFTER_HEAD_MODE] = {}; +_[AFTER_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterHead; +_[AFTER_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[AFTER_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterHead; +_[AFTER_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterHead; +_[AFTER_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenAfterHead; + +_[IN_BODY_MODE] = {}; +_[IN_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagInBody; +_[IN_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagInBody; +_[IN_BODY_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[TEXT_MODE] = {}; +_[TEXT_MODE][Tokenizer.CHARACTER_TOKEN] = +_[TEXT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[TEXT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[TEXT_MODE][Tokenizer.COMMENT_TOKEN] = +_[TEXT_MODE][Tokenizer.DOCTYPE_TOKEN] = +_[TEXT_MODE][Tokenizer.START_TAG_TOKEN] = ignoreToken; +_[TEXT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInText; +_[TEXT_MODE][Tokenizer.EOF_TOKEN] = eofInText; + +_[IN_TABLE_MODE] = {}; +_[IN_TABLE_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_TABLE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[IN_TABLE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; +_[IN_TABLE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_TABLE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_TABLE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTable; +_[IN_TABLE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTable; +_[IN_TABLE_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_TABLE_TEXT_MODE] = {}; +_[IN_TABLE_TEXT_MODE][Tokenizer.CHARACTER_TOKEN] = characterInTableText; +_[IN_TABLE_TEXT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_TABLE_TEXT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInTableText; +_[IN_TABLE_TEXT_MODE][Tokenizer.COMMENT_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.DOCTYPE_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.START_TAG_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.END_TAG_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.EOF_TOKEN] = tokenInTableText; + +_[IN_CAPTION_MODE] = {}; +_[IN_CAPTION_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_CAPTION_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_CAPTION_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_CAPTION_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_CAPTION_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_CAPTION_MODE][Tokenizer.START_TAG_TOKEN] = startTagInCaption; +_[IN_CAPTION_MODE][Tokenizer.END_TAG_TOKEN] = endTagInCaption; +_[IN_CAPTION_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_COLUMN_GROUP_MODE] = {}; +_[IN_COLUMN_GROUP_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_COLUMN_GROUP_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInColumnGroup; +_[IN_COLUMN_GROUP_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_COLUMN_GROUP_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_COLUMN_GROUP_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_COLUMN_GROUP_MODE][Tokenizer.START_TAG_TOKEN] = startTagInColumnGroup; +_[IN_COLUMN_GROUP_MODE][Tokenizer.END_TAG_TOKEN] = endTagInColumnGroup; +_[IN_COLUMN_GROUP_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_TABLE_BODY_MODE] = {}; +_[IN_TABLE_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_TABLE_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[IN_TABLE_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; +_[IN_TABLE_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_TABLE_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_TABLE_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTableBody; +_[IN_TABLE_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTableBody; +_[IN_TABLE_BODY_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_ROW_MODE] = {}; +_[IN_ROW_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_ROW_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[IN_ROW_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; +_[IN_ROW_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_ROW_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_ROW_MODE][Tokenizer.START_TAG_TOKEN] = startTagInRow; +_[IN_ROW_MODE][Tokenizer.END_TAG_TOKEN] = endTagInRow; +_[IN_ROW_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_CELL_MODE] = {}; +_[IN_CELL_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_CELL_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_CELL_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_CELL_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_CELL_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_CELL_MODE][Tokenizer.START_TAG_TOKEN] = startTagInCell; +_[IN_CELL_MODE][Tokenizer.END_TAG_TOKEN] = endTagInCell; +_[IN_CELL_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_SELECT_MODE] = {}; +_[IN_SELECT_MODE][Tokenizer.CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_SELECT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_SELECT_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_SELECT_MODE][Tokenizer.START_TAG_TOKEN] = startTagInSelect; +_[IN_SELECT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInSelect; +_[IN_SELECT_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_SELECT_IN_TABLE_MODE] = {}; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInSelectInTable; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInSelectInTable; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_TEMPLATE_MODE] = {}; +_[IN_TEMPLATE_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_TEMPLATE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_TEMPLATE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_TEMPLATE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_TEMPLATE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_TEMPLATE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTemplate; +_[IN_TEMPLATE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTemplate; +_[IN_TEMPLATE_MODE][Tokenizer.EOF_TOKEN] = eofInTemplate; + +_[AFTER_BODY_MODE] = {}; +_[AFTER_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterBody; +_[AFTER_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[AFTER_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToRootHtmlElement; +_[AFTER_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterBody; +_[AFTER_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterBody; +_[AFTER_BODY_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[IN_FRAMESET_MODE] = {}; +_[IN_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagInFrameset; +_[IN_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = endTagInFrameset; +_[IN_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[AFTER_FRAMESET_MODE] = {}; +_[AFTER_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[AFTER_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[AFTER_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[AFTER_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterFrameset; +_[AFTER_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterFrameset; +_[AFTER_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[AFTER_AFTER_BODY_MODE] = {}; +_[AFTER_AFTER_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = tokenAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToDocument; +_[AFTER_AFTER_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_AFTER_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.END_TAG_TOKEN] = tokenAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[AFTER_AFTER_FRAMESET_MODE] = {}; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToDocument; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterAfterFrameset; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = ignoreToken; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + + +//Parser +var Parser = module.exports = function (options) { + this.options = mergeOptions(DEFAULT_OPTIONS, options); + + this.treeAdapter = this.options.treeAdapter; + this.pendingScript = null; + + if (this.options.locationInfo) + locationInfoMixin.assign(this); +}; + +// API +Parser.prototype.parse = function (html) { + var document = this.treeAdapter.createDocument(); + + this._bootstrap(document, null); + this.tokenizer.write(html, true); + this._runParsingLoop(null, null); + + return document; +}; + +Parser.prototype.parseFragment = function (html, fragmentContext) { + //NOTE: use
Shake it, baby