diff --git a/README.md b/README.md index b9cb85b..aaf6d08 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,65 @@ Scans the DOM for legal citations, finds them, and turns them into HTML links. ``` +## Advanced Usage + +You can control where on the page links are inserted, which websites you allow linking to, which page types (web pages, PDFs, and raw HTML) you allow links to, and how the link `` elements are created by turning off automatic linking and calling the linking function directly. + +Here's a simple example assuming you are using jQuery: + +```html + + +// load the library + +``` + +The arguments to `linkify_citations` are all optional. They are: + +* The DOM element within which to replace links. +* An array of allowed link targets, in order of preference. The default is the complete list of targets provided by the citation library and preferring links to authoritative sources. The values in this array come from the `id` attribute of the [linker modules in the citation library](https://github.com/unitedstates/citation/tree/master/links). +* An array of allowed link page types, in order of preference. The default is `["landing", "pdf", "html"]`. See the documentation for [link rendition types](https://github.com/unitedstates/citation#include-links). +* A function that creates `` elements for the inserted links (see below for example). + +Here's a complete example showing the default arguments explicity: + +```html + + +// load the library + +``` + ## Development/deployment Pull requests merged into `master` will automatically be deployed to the S3 bucket. To test things out, you might want to try [rawgit](https://rawgit.com). diff --git a/linkify-citations.js b/linkify-citations.js index 46f5c6c..f3c767c 100644 --- a/linkify-citations.js +++ b/linkify-citations.js @@ -1,1009 +1,18143 @@ (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 0) { + throw new Error('Invalid string. Length must be a multiple of 4') + } - // client can apply a filter that pre-processes text before extraction, - // and post-processes citations after extraction - var results; - if (options.filter && Citation.filters[options.filter]) - return Citation.filtered(options.filter, text, options); + // the number of equal signs (place holders) + // if there are two placeholders, than the two characters before it + // represent one byte + // if there is only one, then the three characters before it represent 2 bytes + // this is just a cheap hack to not do indexOf twice + var len = b64.length + placeHolders = '=' === b64.charAt(len - 2) ? 2 : '=' === b64.charAt(len - 1) ? 1 : 0 - // otherwise, do a single pass over the whole text. - else - return Citation.extract(text, options); - }, + // base64 is 4/3 + up to two characters of the original data + arr = new Arr(b64.length * 3 / 4 - placeHolders) - // return an array of matched and filter-mapped cites - filtered: function(name, text, options) { - var results = []; + // if there are placeholders, only get up to the last complete 4 chars + l = placeHolders > 0 ? b64.length - 4 : b64.length - var filter = Citation.filters[name]; + var L = 0 - // filter can break up the text into pieces with accompanying metadata - filter.from(text, options[name], function(piece, metadata) { - var response = Citation.extract(piece, options); + function push (v) { + arr[L++] = v + } - // ignores any replaced text, it falls off the edge of the earth + for (i = 0, j = 0; i < l; i += 4, j += 3) { + tmp = (decode(b64.charAt(i)) << 18) | (decode(b64.charAt(i + 1)) << 12) | (decode(b64.charAt(i + 2)) << 6) | decode(b64.charAt(i + 3)) + push((tmp & 0xFF0000) >> 16) + push((tmp & 0xFF00) >> 8) + push(tmp & 0xFF) + } - var filtered = response.citations.map(function(result) { + if (placeHolders === 2) { + tmp = (decode(b64.charAt(i)) << 2) | (decode(b64.charAt(i + 1)) >> 4) + push(tmp & 0xFF) + } else if (placeHolders === 1) { + tmp = (decode(b64.charAt(i)) << 10) | (decode(b64.charAt(i + 1)) << 4) | (decode(b64.charAt(i + 2)) >> 2) + push((tmp >> 8) & 0xFF) + push(tmp & 0xFF) + } - Object.keys(metadata).forEach(function(key) { - result[key] = metadata[key]; - }); + return arr + } - return result; - }); + function uint8ToBase64 (uint8) { + var i, + extraBytes = uint8.length % 3, // if we have 1 byte left, pad 2 bytes + output = "", + temp, length - results = results.concat(filtered); - }); + function encode (num) { + return lookup.charAt(num) + } - // doesn't return replaced text - return {citations: results}; - }, + function tripletToBase64 (num) { + return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F) + } + // go through the array every three bytes, we'll deal with trailing stuff later + for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) { + temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2]) + output += tripletToBase64(temp) + } - // run the citators over the text, return an array of matched cites - extract: function(text, options) { - if (!options) options = {}; + // pad the end with zeros, but make sure to not forget the extra bytes + switch (extraBytes) { + case 1: + temp = uint8[uint8.length - 1] + output += encode(temp >> 2) + output += encode((temp << 4) & 0x3F) + output += '==' + break + case 2: + temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1]) + output += encode(temp >> 10) + output += encode((temp >> 4) & 0x3F) + output += encode((temp << 2) & 0x3F) + output += '=' + break + } - // default: no excerpt - var excerpt = options.excerpt ? parseInt(options.excerpt, 10) : 0; + return output + } - // whether to return parent citations - // default: false - var parents = options.parents || false; + exports.toByteArray = b64ToByteArray + exports.fromByteArray = uint8ToBase64 +}(typeof exports === 'undefined' ? (this.base64js = {}) : exports)) - // default: all types, can be filtered to one, or an array of them - var types = Citation.selectedTypes(options); - if (types.length === 0) return null; +},{}],2:[function(require,module,exports){ +},{}],3:[function(require,module,exports){ +(function (global){ +'use strict'; - // The caller can provide a replace callback to alter every found citation. - // this function will be called with each (found and processed) cite object, - // and should return a string to be put in the cite's place. - // - // The resulting transformed string will be in the returned object as a 'text' field. - // this field will only be present if a replace callback was provided. - // - // providing this callback will also cause matched cites not to return the 'index' field, - // as the replace process will completely screw them up. only use the 'index' field if you - // plan on doing your own replacing. - var replace = options.replace; +var buffer = require('buffer'); +var Buffer = buffer.Buffer; +var SlowBuffer = buffer.SlowBuffer; +var MAX_LEN = buffer.kMaxLength || 2147483647; +exports.alloc = function alloc(size, fill, encoding) { + if (typeof Buffer.alloc === 'function') { + return Buffer.alloc(size, fill, encoding); + } + if (typeof encoding === 'number') { + throw new TypeError('encoding must not be number'); + } + if (typeof size !== 'number') { + throw new TypeError('size must be a number'); + } + if (size > MAX_LEN) { + throw new RangeError('size is too large'); + } + var enc = encoding; + var _fill = fill; + if (_fill === undefined) { + enc = undefined; + _fill = 0; + } + var buf = new Buffer(size); + if (typeof _fill === 'string') { + var fillBuf = new Buffer(_fill, enc); + var flen = fillBuf.length; + var i = -1; + while (++i < size) { + buf[i] = fillBuf[i % flen]; + } + } else { + buf.fill(_fill); + } + return buf; +} +exports.allocUnsafe = function allocUnsafe(size) { + if (typeof Buffer.allocUnsafe === 'function') { + return Buffer.allocUnsafe(size); + } + if (typeof size !== 'number') { + throw new TypeError('size must be a number'); + } + if (size > MAX_LEN) { + throw new RangeError('size is too large'); + } + return new Buffer(size); +} +exports.from = function from(value, encodingOrOffset, length) { + if (typeof Buffer.from === 'function' && (!global.Uint8Array || Uint8Array.from !== Buffer.from)) { + return Buffer.from(value, encodingOrOffset, length); + } + if (typeof value === 'number') { + throw new TypeError('"value" argument must not be a number'); + } + if (typeof value === 'string') { + return new Buffer(value, encodingOrOffset); + } + if (typeof ArrayBuffer !== 'undefined' && value instanceof ArrayBuffer) { + var offset = encodingOrOffset; + if (arguments.length === 1) { + return new Buffer(value); + } + if (typeof offset === 'undefined') { + offset = 0; + } + var len = length; + if (typeof len === 'undefined') { + len = value.byteLength - offset; + } + if (offset >= value.byteLength) { + throw new RangeError('\'offset\' is out of bounds'); + } + if (len > value.byteLength - offset) { + throw new RangeError('\'length\' is out of bounds'); + } + return new Buffer(value.slice(offset, offset + len)); + } + if (Buffer.isBuffer(value)) { + var out = new Buffer(value.length); + value.copy(out, 0, 0, value.length); + return out; + } + if (value) { + if (Array.isArray(value) || (typeof ArrayBuffer !== 'undefined' && value.buffer instanceof ArrayBuffer) || 'length' in value) { + return new Buffer(value); + } + if (value.type === 'Buffer' && Array.isArray(value.data)) { + return new Buffer(value.data); + } + } - // accumulate the results - var results = []; + throw new TypeError('First argument must be a string, Buffer, ' + 'ArrayBuffer, Array, or array-like object.'); +} +exports.allocUnsafeSlow = function allocUnsafeSlow(size) { + if (typeof Buffer.allocUnsafeSlow === 'function') { + return Buffer.allocUnsafeSlow(size); + } + if (typeof size !== 'number') { + throw new TypeError('size must be a number'); + } + if (size >= MAX_LEN) { + throw new RangeError('size is too large'); + } + return new SlowBuffer(size); +} +}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) +},{"buffer":4}],4:[function(require,module,exports){ +(function (global){ +/*! + * The buffer module from node.js, for the browser. + * + * @author Feross Aboukhadijeh + * @license MIT + */ +/* eslint-disable no-proto */ - // will hold the calculated context-specific patterns we are to run - // over the given text, tracked by index we expect to find them at. - // nextIndex tracks a running index as we loop through patterns. - // (citators could just be called indexedPatterns) - var citators = {}; - var nextIndex = 0; +'use strict' - // Go through every regex-based citator and prepare a set of patterns, - // indexed by the order of a matched arguments array. - types.forEach(function(type) { - if (Citation.types[type].type != "regex") return; +var base64 = require('base64-js') +var ieee754 = require('ieee754') +var isArray = require('isarray') - // Calculate the patterns this citator will contribute to the parse. - // (individual parsers can opt to make their parsing context-specific) - var patterns = Citation.types[type].patterns; - if (typeof(patterns) == "function") - patterns = patterns(options[type] || {}); +exports.Buffer = Buffer +exports.SlowBuffer = SlowBuffer +exports.INSPECT_MAX_BYTES = 50 +Buffer.poolSize = 8192 // not used by this implementation - // add each pattern, keeping a running tally of what we would - // expect its primary index to be when found in the master regex. - patterns.forEach(function(pattern) { - pattern.type = type; // will be needed later - citators[nextIndex] = pattern; - nextIndex += pattern.fields.length + 1; - }); - }); +var rootParent = {} - // If there are any regex-based patterns being applied, combine them - // and run a find/replace over the string. - var regexes = Object.keys(citators).map(function(key) {return citators[key].regex}); - if (regexes.length > 0) { +/** + * If `Buffer.TYPED_ARRAY_SUPPORT`: + * === true Use Uint8Array implementation (fastest) + * === false Use Object implementation (most compatible, even IE6) + * + * Browsers that support typed arrays are IE 10+, Firefox 4+, Chrome 7+, Safari 5.1+, + * Opera 11.6+, iOS 4.2+. + * + * Due to various browser bugs, sometimes the Object implementation will be used even + * when the browser supports typed arrays. + * + * Note: + * + * - Firefox 4-29 lacks support for adding new properties to `Uint8Array` instances, + * See: https://bugzilla.mozilla.org/show_bug.cgi?id=695438. + * + * - Safari 5-7 lacks support for changing the `Object.prototype.constructor` property + * on objects. + * + * - Chrome 9-10 is missing the `TypedArray.prototype.subarray` function. + * + * - IE10 has a broken `TypedArray.prototype.subarray` function which returns arrays of + * incorrect length in some situations. - // merge all regexes into one, so that each pattern will begin at a predictable place - var regex = new RegExp("(" + regexes.join(")|(") + ")", "ig"); + * We detect these buggy browsers and set `Buffer.TYPED_ARRAY_SUPPORT` to `false` so they + * get the Object implementation, which is slower but behaves correctly. + */ +Buffer.TYPED_ARRAY_SUPPORT = global.TYPED_ARRAY_SUPPORT !== undefined + ? global.TYPED_ARRAY_SUPPORT + : typedArraySupport() - var replaced = text.replace(regex, function() { - var match = arguments[0]; +function typedArraySupport () { + function Bar () {} + try { + var arr = new Uint8Array(1) + arr.foo = function () { return 42 } + arr.constructor = Bar + return arr.foo() === 42 && // typed array instances can be augmented + arr.constructor === Bar && // constructor can be set + typeof arr.subarray === 'function' && // chrome 9-10 lack `subarray` + arr.subarray(1, 1).byteLength === 0 // ie10 has broken `subarray` + } catch (e) { + return false + } +} - // offset is second-to-last argument - var index = arguments[arguments.length - 2]; +function kMaxLength () { + return Buffer.TYPED_ARRAY_SUPPORT + ? 0x7fffffff + : 0x3fffffff +} - // pull out just the regex-captured matches - var captures = Array.prototype.slice.call(arguments, 1, -2); +/** + * Class: Buffer + * ============= + * + * The Buffer constructor returns instances of `Uint8Array` that are augmented + * with function properties for all the node `Buffer` API functions. We use + * `Uint8Array` so that square bracket notation works as expected -- it returns + * a single octet. + * + * By augmenting the instances, we can avoid modifying the `Uint8Array` + * prototype. + */ +function Buffer (arg) { + if (!(this instanceof Buffer)) { + // Avoid going through an ArgumentsAdaptorTrampoline in the common case. + if (arguments.length > 1) return new Buffer(arg, arguments[1]) + return new Buffer(arg) + } - // find the first matched index in the captures - var matchIndex; - for (matchIndex=0; matchIndex 1 ? arguments[1] : 'utf8') + } - // one match can generate one or many citation results (e.g. ranges) - if (!Array.isArray(cites)) cites = [cites]; + // Unusual. + return fromObject(this, arg) +} - // put together the match-level information - var matchInfo = {type: citator.type}; - matchInfo.match = match.toString(); // match data can be converted to the plain string +function fromNumber (that, length) { + that = allocate(that, length < 0 ? 0 : checked(length) | 0) + if (!Buffer.TYPED_ARRAY_SUPPORT) { + for (var i = 0; i < length; i++) { + that[i] = 0 + } + } + return that +} - // store the matched character offset (if we're replacing we need it to handle - // some multiple citations, but the index will be useless to the caller after - // the replacement) so we wipe it out later. - matchInfo.index = index; +function fromString (that, string, encoding) { + if (typeof encoding !== 'string' || encoding === '') encoding = 'utf8' - // use index to grab surrounding excerpt - if (excerpt > 0) { - var proposedLeft = index - excerpt; - var left = proposedLeft > 0 ? proposedLeft : 0; + // Assumption: byteLength() return value is always < kMaxLength. + var length = byteLength(string, encoding) | 0 + that = allocate(that, length) - var proposedRight = index + matchInfo.match.length + excerpt; - var right = (proposedRight <= text.length) ? proposedRight : text.length; + that.write(string, encoding) + return that +} - matchInfo.excerpt = text.substring(left, right); - } +function fromObject (that, object) { + if (Buffer.isBuffer(object)) return fromBuffer(that, object) + if (isArray(object)) return fromArray(that, object) - // if we want parent cites too, make those now - if (parents && Citation.types[type].parents_by) { - cites = Citation._.flatten(cites.map(function(cite) { - return Citation.citeParents(cite, type); - })); - } + if (object == null) { + throw new TypeError('must start with number, buffer, array or string') + } - cites = cites.map(function(cite) { - var result = {}; + if (typeof ArrayBuffer !== 'undefined') { + if (object.buffer instanceof ArrayBuffer) { + return fromTypedArray(that, object) + } + if (object instanceof ArrayBuffer) { + return fromArrayBuffer(that, object) + } + } - // match-level info - Citation._.extend(result, matchInfo); + if (object.length) return fromArrayLike(that, object) - // handle _submatch, which lets the user-level citator override the - // match and index with a sub-part of the whole matched regex - if (cite._submatch) { - result.match = cite._submatch.text; - result.index += cite._submatch.offset; - delete cite._submatch; - } + return fromJsonObject(that, object) +} - // cite-level info, plus ID standardization - result[type] = cite; - result[type].id = Citation.types[type].id(cite); +function fromBuffer (that, buffer) { + var length = checked(buffer.length) | 0 + that = allocate(that, length) + buffer.copy(that, 0, 0, length) + return that +} - results.push(result); +function fromArray (that, array) { + var length = checked(array.length) | 0 + that = allocate(that, length) + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - return result; - }); +// Duplicate of fromArray() to keep fromArray() monomorphic. +function fromTypedArray (that, array) { + var length = checked(array.length) | 0 + that = allocate(that, length) + // Truncating the elements is probably not what people expect from typed + // arrays with BYTES_PER_ELEMENT > 1 but it's compatible with the behavior + // of the old Buffer constructor. + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - // If a replace function is given, replace each matched citation by the - // result of calling the replace function with the citation passed as its - // only argument. - // - // Most citators return only a single citation match per regex match, but - // some return multiple citations for strings like "§§ 32-701 through 32-703". +function fromArrayBuffer (that, array) { + if (Buffer.TYPED_ARRAY_SUPPORT) { + // Return an augmented `Uint8Array` instance, for best performance + array.byteLength + that = Buffer._augment(new Uint8Array(array)) + } else { + // Fallback: Return an object instance of the Buffer class + that = fromTypedArray(that, new Uint8Array(array)) + } + return that +} - // Collect the final match string here. - var finalstring = matchInfo.match; +function fromArrayLike (that, array) { + var length = checked(array.length) | 0 + that = allocate(that, length) + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - // Get the replace function. If options.replace is a function use that, - // or if it is an object mapping the citator type to a function use that. - var replace_func = null; - if (typeof(replace) === "function") - replace_func = replace; - else if ((typeof(replace) === "object") && (typeof(replace[type]) === "function")) - replace_func = replace[type]; - else - replace_func = null; +// Deserialize { type: 'Buffer', data: [1,2,3,...] } into a Buffer object. +// Returns a zero-length buffer for inputs that don't conform to the spec. +function fromJsonObject (that, object) { + var array + var length = 0 - // If there's a replacement function... - if (replace_func) { - // Process the citations in the order they are returned. Assume they are - // ordered from left to right. - var last_index = 0; - var dx = 0; - for (var i = 0; i < cites.length; i++) { - // Skip citations that overlap with the previous citation (e.g. there - // may be two citations for the same text range.) - if (cites[i].index >= last_index) { - // Execute the replacement function. If the return is truth-y, perform - // a replacement. - var replacement = replace_func(cites[i]); - if (replacement) { - // Replace the substring. - finalstring = finalstring.substring(0, cites[i].index-index+dx) + replacement + finalstring.substring(cites[i].index-index+cites[i].match.length+dx); - - // The replacement text may have a different length than the text - // being replaced. Keep track of the total change in string length - // as we go because we have to adjust future citation replacements's - // indexes so that we make the edit to finalstring in the right place. - dx += replacement.length - cites[i].match.length; + if (object.type === 'Buffer' && isArray(object.data)) { + array = object.data + length = checked(array.length) | 0 + } + that = allocate(that, length) - // And track the end of last citation so we can skip any future citations - // that overlap with this text range. - last_index = cites[i].index + cites[i].match.length; - } - } + for (var i = 0; i < length; i += 1) { + that[i] = array[i] & 255 + } + return that +} - // Per the citation API, delete the index field when doing a replacement. - // After replacements, the index will no longer be useful to the caller - // because the string has been edited. - delete cites[i].index; - } - } - return finalstring; - }); - } +if (Buffer.TYPED_ARRAY_SUPPORT) { + Buffer.prototype.__proto__ = Uint8Array.prototype + Buffer.__proto__ = Uint8Array +} else { + // pre-set for values that may exist in the future + Buffer.prototype.length = undefined + Buffer.prototype.parent = undefined +} - // TODO: do for any external cite types, not just "judicial" - if (types.indexOf("judicial") != -1) - results = results.concat(Citation.types.judicial.extract(text)); +function allocate (that, length) { + if (Buffer.TYPED_ARRAY_SUPPORT) { + // Return an augmented `Uint8Array` instance, for best performance + that = Buffer._augment(new Uint8Array(length)) + that.__proto__ = Buffer.prototype + } else { + // Fallback: Return an object instance of the Buffer class + that.length = length + that._isBuffer = true + } - var response = {citations: results}; - if (options.replace) response.text = replaced; + var fromPool = length !== 0 && length <= Buffer.poolSize >>> 1 + if (fromPool) that.parent = rootParent - return response; - }, + return that +} +function checked (length) { + // Note: cannot use `length < kMaxLength` here because that fails when + // length is NaN (which is otherwise coerced to zero.) + if (length >= kMaxLength()) { + throw new RangeError('Attempt to allocate Buffer larger than maximum ' + + 'size: 0x' + kMaxLength().toString(16) + ' bytes') + } + return length | 0 +} - // for a given set of cite-specific details, - // return itself and its parent citations - citeParents: function(citation, type) { - var field = Citation.types[type].parents_by; - var results = []; +function SlowBuffer (subject, encoding) { + if (!(this instanceof SlowBuffer)) return new SlowBuffer(subject, encoding) - for (var i=citation[field].length; i >= 0; i--) { - var parent = Citation._.extend({}, citation); - parent[field] = parent[field].slice(0, i); - results.push(parent); - } - return results; - }, + var buf = new Buffer(subject, encoding) + delete buf.parent + return buf +} - // given an array of captures *beginning* with values the pattern - // knows how to process, turn it into an object with those keys. - matchFor: function(captures, pattern) { - var match = {}; - for (var i=0; i 0) - types = options.types; - } else - types = [options.types]; - } +Buffer.compare = function compare (a, b) { + if (!Buffer.isBuffer(a) || !Buffer.isBuffer(b)) { + throw new TypeError('Arguments must be Buffers') + } - // only allow valid types - if (types) { - types = types.filter(function(type) { - return Object.keys(Citation.types).indexOf(type) != -1; - }); - } else - types = Object.keys(Citation.types); + if (a === b) return 0 - return types; - }, + var x = a.length + var y = b.length - // small replacement for several functions previously served by - // the `underscore` library. - _: { - extend: function(obj) { - Array.prototype.slice.call(arguments, 1).forEach(function(source) { - if (source) { - for (var prop in source) - obj[prop] = source[prop]; - } - }); - return obj; - }, + var i = 0 + var len = Math.min(x, y) + while (i < len) { + if (a[i] !== b[i]) break - flatten: function(array) { - var impl = function(input, output) { - input.forEach(function(value) { - if (Array.isArray(value)) - impl(value, output); - else - output.push(value); - }); - return output; - } + ++i + } - return impl(array, []); - } + if (i !== len) { + x = a[i] + y = b[i] } -}; + if (x < y) return -1 + if (y < x) return 1 + return 0 +} +Buffer.isEncoding = function isEncoding (encoding) { + switch (String(encoding).toLowerCase()) { + case 'hex': + case 'utf8': + case 'utf-8': + case 'ascii': + case 'binary': + case 'base64': + case 'raw': + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return true + default: + return false + } +} -// TODO: load only the citation types asked for -if (typeof(require) !== "undefined") { - Citation.types.usc = require("./citations/usc"); - Citation.types.law = require("./citations/law"); - Citation.types.cfr = require("./citations/cfr"); - Citation.types.va_code = require("./citations/va_code"); - Citation.types.dc_code = require("./citations/dc_code"); - Citation.types.dc_register = require("./citations/dc_register"); - Citation.types.dc_law = require("./citations/dc_law"); - Citation.types.dc_stat = require("./citations/dc_stat"); - Citation.types.stat = require("./citations/stat"); - Citation.types.reporter = require("./citations/reporter"); +Buffer.concat = function concat (list, length) { + if (!isArray(list)) throw new TypeError('list argument must be an Array of Buffers.') + if (list.length === 0) { + return new Buffer(0) + } - Citation.filters.lines = require("./filters/lines"); + var i + if (length === undefined) { + length = 0 + for (i = 0; i < list.length; i++) { + length += list[i].length + } + } + + var buf = new Buffer(length) + var pos = 0 + for (i = 0; i < list.length; i++) { + var item = list[i] + item.copy(buf, pos) + pos += item.length + } + return buf } -// auto-load in-browser -if (typeof(window) !== "undefined") - window.Citation = Citation; +function byteLength (string, encoding) { + if (typeof string !== 'string') string = '' + string -return Citation; + var len = string.length + if (len === 0) return 0 -})(); + // Use a for loop to avoid recursion + var loweredCase = false + for (;;) { + switch (encoding) { + case 'ascii': + case 'binary': + // Deprecated + case 'raw': + case 'raws': + return len + case 'utf8': + case 'utf-8': + return utf8ToBytes(string).length + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return len * 2 + case 'hex': + return len >>> 1 + case 'base64': + return base64ToBytes(string).length + default: + if (loweredCase) return utf8ToBytes(string).length // assume utf8 + encoding = ('' + encoding).toLowerCase() + loweredCase = true + } + } +} +Buffer.byteLength = byteLength -},{"./citations/cfr":2,"./citations/dc_code":3,"./citations/dc_law":4,"./citations/dc_register":5,"./citations/dc_stat":6,"./citations/law":7,"./citations/reporter":8,"./citations/stat":9,"./citations/usc":10,"./citations/va_code":11,"./filters/lines":12}],2:[function(require,module,exports){ -module.exports = { - type: "regex", +function slowToString (encoding, start, end) { + var loweredCase = false - id: function(data) { - return ["cfr", data.title, (data.section || data.part)] - .concat(data.subsections || []) - .join("/") - }, + start = start | 0 + end = end === undefined || end === Infinity ? this.length : end | 0 - patterns: [ - // done: - // 14 CFR part 25 - // 38 CFR Part 74.2 - // 48 CFR § 9903.201 - // 24 CFR 85.25(h) - // 5 CFR §531.610(f) - // 45 C.F.R. 3009.4 - // 47 CFR 54.506 (c) - // but not: 47 CFR 54.506 (whatever) - // 5CFR, part 575 + if (!encoding) encoding = 'utf8' + if (start < 0) start = 0 + if (end > this.length) end = this.length + if (end <= start) return '' - // maybe: - // 13 CFR Parts 125 and 134 - // 5CFR, part 575, subpart C - // 23 CFR 650, Subpart A - { - regex: - "(\\d+)\\s?" + - "C\\.?\\s?F\\.?\\s?R\\.?" + - "(?:[\\s,]+(?:§+|parts?))?" + - "\\s*((?:\\d+\\.?\\d*(?:\\s*\\((?:[a-zA-Z\\d]{1,2}|[ixvIXV]+)\\))*)+)", + while (true) { + switch (encoding) { + case 'hex': + return hexSlice(this, start, end) - fields: ['title', 'sections'], + case 'utf8': + case 'utf-8': + return utf8Slice(this, start, end) - processor: function(captures) { - var title = captures.title; - var part, section, subsections; + case 'ascii': + return asciiSlice(this, start, end) - // separate subsections for each section being considered - var split = captures.sections.split(/[\(\)]+/).filter(function(x) {return x;}); - section = split[0].trim(); - subsections = split.splice(1); + case 'binary': + return binarySlice(this, start, end) - if (section.indexOf(".") > 0) - part = section.split(".")[0]; - else { - part = section; - section = null; - subsections = null; // don't include empty array - } + case 'base64': + return base64Slice(this, start, end) - return { - title: title, - part: part, - section: section, - subsections: subsections - }; - } + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return utf16leSlice(this, start, end) + + default: + if (loweredCase) throw new TypeError('Unknown encoding: ' + encoding) + encoding = (encoding + '').toLowerCase() + loweredCase = true } + } +} - // todo: - // parts 121 and 135 of Title 14 of the Code of Federal Regulations - // { - // regex: - // "section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*)" + - // "(?:\\s+of|\\,) title (\\d+)", - // fields: ['section', 'subsections', 'title'], - // processor: function(captures) { - // return { - // title: captures.title, - // section: captures.section, - // subsections: captures.subsections.split(/[\(\)]+/).filter(function(x) {return x;}) - // }; - // } - // } - ] -}; +Buffer.prototype.toString = function toString () { + var length = this.length | 0 + if (length === 0) return '' + if (arguments.length === 0) return utf8Slice(this, 0, length) + return slowToString.apply(this, arguments) +} -},{}],3:[function(require,module,exports){ -var base_regex = - "(\\d+A?)" + // title - "\\s?\\-\\s?" + // dash - "([\\w\\d]+(?:\\.?[\\w\\d]+)?)" + // section identifier (letters/numbers/dots) - "((?:\\([^\\)]+\\))*)"; // subsection (any number of adjacent parenthesized subsections) +Buffer.prototype.equals = function equals (b) { + if (!Buffer.isBuffer(b)) throw new TypeError('Argument must be a Buffer') + if (this === b) return true + return Buffer.compare(this, b) === 0 +} -module.exports = { - type: "regex", +Buffer.prototype.inspect = function inspect () { + var str = '' + var max = exports.INSPECT_MAX_BYTES + if (this.length > 0) { + str = this.toString('hex', 0, max).match(/.{2}/g).join(' ') + if (this.length > max) str += ' ... ' + } + return '' +} - // normalize all cites to an ID, with and without subsections - id: function(cite) { - return ["dc-code", cite.title, cite.section] - .concat(cite.subsections) - .join("/"); - }, +Buffer.prototype.compare = function compare (b) { + if (!Buffer.isBuffer(b)) throw new TypeError('Argument must be a Buffer') + if (this === b) return 0 + return Buffer.compare(this, b) +} - // field to calculate parents from - parents_by: "subsections", +Buffer.prototype.indexOf = function indexOf (val, byteOffset) { + if (byteOffset > 0x7fffffff) byteOffset = 0x7fffffff + else if (byteOffset < -0x80000000) byteOffset = -0x80000000 + byteOffset >>= 0 - patterns: function(context) { - // D.C. Official Code 3-1202.04 - // D.C. Official Code § 3-1201.01 - // D.C. Official Code §§ 38-2602(b)(11) - // D.C. Official Code § 3- 1201.01 - // D.C. Official Code § 3 -1201.01 - // - // § 32-701 - // § 32-701(4) - // § 3-101.01 - // § 1-603.01(13) - // § 1- 1163.33 - // § 1 -1163.33 - // section 16-2326.01 + if (this.length === 0) return -1 + if (byteOffset >= this.length) return -1 - var prefix_regex = ""; - var section_regex = "(?:sections?|§+)\\s+"; - var sections_regex = "(?:sections|§§)\\s+"; - if (context.source != "dc_code") { - // Require "DC Official Code" but then make the section symbol optional. - prefix_regex = "D\\.?C\\.? (?:Official )?Code\\s+"; - section_regex = "(?:" + section_regex + ")?"; - sections_regex = "(?:" + sections_regex + ")?"; + // Negative offsets start from the end of the buffer + if (byteOffset < 0) byteOffset = Math.max(this.length + byteOffset, 0) + + if (typeof val === 'string') { + if (val.length === 0) return -1 // special case: looking for empty string always fails + return String.prototype.indexOf.call(this, val, byteOffset) + } + if (Buffer.isBuffer(val)) { + return arrayIndexOf(this, val, byteOffset) + } + if (typeof val === 'number') { + if (Buffer.TYPED_ARRAY_SUPPORT && Uint8Array.prototype.indexOf === 'function') { + return Uint8Array.prototype.indexOf.call(this, val, byteOffset) } + return arrayIndexOf(this, [ val ], byteOffset) + } - return [ - // multiple citations - // has precedence over a single citation - // Unlike the single citation, the matched parts are just the title/section/subsection - // and omits "DC Code" and the section symbols (if present) from the matched text. - { - regex: "(" + prefix_regex + sections_regex + ")(" + base_regex + "(?:(?:,|, and|\\s+and|\\s+through|\\s+to)\\s+" + base_regex + ")+)", + function arrayIndexOf (arr, val, byteOffset) { + var foundIndex = -1 + for (var i = 0; byteOffset + i < arr.length; i++) { + if (arr[byteOffset + i] === val[foundIndex === -1 ? 0 : i - foundIndex]) { + if (foundIndex === -1) foundIndex = i + if (i - foundIndex + 1 === val.length) return byteOffset + foundIndex + } else { + foundIndex = -1 + } + } + return -1 + } - fields: ["prefix", "multicite", "title1", "section1", "subsections1", "title2", "section2", "subsections2"], + throw new TypeError('val must be string, number or Buffer') +} - processor: function(captures) { - var rx = new RegExp(base_regex, "g"); - var matches = new Array(); - var match; - while((match = rx.exec(captures.multicite)) !== null) { - matches.push({ - _submatch: { - text: match[0], - offset: captures.prefix.length + match.index, - }, - title: match[1], - section: match[2], - subsections: split_subsections(match[3]) - }); +// `get` is deprecated +Buffer.prototype.get = function get (offset) { + console.log('.get() is deprecated. Access using array indexes instead.') + return this.readUInt8(offset) +} + +// `set` is deprecated +Buffer.prototype.set = function set (v, offset) { + console.log('.set() is deprecated. Access using array indexes instead.') + return this.writeUInt8(v, offset) +} + +function hexWrite (buf, string, offset, length) { + offset = Number(offset) || 0 + var remaining = buf.length - offset + if (!length) { + length = remaining + } else { + length = Number(length) + if (length > remaining) { + length = remaining + } + } + + // must be an even number of digits + var strLen = string.length + if (strLen % 2 !== 0) throw new Error('Invalid hex string') + + if (length > strLen / 2) { + length = strLen / 2 + } + for (var i = 0; i < length; i++) { + var parsed = parseInt(string.substr(i * 2, 2), 16) + if (isNaN(parsed)) throw new Error('Invalid hex string') + buf[offset + i] = parsed + } + return i +} + +function utf8Write (buf, string, offset, length) { + return blitBuffer(utf8ToBytes(string, buf.length - offset), buf, offset, length) +} + +function asciiWrite (buf, string, offset, length) { + return blitBuffer(asciiToBytes(string), buf, offset, length) +} + +function binaryWrite (buf, string, offset, length) { + return asciiWrite(buf, string, offset, length) +} + +function base64Write (buf, string, offset, length) { + return blitBuffer(base64ToBytes(string), buf, offset, length) +} + +function ucs2Write (buf, string, offset, length) { + return blitBuffer(utf16leToBytes(string, buf.length - offset), buf, offset, length) +} + +Buffer.prototype.write = function write (string, offset, length, encoding) { + // Buffer#write(string) + if (offset === undefined) { + encoding = 'utf8' + length = this.length + offset = 0 + // Buffer#write(string, encoding) + } else if (length === undefined && typeof offset === 'string') { + encoding = offset + length = this.length + offset = 0 + // Buffer#write(string, offset[, length][, encoding]) + } else if (isFinite(offset)) { + offset = offset | 0 + if (isFinite(length)) { + length = length | 0 + if (encoding === undefined) encoding = 'utf8' + } else { + encoding = length + length = undefined + } + // legacy write(string, encoding, offset, length) - remove in v0.13 + } else { + var swap = encoding + encoding = offset + offset = length | 0 + length = swap + } + + var remaining = this.length - offset + if (length === undefined || length > remaining) length = remaining + + if ((string.length > 0 && (length < 0 || offset < 0)) || offset > this.length) { + throw new RangeError('attempt to write outside buffer bounds') + } + + if (!encoding) encoding = 'utf8' + + var loweredCase = false + for (;;) { + switch (encoding) { + case 'hex': + return hexWrite(this, string, offset, length) + + case 'utf8': + case 'utf-8': + return utf8Write(this, string, offset, length) + + case 'ascii': + return asciiWrite(this, string, offset, length) + + case 'binary': + return binaryWrite(this, string, offset, length) + + case 'base64': + // Warning: maxLength not taken into account in base64Write + return base64Write(this, string, offset, length) + + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return ucs2Write(this, string, offset, length) + + default: + if (loweredCase) throw new TypeError('Unknown encoding: ' + encoding) + encoding = ('' + encoding).toLowerCase() + loweredCase = true + } + } +} + +Buffer.prototype.toJSON = function toJSON () { + return { + type: 'Buffer', + data: Array.prototype.slice.call(this._arr || this, 0) + } +} + +function base64Slice (buf, start, end) { + if (start === 0 && end === buf.length) { + return base64.fromByteArray(buf) + } else { + return base64.fromByteArray(buf.slice(start, end)) + } +} + +function utf8Slice (buf, start, end) { + end = Math.min(buf.length, end) + var res = [] + + var i = start + while (i < end) { + var firstByte = buf[i] + var codePoint = null + var bytesPerSequence = (firstByte > 0xEF) ? 4 + : (firstByte > 0xDF) ? 3 + : (firstByte > 0xBF) ? 2 + : 1 + + if (i + bytesPerSequence <= end) { + var secondByte, thirdByte, fourthByte, tempCodePoint + + switch (bytesPerSequence) { + case 1: + if (firstByte < 0x80) { + codePoint = firstByte } - return matches; + break + case 2: + secondByte = buf[i + 1] + if ((secondByte & 0xC0) === 0x80) { + tempCodePoint = (firstByte & 0x1F) << 0x6 | (secondByte & 0x3F) + if (tempCodePoint > 0x7F) { + codePoint = tempCodePoint + } + } + break + case 3: + secondByte = buf[i + 1] + thirdByte = buf[i + 2] + if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80) { + tempCodePoint = (firstByte & 0xF) << 0xC | (secondByte & 0x3F) << 0x6 | (thirdByte & 0x3F) + if (tempCodePoint > 0x7FF && (tempCodePoint < 0xD800 || tempCodePoint > 0xDFFF)) { + codePoint = tempCodePoint + } + } + break + case 4: + secondByte = buf[i + 1] + thirdByte = buf[i + 2] + fourthByte = buf[i + 3] + if ((secondByte & 0xC0) === 0x80 && (thirdByte & 0xC0) === 0x80 && (fourthByte & 0xC0) === 0x80) { + tempCodePoint = (firstByte & 0xF) << 0x12 | (secondByte & 0x3F) << 0xC | (thirdByte & 0x3F) << 0x6 | (fourthByte & 0x3F) + if (tempCodePoint > 0xFFFF && tempCodePoint < 0x110000) { + codePoint = tempCodePoint + } + } + } + } + + if (codePoint === null) { + // we did not generate a valid codePoint so insert a + // replacement char (U+FFFD) and advance only 1 byte + codePoint = 0xFFFD + bytesPerSequence = 1 + } else if (codePoint > 0xFFFF) { + // encode to utf16 (surrogate pair dance) + codePoint -= 0x10000 + res.push(codePoint >>> 10 & 0x3FF | 0xD800) + codePoint = 0xDC00 | codePoint & 0x3FF + } + + res.push(codePoint) + i += bytesPerSequence + } + + return decodeCodePointsArray(res) +} + +// Based on http://stackoverflow.com/a/22747272/680742, the browser with +// the lowest limit is Chrome, with 0x10000 args. +// We go 1 magnitude less, for safety +var MAX_ARGUMENTS_LENGTH = 0x1000 + +function decodeCodePointsArray (codePoints) { + var len = codePoints.length + if (len <= MAX_ARGUMENTS_LENGTH) { + return String.fromCharCode.apply(String, codePoints) // avoid extra slice() + } + + // Decode in chunks to avoid "call stack size exceeded". + var res = '' + var i = 0 + while (i < len) { + res += String.fromCharCode.apply( + String, + codePoints.slice(i, i += MAX_ARGUMENTS_LENGTH) + ) + } + return res +} + +function asciiSlice (buf, start, end) { + var ret = '' + end = Math.min(buf.length, end) + + for (var i = start; i < end; i++) { + ret += String.fromCharCode(buf[i] & 0x7F) + } + return ret +} + +function binarySlice (buf, start, end) { + var ret = '' + end = Math.min(buf.length, end) + + for (var i = start; i < end; i++) { + ret += String.fromCharCode(buf[i]) + } + return ret +} + +function hexSlice (buf, start, end) { + var len = buf.length + + if (!start || start < 0) start = 0 + if (!end || end < 0 || end > len) end = len + + var out = '' + for (var i = start; i < end; i++) { + out += toHex(buf[i]) + } + return out +} + +function utf16leSlice (buf, start, end) { + var bytes = buf.slice(start, end) + var res = '' + for (var i = 0; i < bytes.length; i += 2) { + res += String.fromCharCode(bytes[i] + bytes[i + 1] * 256) + } + return res +} + +Buffer.prototype.slice = function slice (start, end) { + var len = this.length + start = ~~start + end = end === undefined ? len : ~~end + + if (start < 0) { + start += len + if (start < 0) start = 0 + } else if (start > len) { + start = len + } + + if (end < 0) { + end += len + if (end < 0) end = 0 + } else if (end > len) { + end = len + } + + if (end < start) end = start + + var newBuf + if (Buffer.TYPED_ARRAY_SUPPORT) { + newBuf = Buffer._augment(this.subarray(start, end)) + } else { + var sliceLen = end - start + newBuf = new Buffer(sliceLen, undefined) + for (var i = 0; i < sliceLen; i++) { + newBuf[i] = this[i + start] + } + } + + if (newBuf.length) newBuf.parent = this.parent || this + + return newBuf +} + +/* + * Need to make sure that buffer isn't trying to write out of bounds. + */ +function checkOffset (offset, ext, length) { + if ((offset % 1) !== 0 || offset < 0) throw new RangeError('offset is not uint') + if (offset + ext > length) throw new RangeError('Trying to access beyond buffer length') +} + +Buffer.prototype.readUIntLE = function readUIntLE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkOffset(offset, byteLength, this.length) + + var val = this[offset] + var mul = 1 + var i = 0 + while (++i < byteLength && (mul *= 0x100)) { + val += this[offset + i] * mul + } + + return val +} + +Buffer.prototype.readUIntBE = function readUIntBE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) { + checkOffset(offset, byteLength, this.length) + } + + var val = this[offset + --byteLength] + var mul = 1 + while (byteLength > 0 && (mul *= 0x100)) { + val += this[offset + --byteLength] * mul + } + + return val +} + +Buffer.prototype.readUInt8 = function readUInt8 (offset, noAssert) { + if (!noAssert) checkOffset(offset, 1, this.length) + return this[offset] +} + +Buffer.prototype.readUInt16LE = function readUInt16LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + return this[offset] | (this[offset + 1] << 8) +} + +Buffer.prototype.readUInt16BE = function readUInt16BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + return (this[offset] << 8) | this[offset + 1] +} + +Buffer.prototype.readUInt32LE = function readUInt32LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return ((this[offset]) | + (this[offset + 1] << 8) | + (this[offset + 2] << 16)) + + (this[offset + 3] * 0x1000000) +} + +Buffer.prototype.readUInt32BE = function readUInt32BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return (this[offset] * 0x1000000) + + ((this[offset + 1] << 16) | + (this[offset + 2] << 8) | + this[offset + 3]) +} + +Buffer.prototype.readIntLE = function readIntLE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkOffset(offset, byteLength, this.length) + + var val = this[offset] + var mul = 1 + var i = 0 + while (++i < byteLength && (mul *= 0x100)) { + val += this[offset + i] * mul + } + mul *= 0x80 + + if (val >= mul) val -= Math.pow(2, 8 * byteLength) + + return val +} + +Buffer.prototype.readIntBE = function readIntBE (offset, byteLength, noAssert) { + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkOffset(offset, byteLength, this.length) + + var i = byteLength + var mul = 1 + var val = this[offset + --i] + while (i > 0 && (mul *= 0x100)) { + val += this[offset + --i] * mul + } + mul *= 0x80 + + if (val >= mul) val -= Math.pow(2, 8 * byteLength) + + return val +} + +Buffer.prototype.readInt8 = function readInt8 (offset, noAssert) { + if (!noAssert) checkOffset(offset, 1, this.length) + if (!(this[offset] & 0x80)) return (this[offset]) + return ((0xff - this[offset] + 1) * -1) +} + +Buffer.prototype.readInt16LE = function readInt16LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + var val = this[offset] | (this[offset + 1] << 8) + return (val & 0x8000) ? val | 0xFFFF0000 : val +} + +Buffer.prototype.readInt16BE = function readInt16BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 2, this.length) + var val = this[offset + 1] | (this[offset] << 8) + return (val & 0x8000) ? val | 0xFFFF0000 : val +} + +Buffer.prototype.readInt32LE = function readInt32LE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return (this[offset]) | + (this[offset + 1] << 8) | + (this[offset + 2] << 16) | + (this[offset + 3] << 24) +} + +Buffer.prototype.readInt32BE = function readInt32BE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + + return (this[offset] << 24) | + (this[offset + 1] << 16) | + (this[offset + 2] << 8) | + (this[offset + 3]) +} + +Buffer.prototype.readFloatLE = function readFloatLE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + return ieee754.read(this, offset, true, 23, 4) +} + +Buffer.prototype.readFloatBE = function readFloatBE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 4, this.length) + return ieee754.read(this, offset, false, 23, 4) +} + +Buffer.prototype.readDoubleLE = function readDoubleLE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 8, this.length) + return ieee754.read(this, offset, true, 52, 8) +} + +Buffer.prototype.readDoubleBE = function readDoubleBE (offset, noAssert) { + if (!noAssert) checkOffset(offset, 8, this.length) + return ieee754.read(this, offset, false, 52, 8) +} + +function checkInt (buf, value, offset, ext, max, min) { + if (!Buffer.isBuffer(buf)) throw new TypeError('buffer must be a Buffer instance') + if (value > max || value < min) throw new RangeError('value is out of bounds') + if (offset + ext > buf.length) throw new RangeError('index out of range') +} + +Buffer.prototype.writeUIntLE = function writeUIntLE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkInt(this, value, offset, byteLength, Math.pow(2, 8 * byteLength), 0) + + var mul = 1 + var i = 0 + this[offset] = value & 0xFF + while (++i < byteLength && (mul *= 0x100)) { + this[offset + i] = (value / mul) & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeUIntBE = function writeUIntBE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + byteLength = byteLength | 0 + if (!noAssert) checkInt(this, value, offset, byteLength, Math.pow(2, 8 * byteLength), 0) + + var i = byteLength - 1 + var mul = 1 + this[offset + i] = value & 0xFF + while (--i >= 0 && (mul *= 0x100)) { + this[offset + i] = (value / mul) & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeUInt8 = function writeUInt8 (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 1, 0xff, 0) + if (!Buffer.TYPED_ARRAY_SUPPORT) value = Math.floor(value) + this[offset] = (value & 0xff) + return offset + 1 +} + +function objectWriteUInt16 (buf, value, offset, littleEndian) { + if (value < 0) value = 0xffff + value + 1 + for (var i = 0, j = Math.min(buf.length - offset, 2); i < j; i++) { + buf[offset + i] = (value & (0xff << (8 * (littleEndian ? i : 1 - i)))) >>> + (littleEndian ? i : 1 - i) * 8 + } +} + +Buffer.prototype.writeUInt16LE = function writeUInt16LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0xffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value & 0xff) + this[offset + 1] = (value >>> 8) + } else { + objectWriteUInt16(this, value, offset, true) + } + return offset + 2 +} + +Buffer.prototype.writeUInt16BE = function writeUInt16BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0xffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 8) + this[offset + 1] = (value & 0xff) + } else { + objectWriteUInt16(this, value, offset, false) + } + return offset + 2 +} + +function objectWriteUInt32 (buf, value, offset, littleEndian) { + if (value < 0) value = 0xffffffff + value + 1 + for (var i = 0, j = Math.min(buf.length - offset, 4); i < j; i++) { + buf[offset + i] = (value >>> (littleEndian ? i : 3 - i) * 8) & 0xff + } +} + +Buffer.prototype.writeUInt32LE = function writeUInt32LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0xffffffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset + 3] = (value >>> 24) + this[offset + 2] = (value >>> 16) + this[offset + 1] = (value >>> 8) + this[offset] = (value & 0xff) + } else { + objectWriteUInt32(this, value, offset, true) + } + return offset + 4 +} + +Buffer.prototype.writeUInt32BE = function writeUInt32BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0xffffffff, 0) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 24) + this[offset + 1] = (value >>> 16) + this[offset + 2] = (value >>> 8) + this[offset + 3] = (value & 0xff) + } else { + objectWriteUInt32(this, value, offset, false) + } + return offset + 4 +} + +Buffer.prototype.writeIntLE = function writeIntLE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) { + var limit = Math.pow(2, 8 * byteLength - 1) + + checkInt(this, value, offset, byteLength, limit - 1, -limit) + } + + var i = 0 + var mul = 1 + var sub = value < 0 ? 1 : 0 + this[offset] = value & 0xFF + while (++i < byteLength && (mul *= 0x100)) { + this[offset + i] = ((value / mul) >> 0) - sub & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeIntBE = function writeIntBE (value, offset, byteLength, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) { + var limit = Math.pow(2, 8 * byteLength - 1) + + checkInt(this, value, offset, byteLength, limit - 1, -limit) + } + + var i = byteLength - 1 + var mul = 1 + var sub = value < 0 ? 1 : 0 + this[offset + i] = value & 0xFF + while (--i >= 0 && (mul *= 0x100)) { + this[offset + i] = ((value / mul) >> 0) - sub & 0xFF + } + + return offset + byteLength +} + +Buffer.prototype.writeInt8 = function writeInt8 (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 1, 0x7f, -0x80) + if (!Buffer.TYPED_ARRAY_SUPPORT) value = Math.floor(value) + if (value < 0) value = 0xff + value + 1 + this[offset] = (value & 0xff) + return offset + 1 +} + +Buffer.prototype.writeInt16LE = function writeInt16LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0x7fff, -0x8000) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value & 0xff) + this[offset + 1] = (value >>> 8) + } else { + objectWriteUInt16(this, value, offset, true) + } + return offset + 2 +} + +Buffer.prototype.writeInt16BE = function writeInt16BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 2, 0x7fff, -0x8000) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 8) + this[offset + 1] = (value & 0xff) + } else { + objectWriteUInt16(this, value, offset, false) + } + return offset + 2 +} + +Buffer.prototype.writeInt32LE = function writeInt32LE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0x7fffffff, -0x80000000) + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value & 0xff) + this[offset + 1] = (value >>> 8) + this[offset + 2] = (value >>> 16) + this[offset + 3] = (value >>> 24) + } else { + objectWriteUInt32(this, value, offset, true) + } + return offset + 4 +} + +Buffer.prototype.writeInt32BE = function writeInt32BE (value, offset, noAssert) { + value = +value + offset = offset | 0 + if (!noAssert) checkInt(this, value, offset, 4, 0x7fffffff, -0x80000000) + if (value < 0) value = 0xffffffff + value + 1 + if (Buffer.TYPED_ARRAY_SUPPORT) { + this[offset] = (value >>> 24) + this[offset + 1] = (value >>> 16) + this[offset + 2] = (value >>> 8) + this[offset + 3] = (value & 0xff) + } else { + objectWriteUInt32(this, value, offset, false) + } + return offset + 4 +} + +function checkIEEE754 (buf, value, offset, ext, max, min) { + if (value > max || value < min) throw new RangeError('value is out of bounds') + if (offset + ext > buf.length) throw new RangeError('index out of range') + if (offset < 0) throw new RangeError('index out of range') +} + +function writeFloat (buf, value, offset, littleEndian, noAssert) { + if (!noAssert) { + checkIEEE754(buf, value, offset, 4, 3.4028234663852886e+38, -3.4028234663852886e+38) + } + ieee754.write(buf, value, offset, littleEndian, 23, 4) + return offset + 4 +} + +Buffer.prototype.writeFloatLE = function writeFloatLE (value, offset, noAssert) { + return writeFloat(this, value, offset, true, noAssert) +} + +Buffer.prototype.writeFloatBE = function writeFloatBE (value, offset, noAssert) { + return writeFloat(this, value, offset, false, noAssert) +} + +function writeDouble (buf, value, offset, littleEndian, noAssert) { + if (!noAssert) { + checkIEEE754(buf, value, offset, 8, 1.7976931348623157E+308, -1.7976931348623157E+308) + } + ieee754.write(buf, value, offset, littleEndian, 52, 8) + return offset + 8 +} + +Buffer.prototype.writeDoubleLE = function writeDoubleLE (value, offset, noAssert) { + return writeDouble(this, value, offset, true, noAssert) +} + +Buffer.prototype.writeDoubleBE = function writeDoubleBE (value, offset, noAssert) { + return writeDouble(this, value, offset, false, noAssert) +} + +// copy(targetBuffer, targetStart=0, sourceStart=0, sourceEnd=buffer.length) +Buffer.prototype.copy = function copy (target, targetStart, start, end) { + if (!start) start = 0 + if (!end && end !== 0) end = this.length + if (targetStart >= target.length) targetStart = target.length + if (!targetStart) targetStart = 0 + if (end > 0 && end < start) end = start + + // Copy 0 bytes; we're done + if (end === start) return 0 + if (target.length === 0 || this.length === 0) return 0 + + // Fatal error conditions + if (targetStart < 0) { + throw new RangeError('targetStart out of bounds') + } + if (start < 0 || start >= this.length) throw new RangeError('sourceStart out of bounds') + if (end < 0) throw new RangeError('sourceEnd out of bounds') + + // Are we oob? + if (end > this.length) end = this.length + if (target.length - targetStart < end - start) { + end = target.length - targetStart + start + } + + var len = end - start + var i + + if (this === target && start < targetStart && targetStart < end) { + // descending copy from end + for (i = len - 1; i >= 0; i--) { + target[i + targetStart] = this[i + start] + } + } else if (len < 1000 || !Buffer.TYPED_ARRAY_SUPPORT) { + // ascending copy from start + for (i = 0; i < len; i++) { + target[i + targetStart] = this[i + start] + } + } else { + target._set(this.subarray(start, start + len), targetStart) + } + + return len +} + +// fill(value, start=0, end=buffer.length) +Buffer.prototype.fill = function fill (value, start, end) { + if (!value) value = 0 + if (!start) start = 0 + if (!end) end = this.length + + if (end < start) throw new RangeError('end < start') + + // Fill 0 bytes; we're done + if (end === start) return + if (this.length === 0) return + + if (start < 0 || start >= this.length) throw new RangeError('start out of bounds') + if (end < 0 || end > this.length) throw new RangeError('end out of bounds') + + var i + if (typeof value === 'number') { + for (i = start; i < end; i++) { + this[i] = value + } + } else { + var bytes = utf8ToBytes(value.toString()) + var len = bytes.length + for (i = start; i < end; i++) { + this[i] = bytes[i % len] + } + } + + return this +} + +/** + * Creates a new `ArrayBuffer` with the *copied* memory of the buffer instance. + * Added in Node 0.12. Only available in browsers that support ArrayBuffer. + */ +Buffer.prototype.toArrayBuffer = function toArrayBuffer () { + if (typeof Uint8Array !== 'undefined') { + if (Buffer.TYPED_ARRAY_SUPPORT) { + return (new Buffer(this)).buffer + } else { + var buf = new Uint8Array(this.length) + for (var i = 0, len = buf.length; i < len; i += 1) { + buf[i] = this[i] + } + return buf.buffer + } + } else { + throw new TypeError('Buffer.toArrayBuffer not supported in this browser') + } +} + +// HELPER FUNCTIONS +// ================ + +var BP = Buffer.prototype + +/** + * Augment a Uint8Array *instance* (not the Uint8Array class!) with Buffer methods + */ +Buffer._augment = function _augment (arr) { + arr.constructor = Buffer + arr._isBuffer = true + + // save reference to original Uint8Array set method before overwriting + arr._set = arr.set + + // deprecated + arr.get = BP.get + arr.set = BP.set + + arr.write = BP.write + arr.toString = BP.toString + arr.toLocaleString = BP.toString + arr.toJSON = BP.toJSON + arr.equals = BP.equals + arr.compare = BP.compare + arr.indexOf = BP.indexOf + arr.copy = BP.copy + arr.slice = BP.slice + arr.readUIntLE = BP.readUIntLE + arr.readUIntBE = BP.readUIntBE + arr.readUInt8 = BP.readUInt8 + arr.readUInt16LE = BP.readUInt16LE + arr.readUInt16BE = BP.readUInt16BE + arr.readUInt32LE = BP.readUInt32LE + arr.readUInt32BE = BP.readUInt32BE + arr.readIntLE = BP.readIntLE + arr.readIntBE = BP.readIntBE + arr.readInt8 = BP.readInt8 + arr.readInt16LE = BP.readInt16LE + arr.readInt16BE = BP.readInt16BE + arr.readInt32LE = BP.readInt32LE + arr.readInt32BE = BP.readInt32BE + arr.readFloatLE = BP.readFloatLE + arr.readFloatBE = BP.readFloatBE + arr.readDoubleLE = BP.readDoubleLE + arr.readDoubleBE = BP.readDoubleBE + arr.writeUInt8 = BP.writeUInt8 + arr.writeUIntLE = BP.writeUIntLE + arr.writeUIntBE = BP.writeUIntBE + arr.writeUInt16LE = BP.writeUInt16LE + arr.writeUInt16BE = BP.writeUInt16BE + arr.writeUInt32LE = BP.writeUInt32LE + arr.writeUInt32BE = BP.writeUInt32BE + arr.writeIntLE = BP.writeIntLE + arr.writeIntBE = BP.writeIntBE + arr.writeInt8 = BP.writeInt8 + arr.writeInt16LE = BP.writeInt16LE + arr.writeInt16BE = BP.writeInt16BE + arr.writeInt32LE = BP.writeInt32LE + arr.writeInt32BE = BP.writeInt32BE + arr.writeFloatLE = BP.writeFloatLE + arr.writeFloatBE = BP.writeFloatBE + arr.writeDoubleLE = BP.writeDoubleLE + arr.writeDoubleBE = BP.writeDoubleBE + arr.fill = BP.fill + arr.inspect = BP.inspect + arr.toArrayBuffer = BP.toArrayBuffer + + return arr +} + +var INVALID_BASE64_RE = /[^+\/0-9A-Za-z-_]/g + +function base64clean (str) { + // Node strips out invalid characters like \n and \t from the string, base64-js does not + str = stringtrim(str).replace(INVALID_BASE64_RE, '') + // Node converts strings with length < 2 to '' + if (str.length < 2) return '' + // Node allows for non-padded base64 strings (missing trailing ===), base64-js does not + while (str.length % 4 !== 0) { + str = str + '=' + } + return str +} + +function stringtrim (str) { + if (str.trim) return str.trim() + return str.replace(/^\s+|\s+$/g, '') +} + +function toHex (n) { + if (n < 16) return '0' + n.toString(16) + return n.toString(16) +} + +function utf8ToBytes (string, units) { + units = units || Infinity + var codePoint + var length = string.length + var leadSurrogate = null + var bytes = [] + + for (var i = 0; i < length; i++) { + codePoint = string.charCodeAt(i) + + // is surrogate component + if (codePoint > 0xD7FF && codePoint < 0xE000) { + // last char was a lead + if (!leadSurrogate) { + // no lead yet + if (codePoint > 0xDBFF) { + // unexpected trail + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + continue + } else if (i + 1 === length) { + // unpaired lead + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + continue + } + + // valid lead + leadSurrogate = codePoint + + continue + } + + // 2 leads in a row + if (codePoint < 0xDC00) { + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + leadSurrogate = codePoint + continue + } + + // valid surrogate pair + codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 + } else if (leadSurrogate) { + // valid bmp char, but last char was a lead + if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) + } + + leadSurrogate = null + + // encode utf8 + if (codePoint < 0x80) { + if ((units -= 1) < 0) break + bytes.push(codePoint) + } else if (codePoint < 0x800) { + if ((units -= 2) < 0) break + bytes.push( + codePoint >> 0x6 | 0xC0, + codePoint & 0x3F | 0x80 + ) + } else if (codePoint < 0x10000) { + if ((units -= 3) < 0) break + bytes.push( + codePoint >> 0xC | 0xE0, + codePoint >> 0x6 & 0x3F | 0x80, + codePoint & 0x3F | 0x80 + ) + } else if (codePoint < 0x110000) { + if ((units -= 4) < 0) break + bytes.push( + codePoint >> 0x12 | 0xF0, + codePoint >> 0xC & 0x3F | 0x80, + codePoint >> 0x6 & 0x3F | 0x80, + codePoint & 0x3F | 0x80 + ) + } else { + throw new Error('Invalid code point') + } + } + + return bytes +} + +function asciiToBytes (str) { + var byteArray = [] + for (var i = 0; i < str.length; i++) { + // Node's code seems to be doing this and not & 0x7F.. + byteArray.push(str.charCodeAt(i) & 0xFF) + } + return byteArray +} + +function utf16leToBytes (str, units) { + var c, hi, lo + var byteArray = [] + for (var i = 0; i < str.length; i++) { + if ((units -= 2) < 0) break + + c = str.charCodeAt(i) + hi = c >> 8 + lo = c % 256 + byteArray.push(lo) + byteArray.push(hi) + } + + return byteArray +} + +function base64ToBytes (str) { + return base64.toByteArray(base64clean(str)) +} + +function blitBuffer (src, dst, offset, length) { + for (var i = 0; i < length; i++) { + if ((i + offset >= dst.length) || (i >= src.length)) break + dst[i + offset] = src[i] + } + return i +} + +}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) +},{"base64-js":1,"ieee754":56,"isarray":5}],5:[function(require,module,exports){ +var toString = {}.toString; + +module.exports = Array.isArray || function (arr) { + return toString.call(arr) == '[object Array]'; +}; + +},{}],6:[function(require,module,exports){ +/* Citation.js - a legal citation extractor. + * + * Open source, dedicated to the public domain: https://github.com/unitedstates/citation + * + * Originally authored by Eric Mill (@konklone), at the Sunlight Foundation, + * many contributions by https://github.com/unitedstates/citation/graphs/contributors + */ + + +module.exports = (function(Citation) { + +Citation = { + + // will be filled in by individual citation types as available + types: {}, + + // filters that can pre-process text and post-process citations + filters: {}, + + // link sources that add permalink information to citations + links: {}, + + // TODO: document this inline + // check a block of text for citations of a given type - + // return an array of matches, with citation broken out into fields + find: function(text, options) { + if (!options) options = {}; + if (typeof(text) !== "string") return; + + // client can apply a filter that pre-processes text before extraction, + // and post-processes citations after extraction + var results; + if (options.filter && Citation.filters[options.filter]) + return Citation.filtered(options.filter, text, options); + + // otherwise, do a single pass over the whole text. + else + return Citation.extract(text, options); + }, + + // return an array of matched and filter-mapped cites + filtered: function(name, text, options) { + var results = []; + + var filter = Citation.filters[name]; + + // filter can break up the text into pieces with accompanying metadata + filter.from(text, options[name], function(piece, metadata) { + var response = Citation.extract(piece, options); + + // ignores any replaced text, it falls off the edge of the earth + + var filtered = response.citations.map(function(result) { + + Object.keys(metadata).forEach(function(key) { + result[key] = metadata[key]; + }); + + return result; + }); + + results = results.concat(filtered); + }); + + // doesn't return replaced text + return {citations: results}; + }, + + + // run the citators over the text, return an array of matched cites + extract: function(text, options) { + if (!options) options = {}; + + // default: no excerpt + var excerpt = options.excerpt ? parseInt(options.excerpt, 10) : 0; + + // whether to return parent citations + // default: false + var parents = options.parents || false; + + // default: all types, can be filtered to one, or an array of them + var types = Citation.selectedTypes(options); + if (types.length === 0) return null; + + + // The caller can provide a replace callback to alter every found citation. + // this function will be called with each (found and processed) cite object, + // and should return a string to be put in the cite's place. + // + // The resulting transformed string will be in the returned object as a 'text' field. + // this field will only be present if a replace callback was provided. + // + // providing this callback will also cause matched cites not to return the 'index' field, + // as the replace process will completely screw them up. only use the 'index' field if you + // plan on doing your own replacing. + var replace = options.replace; + + // accumulate the results + var results = []; + + + // will hold the calculated context-specific patterns we are to run + // over the given text, tracked by index we expect to find them at. + // nextIndex tracks a running index as we loop through patterns. + // (citators could just be called indexedPatterns) + var citators = {}; + var nextIndex = 0; + + // Go through every regex-based citator and prepare a set of patterns, + // indexed by the order of a matched arguments array. + types.forEach(function(type) { + if (Citation.types[type].type != "regex") return; + + // Calculate the patterns this citator will contribute to the parse. + // (individual parsers can opt to make their parsing context-specific) + var patterns = Citation.types[type].patterns; + if (typeof(patterns) == "function") + patterns = patterns(options[type] || {}); + + // add each pattern, keeping a running tally of what we would + // expect its primary index to be when found in the master regex. + patterns.forEach(function(pattern) { + pattern.type = type; // will be needed later + citators[nextIndex] = pattern; + nextIndex += pattern.fields.length + 1; + }); + }); + + // If there are any regex-based patterns being applied, combine them + // and run a find/replace over the string. + var regexes = Object.keys(citators).map(function(key) {return citators[key].regex}); + if (regexes.length > 0) { + + // merge all regexes into one, so that each pattern will begin at a predictable place + var regex = new RegExp("(" + regexes.join(")|(") + ")", "ig"); + + var replaced = text.replace(regex, function() { + var match = arguments[0]; + + // offset is second-to-last argument + var index = arguments[arguments.length - 2]; + + // pull out just the regex-captured matches + var captures = Array.prototype.slice.call(arguments, 1, -2); + + // find the first matched index in the captures + var matchIndex; + for (matchIndex=0; matchIndex 0) { + var proposedLeft = index - excerpt; + var left = proposedLeft > 0 ? proposedLeft : 0; + + var proposedRight = index + matchInfo.match.length + excerpt; + var right = (proposedRight <= text.length) ? proposedRight : text.length; + + matchInfo.excerpt = text.substring(left, right); + } + + + // if we want parent cites too, make those now + if (parents && Citation.types[type].parents_by) { + cites = Citation._.flatten(cites.map(function(cite) { + return Citation.citeParents(cite, type); + })); + } + + cites = cites.map(function(cite) { + var result = {}; + + // match-level info + Citation._.extend(result, matchInfo); + + // handle _submatch, which lets the user-level citator override the + // match and index with a sub-part of the whole matched regex + if (cite._submatch) { + result.match = cite._submatch.text; + result.index += cite._submatch.offset; + delete cite._submatch; + } + + // since a single text region can match multiple citations, such as when + // a range is given, clarify what this match represents + if ('canonical' in Citation.types[type]) + result.citation = Citation.types[type].canonical(cite); + + // cite-level info, plus ID standardization + result[type] = cite; + result[type].id = Citation.types[type].id(cite); + + // add permalinks if requested and a link source exists for this citation + // type. + if (options.links) + result[type].links = Citation.getLinksForCitation(type, cite); + + results.push(result); + + return result; + }); + + // If a replace function is given, replace each matched citation by the + // result of calling the replace function with the citation passed as its + // only argument. + // + // Most citators return only a single citation match per regex match, but + // some return multiple citations for strings like "§§ 32-701 through 32-703". + + // Collect the final match string here. + var finalstring = matchInfo.match; + + // Get the replace function. If options.replace is a function use that, + // or if it is an object mapping the citator type to a function use that. + var replace_func = null; + if (typeof(replace) === "function") + replace_func = replace; + else if ((typeof(replace) === "object") && (typeof(replace[type]) === "function")) + replace_func = replace[type]; + else + replace_func = null; + + // If there's a replacement function... + if (replace_func) { + // Process the citations in the order they are returned. Assume they are + // ordered from left to right. + var last_index = 0; + var dx = 0; + for (var i = 0; i < cites.length; i++) { + // Skip citations that overlap with the previous citation (e.g. there + // may be two citations for the same text range.) + if (cites[i].index >= last_index) { + // Execute the replacement function. If the return is truth-y, perform + // a replacement. + var replacement = replace_func(cites[i]); + if (replacement) { + // Replace the substring. + finalstring = finalstring.substring(0, cites[i].index-index+dx) + replacement + finalstring.substring(cites[i].index-index+cites[i].match.length+dx); + + // The replacement text may have a different length than the text + // being replaced. Keep track of the total change in string length + // as we go because we have to adjust future citation replacements's + // indexes so that we make the edit to finalstring in the right place. + dx += replacement.length - cites[i].match.length; + + // And track the end of last citation so we can skip any future citations + // that overlap with this text range. + last_index = cites[i].index + cites[i].match.length; + } + } + + // Per the citation API, delete the index field when doing a replacement. + // After replacements, the index will no longer be useful to the caller + // because the string has been edited. + delete cites[i].index; + } + } + return finalstring; + }); + } + + // TODO: do for any external cite types, not just "judicial" + if (types.indexOf("judicial") != -1) + results = results.concat(Citation.types.judicial.extract(text)); + + var response = {citations: results}; + if (options.replace) response.text = replaced; + + return response; + }, + + + // for a given set of cite-specific details, + // return itself and its parent citations + citeParents: function(citation, type) { + var field = Citation.types[type].parents_by; + var results = []; + + for (var i=citation[field].length; i >= 0; i--) { + var parent = Citation._.extend({}, citation); + parent[field] = parent[field].slice(0, i); + results.push(parent); + } + return results; + }, + + // given an array of captures *beginning* with values the pattern + // knows how to process, turn it into an object with those keys. + matchFor: function(captures, pattern) { + var match = {}; + for (var i=0; i 0) + types = options.types; + } else + types = [options.types]; + } + + // only allow valid types + if (types) { + types = types.filter(function(type) { + return Object.keys(Citation.types).indexOf(type) != -1; + }); + } else + types = Object.keys(Citation.types); + + return types; + }, + + getLinksForCitation: function(type, cite) { + // Create a place to store the links. + var links = {}; + + // Check each link source to see if it provides a link for this type + // of citation. + for (var link_source in Citation.links) { + var link_source_module = Citation.links[link_source]; + if (type in link_source_module.citations) { + + // This link source provides link info for this type of citation. + // The function may return null if it doesn't provide a link for + // the particular citation. + var link_info = link_source_module.citations[type](cite); + if (link_info) { + // Add source metadata. + link_info.source = { + name: link_source_module.name, + abbreviation: link_source_module.abbreviation, + link: link_source_module.link, + authoritative: link_source_module.authoritative + }; + + // Add to citation. + links[link_source_module.id] = link_info; + } + } + } + + return links; + }, + + // small replacement for several functions previously served by + // the `underscore` library. + _: { + extend: function(obj) { + Array.prototype.slice.call(arguments, 1).forEach(function(source) { + if (source) { + for (var prop in source) + obj[prop] = source[prop]; + } + }); + return obj; + }, + + flatten: function(array) { + var impl = function(input, output) { + input.forEach(function(value) { + if (Array.isArray(value)) + impl(value, output); + else + output.push(value); + }); + return output; + } + + return impl(array, []); + } + } + +}; + + +// TODO: load only the citation types, filters, and link sources asked for +if (typeof(require) !== "undefined") { + Citation.types.usc = require("./citations/usc"); + Citation.types.law = require("./citations/law"); + Citation.types.cfr = require("./citations/cfr"); + Citation.types.va_code = require("./citations/va_code"); + Citation.types.dc_code = require("./citations/dc_code"); + Citation.types.dc_register = require("./citations/dc_register"); + Citation.types.dc_law = require("./citations/dc_law"); + Citation.types.dc_stat = require("./citations/dc_stat"); + Citation.types.stat = require("./citations/stat"); + Citation.types.reporter = require("./citations/reporter"); + Citation.types.fedreg = require("./citations/fedreg"); + Citation.types.usconst = require("./citations/usconst"); + + + Citation.filters.lines = require("./filters/lines"); + Citation.filters.xpath_html = require("./filters/xpath_html"); + Citation.filters.xpath_xml = require("./filters/xpath_xml"); + + Citation.links.cornell_lii = require("./links/cornell_lii"); + Citation.links.courtlistener = require("./links/courtlistener"); + Citation.links.dc_council = require("./links/dc_council"); + Citation.links.govtrack = require("./links/govtrack"); + Citation.links.gpo = require("./links/gpo"); + Citation.links.house = require("./links/house"); + Citation.links.legislink = require("./links/legislink"); + Citation.links.libraryofcongress = require("./links/libraryofcongress"); + Citation.links.nara = require("./links/nara"); + Citation.links.vadecoded = require("./links/vadecoded"); +} + +// auto-load in-browser +if (typeof(window) !== "undefined") + window.Citation = Citation; + +return Citation; + +})(); + +},{"./citations/cfr":7,"./citations/dc_code":8,"./citations/dc_law":9,"./citations/dc_register":10,"./citations/dc_stat":11,"./citations/fedreg":12,"./citations/law":13,"./citations/reporter":14,"./citations/stat":15,"./citations/usc":16,"./citations/usconst":17,"./citations/va_code":18,"./filters/lines":19,"./filters/xpath_html":20,"./filters/xpath_xml":21,"./links/cornell_lii":22,"./links/courtlistener":23,"./links/dc_council":24,"./links/govtrack":25,"./links/gpo":26,"./links/house":27,"./links/legislink":28,"./links/libraryofcongress":29,"./links/nara":30,"./links/vadecoded":31}],7:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(data) { + return ["cfr", data.title, (data.section || data.part)] + .concat(data.subsections || []) + .join("/") + }, + + patterns: [ + // done: + // 14 CFR part 25 + // 38 CFR Part 74.2 + // 48 CFR § 9903.201 + // 24 CFR 85.25(h) + // 5 CFR §531.610(f) + // 45 C.F.R. 3009.4 + // 47 CFR 54.506 (c) + // but not: 47 CFR 54.506 (whatever) + // 5CFR, part 575 + + // maybe: + // 13 CFR Parts 125 and 134 + // 5CFR, part 575, subpart C + // 23 CFR 650, Subpart A + { + regex: + "(\\d+)\\s?" + // Title number + "C\\.?\\s?F\\.?\\s?R\\.?" + // CFR + "(?:[\\s,]+(?:§+|parts?))?" + // Extra separators (section sign, part) + "\\s*(\\d+(?:(?:[-–—]\\d+)?[a-z]?" + // Part number + "(?:\\.(?:13h[-–—]l|\\d+[-–—]?\\d*\\.5\\d|(?:\\d+T|T|\\d+[-–—]DD[-–—]|\\d+[-–—]WH[-–—]|\\d+[a-z]{1,2}\\d*[-–—])?\\d+)[a-z]{0,2}(?:(?:(?:\\([a-z\\d]{1,2}\\))*[-–—]\\d+)+[a-z]{0,2})?)?" + // Optionally: period and section number + "(?:(?:\\s*\\((?:[a-z\\d]{1,2}|[ixv]+)\\))+)?)?)", // Optionally: subsections, if there was a section number + + fields: ['title', 'sections'], + + processor: function(captures) { + var title = captures.title; + var part, section, subsections; + + // convert all dashes to hyphens, deduplicate hyphens, and look for + // subsections starting after the last hyphen + var hyphen_split = captures.sections.split(/[-–—]+/); + var head, tail; + if (hyphen_split.length > 1) { + head = hyphen_split.slice(0, -1).join("-") + "-"; + tail = hyphen_split[hyphen_split.length - 1]; + } else { + head = ""; + tail = hyphen_split[0]; + } + + // separate subsections for each section being considered + var paren_split = tail.split(/[\(\)]+/).filter(function(x) {return x;}); + section = head + paren_split[0].trim(); + subsections = paren_split.splice(1); + + if (section.indexOf(".") > 0) + part = section.split(".")[0]; + else { + part = section; + section = null; + subsections = null; // don't include empty array + } + + return { + title: title, + part: part, + section: section, + subsections: subsections + }; + } + } + + // todo: + // parts 121 and 135 of Title 14 of the Code of Federal Regulations + // { + // regex: + // "section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*)" + + // "(?:\\s+of|\\,) title (\\d+)", + // fields: ['section', 'subsections', 'title'], + // processor: function(captures) { + // return { + // title: captures.title, + // section: captures.section, + // subsections: captures.subsections.split(/[\(\)]+/).filter(function(x) {return x;}) + // }; + // } + // } + ] +}; + +},{}],8:[function(require,module,exports){ +var base_regex = + "(\\d+A?)" + // title + "\\s?\\-\\s?" + // dash + "([\\w\\d]+(?:\\.?[\\w\\d]+)?)" + // section identifier (letters/numbers/dots) + "((?:\\([^\\)]+\\))*)"; // subsection (any number of adjacent parenthesized subsections) + +module.exports = { + type: "regex", + + // normalize all cites to an ID, with and without subsections + id: function(cite) { + return ["dc-code", cite.title, cite.section] + .concat(cite.subsections) + .join("/"); + }, + + // field to calculate parents from + parents_by: "subsections", + + patterns: function(context) { + // D.C. Official Code 3-1202.04 + // D.C. Official Code § 3-1201.01 + // D.C. Official Code §§ 38-2602(b)(11) + // D.C. Official Code § 3- 1201.01 + // D.C. Official Code § 3 -1201.01 + // + // § 32-701 + // § 32-701(4) + // § 3-101.01 + // § 1-603.01(13) + // § 1- 1163.33 + // § 1 -1163.33 + // section 16-2326.01 + + var prefix_regex = ""; + var section_regex = "(?:sections?\\s+|§+\\s*)"; + var sections_regex = "(?:sections\\s+|§§\\s*)"; + if (context.source != "dc_code") { + // Require "DC Official Code" but then make the section symbol optional. + prefix_regex = "D\\.?C\\.? (?:Official )?Code\\s+"; + section_regex = "(?:" + section_regex + ")?"; + sections_regex = "(?:" + sections_regex + ")?"; + } + + return [ + // multiple citations + // has precedence over a single citation + // Unlike the single citation, the matched parts are just the title/section/subsection + // and omits "DC Code" and the section symbols (if present) from the matched text. + { + regex: "(" + prefix_regex + sections_regex + ")(" + base_regex + "(?:(?:,|, and|\\s+and|\\s+through|\\s+to)\\s+" + base_regex + ")+)", + + fields: ["prefix", "multicite", "title1", "section1", "subsections1", "title2", "section2", "subsections2"], + + processor: function(captures) { + var rx = new RegExp(base_regex, "g"); + var matches = new Array(); + var match; + while((match = rx.exec(captures.multicite)) !== null) { + matches.push({ + _submatch: { + text: match[0], + offset: captures.prefix.length + match.index, + }, + title: match[1], + section: match[2], + subsections: split_subsections(match[3]) + }); + } + return matches; + } + }, + + // a single citation + { + regex: prefix_regex + section_regex + base_regex, + + fields: ["title", "section", "subsections"], + + processor: function(captures) { + var title = captures.title; + var section = captures.section; + var subsections = split_subsections(captures.subsections); + + return { + title: title, + section: section, + subsections: subsections + }; + } + } + ]; + } +}; + +function split_subsections(match) { + if (match) + return match.split(/[\(\)]+/).filter(function(x) {return x}); + else + return []; +} + +},{}],9:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["dc-law", cite.period, cite.number].join("/"); + }, + + patterns: function(context) { + // If the context for this citation is the DC Code, then Law XX-YYY can be assumed + // to be a DC law. In other context, require the "DC Law" prefix. In the DC Code + // context also slurp in the "DC" prefix. + var context_regex = "D\\.?\\s*C\\.?\\s+"; + if (context.source == "dc_code") + context_regex = "(?:" + context_regex + ")?" + + return [ + // "D.C. Law 20-17" + // "DC Law 20-17" + // "DC Law 18-135A" + { + regex: + context_regex + "Law\\s+(\\d+)\\s?[-–]+\\s?(\\d+\\w?)", + fields: ["period", "number"], + processor: function(captures) { + return { + period: captures.period, + number: captures.number + }; + } + } + ]; + } +}; + +},{}],10:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["dc-register", cite.volume, cite.page].join("/"); + }, + + patterns: [ + // 54 DCR 8014 + { + regex: + "(\\d+)\\s+" + + "DCR" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],11:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["dcstat", cite.volume, cite.page].join("/") + }, + + patterns: [ + // "20 DCSTAT 1952" + { + regex: + "(\\d+)\\s+" + + "DCSTAT" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],12:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["fedreg", cite.volume, cite.page].join("/") + }, + + + patterns: [ + // "75 Fed. Reg. 28404" + // "69 FR 22135" + { + regex: + "(\\d+)\\s+" + + "(?:Fed\\.?\\sReg?\\.?|F\\.?R\\.?)" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],13:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["us-law", cite.type, cite.congress, cite.number] + .concat(cite.sections || []) + .join("/"); + }, + + canonical: function(cite) { + if (!cite.sections || cite.sections.length == 0) + // this style matches GPO at http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW&browsePath=112&isCollapsed=false&leafLevelBrowse=false&ycord=0 + return (cite.type == "public" ? "Pub. L." : "Pvt. L.") + " " + cite.congress + "-" + cite.number; + else + return "Section " + cite.sections[0] + cite.sections.slice(1).map(function(item) { return "(" + item + ")" }).join("") + + " of " + + (cite.type == "public" ? "Public" : "Private") + " Law " + cite.congress + "-" + cite.number; + }, + + // field to calculate parents from + parents_by: "sections", + + patterns: [ + // "Public Law 111-89" + // "Pub. L. 112-56" + // "Pub. L. No. 110-2" + // "Pub.L. 105-33" + // "Private Law 111-72" + // "Priv. L. No. 98-23" + // "section 552 of Public Law 111-89" + // "section 4402(e)(1) of Public Law 110-2" + { + regex: + "(?:section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*) of )?" + + "(pub(?:lic)?|priv(?:ate)?)\\.?\\s*l(?:aw)?\\.?(?:\\s*No\\.?)?" + + " +(\\d+)[-–]+(\\d+)", + fields: ['section', 'subsections', 'type', 'congress', 'number'], + processor: function(captures) { + var sections = []; + if (captures.section) sections.push(captures.section); + if (captures.subsections) sections = sections.concat(captures.subsections.split(/[\(\)]+/).filter(function(x) {return x})); + + return { + type: captures.type.match(/^priv/i) ? "private" : "public", + congress: captures.congress, + number: captures.number, + sections: sections + }; + } + }, + + // "PL 19-4" + // "P.L. 45-78" + // "section 552 of PL 19-4" + // "section 4402(e)(1) of PL 19-4" + { + regex: + "(?:section (\\d+[\\w\\d\-]*)((?:\\([^\\)]+\\))*) of )?" + + "P\\.?L\\.? +(\\d+)[-–](\\d+)", + fields: ['section', 'subsections', 'congress', 'number'], + processor: function(captures) { + sections = []; + if (captures.section) sections.push(captures.section); + if (captures.subsections) sections = sections.concat(captures.subsections.split(/[\(\)]+/).filter(function(x) {return x})); + + return { + type: "public", + congress: captures.congress, + number: captures.number, + sections: sections + }; + } + } + ] +}; + +},{}],14:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["reporter", cite.volume, cite.reporter, cite.page].join("/") + }, + + canonical: function(cite) { + return cite.volume + " " + cite.reporter + " " + cite.page; + }, + + patterns: [ + { + regex: + "(\\d{1,3})\\s" + + "(\\w+(?:\\.\\w+(?:\\.)?)?(?:\\.\\dd)?|U\\.?\\s?S\\.?|F\\. Supp\\.(?:\\s\\dd)?)\\s" + + "(\\d{1,4})", + fields: ['volume', 'reporter', 'page'], + processor: function(match) { + return { + volume: match.volume, + reporter: match.reporter, + page: match.page, + }; + } + } + ] +}; + +},{}],15:[function(require,module,exports){ +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["stat", cite.volume, cite.page].join("/") + }, + + canonical: function(cite) { + return cite.volume + " Stat. " + cite.page; + }, + + patterns: [ + // "117 Stat. 1952" + // "77 STAT. 77" + { + regex: + "(\\d+[\\w]*)\\s+" + + "Stat\\.?" + + "\\s+(\\d+)", + fields: ['volume', 'page'], + processor: function(match) { + return { + volume: match.volume, + page: match.page, + }; + } + } + ] +}; + +},{}],16:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(cite) { + return ["usc", cite.title, cite.section] + .concat(cite.subsections || []) + .join("/"); + }, + + canonical: function(cite) { + // title, which also may specify it is an appendix title + var title = cite.title; + var app = ""; + var title_without_app = cite.title.replace(/-app$/, ''); + if (title != title_without_app) app = "App. "; + + // subsections, possibly with a note/et-seq as a leaf which should + // be rendered differently from a normal subsection item + var subsections = cite.subsections.slice(); // clone + var suffix = ""; + var leaf = subsections.length > 0 ? subsections[subsections.length-1] : null; + if (leaf == "note") { + subsections.pop(); + suffix = " note" + } else if (leaf == "et-seq") { + subsections.pop(); + suffix = " et seq" + } + + return title_without_app + " U.S.C. " + app + cite.section + + subsections.map(function(item) { return "(" + item + ")" }).join("") + + suffix; + }, + + // field to calculate parents from + parents_by: "subsections", + + patterns: [ + // "5 USC 552" + // "5 U.S.C. § 552(a)(1)(E)" + // "7 U.S.C. 612c note" + // "29 U.S.C. 1081 et seq" + // "50 U.S.C. App. 595" + // "45 U.S.C. 10a-10c" + // "50 U.S.C. 404o-1(a)" - single section + // "45 U.S.C. 10a(1)-10c(2)" - range + // "50 U.S.C. App. §§ 451--473" - range + { + regex: + "(\\d+)\\s+" + // title + "U\\.?\\s?S\\.?\\s?C\\.?" + + "(?:\\s+(App)\.?)?\\s+" + // appendix + "(?:(§+)\\s*)?" + // symbol + "((?:[-–—]*\\d+[\\w\\d\\-–—]*(?:\\([^\\)]+\\))*)+)" + // sections + "(?:\\s+(note|et\\s+seq))?", // note + + fields: [ + 'title', 'appendix', + 'symbol', 'sections', 'note' + ], + + processor: function(match) { + // a few titles have distinct appendixes + var title = match.title; + if (match.appendix) title += "-app"; + + var sections = match.sections.split(/[-–—]+/); + var match_sections_normalized = match.sections.replace(/[–—]/g, '-'); + + var range = false; + + // two section symbols is unambiguous + if (match.symbol == "§§") // 2 section symbols + range = true; + + // paren before dash is unambiguous + else { + var dash = match_sections_normalized.indexOf("-"); + var paren = match_sections_normalized.indexOf("("); + if (dash > 0 && paren > 0 && paren < dash) + range = true; + } + + // if there's a hyphen and the range is ambiguous, + // also return the original section string as one + if ((sections.length > 1) && !range) + sections.unshift(match_sections_normalized); + + return sections.map(function(section) { + // separate subsections for each section being considered + var split = section.split(/[\(\)]+/).filter(function(x) {return x}); + section = split[0]; + subsections = split.splice(1); + if (match.note) + subsections.push(match.note.replace(" ", "-")); // "note" or "et seq" + + return { + title: title, + section: section, + subsections: subsections + }; + }); + } + }, + + // "section 552 of title 5" + // "section 552, title 5" + // "section 552(a)(1)(E) of title 5" + // "section 404o-1(a) of title 50" + { + regex: + "section (\\d+[\\w\\d\\-–—]*)((?:\\([^\\)]+\\))*)" + + "(?:\\s+of|\\,) title (\\d+)", + + fields: ['section', 'subsections', 'title'], + + processor: function(match) { + return { + title: match.title, + section: match.section.replace(/[–—]/g, '-'), + subsections: match.subsections.split(/[\(\)]+/).filter(function(x) {return x}) + }; + } + }, + + // "Section 14123(a)(2) of 49 U.S.C." + // "Section 14123(a)(2), 49 U.S.C." + { + regex: + "section (\\d+[\\w\\d\\-–—]*)((?:\\([^\\)]+\\))*)" + + "(?:\\s+of|\\,) (\\d+) " + + "U\\.?\\s?S\\.?\\s?C\\.?", + + fields: ['section', 'subsections', 'title'], + + processor: function(match) { + return { + title: match.title, + section: match.section.replace(/[–—]/g, '-'), + subsections: match.subsections.split(/[\(\)]+/).filter(function(x) {return x}) + }; + } + } + ] +}; + +},{}],17:[function(require,module,exports){ +/* Parses citations to the United States Constitution + * + * like: U.S. CONST., art. I, ¶ 8, cl. 17 + * as seen in http://pdfserver.amlaw.com/nlj/3-18-16%20dc%20council%20v%20mayor%20order%20NLJ.pdf + */ + +var arabic_number = parseInt; +var roman_numeral = require('nomar'); + +// All of the sub-parts that might be found in the citation. +var part_types = { + amendment: { abbrev: "Amdt.", regex: "Amdt\\.?|Amend\\.?", numbering: roman_numeral }, + article: { abbrev: "art.", regex: "art\\.?", numbering: roman_numeral }, + section: { abbrev: "§", regex: "§", numbering: arabic_number }, + paragraph: { abbrev: "¶", regex: "¶", numbering: arabic_number }, + clause: { abbrev: "cl.", regex: "cl\\.?", numbering: arabic_number }, +}; + +module.exports = { + type: "regex", + + // normalize all cites to an ID + id: function(cite) { + return ["usconst"].concat((cite.part || []).map(function(part) { + if (!part) return "?"; + return part.type + "-" + part.number; + })).join("/"); + }, + + canonical: function(cite) { + var ret = "U.S. Const."; + for (var i = 0; i < (cite.part || []).length; i++) + if (cite.part[i]) // did this part parse? + ret += ", " + part_types[cite.part[i].type].abbrev + " " + cite.part[i].number_str; + return ret; + }, + + patterns: [ + // "U.S. CONST., art. I, ¶ 8, cl. 17" + { + regex: + "U\\.? ?S\\.? ?C(?:ONST|onst)\\.?" + + "((:?,? ?" + + "(" + + Object.keys(part_types).map(function(type) { return part_types[type].regex; }).join("|") + + ") ?([IVX0-9]+)" + + ")*)", + fields: ['part'], + processor: function(match) { + var part = match.part; + if (part) { + // Split the comma-separated list of parts into the Constitution. + part = part.split(/, ?/); + if (part[0].length == 0) + part.shift(); + part = part.map(process_part); + } + return { + part: part, + }; + } + } + ] +}; + +function process_part(part) { + for (var part_type in part_types) { + var match = new RegExp("(?:" + part_types[part_type].regex + ") ?([IVX0-9]+)" + "$" , 'i').exec(part); + if (match) { + return { + type: part_type, + number_str: match[1], + number: part_types[part_type].numbering(match[1]) + }; + } + } + return null; // somehow didn't match +} + + +},{"nomar":59}],18:[function(require,module,exports){ +module.exports = { + type: "regex", + + id: function(data) { + return ["va-code", data.title, data.section].join("/"); + }, + + patterns: [ + + // Va. Code Ann. § 19.2-56.2 (2010) + // Va. Code Ann. § 19.2-56.2 (West 2010) + // Va. Code Ann. § 57-1 + // Va. Code Ann. § 57-2.02 + // Va. Code Ann. § 63.2-300 + // Va. Code Ann. § 66-25.1:1 + // Va. Code § 66-25.1:1 + // VA Code § 66-25.1:1 + { + regex: + "Va\\.? Code\\.?" + + "(?:\\s+Ann\\.?)?\\s+" + + "(?:§+\\s*)?" + + "([\\d\\.]+)\\-([\\d\\.:]+)" + + "(?:\\s+\\((?:West )?([12]\\d{3})\\))?", + fields: ['title', 'section', 'year'], + processor: function (captures) { + return { + title: captures.title, + section: captures.section, + year: captures.year + }; + } + } + ] +}; + +},{}],19:[function(require,module,exports){ +module.exports = { + + /* + Filters receive: + * text: the entire input text + * options: any filter-specific options, e.g. delimiter + * extract: execute this function once with every substring the filter + breaks the input text into, e.g. each line, + along with any associated metadata, e.g. the line number. + + */ + + // A line-by-line filter. + // + // Breaks the text up by line, and feeds each line into the extractor. + // Attaches the line number (1-indexed) as metadata to each cite, + // so that any character offsets will be relative to that line. + // + // Accepts options: + // delimiter: override the default delimiter + + from: function(text, options, extract) { + // by default, break lines on any combination of \n\r + var delimiter = (options && options.delimiter) || /[\n\r]+/; + + // split the text into an array of lines + var lines = text.split(new RegExp(delimiter)); + + // for each line, submit it to the extractor along with its line number + lines.forEach(function(line, i) { + extract(line, {line: (i+1)}); + }); + } + +}; + +},{}],20:[function(require,module,exports){ +var parse5 = require("parse5"); + +function recurse(node, partialXpath, extract) { + if (node.nodeName == "#text") { + // Pass contents of text nodes to the extractor + extract(node.value, {xpath: partialXpath}); + } else if (node.nodeName == "#comment" || node.nodeName == "#documentType") { + // Skip doctypes and comments + // (parse5 treats processing instructions, entities, and notations as + // comments) + return; + } else { + for (var i = 0; i < node.childNodes.length; i++) { + var next = node.childNodes[i]; + + // Incrementally build XPath expressions for each node + var nextName = next.nodeName; + var index = 1; // XPath indices are 1-based because reasons + for (var j = 0; j < i; j++) { + if (node.childNodes[j].nodeName == nextName) { + index++; + } + } + var nextXpath; + if (nextName == "#text") { + nextXpath = partialXpath + "/text()[" + index + "]"; + } else { + nextXpath = partialXpath + "/" + nextName + "[" + index + "]"; + } + + // Recurse through each child element node + recurse(next, nextXpath, extract); + } + } +} + +module.exports = { + + /* + Filters receive: + * text: the entire input text + * options: any filter-specific options + * extract: execute this function once with every substring the filter + breaks the input tet into, along with any associated metadata, e.g. + the XPath expression associated with each text fragment. + */ + + // An HTML/XPath filter. + // + // Parses the text as an HTML document, using an HTML5 parser, and feeds + // each text node into the extractor. Attaches an XPath expression that + // locates the text node as metadata to each cite. Character offsets will + // be relative to the beginning of the text node. + + from: function(text, options, extract) { + // Parse the input text + var doc = parse5.parse(text); + + // Hand off to recursive function, which will walk the DOM + recurse(doc, '', extract); + } + +}; + +},{"parse5":37}],21:[function(require,module,exports){ +var DOMParser = require("xmldom").DOMParser; + +function recurse(node, partialXpath, extract) { + if (node.nodeType == node.TEXT_NODE || node.nodeType == node.CDATA_SECTION_NODE) { + extract(node.nodeValue, {xpath: partialXpath}); + } else if (node.nodeType == node.ELEMENT_NODE || node.nodeType == node.DOCUMENT_NODE) { + for (var i = 0; i < node.childNodes.length; i++) { + var next = node.childNodes[i]; + var nextXpath, index, j; + + if (next.nodeType == next.TEXT_NODE || + next.nodeType == next.CDATA_SECTION_NODE) { + index = 1; + for (j = 0; j < i; j++) { + if (node.childNodes[j].nodeType == node.TEXT_NODE || + node.childNodes[j].nodeType == node.CDATA_SECTION_NODE) { + index++; + } + } + nextXpath = partialXpath + "/text()[" + index + "]"; + } else if (next.nodeType == next.ELEMENT_NODE) { + index = 1; + for (j = 0; j < i; j++) { + if (node.childNodes[j].nodeType == node.ELEMENT_NODE && + node.childNodes[j].nodeName == next.nodeName) { + index++; + } + } + nextXpath = partialXpath + "/" + next.nodeName + "[" + index + "]"; + } + + recurse(next, nextXpath, extract); + } + } +} + +module.exports = { + + /* + Filters receive: + * text: the entire input text + * options: any filter-specific options + * extract: execute this function once with every substring the filter + breaks the input tet into, along with any associated metadata, e.g. + the XPath expression associated with each text fragment. + */ + + // An XML/XPath filter. + // + // Parses the text as an XML document, using the "xmldom" parser, and feeds + // each text node into the extractor. Attaches an XPath expression that + // locates the text node as metadata to each cite. Character offsets will + // be relative to the beginning of the text node. + + from: function(text, options, extract) { + // Parse the input text + var parser, doc; + parser = new DOMParser(); + doc = parser.parseFromString(text, "text/xml"); + + // Hand off to recursive function, which will walk the DOM + recurse(doc, '', extract); + } + +}; + +},{"xmldom":80}],22:[function(require,module,exports){ +module.exports = { + id: "cornell_lii", + + name: "Cornell Legal Information Institute", + abbreviation: "Cornell LII", + link: "https://www.law.cornell.edu/uscode/text", + + authoritative: false, + + citations: { + usc: function(cite) { + var title = cite.title.replace(/-app$/, ''); + var is_appendix = cite.title.indexOf("-app") != -1; + + // (for current citations only, i.e. not tied to a publication or effective date) + var subsections = (cite.subsections.slice() || []); // clone + if (subsections.length && subsections[subsections.length-1] == "et-seq") subsections.pop(); // don't include eq-seq in a link + return { + landing: "https://www.law.cornell.edu/uscode/text/" + (title + (is_appendix ? "a" : "")) + + "/" + cite.section + + (subsections.length ? ("#" + subsections.join("_")) : ""), + note: "Link is to most current version of the US Code, as available at law.cornell.edu." + }; + } + } +} + +},{}],23:[function(require,module,exports){ +var form_canonical_cite = require("../citations/reporter").canonical; + +module.exports = { + id: "courtlistener", + + name: "Court Listener", + abbreviation: "CL", + link: "https://www.courtlistener.com", + + authoritative: false, + + citations: { + reporter: function(cite) { + // Create a link to the Court Listener search page for the citation. Citations + // can be ambiguous, and so there is no permalink to a case available without + // querying an API. + // + // The citation is wrapped in quotes in the query to force the CL API to do + // a phrase search (per Solr). Without quotes, a citation search on "410 U.S. 113" + // brings back `410 U.S. 257, 93 S. Ct. 880, 35 L. Ed. 2d 247, 1973 U.S. LEXIS 113` + // and `507 U.S. 410, 113 S. Ct. 1505, 123 L. Ed. 2d 99, 1993 U.S. LEXIS 2401`. + // (They match because "410" "US" and "113" appear somewhere in the whole string.) + // See https://github.com/freelawproject/courtlistener/issues/381, but that's only + // a partial fix because quotes are still needed to ensure the terms appear in + // the right order. + return { + landing: "https://www.courtlistener.com/?citation=" + encodeURIComponent("\"" + form_canonical_cite(cite) + "\"") + }; + } + } +} + +},{"../citations/reporter":14}],24:[function(require,module,exports){ +module.exports = { + id: "dc_council", + + name: "Council of the District of Columbia", + abbreviation: "DC Council", + link: "https://dccode.gov", + + authoritative: true, + + citations: { + dc_law: function(cite) { + return { + landing: "https://beta.code.dccouncil.us/dc/council/laws/" + cite.period + "-" + cite.number + ".html" + }; + }, + dc_code: function(cite) { + return { + landing: "https://beta.code.dccouncil.us/dc/council/code/sections/" + cite.title + "-" + cite.section + ".html" + }; + } + } +}; + +},{}],25:[function(require,module,exports){ +module.exports = { + id: "govtrack", + + name: "GovTrack.us", + abbreviation: "GovTrack.us", + link: "https://www.govtrack.us", + + authoritative: false, + + citations: { + law: function(cite) { + if (cite.congress < 82) return null; + return { + landing: "https://www.govtrack.us/search?q=" + (cite.type=="public"?"Pub":"Priv") + "Law+" + cite.congress + "-" + cite.number + }; + } + } +} + +},{}],26:[function(require,module,exports){ +module.exports = { + id: "usgpo", + + name: "U.S. Government Publishing Office", + abbreviation: "US GPO", + link: "https://www.gpo.gov", + + authoritative: true, + + citations: { + cfr: function(cite) { + var gpo_url = "http://api.fdsys.gov/link?collection=cfr&year=mostrecent" + + "&titlenum=" + cite.title + "&partnum=" + cite.part; + if (cite.section) // section, if present, is of the form PART.SECTION, and for the GPO url only include the (inner) section + gpo_url += "§ionnum=" + cite.section.substring(cite.part.length+1) + ""; + + return { + pdf: gpo_url + }; + }, + + fedreg: function(cite) { + return { + pdf: "http://api.fdsys.gov/link?collection=fr&volume=" + cite.volume + "&page=" + cite.page + }; + }, + + law: function(cite) { + if (cite.congress < 104) return null; + return { + pdf: "http://api.fdsys.gov/link?collection=plaw&congress=" + cite.congress + "&lawtype=" + cite.type + "&lawnum=" + cite.number, + mods: "http://api.fdsys.gov/link?collection=plaw&congress=" + cite.congress + "&lawtype=" + cite.type + "&lawnum=" + cite.number + "&link-type=mods" + }; + }, + + stat: function(cite) { + if (cite.volume < 65 || cite.volume > 125) return null; + var usgpo_url = "http://api.fdsys.gov/link?collection=statute&volume=" + cite.volume + "&page=" + cite.page; + return { + pdf: usgpo_url, + mods: usgpo_url + "&link-type=mods" + }; + }, + + usc: function(cite) { + var title = cite.title.replace(/-app$/, ''); + var is_appendix = cite.title.indexOf("-app") != -1; + + var edition; + for (var i = 0; i < us_code_editions.length; i++) { + if (us_code_editions[i].titles == null || us_code_editions[i].titles.indexOf(title) >= 0) { + // This edition contains the title. + edition = us_code_editions[i] + break; + } + } + + if (!edition) return null; + + var url = "http://api.fdsys.gov/link?collection=uscode&year=" + + edition.edition + "&title=" + title + + "§ion=" + cite.section + + "&type=" + (!is_appendix ? "usc" : "uscappendix"); + + return { + pdf: url, + html: url + "&link-type=html", + landing: url + "&link-type=contentdetail", + note: edition.edition + " edition." + ((cite.subsections && cite.subsections.length) ? " Sub-section citation is not reflected in the link." : "") + }; + } + } +} + + +// Map published editions of the US Code to the titles they contain. Not all +// published editions have the full US Code. Some are updates. This is per +// http://www.gpo.gov/fdsys/browse/collectionUScode.action?collectionCode=USCODE. +// Most recent first. +var us_code_editions = [ + { edition: '2014', titles: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'] }, + { edition: '2013', titles: null }, // all titles available in this edition +]; + +},{}],27:[function(require,module,exports){ +module.exports = { + id: "house", + + name: "Office of the Law Revision Counsel of the United States House of Representatives", + abbreviation: "House OLRC", + link: "http://uscode.house.gov/", + + authoritative: true, + + citations: { + usc: function(cite) { + var title = cite.title.replace(/-app$/, ''); + var is_appendix = cite.title.indexOf("-app") != -1; + return { + note: "Link is to most current version of the US Code.", + html: "http://uscode.house.gov/view.xhtml?req=(" + encodeURIComponent("title:" + (title + (is_appendix ? "a" : "")) + " section:" + cite.section + " edition:prelim") + ")" + } + } + } +} + +},{}],28:[function(require,module,exports){ +module.exports = { + id: "legislink", + + name: "Legislink", + abbreviation: "Legislink", + link: "http://legislink.org/us", + + authoritative: false, + + citations: { + stat: function(cite) { + var legislink_url = "http://legislink.org/us/stat-" + cite.volume + "-" + cite.page; + + // the format differs depending on the volume, and where it is a simple + // redirect to US GPO (and not hosted content) then we can note that. + if (cite.volume >= 125) { + // hosted content is a mirror of US GPO Public and Private Laws in text format + return { + text: legislink_url + }; + + } else if (cite.volume >= 65) { + // redirect to US GPO (so same content as the usgpo link) + return { + pdf: legislink_url, + note: "Link redirects to US GPO Statutes at Large." + }; + + } else { + // original content + return { + pdf: legislink_url + }; + } + } + } +} + +},{}],29:[function(require,module,exports){ +module.exports = { + id: "libraryofcongress", + + name: "Library of Congress", + abbreviation: "LoC", + link: "https://www.loc.gov", + + authoritative: true, + + citations: { + stat: function(cite) { + // LoC organizes the volumes by Congress and, for some Congresses, by chapter + // number. This is well and good but awful for direct linking of citations + // because we don't know the Congress number from a volume (through the 12th + // volume volumes contained more than one Congress) or the chapter number + // (which is a sequential numbering of public and private laws, I think?). + if (cite.volume >= 65) return null; + return { + landing: "https://www.loc.gov/law/help/statutes-at-large/index.php", + note: "Link is to LoC's general Statutes at Large landing page." + }; + }, + + usconst: function(cite) { + return { + landing: "https://www.congress.gov/constitution-annotated", + pdf: get_conan_link(cite), + note: "Link is to the Constitution Annotated." + } + } + } +} + +// Helper routines to get a direct link to the PDF of the Constitution Annotated +// for the cited section. + +var conan_links = { + "article-1": "9-2.pdf", + "article-2": "9-3.pdf", + "article-3": "9-4.pdf", + "article-4": "9-5.pdf", + "article-5": "9-6.pdf", + "article-6": "9-7.pdf", + "article-7": "9-8.pdf", + "amendment-1": "10-2.pdf", + "amendment-2": "10-3.pdf", + "amendment-3": "10-4.pdf", + "amendment-4": "10-5.pdf", + "amendment-5": "10-6.pdf", + "amendment-6": "10-7.pdf", + "amendment-7": "10-8.pdf", + "amendment-8": "10-9.pdf", + "amendment-9": "10-10.pdf", + "amendment-10": "10-11.pdf", + "amendment-11": "10-12.pdf", + "amendment-12": "10-13.pdf", + "amendment-13": "10-14.pdf", + "amendment-14": "10-15.pdf", + "amendment-15": "10-16.pdf", + "amendment-16": "10-17.pdf", + "amendment-17": "10-18.pdf", + "amendment-18": "10-19.pdf", + "amendment-19": "10-20.pdf", + "amendment-20": "10-21.pdf", + "amendment-21": "10-22.pdf", + "amendment-22": "10-23.pdf", + "amendment-23": "10-24.pdf", + "amendment-24": "10-25.pdf", + "amendment-25": "10-26.pdf", + "amendment-26": "10-27.pdf", + "amendment-27": "10-28.pdf" +} + +function get_conan_link(cite) { + for (var sec in conan_links) { + var id_prefix = "usconst/" + sec; + if (cite.id == id_prefix || cite.id.substring(0, id_prefix.length+1) == (id_prefix+"/")) + return "https://www.congress.gov/content/conan/pdf/GPO-CONAN-REV-2014-" + conan_links[sec]; + } + return null; +} +},{}],30:[function(require,module,exports){ +module.exports = { + id: "nara", + + name: "The National Archives and Records Administration", + abbreviation: "NARA", + link: "http://www.archives.gov", + + authoritative: true, + + citations: { + usconst: function(cite) { + return { + landing: "http://www.archives.gov/exhibits/charters/constitution_transcript.html" + } + } + } +} + +},{}],31:[function(require,module,exports){ +module.exports = { + id: "vadecoded", + + name: "Virginia Decoded", + abbreviation: "VACode.org", + link: "https://vacode.org", + + authoritative: false, + + citations: { + va_code: function(cite) { + return { + landing: "https://vacode.org/" + cite.title + "-" + cite.section + "/" + }; + } + } +}; + +},{}],32:[function(require,module,exports){ +'use strict'; + +//Const +var VALID_DOCTYPE_NAME = 'html', + QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd', + QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ + '+//silmaril//dtd html pro v0r11 19970101//en', + '-//advasoft ltd//dtd html 3.0 aswedit + extensions//en', + '-//as//dtd html 3.0 aswedit + extensions//en', + '-//ietf//dtd html 2.0 level 1//en', + '-//ietf//dtd html 2.0 level 2//en', + '-//ietf//dtd html 2.0 strict level 1//en', + '-//ietf//dtd html 2.0 strict level 2//en', + '-//ietf//dtd html 2.0 strict//en', + '-//ietf//dtd html 2.0//en', + '-//ietf//dtd html 2.1e//en', + '-//ietf//dtd html 3.0//en', + '-//ietf//dtd html 3.0//en//', + '-//ietf//dtd html 3.2 final//en', + '-//ietf//dtd html 3.2//en', + '-//ietf//dtd html 3//en', + '-//ietf//dtd html level 0//en', + '-//ietf//dtd html level 0//en//2.0', + '-//ietf//dtd html level 1//en', + '-//ietf//dtd html level 1//en//2.0', + '-//ietf//dtd html level 2//en', + '-//ietf//dtd html level 2//en//2.0', + '-//ietf//dtd html level 3//en', + '-//ietf//dtd html level 3//en//3.0', + '-//ietf//dtd html strict level 0//en', + '-//ietf//dtd html strict level 0//en//2.0', + '-//ietf//dtd html strict level 1//en', + '-//ietf//dtd html strict level 1//en//2.0', + '-//ietf//dtd html strict level 2//en', + '-//ietf//dtd html strict level 2//en//2.0', + '-//ietf//dtd html strict level 3//en', + '-//ietf//dtd html strict level 3//en//3.0', + '-//ietf//dtd html strict//en', + '-//ietf//dtd html strict//en//2.0', + '-//ietf//dtd html strict//en//3.0', + '-//ietf//dtd html//en', + '-//ietf//dtd html//en//2.0', + '-//ietf//dtd html//en//3.0', + '-//metrius//dtd metrius presentational//en', + '-//microsoft//dtd internet explorer 2.0 html strict//en', + '-//microsoft//dtd internet explorer 2.0 html//en', + '-//microsoft//dtd internet explorer 2.0 tables//en', + '-//microsoft//dtd internet explorer 3.0 html strict//en', + '-//microsoft//dtd internet explorer 3.0 html//en', + '-//microsoft//dtd internet explorer 3.0 tables//en', + '-//netscape comm. corp.//dtd html//en', + '-//netscape comm. corp.//dtd strict html//en', + '-//o\'reilly and associates//dtd html 2.0//en', + '-//o\'reilly and associates//dtd html extended 1.0//en', + '-//spyglass//dtd html 2.0 extended//en', + '-//sq//dtd html 2.0 hotmetal + extensions//en', + '-//sun microsystems corp.//dtd hotjava html//en', + '-//sun microsystems corp.//dtd hotjava strict html//en', + '-//w3c//dtd html 3 1995-03-24//en', + '-//w3c//dtd html 3.2 draft//en', + '-//w3c//dtd html 3.2 final//en', + '-//w3c//dtd html 3.2//en', + '-//w3c//dtd html 3.2s draft//en', + '-//w3c//dtd html 4.0 frameset//en', + '-//w3c//dtd html 4.0 transitional//en', + '-//w3c//dtd html experimental 19960712//en', + '-//w3c//dtd html experimental 970421//en', + '-//w3c//dtd w3 html//en', + '-//w3o//dtd w3 html 3.0//en', + '-//w3o//dtd w3 html 3.0//en//', + '-//webtechs//dtd mozilla html 2.0//en', + '-//webtechs//dtd mozilla html//en' + ], + QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ + '-//w3c//dtd html 4.01 frameset//', + '-//w3c//dtd html 4.01 transitional//' + ], + QUIRKS_MODE_PUBLIC_IDS = [ + '-//w3o//dtd w3 html strict 3.0//en//', + '-/w3c/dtd html 4.0 transitional/en', + 'html' + ]; + + +//Utils +function enquoteDoctypeId(id) { + var quote = id.indexOf('"') !== -1 ? '\'' : '"'; + + return quote + id + quote; +} + + +//API +exports.isQuirks = function (name, publicId, systemId) { + if (name !== VALID_DOCTYPE_NAME) + return true; + + if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) + return true; + + if (publicId !== null) { + publicId = publicId.toLowerCase(); + + if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) + return true; + + var prefixes = QUIRKS_MODE_PUBLIC_ID_PREFIXES; + + if (systemId === null) + prefixes = prefixes.concat(QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES); + + for (var i = 0; i < prefixes.length; i++) { + if (publicId.indexOf(prefixes[i]) === 0) + return true; + } + } + + return false; +}; + +exports.serializeContent = function (name, publicId, systemId) { + var str = '!DOCTYPE '; + + if (name) + str += name; + + if (publicId !== null) + str += ' PUBLIC ' + enquoteDoctypeId(publicId); + + else if (systemId !== null) + str += ' SYSTEM'; + + if (systemId !== null) + str += ' ' + enquoteDoctypeId(systemId); + + return str; +}; + +},{}],33:[function(require,module,exports){ +'use strict'; + +var Tokenizer = require('../tokenizer'), + HTML = require('./html'); + +//Aliases +var $ = HTML.TAG_NAMES, + NS = HTML.NAMESPACES, + ATTRS = HTML.ATTRS; + + +//MIME types +var MIME_TYPES = { + TEXT_HTML: 'text/html', + APPLICATION_XML: 'application/xhtml+xml' +}; + +//Attributes +var DEFINITION_URL_ATTR = 'definitionurl', + ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL', + SVG_ATTRS_ADJUSTMENT_MAP = { + 'attributename': 'attributeName', + 'attributetype': 'attributeType', + 'basefrequency': 'baseFrequency', + 'baseprofile': 'baseProfile', + 'calcmode': 'calcMode', + 'clippathunits': 'clipPathUnits', + 'diffuseconstant': 'diffuseConstant', + 'edgemode': 'edgeMode', + 'filterunits': 'filterUnits', + 'glyphref': 'glyphRef', + 'gradienttransform': 'gradientTransform', + 'gradientunits': 'gradientUnits', + 'kernelmatrix': 'kernelMatrix', + 'kernelunitlength': 'kernelUnitLength', + 'keypoints': 'keyPoints', + 'keysplines': 'keySplines', + 'keytimes': 'keyTimes', + 'lengthadjust': 'lengthAdjust', + 'limitingconeangle': 'limitingConeAngle', + 'markerheight': 'markerHeight', + 'markerunits': 'markerUnits', + 'markerwidth': 'markerWidth', + 'maskcontentunits': 'maskContentUnits', + 'maskunits': 'maskUnits', + 'numoctaves': 'numOctaves', + 'pathlength': 'pathLength', + 'patterncontentunits': 'patternContentUnits', + 'patterntransform': 'patternTransform', + 'patternunits': 'patternUnits', + 'pointsatx': 'pointsAtX', + 'pointsaty': 'pointsAtY', + 'pointsatz': 'pointsAtZ', + 'preservealpha': 'preserveAlpha', + 'preserveaspectratio': 'preserveAspectRatio', + 'primitiveunits': 'primitiveUnits', + 'refx': 'refX', + 'refy': 'refY', + 'repeatcount': 'repeatCount', + 'repeatdur': 'repeatDur', + 'requiredextensions': 'requiredExtensions', + 'requiredfeatures': 'requiredFeatures', + 'specularconstant': 'specularConstant', + 'specularexponent': 'specularExponent', + 'spreadmethod': 'spreadMethod', + 'startoffset': 'startOffset', + 'stddeviation': 'stdDeviation', + 'stitchtiles': 'stitchTiles', + 'surfacescale': 'surfaceScale', + 'systemlanguage': 'systemLanguage', + 'tablevalues': 'tableValues', + 'targetx': 'targetX', + 'targety': 'targetY', + 'textlength': 'textLength', + 'viewbox': 'viewBox', + 'viewtarget': 'viewTarget', + 'xchannelselector': 'xChannelSelector', + 'ychannelselector': 'yChannelSelector', + 'zoomandpan': 'zoomAndPan' + }, + XML_ATTRS_ADJUSTMENT_MAP = { + 'xlink:actuate': {prefix: 'xlink', name: 'actuate', namespace: NS.XLINK}, + 'xlink:arcrole': {prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK}, + 'xlink:href': {prefix: 'xlink', name: 'href', namespace: NS.XLINK}, + 'xlink:role': {prefix: 'xlink', name: 'role', namespace: NS.XLINK}, + 'xlink:show': {prefix: 'xlink', name: 'show', namespace: NS.XLINK}, + 'xlink:title': {prefix: 'xlink', name: 'title', namespace: NS.XLINK}, + 'xlink:type': {prefix: 'xlink', name: 'type', namespace: NS.XLINK}, + 'xml:base': {prefix: 'xml', name: 'base', namespace: NS.XML}, + 'xml:lang': {prefix: 'xml', name: 'lang', namespace: NS.XML}, + 'xml:space': {prefix: 'xml', name: 'space', namespace: NS.XML}, + 'xmlns': {prefix: '', name: 'xmlns', namespace: NS.XMLNS}, + 'xmlns:xlink': {prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS} + + }; + +//SVG tag names adjustment map +var SVG_TAG_NAMES_ADJUSTMENT_MAP = exports.SVG_TAG_NAMES_ADJUSTMENT_MAP = { + 'altglyph': 'altGlyph', + 'altglyphdef': 'altGlyphDef', + 'altglyphitem': 'altGlyphItem', + 'animatecolor': 'animateColor', + 'animatemotion': 'animateMotion', + 'animatetransform': 'animateTransform', + 'clippath': 'clipPath', + 'feblend': 'feBlend', + 'fecolormatrix': 'feColorMatrix', + 'fecomponenttransfer': 'feComponentTransfer', + 'fecomposite': 'feComposite', + 'feconvolvematrix': 'feConvolveMatrix', + 'fediffuselighting': 'feDiffuseLighting', + 'fedisplacementmap': 'feDisplacementMap', + 'fedistantlight': 'feDistantLight', + 'feflood': 'feFlood', + 'fefunca': 'feFuncA', + 'fefuncb': 'feFuncB', + 'fefuncg': 'feFuncG', + 'fefuncr': 'feFuncR', + 'fegaussianblur': 'feGaussianBlur', + 'feimage': 'feImage', + 'femerge': 'feMerge', + 'femergenode': 'feMergeNode', + 'femorphology': 'feMorphology', + 'feoffset': 'feOffset', + 'fepointlight': 'fePointLight', + 'fespecularlighting': 'feSpecularLighting', + 'fespotlight': 'feSpotLight', + 'fetile': 'feTile', + 'feturbulence': 'feTurbulence', + 'foreignobject': 'foreignObject', + 'glyphref': 'glyphRef', + 'lineargradient': 'linearGradient', + 'radialgradient': 'radialGradient', + 'textpath': 'textPath' +}; + +//Tags that causes exit from foreign content +var EXITS_FOREIGN_CONTENT = {}; + +EXITS_FOREIGN_CONTENT[$.B] = true; +EXITS_FOREIGN_CONTENT[$.BIG] = true; +EXITS_FOREIGN_CONTENT[$.BLOCKQUOTE] = true; +EXITS_FOREIGN_CONTENT[$.BODY] = true; +EXITS_FOREIGN_CONTENT[$.BR] = true; +EXITS_FOREIGN_CONTENT[$.CENTER] = true; +EXITS_FOREIGN_CONTENT[$.CODE] = true; +EXITS_FOREIGN_CONTENT[$.DD] = true; +EXITS_FOREIGN_CONTENT[$.DIV] = true; +EXITS_FOREIGN_CONTENT[$.DL] = true; +EXITS_FOREIGN_CONTENT[$.DT] = true; +EXITS_FOREIGN_CONTENT[$.EM] = true; +EXITS_FOREIGN_CONTENT[$.EMBED] = true; +EXITS_FOREIGN_CONTENT[$.H1] = true; +EXITS_FOREIGN_CONTENT[$.H2] = true; +EXITS_FOREIGN_CONTENT[$.H3] = true; +EXITS_FOREIGN_CONTENT[$.H4] = true; +EXITS_FOREIGN_CONTENT[$.H5] = true; +EXITS_FOREIGN_CONTENT[$.H6] = true; +EXITS_FOREIGN_CONTENT[$.HEAD] = true; +EXITS_FOREIGN_CONTENT[$.HR] = true; +EXITS_FOREIGN_CONTENT[$.I] = true; +EXITS_FOREIGN_CONTENT[$.IMG] = true; +EXITS_FOREIGN_CONTENT[$.LI] = true; +EXITS_FOREIGN_CONTENT[$.LISTING] = true; +EXITS_FOREIGN_CONTENT[$.MENU] = true; +EXITS_FOREIGN_CONTENT[$.META] = true; +EXITS_FOREIGN_CONTENT[$.NOBR] = true; +EXITS_FOREIGN_CONTENT[$.OL] = true; +EXITS_FOREIGN_CONTENT[$.P] = true; +EXITS_FOREIGN_CONTENT[$.PRE] = true; +EXITS_FOREIGN_CONTENT[$.RUBY] = true; +EXITS_FOREIGN_CONTENT[$.S] = true; +EXITS_FOREIGN_CONTENT[$.SMALL] = true; +EXITS_FOREIGN_CONTENT[$.SPAN] = true; +EXITS_FOREIGN_CONTENT[$.STRONG] = true; +EXITS_FOREIGN_CONTENT[$.STRIKE] = true; +EXITS_FOREIGN_CONTENT[$.SUB] = true; +EXITS_FOREIGN_CONTENT[$.SUP] = true; +EXITS_FOREIGN_CONTENT[$.TABLE] = true; +EXITS_FOREIGN_CONTENT[$.TT] = true; +EXITS_FOREIGN_CONTENT[$.U] = true; +EXITS_FOREIGN_CONTENT[$.UL] = true; +EXITS_FOREIGN_CONTENT[$.VAR] = true; + +//Check exit from foreign content +exports.causesExit = function (startTagToken) { + var tn = startTagToken.tagName; + var isFontWithAttrs = tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null); + + return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn]; +}; + +//Token adjustments +exports.adjustTokenMathMLAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + if (token.attrs[i].name === DEFINITION_URL_ATTR) { + token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; + break; + } + } +}; + +exports.adjustTokenSVGAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + var adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrName) + token.attrs[i].name = adjustedAttrName; + } +}; + +exports.adjustTokenXMLAttrs = function (token) { + for (var i = 0; i < token.attrs.length; i++) { + var adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrEntry) { + token.attrs[i].prefix = adjustedAttrEntry.prefix; + token.attrs[i].name = adjustedAttrEntry.name; + token.attrs[i].namespace = adjustedAttrEntry.namespace; + } + } +}; + +exports.adjustTokenSVGTagName = function (token) { + var adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName]; + + if (adjustedTagName) + token.tagName = adjustedTagName; +}; + +//Integration points +function isMathMLTextIntegrationPoint(tn, ns) { + return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT); +} + +function isHtmlIntegrationPoint(tn, ns, attrs) { + if (ns === NS.MATHML && tn === $.ANNOTATION_XML) { + for (var i = 0; i < attrs.length; i++) { + if (attrs[i].name === ATTRS.ENCODING) { + var value = attrs[i].value.toLowerCase(); + + return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML; + } + } + } + + return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE); +} + +exports.isIntegrationPoint = function (tn, ns, attrs, foreignNS) { + if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) + return true; + + if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)) + return true; + + return false; +}; + +},{"../tokenizer":49,"./html":34}],34:[function(require,module,exports){ +'use strict'; + +var NS = exports.NAMESPACES = { + HTML: 'http://www.w3.org/1999/xhtml', + MATHML: 'http://www.w3.org/1998/Math/MathML', + SVG: 'http://www.w3.org/2000/svg', + XLINK: 'http://www.w3.org/1999/xlink', + XML: 'http://www.w3.org/XML/1998/namespace', + XMLNS: 'http://www.w3.org/2000/xmlns/' +}; + +exports.ATTRS = { + TYPE: 'type', + ACTION: 'action', + ENCODING: 'encoding', + PROMPT: 'prompt', + NAME: 'name', + COLOR: 'color', + FACE: 'face', + SIZE: 'size' +}; + +var $ = exports.TAG_NAMES = { + A: 'a', + ADDRESS: 'address', + ANNOTATION_XML: 'annotation-xml', + APPLET: 'applet', + AREA: 'area', + ARTICLE: 'article', + ASIDE: 'aside', + + B: 'b', + BASE: 'base', + BASEFONT: 'basefont', + BGSOUND: 'bgsound', + BIG: 'big', + BLOCKQUOTE: 'blockquote', + BODY: 'body', + BR: 'br', + BUTTON: 'button', + + CAPTION: 'caption', + CENTER: 'center', + CODE: 'code', + COL: 'col', + COLGROUP: 'colgroup', + + DD: 'dd', + DESC: 'desc', + DETAILS: 'details', + DIALOG: 'dialog', + DIR: 'dir', + DIV: 'div', + DL: 'dl', + DT: 'dt', + + EM: 'em', + EMBED: 'embed', + + FIELDSET: 'fieldset', + FIGCAPTION: 'figcaption', + FIGURE: 'figure', + FONT: 'font', + FOOTER: 'footer', + FOREIGN_OBJECT: 'foreignObject', + FORM: 'form', + FRAME: 'frame', + FRAMESET: 'frameset', + + H1: 'h1', + H2: 'h2', + H3: 'h3', + H4: 'h4', + H5: 'h5', + H6: 'h6', + HEAD: 'head', + HEADER: 'header', + HGROUP: 'hgroup', + HR: 'hr', + HTML: 'html', + + I: 'i', + IMG: 'img', + IMAGE: 'image', + INPUT: 'input', + IFRAME: 'iframe', + + KEYGEN: 'keygen', + + LABEL: 'label', + LI: 'li', + LINK: 'link', + LISTING: 'listing', + + MAIN: 'main', + MALIGNMARK: 'malignmark', + MARQUEE: 'marquee', + MATH: 'math', + MENU: 'menu', + MENUITEM: 'menuitem', + META: 'meta', + MGLYPH: 'mglyph', + MI: 'mi', + MO: 'mo', + MN: 'mn', + MS: 'ms', + MTEXT: 'mtext', + + NAV: 'nav', + NOBR: 'nobr', + NOFRAMES: 'noframes', + NOEMBED: 'noembed', + NOSCRIPT: 'noscript', + + OBJECT: 'object', + OL: 'ol', + OPTGROUP: 'optgroup', + OPTION: 'option', + + P: 'p', + PARAM: 'param', + PLAINTEXT: 'plaintext', + PRE: 'pre', + + RB: 'rb', + RP: 'rp', + RT: 'rt', + RTC: 'rtc', + RUBY: 'ruby', + + S: 's', + SCRIPT: 'script', + SECTION: 'section', + SELECT: 'select', + SOURCE: 'source', + SMALL: 'small', + SPAN: 'span', + STRIKE: 'strike', + STRONG: 'strong', + STYLE: 'style', + SUB: 'sub', + SUMMARY: 'summary', + SUP: 'sup', + + TABLE: 'table', + TBODY: 'tbody', + TEMPLATE: 'template', + TEXTAREA: 'textarea', + TFOOT: 'tfoot', + TD: 'td', + TH: 'th', + THEAD: 'thead', + TITLE: 'title', + TR: 'tr', + TRACK: 'track', + TT: 'tt', + + U: 'u', + UL: 'ul', + + SVG: 'svg', + + VAR: 'var', + + WBR: 'wbr', + + XMP: 'xmp' +}; + +var SPECIAL_ELEMENTS = exports.SPECIAL_ELEMENTS = {}; + +SPECIAL_ELEMENTS[NS.HTML] = {}; +SPECIAL_ELEMENTS[NS.HTML][$.ADDRESS] = true; +SPECIAL_ELEMENTS[NS.HTML][$.APPLET] = true; +SPECIAL_ELEMENTS[NS.HTML][$.AREA] = true; +SPECIAL_ELEMENTS[NS.HTML][$.ARTICLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.ASIDE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BASE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BASEFONT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BGSOUND] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BLOCKQUOTE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BODY] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.BUTTON] = true; +SPECIAL_ELEMENTS[NS.HTML][$.CAPTION] = true; +SPECIAL_ELEMENTS[NS.HTML][$.CENTER] = true; +SPECIAL_ELEMENTS[NS.HTML][$.COL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.COLGROUP] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DETAILS] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DIR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DIV] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.DT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.EMBED] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FIELDSET] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FIGCAPTION] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FIGURE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FOOTER] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FORM] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FRAME] = true; +SPECIAL_ELEMENTS[NS.HTML][$.FRAMESET] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H1] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H2] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H3] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H4] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H5] = true; +SPECIAL_ELEMENTS[NS.HTML][$.H6] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HEAD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HEADER] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HGROUP] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.HTML] = true; +SPECIAL_ELEMENTS[NS.HTML][$.IFRAME] = true; +SPECIAL_ELEMENTS[NS.HTML][$.IMG] = true; +SPECIAL_ELEMENTS[NS.HTML][$.INPUT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.LI] = true; +SPECIAL_ELEMENTS[NS.HTML][$.LINK] = true; +SPECIAL_ELEMENTS[NS.HTML][$.LISTING] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MAIN] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MARQUEE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.MENU] = true; +SPECIAL_ELEMENTS[NS.HTML][$.META] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NAV] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NOEMBED] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NOFRAMES] = true; +SPECIAL_ELEMENTS[NS.HTML][$.NOSCRIPT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.OBJECT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.OL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.P] = true; +SPECIAL_ELEMENTS[NS.HTML][$.PARAM] = true; +SPECIAL_ELEMENTS[NS.HTML][$.PLAINTEXT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.PRE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SCRIPT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SECTION] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SELECT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SOURCE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.STYLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.SUMMARY] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TABLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TBODY] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TEMPLATE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TEXTAREA] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TFOOT] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TH] = true; +SPECIAL_ELEMENTS[NS.HTML][$.THEAD] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TITLE] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.TRACK] = true; +SPECIAL_ELEMENTS[NS.HTML][$.UL] = true; +SPECIAL_ELEMENTS[NS.HTML][$.WBR] = true; +SPECIAL_ELEMENTS[NS.HTML][$.XMP] = true; + +SPECIAL_ELEMENTS[NS.MATHML] = {}; +SPECIAL_ELEMENTS[NS.MATHML][$.MI] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MO] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MN] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MS] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.MTEXT] = true; +SPECIAL_ELEMENTS[NS.MATHML][$.ANNOTATION_XML] = true; + +SPECIAL_ELEMENTS[NS.SVG] = {}; +SPECIAL_ELEMENTS[NS.SVG][$.TITLE] = true; +SPECIAL_ELEMENTS[NS.SVG][$.FOREIGN_OBJECT] = true; +SPECIAL_ELEMENTS[NS.SVG][$.DESC] = true; + +},{}],35:[function(require,module,exports){ +'use strict'; + +module.exports = function mergeOptions(defaults, options) { + options = options || {}; + + return [defaults, options].reduce(function (merged, optObj) { + Object.keys(optObj).forEach(function (key) { + merged[key] = optObj[key]; + }); + + return merged; + }, {}); +}; + +},{}],36:[function(require,module,exports){ +'use strict'; + +exports.REPLACEMENT_CHARACTER = '\uFFFD'; + +exports.CODE_POINTS = { + EOF: -1, + NULL: 0x00, + TABULATION: 0x09, + CARRIAGE_RETURN: 0x0D, + LINE_FEED: 0x0A, + FORM_FEED: 0x0C, + SPACE: 0x20, + EXCLAMATION_MARK: 0x21, + QUOTATION_MARK: 0x22, + NUMBER_SIGN: 0x23, + AMPERSAND: 0x26, + APOSTROPHE: 0x27, + HYPHEN_MINUS: 0x2D, + SOLIDUS: 0x2F, + DIGIT_0: 0x30, + DIGIT_9: 0x39, + SEMICOLON: 0x3B, + LESS_THAN_SIGN: 0x3C, + EQUALS_SIGN: 0x3D, + GREATER_THAN_SIGN: 0x3E, + QUESTION_MARK: 0x3F, + LATIN_CAPITAL_A: 0x41, + LATIN_CAPITAL_F: 0x46, + LATIN_CAPITAL_X: 0x58, + LATIN_CAPITAL_Z: 0x5A, + GRAVE_ACCENT: 0x60, + LATIN_SMALL_A: 0x61, + LATIN_SMALL_F: 0x66, + LATIN_SMALL_X: 0x78, + LATIN_SMALL_Z: 0x7A, + REPLACEMENT_CHARACTER: 0xFFFD +}; + +exports.CODE_POINT_SEQUENCES = { + DASH_DASH_STRING: [0x2D, 0x2D], //-- + DOCTYPE_STRING: [0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE + CDATA_START_STRING: [0x5B, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5B], //[CDATA[ + CDATA_END_STRING: [0x5D, 0x5D, 0x3E], //]]> + SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script + PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4C, 0x49, 0x43], //PUBLIC + SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4D] //SYSTEM +}; + +},{}],37:[function(require,module,exports){ +'use strict'; + +var Parser = require('./parser'), + Serializer = require('./serializer'); + +/** @namespace parse5 */ + +/** + * Parses an HTML string. + * @function parse + * @memberof parse5 + * @instance + * @param {string} html - Input HTML string. + * @param {ParserOptions} [options] - Parsing options. + * @returns {ASTNode} document + * @example + * var parse5 = require('parse5'); + * + * var document = parse5.parse('Hi there!'); + */ +exports.parse = function parse(html, options) { + var parser = new Parser(options); + + return parser.parse(html); +}; + +/** + * Parses an HTML fragment. + * @function parseFragment + * @memberof parse5 + * @instance + * @param {ASTNode} [fragmentContext] - Parsing context element. If specified, given fragment + * will be parsed as if it was set to the context element's `innerHTML` property. + * @param {string} html - Input HTML fragment string. + * @param {ParserOptions} [options] - Parsing options. + * @returns {ASTNode} documentFragment + * @example + * var parse5 = require('parse5'); + * + * var documentFragment = parse5.parseFragment('
'); + * + * // Parses the html fragment in the context of the parsed element. + * var trFragment = parser.parseFragment(documentFragment.childNodes[0], ''); + */ +exports.parseFragment = function parseFragment(fragmentContext, html, options) { + if (typeof fragmentContext === 'string') { + options = html; + html = fragmentContext; + fragmentContext = null; + } + + var parser = new Parser(options); + + return parser.parseFragment(html, fragmentContext); +}; + +/** + * Serializes an AST node to an HTML string. + * @function serialize + * @memberof parse5 + * @instance + * @param {ASTNode} node - Node to serialize. + * @param {SerializerOptions} [options] - Serialization options. + * @returns {String} html + * @example + * var parse5 = require('parse5'); + * + * var document = parse5.parse('Hi there!'); + * + * // Serializes a document. + * var html = parse5.serialize(document); + * + * // Serializes the element content. + * var bodyInnerHtml = parse5.serialize(document.childNodes[0].childNodes[1]); + */ +exports.serialize = function (node, options) { + var serializer = new Serializer(node, options); + + return serializer.serialize(); +}; + +/** + * Provides built-in tree adapters that can be used for parsing and serialization. + * @var treeAdapters + * @memberof parse5 + * @instance + * @property {TreeAdapter} default - Default tree format for parse5. + * @property {TreeAdapter} htmlparser2 - Quite popular [htmlparser2](https://github.com/fb55/htmlparser2) tree format + * (e.g. used by [cheerio](https://github.com/MatthewMueller/cheerio) and [jsdom](https://github.com/tmpvar/jsdom)). + * @example + * var parse5 = require('parse5'); + * + * // Uses the default tree adapter for parsing. + * var document = parse5.parse('
', { treeAdapter: parse5.treeAdapters.default }); + * + * // Uses the htmlparser2 tree adapter with the SerializerStream. + * var serializer = new parse5.SerializerStream(node, { treeAdapter: parse5.treeAdapters.htmlparser2 }); + */ +exports.treeAdapters = { + default: require('./tree_adapters/default'), + htmlparser2: require('./tree_adapters/htmlparser2') +}; + + +// Streaming +exports.ParserStream = require('./parser/stream'); +exports.SerializerStream = require('./serializer/stream'); +exports.SAXParser = require('./sax'); + +},{"./parser":41,"./parser/stream":43,"./sax":45,"./serializer":47,"./serializer/stream":48,"./tree_adapters/default":52,"./tree_adapters/htmlparser2":53}],38:[function(require,module,exports){ +'use strict'; + +var OpenElementStack = require('../parser/open_element_stack'), + Tokenizer = require('../tokenizer'), + HTML = require('../common/html'); + + +//Aliases +var $ = HTML.TAG_NAMES; + + +function setEndLocation(element, closingToken, treeAdapter) { + var loc = element.__location; + + if (!loc) + return; + + /** + * @typedef {Object} ElementLocationInfo + * @extends StartTagLocationInfo + * + * @property {StartTagLocationInfo} startTag - Element's start tag location info. + * @property {LocationInfo} endTag - Element's end tag location info. + */ + if (!loc.startTag) { + loc.startTag = { + line: loc.line, + col: loc.col, + startOffset: loc.startOffset, + endOffset: loc.endOffset + }; + if (loc.attrs) + loc.startTag.attrs = loc.attrs; + } + + if (closingToken.location) { + var ctLocation = closingToken.location, + tn = treeAdapter.getTagName(element), + // NOTE: For cases like

- First 'p' closes without a closing tag and + // for cases like - 'p' closes without a closing tag + isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && + tn === closingToken.tagName; + + if (isClosingEndTag) { + loc.endTag = { + line: ctLocation.line, + col: ctLocation.col, + startOffset: ctLocation.startOffset, + endOffset: ctLocation.endOffset + }; + } + + if (isClosingEndTag) + loc.endOffset = ctLocation.endOffset; + else + loc.endOffset = ctLocation.startOffset; + } +} + + +exports.assign = function (parser) { + //NOTE: obtain Parser proto this way to avoid module circular references + var parserProto = Object.getPrototypeOf(parser), + treeAdapter = parser.treeAdapter, + attachableElementLocation = null, + lastFosterParentingLocation = null, + currentToken = null; + + + //NOTE: patch _bootstrap method + parser._bootstrap = function (document, fragmentContext) { + parserProto._bootstrap.call(this, document, fragmentContext); + + attachableElementLocation = null; + lastFosterParentingLocation = null; + currentToken = null; + + //OpenElementStack + parser.openElements.pop = function () { + setEndLocation(this.current, currentToken, treeAdapter); + OpenElementStack.prototype.pop.call(this); + }; + + parser.openElements.popAllUpToHtmlElement = function () { + for (var i = this.stackTop; i > 0; i--) + setEndLocation(this.items[i], currentToken, treeAdapter); + + OpenElementStack.prototype.popAllUpToHtmlElement.call(this); + }; + + parser.openElements.remove = function (element) { + setEndLocation(element, currentToken, treeAdapter); + OpenElementStack.prototype.remove.call(this, element); + }; + }; + + + //Token processing + parser._processTokenInForeignContent = function (token) { + currentToken = token; + parserProto._processTokenInForeignContent.call(this, token); + }; + + parser._processToken = function (token) { + currentToken = token; + parserProto._processToken.call(this, token); + + //NOTE: and are never popped from the stack, so we need to updated + //their end location explicitly. + if (token.type === Tokenizer.END_TAG_TOKEN && + (token.tagName === $.HTML || + token.tagName === $.BODY && this.openElements.hasInScope($.BODY))) { + for (var i = this.openElements.stackTop; i >= 0; i--) { + var element = this.openElements.items[i]; + + if (this.treeAdapter.getTagName(element) === token.tagName) { + setEndLocation(element, token, treeAdapter); + break; + } + } + } + }; + + + //Doctype + parser._setDocumentType = function (token) { + parserProto._setDocumentType.call(this, token); + + var documentChildren = this.treeAdapter.getChildNodes(this.document), + cnLength = documentChildren.length; + + for (var i = 0; i < cnLength; i++) { + var node = documentChildren[i]; + + if (this.treeAdapter.isDocumentTypeNode(node)) { + node.__location = token.location; + break; + } + } + }; + + + //Elements + parser._attachElementToTree = function (element) { + //NOTE: _attachElementToTree is called from _appendElement, _insertElement and _insertTemplate methods. + //So we will use token location stored in this methods for the element. + element.__location = attachableElementLocation || null; + attachableElementLocation = null; + parserProto._attachElementToTree.call(this, element); + }; + + parser._appendElement = function (token, namespaceURI) { + attachableElementLocation = token.location; + parserProto._appendElement.call(this, token, namespaceURI); + }; + + parser._insertElement = function (token, namespaceURI) { + attachableElementLocation = token.location; + parserProto._insertElement.call(this, token, namespaceURI); + }; + + parser._insertTemplate = function (token) { + attachableElementLocation = token.location; + parserProto._insertTemplate.call(this, token); + + var tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current); + + tmplContent.__location = null; + }; + + parser._insertFakeRootElement = function () { + parserProto._insertFakeRootElement.call(this); + this.openElements.current.__location = null; + }; + + + //Comments + parser._appendCommentNode = function (token, parent) { + parserProto._appendCommentNode.call(this, token, parent); + + var children = this.treeAdapter.getChildNodes(parent), + commentNode = children[children.length - 1]; + + commentNode.__location = token.location; + }; + + + //Text + parser._findFosterParentingLocation = function () { + //NOTE: store last foster parenting location, so we will be able to find inserted text + //in case of foster parenting + lastFosterParentingLocation = parserProto._findFosterParentingLocation.call(this); + return lastFosterParentingLocation; + }; + + parser._insertCharacters = function (token) { + parserProto._insertCharacters.call(this, token); + + var hasFosterParent = this._shouldFosterParentOnInsertion(), + parent = hasFosterParent && lastFosterParentingLocation.parent || + this.openElements.currentTmplContent || + this.openElements.current, + siblings = this.treeAdapter.getChildNodes(parent), + textNodeIdx = hasFosterParent && lastFosterParentingLocation.beforeElement ? + siblings.indexOf(lastFosterParentingLocation.beforeElement) - 1 : + siblings.length - 1, + textNode = siblings[textNodeIdx]; + + //NOTE: if we have location assigned by another token, then just update end position + if (textNode.__location) + textNode.__location.endOffset = token.location.endOffset; + + else + textNode.__location = token.location; + }; +}; + + +},{"../common/html":34,"../parser/open_element_stack":42,"../tokenizer":49}],39:[function(require,module,exports){ +'use strict'; + +var UNICODE = require('../common/unicode'); + +//Aliases +var $ = UNICODE.CODE_POINTS; + + +exports.assign = function (tokenizer) { + //NOTE: obtain Tokenizer proto this way to avoid module circular references + var tokenizerProto = Object.getPrototypeOf(tokenizer), + tokenStartOffset = -1, + tokenCol = -1, + tokenLine = 1, + isEol = false, + lineStartPosStack = [0], + lineStartPos = 0, + col = -1, + line = 1; + + function attachLocationInfo(token) { + /** + * @typedef {Object} LocationInfo + * + * @property {Number} line - One-based line index + * @property {Number} col - One-based column index + * @property {Number} startOffset - Zero-based first character index + * @property {Number} endOffset - Zero-based last character index + */ + token.location = { + line: tokenLine, + col: tokenCol, + startOffset: tokenStartOffset, + endOffset: -1 + }; + } + + //NOTE: patch consumption method to track line/col information + tokenizer._consume = function () { + var cp = tokenizerProto._consume.call(this); + + //NOTE: LF should be in the last column of the line + if (isEol) { + isEol = false; + line++; + lineStartPosStack.push(this.preprocessor.sourcePos); + lineStartPos = this.preprocessor.sourcePos; + } + + if (cp === $.LINE_FEED) + isEol = true; + + col = this.preprocessor.sourcePos - lineStartPos + 1; + + return cp; + }; + + tokenizer._unconsume = function () { + tokenizerProto._unconsume.call(this); + isEol = false; + + while (lineStartPos > this.preprocessor.sourcePos && lineStartPosStack.length > 1) { + lineStartPos = lineStartPosStack.pop(); + line--; + } + + col = this.preprocessor.sourcePos - lineStartPos + 1; + }; + + //NOTE: patch token creation methods and attach location objects + tokenizer._createStartTagToken = function () { + tokenizerProto._createStartTagToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createEndTagToken = function () { + tokenizerProto._createEndTagToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createCommentToken = function () { + tokenizerProto._createCommentToken.call(this); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createDoctypeToken = function (initialName) { + tokenizerProto._createDoctypeToken.call(this, initialName); + attachLocationInfo(this.currentToken); + }; + + tokenizer._createCharacterToken = function (type, ch) { + tokenizerProto._createCharacterToken.call(this, type, ch); + attachLocationInfo(this.currentCharacterToken); + }; + + tokenizer._createAttr = function (attrNameFirstCh) { + tokenizerProto._createAttr.call(this, attrNameFirstCh); + this.currentAttrLocation = { + line: line, + col: col, + startOffset: this.preprocessor.sourcePos, + endOffset: -1 + }; + }; + + tokenizer._leaveAttrName = function (toState) { + tokenizerProto._leaveAttrName.call(this, toState); + this._attachCurrentAttrLocationInfo(); + }; + + tokenizer._leaveAttrValue = function (toState) { + tokenizerProto._leaveAttrValue.call(this, toState); + this._attachCurrentAttrLocationInfo(); + }; + + tokenizer._attachCurrentAttrLocationInfo = function () { + this.currentAttrLocation.endOffset = this.preprocessor.sourcePos; + + if (!this.currentToken.location.attrs) + this.currentToken.location.attrs = {}; + + /** + * @typedef {Object} StartTagLocationInfo + * @extends LocationInfo + * + * @property {Dictionary} attrs - Start tag attributes' location info. + */ + this.currentToken.location.attrs[this.currentAttr.name] = this.currentAttrLocation; + }; + + //NOTE: patch token emission methods to determine end location + tokenizer._emitCurrentToken = function () { + //NOTE: if we have pending character token make it's end location equal to the + //current token's start location. + if (this.currentCharacterToken) + this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset; + + this.currentToken.location.endOffset = this.preprocessor.sourcePos + 1; + tokenizerProto._emitCurrentToken.call(this); + }; + + tokenizer._emitCurrentCharacterToken = function () { + //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(), + //then set it's location at the current preprocessor position. + //We don't need to increment preprocessor position, since character token + //emission is always forced by the start of the next character token here. + //So, we already have advanced position. + if (this.currentCharacterToken && this.currentCharacterToken.location.endOffset === -1) + this.currentCharacterToken.location.endOffset = this.preprocessor.sourcePos; + + tokenizerProto._emitCurrentCharacterToken.call(this); + }; + + //NOTE: patch initial states for each mode to obtain token start position + Object.keys(tokenizerProto.MODE) + + .map(function (modeName) { + return tokenizerProto.MODE[modeName]; + }) + + .forEach(function (state) { + tokenizer[state] = function (cp) { + tokenStartOffset = this.preprocessor.sourcePos; + tokenLine = line; + tokenCol = col; + tokenizerProto[state].call(this, cp); + }; + }); +}; + +},{"../common/unicode":36}],40:[function(require,module,exports){ +'use strict'; + +//Const +var NOAH_ARK_CAPACITY = 3; + +//List of formatting elements +var FormattingElementList = module.exports = function (treeAdapter) { + this.length = 0; + this.entries = []; + this.treeAdapter = treeAdapter; + this.bookmark = null; +}; + +//Entry types +FormattingElementList.MARKER_ENTRY = 'MARKER_ENTRY'; +FormattingElementList.ELEMENT_ENTRY = 'ELEMENT_ENTRY'; + +//Noah Ark's condition +//OPTIMIZATION: at first we try to find possible candidates for exclusion using +//lightweight heuristics without thorough attributes check. +FormattingElementList.prototype._getNoahArkConditionCandidates = function (newElement) { + var candidates = []; + + if (this.length >= NOAH_ARK_CAPACITY) { + var neAttrsLength = this.treeAdapter.getAttrList(newElement).length, + neTagName = this.treeAdapter.getTagName(newElement), + neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement); + + for (var i = this.length - 1; i >= 0; i--) { + var entry = this.entries[i]; + + if (entry.type === FormattingElementList.MARKER_ENTRY) + break; + + var element = entry.element, + elementAttrs = this.treeAdapter.getAttrList(element), + isCandidate = this.treeAdapter.getTagName(element) === neTagName && + this.treeAdapter.getNamespaceURI(element) === neNamespaceURI && + elementAttrs.length === neAttrsLength; + + if (isCandidate) + candidates.push({idx: i, attrs: elementAttrs}); + } + } + + return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates; +}; + +FormattingElementList.prototype._ensureNoahArkCondition = function (newElement) { + var candidates = this._getNoahArkConditionCandidates(newElement), + cLength = candidates.length; + + if (cLength) { + var neAttrs = this.treeAdapter.getAttrList(newElement), + neAttrsLength = neAttrs.length, + neAttrsMap = {}; + + //NOTE: build attrs map for the new element so we can perform fast lookups + for (var i = 0; i < neAttrsLength; i++) { + var neAttr = neAttrs[i]; + + neAttrsMap[neAttr.name] = neAttr.value; + } + + for (i = 0; i < neAttrsLength; i++) { + for (var j = 0; j < cLength; j++) { + var cAttr = candidates[j].attrs[i]; + + if (neAttrsMap[cAttr.name] !== cAttr.value) { + candidates.splice(j, 1); + cLength--; + } + + if (candidates.length < NOAH_ARK_CAPACITY) + return; + } + } + + //NOTE: remove bottommost candidates until Noah's Ark condition will not be met + for (i = cLength - 1; i >= NOAH_ARK_CAPACITY - 1; i--) { + this.entries.splice(candidates[i].idx, 1); + this.length--; + } + } +}; + +//Mutations +FormattingElementList.prototype.insertMarker = function () { + this.entries.push({type: FormattingElementList.MARKER_ENTRY}); + this.length++; +}; + +FormattingElementList.prototype.pushElement = function (element, token) { + this._ensureNoahArkCondition(element); + + this.entries.push({ + type: FormattingElementList.ELEMENT_ENTRY, + element: element, + token: token + }); + + this.length++; +}; + +FormattingElementList.prototype.insertElementAfterBookmark = function (element, token) { + var bookmarkIdx = this.length - 1; + + for (; bookmarkIdx >= 0; bookmarkIdx--) { + if (this.entries[bookmarkIdx] === this.bookmark) + break; + } + + this.entries.splice(bookmarkIdx + 1, 0, { + type: FormattingElementList.ELEMENT_ENTRY, + element: element, + token: token + }); + + this.length++; +}; + +FormattingElementList.prototype.removeEntry = function (entry) { + for (var i = this.length - 1; i >= 0; i--) { + if (this.entries[i] === entry) { + this.entries.splice(i, 1); + this.length--; + break; + } + } +}; + +FormattingElementList.prototype.clearToLastMarker = function () { + while (this.length) { + var entry = this.entries.pop(); + + this.length--; + + if (entry.type === FormattingElementList.MARKER_ENTRY) + break; + } +}; + +//Search +FormattingElementList.prototype.getElementEntryInScopeWithTagName = function (tagName) { + for (var i = this.length - 1; i >= 0; i--) { + var entry = this.entries[i]; + + if (entry.type === FormattingElementList.MARKER_ENTRY) + return null; + + if (this.treeAdapter.getTagName(entry.element) === tagName) + return entry; + } + + return null; +}; + +FormattingElementList.prototype.getElementEntry = function (element) { + for (var i = this.length - 1; i >= 0; i--) { + var entry = this.entries[i]; + + if (entry.type === FormattingElementList.ELEMENT_ENTRY && entry.element === element) + return entry; + } + + return null; +}; + +},{}],41:[function(require,module,exports){ +'use strict'; + +var Tokenizer = require('../tokenizer'), + OpenElementStack = require('./open_element_stack'), + FormattingElementList = require('./formatting_element_list'), + locationInfoMixin = require('../location_info/parser_mixin'), + defaultTreeAdapter = require('../tree_adapters/default'), + doctype = require('../common/doctype'), + foreignContent = require('../common/foreign_content'), + mergeOptions = require('../common/merge_options'), + UNICODE = require('../common/unicode'), + HTML = require('../common/html'); + +//Aliases +var $ = HTML.TAG_NAMES, + NS = HTML.NAMESPACES, + ATTRS = HTML.ATTRS; + +/** + * @typedef {Object} ParserOptions + * + * @property {Boolean} [locationInfo=false] - Enables source code location information for the nodes. + * When enabled, each node (except root node) has the `__location` property. In case the node is not an empty element, + * `__location` will be {@link ElementLocationInfo} object, otherwise it's {@link LocationInfo}. + * If the element was implicitly created by the parser it's `__location` property will be `null`. + * + * @property {TreeAdapter} [treeAdapter=parse5.treeAdapters.default] - Specifies the resulting tree format. + */ +var DEFAULT_OPTIONS = { + locationInfo: false, + treeAdapter: defaultTreeAdapter +}; + +//Misc constants +var HIDDEN_INPUT_TYPE = 'hidden'; + +//Adoption agency loops iteration count +var AA_OUTER_LOOP_ITER = 8, + AA_INNER_LOOP_ITER = 3; + +//Insertion modes +var INITIAL_MODE = 'INITIAL_MODE', + BEFORE_HTML_MODE = 'BEFORE_HTML_MODE', + BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE', + IN_HEAD_MODE = 'IN_HEAD_MODE', + AFTER_HEAD_MODE = 'AFTER_HEAD_MODE', + IN_BODY_MODE = 'IN_BODY_MODE', + TEXT_MODE = 'TEXT_MODE', + IN_TABLE_MODE = 'IN_TABLE_MODE', + IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE', + IN_CAPTION_MODE = 'IN_CAPTION_MODE', + IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE', + IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE', + IN_ROW_MODE = 'IN_ROW_MODE', + IN_CELL_MODE = 'IN_CELL_MODE', + IN_SELECT_MODE = 'IN_SELECT_MODE', + IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE', + IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE', + AFTER_BODY_MODE = 'AFTER_BODY_MODE', + IN_FRAMESET_MODE = 'IN_FRAMESET_MODE', + AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE', + AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE', + AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE'; + +//Insertion mode reset map +var INSERTION_MODE_RESET_MAP = {}; + +INSERTION_MODE_RESET_MAP[$.TR] = IN_ROW_MODE; +INSERTION_MODE_RESET_MAP[$.TBODY] = +INSERTION_MODE_RESET_MAP[$.THEAD] = +INSERTION_MODE_RESET_MAP[$.TFOOT] = IN_TABLE_BODY_MODE; +INSERTION_MODE_RESET_MAP[$.CAPTION] = IN_CAPTION_MODE; +INSERTION_MODE_RESET_MAP[$.COLGROUP] = IN_COLUMN_GROUP_MODE; +INSERTION_MODE_RESET_MAP[$.TABLE] = IN_TABLE_MODE; +INSERTION_MODE_RESET_MAP[$.BODY] = IN_BODY_MODE; +INSERTION_MODE_RESET_MAP[$.FRAMESET] = IN_FRAMESET_MODE; + +//Template insertion mode switch map +var TEMPLATE_INSERTION_MODE_SWITCH_MAP = {}; + +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.CAPTION] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.COLGROUP] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TBODY] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TFOOT] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.THEAD] = IN_TABLE_MODE; +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.COL] = IN_COLUMN_GROUP_MODE; +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TR] = IN_TABLE_BODY_MODE; +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TD] = +TEMPLATE_INSERTION_MODE_SWITCH_MAP[$.TH] = IN_ROW_MODE; + +//Token handlers map for insertion modes +var _ = {}; + +_[INITIAL_MODE] = {}; +_[INITIAL_MODE][Tokenizer.CHARACTER_TOKEN] = +_[INITIAL_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInInitialMode; +_[INITIAL_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; +_[INITIAL_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[INITIAL_MODE][Tokenizer.DOCTYPE_TOKEN] = doctypeInInitialMode; +_[INITIAL_MODE][Tokenizer.START_TAG_TOKEN] = +_[INITIAL_MODE][Tokenizer.END_TAG_TOKEN] = +_[INITIAL_MODE][Tokenizer.EOF_TOKEN] = tokenInInitialMode; + +_[BEFORE_HTML_MODE] = {}; +_[BEFORE_HTML_MODE][Tokenizer.CHARACTER_TOKEN] = +_[BEFORE_HTML_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHtml; +_[BEFORE_HTML_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; +_[BEFORE_HTML_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[BEFORE_HTML_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[BEFORE_HTML_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHtml; +_[BEFORE_HTML_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHtml; +_[BEFORE_HTML_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHtml; + +_[BEFORE_HEAD_MODE] = {}; +_[BEFORE_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = +_[BEFORE_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHead; +_[BEFORE_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; +_[BEFORE_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[BEFORE_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHead; +_[BEFORE_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHead; +_[BEFORE_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHead; + +_[IN_HEAD_MODE] = {}; +_[IN_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHead; +_[IN_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHead; +_[IN_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHead; +_[IN_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenInHead; + +_[AFTER_HEAD_MODE] = {}; +_[AFTER_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterHead; +_[AFTER_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[AFTER_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterHead; +_[AFTER_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterHead; +_[AFTER_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenAfterHead; + +_[IN_BODY_MODE] = {}; +_[IN_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagInBody; +_[IN_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagInBody; +_[IN_BODY_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[TEXT_MODE] = {}; +_[TEXT_MODE][Tokenizer.CHARACTER_TOKEN] = +_[TEXT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[TEXT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[TEXT_MODE][Tokenizer.COMMENT_TOKEN] = +_[TEXT_MODE][Tokenizer.DOCTYPE_TOKEN] = +_[TEXT_MODE][Tokenizer.START_TAG_TOKEN] = ignoreToken; +_[TEXT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInText; +_[TEXT_MODE][Tokenizer.EOF_TOKEN] = eofInText; + +_[IN_TABLE_MODE] = {}; +_[IN_TABLE_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_TABLE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[IN_TABLE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; +_[IN_TABLE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_TABLE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_TABLE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTable; +_[IN_TABLE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTable; +_[IN_TABLE_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_TABLE_TEXT_MODE] = {}; +_[IN_TABLE_TEXT_MODE][Tokenizer.CHARACTER_TOKEN] = characterInTableText; +_[IN_TABLE_TEXT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_TABLE_TEXT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInTableText; +_[IN_TABLE_TEXT_MODE][Tokenizer.COMMENT_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.DOCTYPE_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.START_TAG_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.END_TAG_TOKEN] = +_[IN_TABLE_TEXT_MODE][Tokenizer.EOF_TOKEN] = tokenInTableText; + +_[IN_CAPTION_MODE] = {}; +_[IN_CAPTION_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_CAPTION_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_CAPTION_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_CAPTION_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_CAPTION_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_CAPTION_MODE][Tokenizer.START_TAG_TOKEN] = startTagInCaption; +_[IN_CAPTION_MODE][Tokenizer.END_TAG_TOKEN] = endTagInCaption; +_[IN_CAPTION_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_COLUMN_GROUP_MODE] = {}; +_[IN_COLUMN_GROUP_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_COLUMN_GROUP_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInColumnGroup; +_[IN_COLUMN_GROUP_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_COLUMN_GROUP_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_COLUMN_GROUP_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_COLUMN_GROUP_MODE][Tokenizer.START_TAG_TOKEN] = startTagInColumnGroup; +_[IN_COLUMN_GROUP_MODE][Tokenizer.END_TAG_TOKEN] = endTagInColumnGroup; +_[IN_COLUMN_GROUP_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_TABLE_BODY_MODE] = {}; +_[IN_TABLE_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_TABLE_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[IN_TABLE_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; +_[IN_TABLE_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_TABLE_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_TABLE_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTableBody; +_[IN_TABLE_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTableBody; +_[IN_TABLE_BODY_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_ROW_MODE] = {}; +_[IN_ROW_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_ROW_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = +_[IN_ROW_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = characterInTable; +_[IN_ROW_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_ROW_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_ROW_MODE][Tokenizer.START_TAG_TOKEN] = startTagInRow; +_[IN_ROW_MODE][Tokenizer.END_TAG_TOKEN] = endTagInRow; +_[IN_ROW_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_CELL_MODE] = {}; +_[IN_CELL_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_CELL_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_CELL_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_CELL_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_CELL_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_CELL_MODE][Tokenizer.START_TAG_TOKEN] = startTagInCell; +_[IN_CELL_MODE][Tokenizer.END_TAG_TOKEN] = endTagInCell; +_[IN_CELL_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_SELECT_MODE] = {}; +_[IN_SELECT_MODE][Tokenizer.CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_SELECT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_SELECT_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_SELECT_MODE][Tokenizer.START_TAG_TOKEN] = startTagInSelect; +_[IN_SELECT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInSelect; +_[IN_SELECT_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_SELECT_IN_TABLE_MODE] = {}; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInSelectInTable; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInSelectInTable; +_[IN_SELECT_IN_TABLE_MODE][Tokenizer.EOF_TOKEN] = eofInBody; + +_[IN_TEMPLATE_MODE] = {}; +_[IN_TEMPLATE_MODE][Tokenizer.CHARACTER_TOKEN] = characterInBody; +_[IN_TEMPLATE_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_TEMPLATE_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[IN_TEMPLATE_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_TEMPLATE_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_TEMPLATE_MODE][Tokenizer.START_TAG_TOKEN] = startTagInTemplate; +_[IN_TEMPLATE_MODE][Tokenizer.END_TAG_TOKEN] = endTagInTemplate; +_[IN_TEMPLATE_MODE][Tokenizer.EOF_TOKEN] = eofInTemplate; + +_[AFTER_BODY_MODE] = {}; +_[AFTER_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterBody; +_[AFTER_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[AFTER_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToRootHtmlElement; +_[AFTER_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterBody; +_[AFTER_BODY_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterBody; +_[AFTER_BODY_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[IN_FRAMESET_MODE] = {}; +_[IN_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = +_[IN_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[IN_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[IN_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[IN_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagInFrameset; +_[IN_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = endTagInFrameset; +_[IN_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[AFTER_FRAMESET_MODE] = {}; +_[AFTER_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[AFTER_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; +_[AFTER_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; +_[AFTER_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterFrameset; +_[AFTER_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterFrameset; +_[AFTER_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[AFTER_AFTER_BODY_MODE] = {}; +_[AFTER_AFTER_BODY_MODE][Tokenizer.CHARACTER_TOKEN] = tokenAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToDocument; +_[AFTER_AFTER_BODY_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_AFTER_BODY_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.END_TAG_TOKEN] = tokenAfterAfterBody; +_[AFTER_AFTER_BODY_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + +_[AFTER_AFTER_FRAMESET_MODE] = {}; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.CHARACTER_TOKEN] = +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = ignoreToken; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = whitespaceCharacterInBody; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.COMMENT_TOKEN] = appendCommentToDocument; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterAfterFrameset; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.END_TAG_TOKEN] = ignoreToken; +_[AFTER_AFTER_FRAMESET_MODE][Tokenizer.EOF_TOKEN] = stopParsing; + + +//Parser +var Parser = module.exports = function (options) { + this.options = mergeOptions(DEFAULT_OPTIONS, options); + + this.treeAdapter = this.options.treeAdapter; + this.pendingScript = null; + + if (this.options.locationInfo) + locationInfoMixin.assign(this); +}; + +// API +Parser.prototype.parse = function (html) { + var document = this.treeAdapter.createDocument(); + + this._bootstrap(document, null); + this.tokenizer.write(html, true); + this._runParsingLoop(null, null); + + return document; +}; + +Parser.prototype.parseFragment = function (html, fragmentContext) { + //NOTE: use
Shake it, baby