var util = require('../util'), _ = util.lodash, fileType = require('file-type'), mimeType = require('mime-types'), mimeFormat = require('mime-format'), /** * @private * @const * @type {String} */ E = '', /** * @private * @const * @type {String} */ DOT = '.', /** * @private * @const * @type {String} */ QUESTION_MARK = '?', /** * @private * @const * @type {String} */ DOUBLE_QUOTES = '"', /** * @private * @const * @type {String} */ TOKEN_$1 = '$1', /** * @private * @const * @type {String} */ BINARY = 'binary', /** * @private * @const * @type {String} */ CHARSET_UTF8 = 'utf8', /** * @private * @const * @type {String} */ CONTENT_TYPE_TEXT_PLAIN = 'text/plain', /** * Enum for all the Content Headers * * @private * @const * @enum {String} HEADERS */ HEADERS = { CONTENT_TYPE: 'Content-Type', CONTENT_DISPOSITION: 'Content-Disposition' }, /** * @private * @const * @type {String} */ DEFAULT_RESPONSE_FILENAME = 'response', /** * @private * @type {Boolean} */ supportsBuffer = (typeof Buffer !== undefined) && _.isFunction(Buffer.byteLength), /** * Regexes for extracting and decoding the filename from content-disposition header * * @private * @type {Object} */ regexes = { /** * RegExp for extracting filename from content-disposition header * * RFC 2616 grammar * parameter = token "=" ( token | quoted-string ) * token = 1* * separators = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) * qdtext = > * quoted-pair = "\" CHAR * CHAR = * TEXT = * LWS = [CRLF] 1*( SP | HT ) * CRLF = CR LF * CR = * LF = * SP = * HT = * CTL = * OCTET = * * egHeader: inline; filename=testResponse.json * egHeader: inline; filename="test Response.json" * Reference: https://github.com/jshttp/content-disposition */ // eslint-disable-next-line max-len fileNameRegex: /;[ \t]*(?:filename)[ \t]*=[ \t]*("(?:[\x20!\x23-\x5b\x5d-\x7e\x80-\xff]|\\[\x20-\x7e])*"|[!#$%&'*+.0-9A-Z^_`a-z|~-]+)[ \t]*/, /** * RegExp for extracting filename* from content-disposition header * * RFC 5987 grammar * parameter = reg-parameter / ext-parameter * ext-parameter = parmname "*" LWSP "=" LWSP ext-value * parmname = 1*attr-char * ext-value = charset "'" [ language ] "'" value-chars ; like RFC 2231's ; (see [RFC2231], Section 7) * charset = "UTF-8" / "ISO-8859-1" / mime-charset * mime-charset = 1*mime-charsetc * mime-charsetc = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "+" / "-" / "^" / "_" / "`" / "{" / "}" / "~" ; as in Section 2.3 of [RFC2978] ; except that the single quote is not included ; SHOULD be registered in the IANA charset registry * language = * value-chars = *( pct-encoded / attr-char ) * pct-encoded = "%" HEXDIG HEXDIG ; see [RFC3986], Section 2.1 * attr-char = ALPHA / DIGIT / "!" / "#" / "$" / "&" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" ; token except ( "*" / "'" / "%" ) * * egHeader: attachment;filename*=utf-8''%E4%BD%A0%E5%A5%BD.txt * Reference: https://github.com/jshttp/content-disposition */ // eslint-disable-next-line max-len encodedFileNameRegex: /;[ \t]*(?:filename\*)[ \t]*=[ \t]*([A-Za-z0-9!#$%&+\-^_`{}~]+)'.*'((?:%[0-9A-Fa-f]{2}|[A-Za-z0-9!#$&+.^_`|~-])+)[ \t]*/, /** * RegExp to match quoted-pair in RFC 2616 * * quoted-pair = "\" CHAR * CHAR = */ quotedPairRegex: /\\([ -~])/g, /** * Regex to match all the hexadecimal number inside encoded string */ hexCharMatchRegex: /%([0-9A-Fa-f]{2})/g, /** * Regex to match non-latin characters */ nonLatinCharMatchRegex: /[^\x20-\x7e\xa0-\xff]/g }, /** * Decodes the hexcode to charCode * * @private * @param {String} str - The matched string part of a hexadecimal number * @param {String} hex - The hexadecimal string which needs to be converted to charCode * @returns {String} - String with decoded hexcode values */ decodeHexcode = function (str, hex) { return String.fromCharCode(parseInt(hex, 16)); }, /** * HashMap for decoding string with supported characterSets * iso-8859-1 * utf-8 * * @private * @type {Object} */ characterDecoders = { /** * Replaces non-latin characters with '?' * * @private * @param {String} val - Input encoded string * @returns {String} - String with latin characters */ 'iso-8859-1' (val) { return val.replace(regexes.nonLatinCharMatchRegex, QUESTION_MARK); }, /** * Decodes the given string with utf-8 character set * * @private * @param {?String} encodedString - Input encoded string * @returns {?String} - String with decoded character with utf-8 */ 'utf-8' (encodedString) { /* istanbul ignore if */ if (!supportsBuffer) { return; } return Buffer.from(encodedString, BINARY).toString(CHARSET_UTF8); } }, /** * Decodes the given filename with given charset * The supported character sets are * iso-8859-1 * utf-8 * * @private * @param {String} encodedFileName - Input encoded file name * @param {String} charset - The character set to be used while decoding * @returns {String} - Returns the decoded filename */ decodeFileName = function (encodedFileName, charset) { /* istanbul ignore if */ if (!encodedFileName) { return; } if (!characterDecoders[charset]) { return; } // decodes the hexadecimal numbers to charCode in encodedFileName and then decodes with given charset return characterDecoders[charset](encodedFileName.replace(regexes.hexCharMatchRegex, decodeHexcode)); }, /** * Takes the content-type header value and performs the mime sniffing with known mime types. * If content-type header is not present, detects the mime type from the response stream or response body * If content-type is not provided and not able to detect, then text/plain is taken as default * * @private * @param {?String} contentType - The value of content type header * @param {Stream|String} response - The response stream or body, for which content-info should be determined * @returns {Object} - mime information from response headers */ getMimeInfo = function (contentType, response) { var normalized, detected, detectedExtension; if (!contentType) { detected = fileType(response); detected && (contentType = detected.mime) && (detectedExtension = detected.ext); } // if contentType is not detected set text/plain as default if (!contentType) { contentType = CONTENT_TYPE_TEXT_PLAIN; } normalized = mimeFormat.lookup(contentType); return { contentType: normalized.source, mimeType: normalized.type, // sanitized mime type base mimeFormat: normalized.format, // format specific to the type returned charset: normalized.charset || CHARSET_UTF8, extension: detectedExtension || mimeType.extension(normalized.source) || E }; }, /** * Parses Content disposition header, and returns file name and extension * * @private * @param {?String} dispositionHeader - Content-disposition Header from the response * @returns {?String} - Returns file name from content disposition header if present */ getFileNameFromDispositionHeader = function (dispositionHeader) { if (!dispositionHeader) { return; } var encodedFileName, fileName; // Get filename* value from the dispositionHeader encodedFileName = regexes.encodedFileNameRegex.exec(dispositionHeader); if (encodedFileName) { fileName = decodeFileName(encodedFileName[2], encodedFileName[1]); } // If filename* is not present or unparseable, then we are checking for filename in header if (!fileName) { fileName = regexes.fileNameRegex.exec(dispositionHeader); fileName && (fileName = fileName[1]); // check if file name is wrapped in double quotes // file name can contain escaped characters if wrapped in quotes if (fileName && fileName[0] === DOUBLE_QUOTES) { // remove quotes and escapes fileName = fileName .substr(1, fileName.length - 2) .replace(regexes.quotedPairRegex, TOKEN_$1); } } return fileName; }; module.exports = { /** * Extracts content related information from response. * Includes response mime information, character set and file name. * * @private * @param {Response} response - response instance * @returns {Response.ResponseContentInfo} - Return contentInfo of the response */ contentInfo (response) { var contentType = response.headers.get(HEADERS.CONTENT_TYPE), contentDisposition = response.headers.get(HEADERS.CONTENT_DISPOSITION), mimeInfo = getMimeInfo(contentType, response.stream || response.body), fileName = getFileNameFromDispositionHeader(contentDisposition), fileExtension = mimeInfo.extension, /** * @typedef Response.ResponseContentInfo * * @property {String} mimeType sanitized mime type * @property {String} mimeFormat format for the identified mime type * @property {String} charset the normalized character set * @property {String} fileExtension extension identified from the mime type * @property {String} fileName file name extracted from disposition header * @property {String} contentType sanitized content-type extracted from header */ contentInfo = {}; // if file name is not present in the content disposition headers, use a default file name if (!fileName) { fileName = DEFAULT_RESPONSE_FILENAME; // add extension to default if present fileExtension && (fileName += (DOT + fileExtension)); } // create a compacted list of content info from mime info and file name mimeInfo.contentType && (contentInfo.contentType = mimeInfo.contentType); mimeInfo.mimeType && (contentInfo.mimeType = mimeInfo.mimeType); mimeInfo.mimeFormat && (contentInfo.mimeFormat = mimeInfo.mimeFormat); mimeInfo.charset && (contentInfo.charset = mimeInfo.charset); fileExtension && (contentInfo.fileExtension = fileExtension); fileName && (contentInfo.fileName = fileName); return contentInfo; }, // regexes are extracted for vulnerability tests regexes };