import { inherit } from './utils.js'; import * as regex from './regex.js'; /** @typedef {import('highlight.js').Mode} Mode */ /** @typedef {import('highlight.js').ModeCallback} ModeCallback */ // Common regexps export const MATCH_NOTHING_RE = /\b\B/; export const IDENT_RE = '[a-zA-Z]\\w*'; export const UNDERSCORE_IDENT_RE = '[a-zA-Z_]\\w*'; export const NUMBER_RE = '\\b\\d+(\\.\\d+)?'; export const C_NUMBER_RE = '(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float export const BINARY_NUMBER_RE = '\\b(0b[01]+)'; // 0b... export const RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~'; /** * @param { Partial & {binary?: string | RegExp} } opts */ export const SHEBANG = (opts = {}) => { const beginShebang = /^#![ ]*\//; if (opts.binary) { opts.begin = regex.concat( beginShebang, /.*\b/, opts.binary, /\b.*/); } return inherit({ scope: 'meta', begin: beginShebang, end: /$/, relevance: 0, /** @type {ModeCallback} */ "on:begin": (m, resp) => { if (m.index !== 0) resp.ignoreMatch(); } }, opts); }; // Common modes export const BACKSLASH_ESCAPE = { begin: '\\\\[\\s\\S]', relevance: 0 }; export const APOS_STRING_MODE = { scope: 'string', begin: '\'', end: '\'', illegal: '\\n', contains: [BACKSLASH_ESCAPE] }; export const QUOTE_STRING_MODE = { scope: 'string', begin: '"', end: '"', illegal: '\\n', contains: [BACKSLASH_ESCAPE] }; export const PHRASAL_WORDS_MODE = { begin: /\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|they|like|more)\b/ }; /** * Creates a comment mode * * @param {string | RegExp} begin * @param {string | RegExp} end * @param {Mode | {}} [modeOptions] * @returns {Partial} */ export const COMMENT = function(begin, end, modeOptions = {}) { const mode = inherit( { scope: 'comment', begin, end, contains: [] }, modeOptions ); mode.contains.push({ scope: 'doctag', // hack to avoid the space from being included. the space is necessary to // match here to prevent the plain text rule below from gobbling up doctags begin: '[ ]*(?=(TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):)', end: /(TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):/, excludeBegin: true, relevance: 0 }); const ENGLISH_WORD = regex.either( // list of common 1 and 2 letter words in English "I", "a", "is", "so", "us", "to", "at", "if", "in", "it", "on", // note: this is not an exhaustive list of contractions, just popular ones /[A-Za-z]+['](d|ve|re|ll|t|s|n)/, // contractions - can't we'd they're let's, etc /[A-Za-z]+[-][a-z]+/, // `no-way`, etc. /[A-Za-z][a-z]{2,}/ // allow capitalized words at beginning of sentences ); // looking like plain text, more likely to be a comment mode.contains.push( { // TODO: how to include ", (, ) without breaking grammars that use these for // comment delimiters? // begin: /[ ]+([()"]?([A-Za-z'-]{3,}|is|a|I|so|us|[tT][oO]|at|if|in|it|on)[.]?[()":]?([.][ ]|[ ]|\))){3}/ // --- // this tries to find sequences of 3 english words in a row (without any // "programming" type syntax) this gives us a strong signal that we've // TRULY found a comment - vs perhaps scanning with the wrong language. // It's possible to find something that LOOKS like the start of the // comment - but then if there is no readable text - good chance it is a // false match and not a comment. // // for a visual example please see: // https://github.com/highlightjs/highlight.js/issues/2827 begin: regex.concat( /[ ]+/, // necessary to prevent us gobbling up doctags like /* @author Bob Mcgill */ '(', ENGLISH_WORD, /[.]?[:]?([.][ ]|[ ])/, '){3}') // look for 3 words in a row } ); return mode; }; export const C_LINE_COMMENT_MODE = COMMENT('//', '$'); export const C_BLOCK_COMMENT_MODE = COMMENT('/\\*', '\\*/'); export const HASH_COMMENT_MODE = COMMENT('#', '$'); export const NUMBER_MODE = { scope: 'number', begin: NUMBER_RE, relevance: 0 }; export const C_NUMBER_MODE = { scope: 'number', begin: C_NUMBER_RE, relevance: 0 }; export const BINARY_NUMBER_MODE = { scope: 'number', begin: BINARY_NUMBER_RE, relevance: 0 }; export const REGEXP_MODE = { scope: "regexp", begin: /\/(?=[^/\n]*\/)/, end: /\/[gimuy]*/, contains: [ BACKSLASH_ESCAPE, { begin: /\[/, end: /\]/, relevance: 0, contains: [BACKSLASH_ESCAPE] } ] }; export const TITLE_MODE = { scope: 'title', begin: IDENT_RE, relevance: 0 }; export const UNDERSCORE_TITLE_MODE = { scope: 'title', begin: UNDERSCORE_IDENT_RE, relevance: 0 }; export const METHOD_GUARD = { // excludes method names from keyword processing begin: '\\.\\s*' + UNDERSCORE_IDENT_RE, relevance: 0 }; /** * Adds end same as begin mechanics to a mode * * Your mode must include at least a single () match group as that first match * group is what is used for comparison * @param {Partial} mode */ export const END_SAME_AS_BEGIN = function(mode) { return Object.assign(mode, { /** @type {ModeCallback} */ 'on:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, /** @type {ModeCallback} */ 'on:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch(); } }); };