rittenhop-ghost/versions/5.94.2/node_modules/mensch/lib/lexer.js

693 lines
15 KiB
JavaScript

var DEBUG = false; // `true` to print debugging info.
var TIMER = false; // `true` to time calls to `lex()` and print the results.
var debug = require('./debug')('lex');
exports = module.exports = lex;
/**
* Convert a CSS string into an array of lexical tokens.
*
* @param {String} css CSS
* @returns {Array} lexical tokens
*/
function lex(css) {
var start; // Debug timer start.
var buffer = ''; // Character accumulator
var ch; // Current character
var column = 0; // Current source column number
var cursor = -1; // Current source cursor position
var depth = 0; // Current nesting depth
var line = 1; // Current source line number
var state = 'before-selector'; // Current state
var stack = [state]; // State stack
var token = {}; // Current token
var tokens = []; // Token accumulator
// Supported @-rules, in roughly descending order of usage probability.
var atRules = [
'media',
'keyframes',
{ name: '-webkit-keyframes', type: 'keyframes', prefix: '-webkit-' },
{ name: '-moz-keyframes', type: 'keyframes', prefix: '-moz-' },
{ name: '-ms-keyframes', type: 'keyframes', prefix: '-ms-' },
{ name: '-o-keyframes', type: 'keyframes', prefix: '-o-' },
'font-face',
{ name: 'import', state: 'before-at-value' },
{ name: 'charset', state: 'before-at-value' },
'supports',
'viewport',
{ name: 'namespace', state: 'before-at-value' },
'document',
{ name: '-moz-document', type: 'document', prefix: '-moz-' },
'page'
];
// -- Functions ------------------------------------------------------------
/**
* Advance the character cursor and return the next character.
*
* @returns {String} The next character.
*/
function getCh() {
skip();
return css[cursor];
}
/**
* Return the state at the given index in the stack.
* The stack is LIFO so indexing is from the right.
*
* @param {Number} [index=0] Index to return.
* @returns {String} state
*/
function getState(index) {
return index ? stack[stack.length - 1 - index] : state;
}
/**
* Look ahead for a string beginning from the next position. The string
* being looked for must start at the next position.
*
* @param {String} str The string to look for.
* @returns {Boolean} Whether the string was found.
*/
function isNextString(str) {
var start = cursor + 1;
return (str === css.slice(start, start + str.length));
}
/**
* Find the start position of a substring beginning from the next
* position. The string being looked for may begin anywhere.
*
* @param {String} str The substring to look for.
* @returns {Number|false} The position, or `false` if not found.
*/
function find(str) {
var pos = css.slice(cursor).indexOf(str);
return pos > 0 ? pos : false;
}
/**
* Determine whether a character is next.
*
* @param {String} ch Character.
* @returns {Boolean} Whether the character is next.
*/
function isNextChar(ch) {
return ch === peek(1);
}
/**
* Return the character at the given cursor offset. The offset is relative
* to the cursor, so negative values move backwards.
*
* @param {Number} [offset=1] Cursor offset.
* @returns {String} Character.
*/
function peek(offset) {
return css[cursor + (offset || 1)];
}
/**
* Remove the current state from the stack and set the new current state.
*
* @returns {String} The removed state.
*/
function popState() {
var removed = stack.pop();
state = stack[stack.length - 1];
return removed;
}
/**
* Set the current state and add it to the stack.
*
* @param {String} newState The new state.
* @returns {Number} The new stack length.
*/
function pushState(newState) {
state = newState;
stack.push(state);
return stack.length;
}
/**
* Replace the current state with a new state.
*
* @param {String} newState The new state.
* @returns {String} The replaced state.
*/
function replaceState(newState) {
var previousState = state;
stack[stack.length - 1] = state = newState;
return previousState;
}
/**
* Move the character cursor. Positive numbers move the cursor forward.
* Negative numbers are not supported!
*
* @param {Number} [n=1] Number of characters to skip.
*/
function skip(n) {
if ((n || 1) == 1) {
if (css[cursor] == '\n') {
line++;
column = 1;
} else {
column++;
}
cursor++;
} else {
var skipStr = css.slice(cursor, cursor + n).split('\n');
if (skipStr.length > 1) {
line += skipStr.length - 1;
column = 1;
}
column += skipStr[skipStr.length - 1].length;
cursor = cursor + n;
}
}
/**
* Add the current token to the pile and reset the buffer.
*/
function addToken() {
token.end = {
line: line,
col: column
};
DEBUG && debug('addToken:', JSON.stringify(token, null, 2));
tokens.push(token);
buffer = '';
token = {};
}
/**
* Set the current token.
*
* @param {String} type Token type.
*/
function initializeToken(type) {
token = {
type: type,
start: {
line: line,
col : column
}
};
}
// -- Main Loop ------------------------------------------------------------
/*
The main loop is a state machine that reads in one character at a time,
and determines what to do based on the current state and character.
This is implemented as a series of nested `switch` statements and the
case orders have been mildly optimized based on rough probabilities
calculated by processing a small sample of real-world CSS.
Further optimization (such as a dispatch table) shouldn't be necessary
since the total number of cases is very low.
*/
TIMER && (start = Date.now());
while (ch = getCh()) {
DEBUG && debug(ch, getState());
// column += 1;
switch (ch) {
// Space
case ' ':
switch (getState()) {
case 'selector':
case 'value':
case 'value-paren':
case 'at-group':
case 'at-value':
case 'comment':
case 'double-string':
case 'single-string':
buffer += ch;
break;
}
break;
// Newline or tab
case '\n':
case '\t':
case '\r':
case '\f':
switch (getState()) {
case 'value':
case 'value-paren':
case 'at-group':
case 'comment':
case 'single-string':
case 'double-string':
case 'selector':
buffer += ch;
break;
case 'at-value':
// Tokenize an @-rule if a semi-colon was omitted.
if ('\n' === ch) {
token.value = buffer.trim();
addToken();
popState();
}
break;
}
// if ('\n' === ch) {
// column = 0;
// line += 1;
// }
break;
case ':':
switch (getState()) {
case 'name':
token.name = buffer.trim();
buffer = '';
replaceState('before-value');
break;
case 'before-selector':
buffer += ch;
initializeToken('selector');
pushState('selector');
break;
case 'before-value':
replaceState('value');
buffer += ch;
break;
default:
buffer += ch;
break;
}
break;
case ';':
switch (getState()) {
case 'name':
case 'before-value':
case 'value':
// Tokenize a declaration
// if value is empty skip the declaration
if (buffer.trim().length > 0) {
token.value = buffer.trim(),
addToken();
}
replaceState('before-name');
break;
case 'value-paren':
// Insignificant semi-colon
buffer += ch;
break;
case 'at-value':
// Tokenize an @-rule
token.value = buffer.trim();
addToken();
popState();
break;
case 'before-name':
// Extraneous semi-colon
break;
default:
buffer += ch;
break;
}
break;
case '{':
switch (getState()) {
case 'selector':
// If the sequence is `\{` then assume that the brace should be escaped.
if (peek(-1) === '\\') {
buffer += ch;
break;
}
// Tokenize a selector
token.text = buffer.trim();
addToken();
replaceState('before-name');
depth = depth + 1;
break;
case 'at-group':
// Tokenize an @-group
token.name = buffer.trim();
// XXX: @-rules are starting to get hairy
switch (token.type) {
case 'font-face':
case 'viewport' :
case 'page' :
pushState('before-name');
break;
default:
pushState('before-selector');
}
addToken();
depth = depth + 1;
break;
case 'name':
case 'at-rule':
// Tokenize a declaration or an @-rule
token.name = buffer.trim();
addToken();
pushState('before-name');
depth = depth + 1;
break;
case 'comment':
case 'double-string':
case 'single-string':
// Ignore braces in comments and strings
buffer += ch;
break;
case 'before-value':
replaceState('value');
buffer += ch;
break;
}
break;
case '}':
switch (getState()) {
case 'before-name':
case 'name':
case 'before-value':
case 'value':
// If the buffer contains anything, it is a value
if (buffer) {
token.value = buffer.trim();
}
// If the current token has a name and a value it should be tokenized.
if (token.name && token.value) {
addToken();
}
// Leave the block
initializeToken('end');
addToken();
popState();
// We might need to leave again.
// XXX: What about 3 levels deep?
if ('at-group' === getState()) {
initializeToken('at-group-end');
addToken();
popState();
}
if (depth > 0) {
depth = depth - 1;
}
break;
case 'at-group':
case 'before-selector':
case 'selector':
// If the sequence is `\}` then assume that the brace should be escaped.
if (peek(-1) === '\\') {
buffer += ch;
break;
}
if (depth > 0) {
// Leave block if in an at-group
if ('at-group' === getState(1)) {
initializeToken('at-group-end');
addToken();
}
}
if (depth > 1) {
popState();
}
if (depth > 0) {
depth = depth - 1;
}
break;
case 'double-string':
case 'single-string':
case 'comment':
// Ignore braces in comments and strings.
buffer += ch;
break;
}
break;
// Strings
case '"':
case "'":
switch (getState()) {
case 'double-string':
if ('"' === ch && '\\' !== peek(-1)) {
popState();
}
break;
case 'single-string':
if ("'" === ch && '\\' !== peek(-1)) {
popState();
}
break;
case 'before-at-value':
replaceState('at-value');
pushState('"' === ch ? 'double-string' : 'single-string');
break;
case 'before-value':
replaceState('value');
pushState('"' === ch ? 'double-string' : 'single-string');
break;
case 'comment':
// Ignore strings within comments.
break;
default:
if ('\\' !== peek(-1)) {
pushState('"' === ch ? 'double-string' : 'single-string');
}
}
buffer += ch;
break;
// Comments
case '/':
switch (getState()) {
case 'comment':
case 'double-string':
case 'single-string':
// Ignore
buffer += ch;
break;
case 'before-value':
case 'selector':
case 'name':
case 'value':
if (isNextChar('*')) {
// Ignore comments in selectors, properties and values. They are
// difficult to represent in the AST.
var pos = find('*/');
if (pos) {
skip(pos + 1);
}
} else {
if (getState() == 'before-value') replaceState('value');
buffer += ch;
}
break;
default:
if (isNextChar('*')) {
// Create a comment token
initializeToken('comment');
pushState('comment');
skip();
}
else {
buffer += ch;
}
break;
}
break;
// Comment end or universal selector
case '*':
switch (getState()) {
case 'comment':
if (isNextChar('/')) {
// Tokenize a comment
token.text = buffer; // Don't trim()!
skip();
addToken();
popState();
}
else {
buffer += ch;
}
break;
case 'before-selector':
buffer += ch;
initializeToken('selector');
pushState('selector');
break;
case 'before-value':
replaceState('value');
buffer += ch;
break;
default:
buffer += ch;
}
break;
// @-rules
case '@':
switch (getState()) {
case 'comment':
case 'double-string':
case 'single-string':
buffer += ch;
break;
case 'before-value':
replaceState('value');
buffer += ch;
break;
default:
// Iterate over the supported @-rules and attempt to tokenize one.
var tokenized = false;
var name;
var rule;
for (var j = 0, len = atRules.length; !tokenized && j < len; ++j) {
rule = atRules[j];
name = rule.name || rule;
if (!isNextString(name)) { continue; }
tokenized = true;
initializeToken(name);
pushState(rule.state || 'at-group');
skip(name.length);
if (rule.prefix) {
token.prefix = rule.prefix;
}
if (rule.type) {
token.type = rule.type;
}
}
if (!tokenized) {
// Keep on truckin' America!
buffer += ch;
}
break;
}
break;
// Parentheses are tracked to disambiguate semi-colons, such as within a
// data URI.
case '(':
switch (getState()) {
case 'value':
pushState('value-paren');
break;
case 'before-value':
replaceState('value');
break;
}
buffer += ch;
break;
case ')':
switch (getState()) {
case 'value-paren':
popState();
break;
case 'before-value':
replaceState('value');
break;
}
buffer += ch;
break;
default:
switch (getState()) {
case 'before-selector':
initializeToken('selector');
pushState('selector');
break;
case 'before-name':
initializeToken('property');
replaceState('name');
break;
case 'before-value':
replaceState('value');
break;
case 'before-at-value':
replaceState('at-value');
break;
}
buffer += ch;
break;
}
}
TIMER && debug('ran in', (Date.now() - start) + 'ms');
return tokens;
}