2021-08-06 21:07:58 +10:00
import { EOL } from "os" ;
2020-06-19 17:16:23 +10:00
const customCharClasses = {
2021-08-06 21:07:58 +10:00
tagName : "[a-zA-Z-]" ,
attrName : "[a-zA-Z-]" ,
2020-06-19 17:16:23 +10:00
} ;
2021-08-06 21:07:58 +10:00
const whitespaceClass = [ " " , "\r" , "\n" , "\t" , "\v" , "\f" ] ;
2020-06-19 17:16:23 +10:00
const charRange = ( from : string , to : string ) = > {
const res = [ ] ;
for ( let i = from . charCodeAt ( 0 ) ; i <= to . charCodeAt ( 0 ) ; i ++ ) {
res . push ( String . fromCharCode ( i ) ) ;
}
return res ;
} ;
const parsePatternEscape = ( pat : string , at : number ) : string [ ] = > {
switch ( pat [ at ] ) {
2021-08-06 21:07:58 +10:00
case "\\" :
return [ "\\" ] ;
case "]" :
return [ "]" ] ;
case "<" :
return [ "<" ] ;
case "w" :
return whitespaceClass ;
default :
throw new Error ( ` Unknown pattern escape: ${ pat [ at ] } ` ) ;
2020-06-19 17:16:23 +10:00
}
} ;
const parsePatternClass = ( pat : string , from : number ) : [ string [ ] , number ] = > {
const chars : string [ ] = [ ] ;
for ( let i = from ; i < pat . length ; i ++ ) {
switch ( pat [ i ] ) {
2021-08-06 21:07:58 +10:00
case "\\" :
chars . push ( . . . parsePatternEscape ( pat , ++ i ) ) ;
break ;
case "]" :
return [ chars , i ] ;
default :
if ( pat [ i + 1 ] === "-" && pat [ i + 2 ] !== undefined ) {
chars . push ( . . . charRange ( pat [ i ] , pat [ i + 2 ] ) ) ;
i += 2 ;
} else {
chars . push ( pat [ i ] ) ;
}
break ;
2020-06-19 17:16:23 +10:00
}
}
throw new Error ( ` Unexpected end of pattern: ${ pat } ` ) ;
} ;
2021-08-06 21:07:58 +10:00
const parsePatternCustomClass = (
pat : string ,
from : number
) : [ string [ ] , number ] = > {
const endIdx = pat . indexOf ( ">" , from ) ;
2020-06-19 17:16:23 +10:00
if ( endIdx == - 1 ) throw new Error ( ` Unexpected end of pattern: ${ pat } ` ) ;
2021-08-06 21:07:58 +10:00
return [
parsePatternClass ( customCharClasses [ pat . slice ( from , endIdx ) ] , 1 ) [ 0 ] ,
endIdx ,
] ;
2020-06-19 17:16:23 +10:00
} ;
export const parsePattern = ( pat : string ) : string [ ] [ ] = > {
const res : string [ ] [ ] = [ ] ;
for ( let i = 0 ; i < pat . length ; i ++ ) {
switch ( pat [ i ] ) {
2021-08-06 21:07:58 +10:00
case "\\" :
res . push ( parsePatternEscape ( pat , ++ i ) ) ;
break ;
case "[" :
const sg = parsePatternClass ( pat , i + 1 ) ;
res . push ( sg [ 0 ] ) ;
i = sg [ 1 ] ;
break ;
case "<" :
const cc = parsePatternCustomClass ( pat , i + 1 ) ;
res . push ( cc [ 0 ] ) ;
i = cc [ 1 ] ;
break ;
default :
res . push ( [ pat [ i ] ] ) ;
2020-06-19 17:16:23 +10:00
}
}
return res ;
} ;
type Node = {
children : Node [ ] ;
value? : string ;
} ;
2021-08-06 21:07:58 +10:00
const createNode = ( value? : string ) = > ( { value , children : [ ] } ) ;
2020-06-19 17:16:23 +10:00
export class TrieBuilder {
private readonly root : Node = createNode ( ) ;
private readonly variables : string [ ] = [ ] ;
private nextId : number = 0 ;
private readonly codeCache : Map < string , string > = new Map ( ) ;
2021-08-06 21:07:58 +10:00
constructor (
2020-06-19 17:16:23 +10:00
private readonly name : string ,
2021-08-06 21:07:58 +10:00
private readonly valueType : string
) { }
2020-06-19 17:16:23 +10:00
2021-08-06 21:07:58 +10:00
fillRemaining ( val : string ) : this {
const { children } = this . root ;
2020-06-19 17:16:23 +10:00
for ( let i = 0 ; i < 256 ; i ++ ) {
children [ i ] = children [ i ] || createNode ( val ) ;
}
return this ;
}
2021-08-06 21:07:58 +10:00
add ( seq : string , val : string ) : this {
2020-06-19 17:16:23 +10:00
let cur : Node = this . root ;
for ( let i = 0 ; i < seq . length ; i ++ ) {
const c = seq . charCodeAt ( i ) ;
2021-08-06 21:07:58 +10:00
if ( c > 255 ) throw new Error ( "Not a byte" ) ;
2020-06-19 17:16:23 +10:00
cur = cur . children [ c ] = cur . children [ c ] || createNode ( ) ;
}
cur . value = val ;
return this ;
}
2021-08-06 21:07:58 +10:00
addPattern ( pattern : string [ ] [ ] , val : string ) : this {
2020-06-19 17:16:23 +10:00
let cur : Node [ ] = [ this . root ] ;
for ( const cls of pattern ) {
const next : Node [ ] = [ ] ;
for ( let i = 0 ; i < cls . length ; i ++ ) {
if ( cls [ i ] . length !== 1 ) throw new Error ( ` Not a byte ` ) ;
const c = cls [ i ] . charCodeAt ( 0 ) ;
2021-08-06 21:07:58 +10:00
if ( c > 255 ) throw new Error ( "Not a byte" ) ;
next . push (
. . . cur . map ( ( n ) = > ( n . children [ c ] = n . children [ c ] || createNode ( ) ) )
) ;
2020-06-19 17:16:23 +10:00
}
cur = next ;
}
2021-08-06 21:07:58 +10:00
cur . forEach ( ( n ) = > ( n . value = val ) ) ;
2020-06-19 17:16:23 +10:00
return this ;
}
// Generate the code for a node's variable name and value, and return the name.
2021-08-06 21:07:58 +10:00
private generateNode ( node : Node ) : string {
2020-06-19 18:12:55 +10:00
// Only generate defined children to cut down on size of array, which would otherwise
// bog down compile time and binary size for large trees with lots of nodes.
// If array is empty, just use zero.
2021-08-06 21:07:58 +10:00
const firstIdx = node . children . length && node . children . findIndex ( ( v ) = > v ) ;
2020-06-19 17:16:23 +10:00
const children = Array . from (
2021-08-06 21:07:58 +10:00
{ length : node.children.length - firstIdx } ,
( _ , i ) = >
node . children [ i + firstIdx ]
? ` Some( ${ this . generateNode ( node . children [ i + firstIdx ] ) } ) `
: "None"
) . join ( ", " ) ;
const value = node . value === undefined ? "None" : ` Some( ${ node . value } ) ` ;
2021-08-09 17:45:42 +10:00
const varValue = ` &crate::common::pattern::TrieNode {
2020-06-19 18:12:55 +10:00
offset : $ { firstIdx } ,
2020-06-19 17:16:23 +10:00
value : $ { value } ,
children : & [ $ { children } ] ,
} ` ;
const existingVarName = this . codeCache . get ( varValue ) ;
if ( existingVarName ) {
return existingVarName ;
}
const name = ` ${ this . name } _NODE_ ${ this . nextId ++ } ` ;
2021-08-06 21:07:58 +10:00
this . variables . push (
2021-08-09 17:45:42 +10:00
` static ${ name } : &crate::common::pattern::TrieNode< ${ this . valueType } > = ${ varValue } ; `
2021-08-06 21:07:58 +10:00
) ;
2020-06-19 17:16:23 +10:00
this . codeCache . set ( varValue , name ) ;
return name ;
}
2021-08-06 21:07:58 +10:00
generate ( ) : string {
2020-06-19 17:16:23 +10:00
this . variables . splice ( 0 , this . variables . length ) ;
this . nextId = 0 ;
const rootName = this . generateNode ( this . root ) ;
// Make root node public and use proper name.
2021-08-06 21:07:58 +10:00
return this . variables
. join ( EOL + EOL )
. replace ( ` static ${ rootName } ` , ` pub static ${ this . name } ` ) ;
2020-06-19 17:16:23 +10:00
}
}