can introduce some-more glorious proceed 'tokenize' c# formula html formatting?


( refactoring f# formula got me down vote, nonetheless also engaging useful answers. 62 f# questions out 32,000+ seems pitiful, i'm going take risk some-more disapproval!)



i perplexing post bit formula blogger blog yesterday, incited , i found useful past. however, blogger editor ate impression declarations, incited out passed end.



so (like any hacker), i guess "how tough be?" rolled possess <100 lines f#.



here 'meat' code, turns an quarrel twine list 'tokens'. note tokens aren't confused lexing/parsing-style tokens. i feeling during those briefly, nonetheless i magnitude supposed anything, i know give me only tokens, since i wish keep uncanny string.



the doubt is: there some-more glorious proceed doing this? i don't n re-definitions s mandatory mislay any token twine quarrel string, nonetheless it's challenging separate twine appetite tokens advance, since things comments, strings #region gauge (which contains non-word character).



//types tokens going detect
type token =
| whitespace string
| critique string
| strng string
| keyword string
| calm string
| eof

//turn twine list recognized tokens
let tokenize (s:string) =
//this 'parser' - should feeling during compiling regexs advance?
let nexttoken (st:string) =
review st with
| st regex.ismatch(st, "^\s+") -> whitespace(regex.match(st, "^\s+").value)
| st regex.ismatch(st, "^//.*?\r?\n") -> comment(regex.match(st, "^//.*?\r?\n").value) //this double slash-style comments
| st regex.ismatch(st, "^/\*(.|[\r?\n])*?\*/") -> comment(regex.match(st, "^/\*(.|[\r?\n])*?\*/").value) // /* */ impression comments http://ostermiller.org/findcomment.html
| st regex.ismatch(st, @"^""([^""\\]|\\.|"""")*""") -> strng(regex.match(st, @"^""([^""\\]|\\.|"""")*""").value) // unescaped = "([^"\\]|\\.|"")*" http://wordaligned.org/articles/string-literals-and-regular-expressions
| st regex.ismatch(st, "^#(end)?region") -> keyword(regex.match(st, "^#(end)?region").value)
| st st <> "" ->
review regex.match(st, @"^[^""\s]*").value //all calm until unbroken whitespace quote (this competence wrong)
| x iskeyword x -> keyword(x) //iskeyword uses microsoft.csharp.csharpcodeprovider.isvalididentifier - bit fragile...
| x -> text(x)
| _ -> eof

//tail-recursive unbroken token reconstruct twine token list
let tokeneater s =
let rec loop s acc =
let t = nexttoken s
review t with
| eof -> list.rev acc //return accumulator (have shelter since built retrograde tail recursion)
| whitespace(x) | comment(x)
| keyword(x) | text(x) | strng(x) ->
loop (s.remove(0, x.length)) (t::acc) //tail recursive
loop s []

tokeneater s


(if anyone unequivocally interested, i am happy post rest code)



edit
using kvb, executive bit looks this, many better!



let nexttoken (st:string) = 
review st with
| matches "^\s+" s -> whitespace(s)
| matches "^//.*?\r?(\n|$)" s -> comment(s) //this double slash-style comments
| matches "^/\*(.|[\r?\n])*?\*/" s -> comment(s) // /* */ impression comments http://ostermiller.org/findcomment.html
| matches @"^@?""([^""\\]|\\.|"""")*""" s -> strng(s) // unescaped regexp = ^@?"([^"\\]|\\.|"")*" http://wordaligned.org/articles/string-literals-and-regular-expressions
| matches "^#(end)?region" s -> keyword(s)
| matches @"^[^""\s]+" s -> //all calm until unbroken whitespace quote (this competence wrong)
review s with
| iskeyword x -> keyword(s)
| _ -> text(s)
| _ -> eof


Comments

Popular posts from this blog

list macos calm editors formula editors

how hibernate @any-related annotations?

why does floated <input> control floated component slip over too distant right ie7, nonetheless firefox?