commit 08dacdd263f8b4bc56e5358801ba4979d596f912 Author: Kogia-sima Date: Fri Jun 5 05:39:33 2020 +0900 Add source code diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e234ff5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +target/ +scripts/ +*.log +*.s diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c9e1ba5 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,253 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ctor" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "difference" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "examples" +version = "0.0.1" +dependencies = [ + "sailfish 0.0.1", + "sailfish-macros 0.0.1", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "integration-tests" +version = "0.0.1" +dependencies = [ + "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "sailfish 0.0.1", + "sailfish-macros 0.0.1", + "trybuild 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "itoa" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "output_vt100" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "pretty_assertions" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ctor 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", + "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "proc-macro2" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quote" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "sailfish" +version = "0.0.1" +dependencies = [ + "itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "sailfish-compiler" +version = "0.0.1" +dependencies = [ + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "sailfish-macros" +version = "0.0.1" +dependencies = [ + "proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)", + "sailfish-compiler 0.0.1", +] + +[[package]] +name = "serde" +version = "1.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_derive" +version = "1.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "syn" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "termcolor" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "toml" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "trybuild" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)", + "termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "toml 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-xid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum ctor 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "cf6b25ee9ac1995c54d7adb2eff8cfffb7260bc774fb63c601ec65467f43cd9d" +"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +"checksum itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" +"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +"checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +"checksum output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" +"checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" +"checksum proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)" = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" +"checksum quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea" +"checksum ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +"checksum serde 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)" = "c9124df5b40cbd380080b2cc6ab894c040a3070d995f5c9dc77e18c34a8ae37d" +"checksum serde_derive 1.0.111 (registry+https://github.com/rust-lang/crates.io-index)" = "3f2c3ac8e6ca1e9c80b8be1023940162bf81ae3cffbb1809474152f2ce1eb250" +"checksum serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)" = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2" +"checksum syn 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2" +"checksum termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" +"checksum toml 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ffc92d160b1eef40665be3a05630d003936a3bc7da7421277846c2613e92c71a" +"checksum trybuild 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)" = "39e3183158b2c8170db33b8b3a90ddc7b5f380d15b50794d22c1fa9c61b47249" +"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4ef3f48 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,32 @@ +[workspace] +members = [ + "sailfish", + "sailfish-compiler", + "sailfish-macros", + "examples", + "integration-tests" +] +exclude = [ + "benches" +] + +[profile.dev.package.syn] +opt-level = 0 +debug = false +debug-assertions = false +overflow-checks = false +incremental = true + +[profile.test.package.syn] +opt-level = 0 +debug = false +debug-assertions = false +overflow-checks = false +incremental = true + +[profile.release.package.syn] +opt-level = 0 +debug = false +debug-assertions = false +overflow-checks = false +incremental = true diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..52754f8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ + + The MIT License (MIT) + Copyright (c) 2020 Ryohei Machida + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE + OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..6e513c3 --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +
+ +![SailFish](./resources/logo.png) + +Simple, small, and extremely fast template engine for Rust + +
+ +## ✨ Features + +- Simple and intuitive syntax inspired by [EJS](https://ejs.co/) +- Relatively small number of dependencies (<15 crates in total) +- Extremely fast (See [benchmarks](./benches)) +- Better error message +- Template rendering is always type-safe because templates are statically compiled. +- Syntax highlighting support ([vscode](./syntax/vscode), [vim](./syntax/vim)) +- Automatically re-compile sources when template file is updated. + +:warning: Currentry sailfish is in early-stage development. You can use this library but be sure that there might be some bugs. Also API is still unstable, and thus may changes frequently. + +## 🐟 Example + +Dependencies: + +```toml +[dependencies] +sailfish = "0.0.1" +sailfish-macros = "0.0.1" +``` + +Template file (templates/hello.stpl): + +```html + + + + <%= content %> + + +``` + +Code: + +```rust +#[macro_use] +extern crate sailfish_macros; // enable derive macro + +use sailfish::TemplateOnce; + +#[derive(TemplateOnce)] +#[template(path = "hello.stpl")] +struct Hello { + content: String +} + +fn main() { + println!("{}", Hello { content: String::from("Hello, world!") }.render_once().unwrap()); +} +``` + +You can find more examples in [examples](./examples) directory. + +## 🐾 Roadmap + +- `Template` trait ([RFC](https://github.com/Kogia-sima/sailfish/blob/master/docs/rfcs/3-template-trait.md)) +- Template inheritance (block, partials, etc.) +- Include another templates without copy +- Whitespace suppressing +- HTML minification +- Filters +- Dynamic template compilation ([RFC](https://github.com/Kogia-sima/sailfish/blob/master/docs/rfcs/1-dynamic-loading.md)) +- `format_templates!(fmt, args..)` macro + +## πŸ‘€ Author + +:jp: **Ryohei Machida** + +* Github: [@Kogia-sima](https://github.com/Kogia-sima) + +## 🀝 Contributing + +Contributions, issues and feature requests are welcome! + +Feel free to check [issues page](https://github.com/Kogia-sima/sailfish/issues). + +## Show your support + +Give a ⭐️ if this project helped you! + + +## πŸ“ License + +Copyright Β© 2020 [Ryohei Machida](https://github.com/Kogia-sima). + +This project is [MIT](https://github.com/Kogia-sima/sailfish/blob/master/LICENSE) licensed. + +*** +_This README was generated with ❀️ by [readme-md-generator](https://github.com/kefranabg/readme-md-generator)_ diff --git a/docs/rfcs/1-dynamic-loading.md b/docs/rfcs/1-dynamic-loading.md new file mode 100644 index 0000000..c22bce0 --- /dev/null +++ b/docs/rfcs/1-dynamic-loading.md @@ -0,0 +1,210 @@ +# Dynamic template loading + +## Description + +Specify the path to template file at runtime, compile it, and then render with supplied data. + +This operation should be type-safe, and not raise any error after template compilation. + +## `sailfish::dynamic::compile` function API + +#### Signature + +```rust +fn compile>(path: P) -> DynamicTemplate; +``` + +#### Behaviour + +1. Generate Rust code to render templates +2. Compile it as a shared library by calling `cargo build` command. +3. Load the generated shared library. +4. returns the `DynamicTemplate` struct which contains the function pointer to call the template function. + +## `DynamicTemplate::render` method API + +#### Signature + +```rust +impl DynamicTemplate { + fn render(&self, data: &data) -> RenderResult; +} +``` + +#### Behaviour + +1. Serialize the `data` to byte array +2. Create the vtable for memory allocation (See the below section) +3. Pass the those objects to the template function pointer. +4. Retrieve the result from function pointer, deserialize it to `Result` and then return it. + +Trait bound makes this code type-safe. + +## Safety for memory allocation + +Since compiler used for compiling templates at runtime is different from the one used for compiling renderer, we must export allocator functions as vtable and share it. + +```rust +#[repr(C)] +pub struct AllocVtable { + pub alloc: unsafe fn(Layout) -> *mut u8, + pub realloc: unsafe fn(*mut u8, Layout, usize) -> *mut u8, +} + +struct VBuffer { + data: *mut u8, + len: usize, + capacity: usize, + vtable: AllocVTable, +} +``` + +AllocVtable is passed to template function, and then VBuffer is constructed inside template function. + +VBuffer should always use AllocVTable to allocate/reallocate a new memory. That cannot achieve with `std::string::String` struct only. We must re-implement the `RawVec` struct. + +## Rust standard library confliction problem + +Rarely, but not never, dynamically compiled templates may use different version of standard library. + +This causes an Undefined behaviour, so we should add `#![no_std]` attribute inside generate Rust code. + +However, since it is a corner case, It may be better if we provide `no_std=false` option to avoid this behaviour. + +## `TempalteData` trait + +We must ensure that all of the data passed to templates should satisfy the following restrictions. + +- completely immutable +- does not allocate/deallocate memory +- can be serialized to/deserialized from byte array (All data is serealized to byte array, and then decoded inside templates) +- can be defined inside `#![no_std]` crate + +Sailfish provide `TemplateData` trait which satisfies the above restrictions. + +```rust +pub unsafe trait TemplateData { + fn type_name() -> String; + fn definition() -> String; + fn fields() -> &'static [&'static str]; + fn deserialize() -> String; // rust code to deserialize struct + fn serialize(&self, v: &mut Vec); +} +``` + +This trait can be implemented to the following types + +- String, +- Primitive integers (bool, char, u8, u16, u32, u64, u128, i8, i16, i32, i64, i128, isize, usize) +- [T; N] where T: TemplateData +- (T1, T2, T3, ...) where T1, T2, T3, ... : TemplateData +- Option\ where T: TemplateData +- Vec\ where T: TemplateData + +### `#[derive(TemplateData)]` attribute + +In order to pass the user-defined data, User must implement `TemplateData` manually. However, it is dangerous and should be avoided. + +We must export the `derive(TemplateData)` procedural macro to automatically implement this trait. + +This macro should cause error if any type of the fields does not implement `TemplateData`. + +### How template file is transformed (current idea) + +Template file contents is transformed into Rust code when `sailfish::dynamic::compile()` function is called. + +For example, if we have a template + +```html +

<%= msg %>

+``` + +and Rust code + +```rust +struct Message { + msg: String, +} + +let template = compile::("templates/message.stpl").unwrap(); +``` + +then, template will be transformed into the following code. + +```rust +#![no_std] +use sailfish::dynamic::runtime as sfrt; +use sfrt::{VBuffer, AllocVtable, OutputData, SizeHint, RenderResult}; + +struct Message { + msg: String, +} + +fn deserialize(data: &mut &[u8]) -> Message { + // Generated code from TemplateData::deserialize() + let msg = sfrt::deserialize_string(data); + + Message { msg } +} + +#[no_mangle] +pub extern fn sf_message(version: u64, data: *const [u8], data_len: usize, vtable: AllocVtable) -> OutputData { + let inner = move || -> RenderResult { + let mut data = unsafe { std::slice::from_raw_parts(data, data_len) }; + let Message { msg } = deserialize(&mut data); + + let mut buf = VBuffer::from_vtable(vtable); + + static SIZE_HINT = SizeHint::new(); + let size_hint = SIZE_HINT.get(); + buf.reserve(size_hint); + + { + sfrt::render_text!(buf, "

"); + sfrt::render_escaped!(buf, msg); + sfrt::render_text!(buf, "

"); + } + + SIZE_HINT.update(buf.len()) + Ok(buf.into_string()) + }; + + OutputData::from_result(inner()) +} +``` + +## Example usage + +Template: + +```html + + + + <%= name %>: <%= score %> + + +``` + +Rust code: + +```rust +use sailfish::dynamic::compile; +use sailfish_macros::TemplateData; + +#[derive(TemplateData)] +pub struct Team { + name: String, + score: u8 +} + +// compile the template as a callable shared library +let template: DynamicTemplate = compile::("templates/team.stpl").unwrap(); +let data = Team { + name: "Jiangsu".into(), + score: 43 +}; +// render templates with given data +let result: String = unsafe { template.render(data).unwrap() }; +println!("{}", result); +``` diff --git a/docs/rfcs/3-template-trait.md b/docs/rfcs/3-template-trait.md new file mode 100644 index 0000000..6ff4e52 --- /dev/null +++ b/docs/rfcs/3-template-trait.md @@ -0,0 +1,45 @@ +# Template trait + +## Description + +Currently `TemplateOnce::render_once` method consumes the object itself and not useful if you want to re-use the struct. + +`Template` trait helps those situation. `Template` trait has `render()` method, which does not consume the object itself. + +Like `TemplateOnce`, `Template` trait can be implemented using derive macro. + +## Definition + +```rust +pub trait Template { + fn render(&self) -> RenderResult; +} +``` + +Since `RenderError` can be converted into `fmt::Error`, we can now implement `Display` trait for those structs. + +```rust +impl Display for T { + ... +} +``` + +## Disadvantage + +If you derive this trait, you cannot move out the struct fields. For example, the following template + +```html +<% for msg in messages { %>
<%= msg %>
<% } %> +``` + +will be transformed into the Rust code like + +```rust +for msg in self.messages { + render_text!(_ctx, "
"); + render!(_ctx, msg); + render_text!(_ctx, "
"); +} +``` + +which causes an compilation error because `self.messages` cannot be moved. diff --git a/examples/Cargo.toml b/examples/Cargo.toml new file mode 100644 index 0000000..223c585 --- /dev/null +++ b/examples/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "examples" +version = "0.0.1" +authors = ["Ryohei Machida "] +edition = "2018" +publish = false + +[dependencies] +sailfish = { path = "../sailfish" } +sailfish-macros = { path = "../sailfish-macros" } + +[[bin]] +name = "simple" +path = "simple.rs" +test = false diff --git a/examples/simple.rs b/examples/simple.rs new file mode 100644 index 0000000..640b9ae --- /dev/null +++ b/examples/simple.rs @@ -0,0 +1,15 @@ +#[macro_use] +extern crate sailfish_macros; + +use sailfish::TemplateOnce; + +#[derive(TemplateOnce)] +#[template(path = "simple.stpl")] +struct Simple { + messages: Vec, +} + +fn main() { + let messages = vec![String::from("Message 1"), String::from("")]; + println!("{}", Simple { messages }.render_once().unwrap()); +} diff --git a/examples/templates/simple.stpl b/examples/templates/simple.stpl new file mode 100644 index 0000000..9ec4dd9 --- /dev/null +++ b/examples/templates/simple.stpl @@ -0,0 +1,12 @@ + + + + <%# This is a comment %> + <% for (i, msg) in messages.iter().enumerate() { %> + <% if i == 0 { %> +

Hello, world!

+ <% } %> +
<%= *msg %>
+ <% } %> + + diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml new file mode 100644 index 0000000..75693fc --- /dev/null +++ b/integration-tests/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "integration-tests" +version = "0.0.1" +authors = ["Kogia-sima "] +edition = "2018" +publish = false + +[dependencies] +sailfish = { path = "../sailfish" } +sailfish-macros = { path = "../sailfish-macros" } + +[dev-dependencies] +trybuild = "1.0.28" +pretty_assertions = "0.6.1" diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs new file mode 100644 index 0000000..7dc2343 --- /dev/null +++ b/integration-tests/src/lib.rs @@ -0,0 +1,21 @@ +use std::fmt; + +#[derive(PartialEq, Eq)] +pub struct PrettyString<'a>(pub &'a str); + +/// Make diff to display string as multi-line string +impl<'a> fmt::Debug for PrettyString<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.0) + } +} + +#[macro_export] +macro_rules! assert_string_eq { + ($left:expr, $right:expr) => { + pretty_assertions::assert_eq!( + $crate::PrettyString($left), + $crate::PrettyString($right) + ); + }; +} diff --git a/integration-tests/templates/custom_delimiter.out b/integration-tests/templates/custom_delimiter.out new file mode 100644 index 0000000..f607375 --- /dev/null +++ b/integration-tests/templates/custom_delimiter.out @@ -0,0 +1 @@ +
i: 10
diff --git a/integration-tests/templates/custom_delimiter.stpl b/integration-tests/templates/custom_delimiter.stpl new file mode 100644 index 0000000..a5f83df --- /dev/null +++ b/integration-tests/templates/custom_delimiter.stpl @@ -0,0 +1 @@ +<🍣 let i = 10; 🍣>
i: <🍣= i 🍣>
diff --git a/integration-tests/templates/empty.out b/integration-tests/templates/empty.out new file mode 100644 index 0000000..e69de29 diff --git a/integration-tests/templates/empty.stpl b/integration-tests/templates/empty.stpl new file mode 100644 index 0000000..e69de29 diff --git a/integration-tests/templates/json.out b/integration-tests/templates/json.out new file mode 100644 index 0000000..3f4a244 --- /dev/null +++ b/integration-tests/templates/json.out @@ -0,0 +1,4 @@ +{ + "name": "Taro", + "value": 16 +} diff --git a/integration-tests/templates/json.stpl b/integration-tests/templates/json.stpl new file mode 100644 index 0000000..b7c6f5b --- /dev/null +++ b/integration-tests/templates/json.stpl @@ -0,0 +1,4 @@ +{ + "name": "<%= name %>", + "value": <%= value %> +} diff --git a/integration-tests/templates/noescape.out b/integration-tests/templates/noescape.out new file mode 100644 index 0000000..b7b0a10 --- /dev/null +++ b/integration-tests/templates/noescape.out @@ -0,0 +1 @@ +raw:

Hello, World!

diff --git a/integration-tests/templates/noescape.stpl b/integration-tests/templates/noescape.stpl new file mode 100644 index 0000000..2f6ca3f --- /dev/null +++ b/integration-tests/templates/noescape.stpl @@ -0,0 +1 @@ +raw: <%- raw %> diff --git a/integration-tests/tests/basic.rs b/integration-tests/tests/basic.rs new file mode 100644 index 0000000..983eb3d --- /dev/null +++ b/integration-tests/tests/basic.rs @@ -0,0 +1,75 @@ +#[macro_use] +extern crate sailfish_macros; + +use integration_tests::assert_string_eq; +use sailfish::runtime::RenderResult; +use sailfish::TemplateOnce; +use std::path::PathBuf; + +fn assert_render_result(name: &str, result: RenderResult) { + let mut output_file = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + output_file.push("templates"); + output_file.push(name); + output_file.set_extension("out"); + + let expected = std::fs::read_to_string(output_file).unwrap(); + assert_string_eq!(&*result.unwrap(), &*expected); +} + +#[inline] +fn assert_render(name: &str, template: T) { + assert_render_result(name, template.render_once()); +} + +#[derive(TemplateOnce)] +#[template(path = "empty.stpl")] +struct Empty {} + +#[test] +fn empty() { + assert_render("empty", Empty {}); +} + +#[derive(TemplateOnce)] +#[template(path = "noescape.stpl")] +struct Noescape<'a> { + raw: &'a str, +} + +#[test] +fn noescape() { + assert_render( + "noescape", + Noescape { + raw: "

Hello, World!

", + }, + ); +} + +#[derive(TemplateOnce)] +#[template(path = "json.stpl")] +struct Json { + name: String, + value: u16, +} + +#[test] +fn json() { + assert_render( + "json", + Json { + name: String::from("Taro"), + value: 16, + }, + ); +} + +#[derive(TemplateOnce)] +#[template(path = "custom_delimiter.stpl")] +#[template(delimiter = '🍣')] +struct CustomDelimiter; + +#[test] +fn custom_delimiter() { + assert_render("custom_delimiter", CustomDelimiter); +} diff --git a/resources/logo.png b/resources/logo.png new file mode 100644 index 0000000..fa470b3 Binary files /dev/null and b/resources/logo.png differ diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..de982c0 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,6 @@ +max_width = 90 +hard_tabs = false +use_field_init_shorthand = true +edition = "2018" +reorder_imports = true +reorder_modules = true diff --git a/sailfish-compiler/Cargo.toml b/sailfish-compiler/Cargo.toml new file mode 100644 index 0000000..61a1d72 --- /dev/null +++ b/sailfish-compiler/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "sailfish-compiler" +version = "0.0.1" +authors = ["Ryohei Machida "] +description = "Really fast, intuitive template engine for Rust" +homepage = "https://github.com/Kogia-sima/sailfish" +repository = "https://github.com/Kogia-sima/sailfish" +readme = "../README.md" +keywords = ["markup", "template", "html"] +categories = ["template-engine"] +license = "MIT" +workspace = ".." +edition = "2018" + +[lib] +name = "sailfish_compiler" +doctest = false + +[features] +default = [] +procmacro = [] + +[dependencies] +memchr = "2.3.3" +quote = { version = "1.0.6", default-features = false } + +[dependencies.syn] +version = "1.0.21" +default-features = false +features = ["parsing", "full", "visit-mut", "printing", "clone-impls"] + +[dependencies.proc-macro2] +version = "1.0.10" +default-features = false +features = ["span-locations"] + +[dev-dependencies] +pretty_assertions = "0.6.1" diff --git a/sailfish-compiler/build.rs b/sailfish-compiler/build.rs new file mode 100644 index 0000000..f328e4d --- /dev/null +++ b/sailfish-compiler/build.rs @@ -0,0 +1 @@ +fn main() {} diff --git a/sailfish-compiler/src/compiler.rs b/sailfish-compiler/src/compiler.rs new file mode 100644 index 0000000..8862b21 --- /dev/null +++ b/sailfish-compiler/src/compiler.rs @@ -0,0 +1,84 @@ +use quote::ToTokens; +use std::fs; +use std::path::{Path, PathBuf}; + +use crate::error::*; +use crate::optimizer::Optimizer; +use crate::parser::Parser; +use crate::resolver::Resolver; +use crate::translator::Translator; +use crate::util::rustfmt_block; + +pub struct Compiler { + delimiter: char, + escape: bool, + cache_dir: PathBuf, +} + +impl Default for Compiler { + fn default() -> Self { + Self { + delimiter: '%', + escape: true, + cache_dir: Path::new(env!("OUT_DIR")).join("cache"), + } + } +} + +impl Compiler { + pub fn new() -> Self { + Self::default() + } + + pub fn delimiter(mut self, new: char) -> Self { + self.delimiter = new; + self + } + + pub fn escape(mut self, new: bool) -> Self { + self.escape = new; + self + } + + pub fn compile_file(&self, input: &Path, output: &Path) -> Result<(), Error> { + // TODO: introduce cache system + + let parser = Parser::new().delimiter(self.delimiter); + let translator = Translator::new().escape(self.escape); + let resolver = Resolver::new(); + let optimizer = Optimizer::new(); + + let compile_file = |input: &Path, output: &Path| -> Result<(), Error> { + let content = fs::read_to_string(&*input) + .chain_err(|| format!("Failed to open template file: {:?}", input))?; + + let stream = parser.parse(&*content); + let mut tsource = translator.translate(stream)?; + drop(content); + + resolver.resolve(&mut tsource.ast)?; + optimizer.optimize(&mut tsource.ast); + + if let Some(parent) = output.parent() { + fs::create_dir_all(parent)?; + } + if output.exists() { + fs::remove_file(output)?; + } + + let string = tsource.ast.into_token_stream().to_string(); + fs::write(output, rustfmt_block(&*string).unwrap_or(string))?; + Ok(()) + }; + + compile_file(&*input, &*output) + .chain_err(|| "Failed to compile template.") + .map_err(|mut e| { + e.source = fs::read_to_string(&*input).ok(); + e.source_file = Some(input.to_owned()); + e + })?; + + Ok(()) + } +} diff --git a/sailfish-compiler/src/config.rs b/sailfish-compiler/src/config.rs new file mode 100644 index 0000000..e69de29 diff --git a/sailfish-compiler/src/error.rs b/sailfish-compiler/src/error.rs new file mode 100644 index 0000000..b30730a --- /dev/null +++ b/sailfish-compiler/src/error.rs @@ -0,0 +1,256 @@ +use std::fmt; +use std::fs; +use std::io; +use std::path::PathBuf; + +#[non_exhaustive] +#[derive(Debug)] +pub enum ErrorKind { + FmtError(fmt::Error), + IoError(io::Error), + RustSyntaxError(syn::Error), + ParseError(String), + AnalyzeError(String), + Unimplemented(String), + Other(String), +} + +impl fmt::Display for ErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ErrorKind::FmtError(ref e) => e.fmt(f), + ErrorKind::IoError(ref e) => e.fmt(f), + ErrorKind::RustSyntaxError(ref e) => write!(f, "Rust Syntax Error: {}", e), + ErrorKind::ParseError(ref msg) => write!(f, "Parse error: {}", msg), + ErrorKind::AnalyzeError(ref msg) => write!(f, "Analyzation error: {}", msg), + ErrorKind::Unimplemented(ref msg) => f.write_str(&**msg), + ErrorKind::Other(ref msg) => f.write_str(&**msg), + } + } +} + +macro_rules! impl_errorkind_conversion { + ($source:ty, $kind:ident, $conv:expr, [ $($lifetimes:tt),* ]) => { + impl<$($lifetimes),*> From<$source> for ErrorKind { + #[inline] + fn from(other: $source) -> Self { + ErrorKind::$kind($conv(other)) + } + } + }; + ($source:ty, $kind:ident) => { + impl_errorkind_conversion!($source, $kind, std::convert::identity, []); + } +} + +impl_errorkind_conversion!(fmt::Error, FmtError); +impl_errorkind_conversion!(io::Error, IoError); +impl_errorkind_conversion!(syn::Error, RustSyntaxError); +impl_errorkind_conversion!(String, Other); +impl_errorkind_conversion!(&'a str, Other, |s: &str| s.to_owned(), ['a]); + +#[derive(Debug, Default)] +pub struct Error { + pub(crate) source_file: Option, + pub(crate) source: Option, + pub(crate) offset: Option, + pub(crate) chains: Vec, +} + +impl Error { + pub fn from_kind(kind: ErrorKind) -> Self { + Self { + chains: vec![kind], + ..Self::default() + } + } + + pub fn kind(&self) -> &ErrorKind { + self.chains.last().unwrap() + } + + pub fn iter(&self) -> impl Iterator { + self.chains.iter().rev() + } +} + +impl From for Error +where + ErrorKind: From, +{ + fn from(other: T) -> Self { + Self::from_kind(ErrorKind::from(other)) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let source = match (self.source.as_ref(), self.source_file.as_deref()) { + (Some(s), _) => Some(s.to_owned()), + (None, Some(f)) => fs::read_to_string(f).ok(), + (None, None) => None, + }; + + writeln!(f, "{}", self.chains.last().unwrap())?; + + for e in self.chains.iter().rev().skip(1) { + writeln!(f, "Caused by: {}", e)?; + } + + f.write_str("\n")?; + + if let Some(ref source_file) = self.source_file { + writeln!(f, "file: {}", source_file.display())?; + } + + if let (Some(ref source), Some(offset)) = (source, self.offset) { + let (lineno, colno) = into_line_column(source, offset); + writeln!(f, "position: line {}, column {}\n", lineno, colno)?; + + // TODO: display adjacent lines + let line = source.lines().nth(lineno - 1).unwrap(); + let lpad = count_digits(lineno); + + writeln!(f, "{: { + fn chain_err(self, kind: F) -> Result + where + F: FnOnce() -> EK, + EK: Into; +} + +impl ResultExt for Result { + fn chain_err(self, kind: F) -> Result + where + F: FnOnce() -> EK, + EK: Into, + { + self.map_err(|mut e| { + e.chains.push(kind().into()); + e + }) + } +} + +impl> ResultExt for Result { + fn chain_err(self, kind: F) -> Result + where + F: FnOnce() -> EK, + EK: Into, + { + self.map_err(|e| { + let mut e = Error::from(e.into()); + e.chains.push(kind().into()); + e + }) + } +} + +fn into_line_column(source: &str, offset: usize) -> (usize, usize) { + assert!( + offset <= source.len(), + "Internal error: error position offset overflow (error code: 56066)" + ); + let mut lineno = 1; + let mut colno = 1; + let mut current = 0; + + for line in source.lines() { + let end = current + line.len() + 1; + if offset < end { + colno = offset - current + 1; + break; + } + + lineno += 1; + current = end; + } + + (lineno, colno) +} + +fn count_digits(n: usize) -> usize { + let mut current = 10; + let mut digits = 1; + + while current <= n { + current *= 10; + digits += 1; + } + + digits +} + +macro_rules! make_error { + ($kind:expr) => { + $crate::Error::from_kind($kind) + }; + ($kind:expr, $($remain:tt)*) => {{ + #[allow(unused_mut)] + let mut err = $crate::Error::from_kind($kind); + make_error!(@opt err $($remain)*); + err + }}; + (@opt $var:ident $key:ident = $value:expr, $($remain:tt)*) => { + $var.$key = Some($value.into()); + make_error!(@opt $var $($remain)*); + }; + (@opt $var:ident $key:ident = $value:expr) => { + $var.$key = Some($value.into()); + }; + (@opt $var:ident $key:ident, $($remain:tt)*) => { + $var.$key = Some($key); + make_error!(@opt $var $($remain)*); + }; + (@opt $var:ident $key:ident) => { + $var.$key = Some($key); + }; + (@opt $var:ident) => {}; +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn display_error() { + let mut err = make_error!( + ErrorKind::AnalyzeError("mismatched types".to_owned()), + source_file = PathBuf::from("apple.rs"), + source = "fn func() {\n 1\n}".to_owned(), + offset = 16usize + ); + err.chains.push(ErrorKind::Other("some error".to_owned())); + assert_eq!( + err.to_string(), + r#"some error +Caused by: Analyzation error: mismatched types + +file: apple.rs +position: line 2, column 5 + + | +2 | 1 + | ^ +"# + ); + } +} diff --git a/sailfish-compiler/src/lib.rs b/sailfish-compiler/src/lib.rs new file mode 100644 index 0000000..145136f --- /dev/null +++ b/sailfish-compiler/src/lib.rs @@ -0,0 +1,18 @@ +#![allow(dead_code)] + +#[macro_use] +mod error; + +mod compiler; +mod optimizer; +mod parser; +mod resolver; +mod translator; +mod util; + +pub use compiler::Compiler; +pub use error::{Error, ErrorKind}; + +#[cfg(feature = "procmacro")] +#[doc(hidden)] +pub mod procmacro; diff --git a/sailfish-compiler/src/optimizer.rs b/sailfish-compiler/src/optimizer.rs new file mode 100644 index 0000000..ceadb46 --- /dev/null +++ b/sailfish-compiler/src/optimizer.rs @@ -0,0 +1,101 @@ +use quote::quote; +use syn::parse::{Parse, ParseStream, Result as ParseResult}; +use syn::visit_mut::VisitMut; +use syn::{Block, Expr, ExprMacro, Ident, LitStr, Stmt, Token}; + +struct RenderTextMacroArgument { + context: Ident, + arg: LitStr, +} + +impl Parse for RenderTextMacroArgument { + fn parse(s: ParseStream) -> ParseResult { + let context = s.parse()?; + s.parse::()?; + let arg = s.parse()?; + + Ok(Self { context, arg }) + } +} + +fn get_rendertext_value(i: &ExprMacro) -> Option { + let mut it = i.mac.path.segments.iter(); + + if it.next().map_or(false, |s| s.ident == "sfrt") + && it.next().map_or(false, |s| s.ident == "render_text") + && it.next().is_none() + { + let tokens = i.mac.tokens.clone(); + if let Ok(macro_arg) = syn::parse2::(tokens) { + return Some(macro_arg.arg.value()); + } + } + + None +} + +struct OptmizerImpl {} + +impl VisitMut for OptmizerImpl { + fn visit_expr_mut(&mut self, i: &mut Expr) { + let fl = if let Expr::ForLoop(ref mut fl) = *i { + fl + } else { + syn::visit_mut::visit_expr_mut(self, i); + return; + }; + + syn::visit_mut::visit_block_mut(self, &mut fl.body); + + let (mf, ml) = match (fl.body.stmts.first(), fl.body.stmts.last()) { + ( + Some(Stmt::Semi(Expr::Macro(ref mf), ..)), + Some(Stmt::Semi(Expr::Macro(ref ml), ..)), + ) => (mf, ml), + _ => { + syn::visit_mut::visit_expr_mut(self, i); + return; + } + }; + + let (sf, sl) = match (get_rendertext_value(mf), get_rendertext_value(ml)) { + (Some(sf), Some(sl)) => (sf, sl), + _ => { + syn::visit_mut::visit_expr_mut(self, i); + return; + } + }; + + let sf_len = sf.len(); + let concat = sl + &*sf; + + fl.body.stmts.remove(0); + *fl.body.stmts.last_mut().unwrap() = syn::parse2(quote! { + sfrt::render_text!(_ctx, #concat); + }) + .unwrap(); + + let new_expr = syn::parse2(quote! {{ + sfrt::render_text!(_ctx, #sf); + #fl; + unsafe { _ctx.buf.set_len(_ctx.buf.len() - #sf_len); } + }}) + .unwrap(); + + *i = new_expr; + } +} + +pub struct Optimizer {} + +impl Optimizer { + #[inline] + pub fn new() -> Self { + Self {} + } + + #[inline] + pub fn optimize(&self, i: &mut Block) { + OptmizerImpl {}.visit_block_mut(i); + } +} diff --git a/sailfish-compiler/src/parser.rs b/sailfish-compiler/src/parser.rs new file mode 100644 index 0000000..b4d4059 --- /dev/null +++ b/sailfish-compiler/src/parser.rs @@ -0,0 +1,474 @@ +// TODO: Better error message (unbalanced rust delimiter, etc.) + +use memchr::{memchr, memchr2, memchr3}; +use std::convert::TryInto; +use std::rc::Rc; + +use crate::{Error, ErrorKind}; + +macro_rules! unwrap_or_break { + ($val:expr) => { + match $val { + Some(t) => t, + None => break, + } + }; +} + +#[derive(Clone, Debug)] +pub struct Parser { + delimiter: char, +} + +impl Parser { + pub fn new() -> Self { + Self::default() + } + + /// change delimiter + pub fn delimiter(mut self, new: char) -> Self { + self.delimiter = new; + self + } + + /// parse source string + pub fn parse<'a>(&self, source: &'a str) -> ParseStream<'a> { + let block_delimiter = Rc::new(( + format!("<{}", self.delimiter), + format!("{}>", self.delimiter), + )); + + ParseStream { + block_delimiter, + original_source: source, + source, + delimiter: self.delimiter, + } + } +} + +impl Default for Parser { + fn default() -> Self { + Self { delimiter: '%' } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TokenKind { + BufferedCode { escape: bool }, + Code, + Comment, + Text, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Token<'a> { + content: &'a str, + offset: usize, + kind: TokenKind, +} + +impl<'a> Token<'a> { + #[inline] + pub fn new(content: &'a str, offset: usize, kind: TokenKind) -> Token<'a> { + Token { + content, + offset, + kind, + } + } + + #[inline] + pub fn as_str(&self) -> &'a str { + self.content + } + + #[inline] + pub fn offset(&self) -> usize { + self.offset + } + + #[inline] + pub fn kind(&self) -> TokenKind { + self.kind + } +} + +#[derive(Clone, Debug)] +pub struct ParseStream<'a> { + block_delimiter: Rc<(String, String)>, + pub(crate) original_source: &'a str, + source: &'a str, + delimiter: char, +} + +impl<'a> ParseStream<'a> { + /// Returns an empty `ParseStream` containing no tokens + pub fn new() -> Self { + Self::default() + } + + pub fn is_empty(&self) -> bool { + self.source.is_empty() + } + + pub fn into_vec(self) -> Result>, Error> { + let mut vec = Vec::new(); + for token in self { + vec.push(token?); + } + + Ok(vec) + } + + pub fn to_vec(&self) -> Result>, Error> { + self.clone().into_vec() + } + + fn error(&self, msg: &str) -> Error { + let offset = self.original_source.len() - self.source.len(); + make_error!( + ErrorKind::ParseError(msg.to_owned()), + source = self.original_source.to_owned(), + offset + ) + } + + fn offset(&self) -> usize { + self.original_source.len() - self.source.len() + } + + fn take_n(&mut self, n: usize) -> &'a str { + let (l, r) = self.source.split_at(n); + self.source = r; + l + } + + fn tokenize_code(&mut self) -> Result, Error> { + debug_assert!(self.source.starts_with(&*self.block_delimiter.0)); + + let mut start = self.block_delimiter.0.len(); + let mut token_kind = TokenKind::Code; + + // read flags + match self.source.as_bytes().get(start).copied() { + Some(b'#') => { + token_kind = TokenKind::Comment; + start += 1; + } + Some(b'=') => { + token_kind = TokenKind::BufferedCode { escape: true }; + start += 1; + } + Some(b'-') => { + token_kind = TokenKind::BufferedCode { escape: false }; + start += 1; + } + _ => {} + } + + // skip whitespaces + for ch in self.source.bytes().skip(start) { + match ch { + b' ' | b'\t' | b'\n'..=b'\r' => { + start += 1; + } + _ => break, + } + } + + if token_kind == TokenKind::Comment { + let pos = self.source[start..] + .find(&*self.block_delimiter.1) + .ok_or_else(|| self.error("Unterminated comment block"))?; + + self.take_n(start); + let token = Token { + content: self.source[..pos].trim_end(), + offset: self.offset(), + kind: token_kind, + }; + + self.take_n(pos + self.block_delimiter.1.len()); + return Ok(token); + } + + // find closing bracket + if let Some(pos) = find_block_end(&self.source[start..], &*self.block_delimiter.1) + { + // closing bracket was found + self.take_n(start); + let s = &self.source[..pos - self.block_delimiter.1.len()].trim_end_matches( + |c| matches!(c, ' ' | '\t' | '\r' | '\u{000B}' | '\u{000C}'), + ); + let token = Token { + content: s, + offset: self.offset(), + kind: token_kind, + }; + self.take_n(pos); + Ok(token) + } else { + Err(self.error("Unterminated code block")) + } + } + + fn tokenize_text(&mut self) -> Result, Error> { + // TODO: allow buffer block inside code block + let offset = self.offset(); + let end = self + .source + .find(&*self.block_delimiter.0) + .unwrap_or(self.source.len()); + let token = Token { + content: self.take_n(end), + offset, + kind: TokenKind::Text, + }; + Ok(token) + } +} + +impl<'a> Default for ParseStream<'a> { + fn default() -> Self { + Self { + block_delimiter: Rc::new(("<%".to_owned(), "%>".to_owned())), + original_source: "", + source: "", + delimiter: '%', + } + } +} + +impl<'a> Iterator for ParseStream<'a> { + type Item = Result, Error>; + + fn next(&mut self) -> Option { + if self.source.is_empty() { + return None; + } + + let token = if self.source.starts_with(&*self.block_delimiter.0) { + if !self.source[self.block_delimiter.0.len()..].starts_with(self.delimiter) { + self.tokenize_code() + } else { + debug_assert_eq!( + &self.source[..self.delimiter.len_utf8() * 2 + 1], + format!("<{0}{0}", self.delimiter) + ); + + // Escape '<%%' token + let token = Token { + content: &self.source[..self.block_delimiter.0.len()], + offset: self.offset(), + kind: TokenKind::Text, + }; + self.take_n(self.block_delimiter.0.len() * 2 - 1); + Ok(token) + } + } else { + self.tokenize_text() + }; + + Some(token) + } +} + +impl<'a> TryInto>> for ParseStream<'a> { + type Error = crate::Error; + + fn try_into(self) -> Result>, Error> { + self.into_vec() + } +} + +fn find_block_end(haystack: &str, delimiter: &str) -> Option { + let mut remain = haystack; + + 'outer: while let Some(pos) = + memchr3(b'/', b'\"', delimiter.as_bytes()[0], remain.as_bytes()) + { + let skip_num = match remain.as_bytes()[pos] { + b'/' => match remain.as_bytes().get(pos + 1).copied() { + Some(b'/') => unwrap_or_break!(find_comment_end(&remain[pos..])), + Some(b'*') => unwrap_or_break!(find_block_comment_end(&remain[pos..])), + _ => pos + 1, + }, + b'\"' => { + // check if the literal is a raw string + for (i, byte) in remain[..pos].as_bytes().iter().enumerate().rev() { + match byte { + b'#' => {} + b'r' => { + let skip_num = + unwrap_or_break!(find_raw_string_end(&remain[i..])); + remain = &remain[i + skip_num..]; + continue 'outer; + } + _ => break, + } + } + unwrap_or_break!(find_string_end(&remain[pos..])) + } + _ => { + if remain[pos..].starts_with(delimiter) { + return Some(haystack.len() - remain.len() + pos + delimiter.len()); + } else { + pos + 1 + } + } + }; + + remain = &remain[pos + skip_num..]; + } + + None +} + +fn find_comment_end(haystack: &str) -> Option { + debug_assert!(haystack.starts_with("//")); + memchr(b'\n', haystack.as_bytes()).map(|p| p + 1) +} + +fn find_block_comment_end(haystack: &str) -> Option { + debug_assert!(haystack.starts_with("/*")); + + let mut remain = &haystack[2..]; + let mut depth = 1; + + while let Some(p) = memchr2(b'*', b'/', remain.as_bytes()) { + let c = unsafe { *remain.as_bytes().get_unchecked(p) }; + let next = remain.as_bytes().get(p + 1); + + match (c, next) { + (b'*', Some(b'/')) => { + if depth == 1 { + let offset = haystack.len() - (remain.len() - (p + 2)); + return Some(offset); + } + depth -= 1; + remain = unsafe { remain.get_unchecked(p + 2..) }; + } + (b'/', Some(b'*')) => { + depth += 1; + remain = unsafe { remain.get_unchecked(p + 2..) }; + } + _ => { + remain = unsafe { remain.get_unchecked(p + 1..) }; + } + } + } + + None +} + +fn find_string_end(haystack: &str) -> Option { + debug_assert!(haystack.starts_with('\"')); + let mut bytes = &haystack.as_bytes()[1..]; + + while let Some(p) = memchr2(b'"', b'\\', bytes) { + unsafe { + if *bytes.get_unchecked(p) == b'\"' { + // string terminator found + return Some(haystack.len() - (bytes.len() - p) + 1); + } else if p + 2 < bytes.len() { + // skip escape + bytes = bytes.get_unchecked(p + 2..); + } else { + break; + } + } + } + + None +} + +fn find_raw_string_end(haystack: &str) -> Option { + debug_assert!(haystack.starts_with('r')); + let mut terminator = String::from("\""); + for ch in haystack[1..].bytes() { + match ch { + b'#' => terminator.push('#'), + b'"' => break, + _ => { + // is not a raw string literal + return Some(1); + } + } + } + + dbg!(&terminator); + haystack[terminator.len() + 1..] + .find(&terminator) + .map(|p| p + terminator.len() * 2 + 1) +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn non_ascii_delimiter() { + let src = r##"foo <🍣# This is a comment 🍣> bar <🍣= r"🍣>" 🍣> baz <🍣🍣"##; + let parser = Parser::new().delimiter('🍣'); + let tokens = parser.parse(src).into_vec().unwrap(); + assert_eq!( + &tokens, + &[ + Token { + content: "foo ", + offset: 0, + kind: TokenKind::Text + }, + Token { + content: "This is a comment", + offset: 11, + kind: TokenKind::Comment + }, + Token { + content: " bar ", + offset: 34, + kind: TokenKind::Text + }, + Token { + content: "r\"🍣>\"", + offset: 46, + kind: TokenKind::BufferedCode { escape: true } + }, + Token { + content: " baz ", + offset: 60, + kind: TokenKind::Text + }, + Token { + content: "<🍣", + offset: 65, + kind: TokenKind::Text + }, + ] + ); + } + + #[test] + fn comment_inside_block() { + let src = "<% // %>\n %><%= /* %%>*/ 1 %>"; + let parser = Parser::new(); + let tokens = parser.parse(src).into_vec().unwrap(); + assert_eq!( + &tokens, + &[ + Token { + content: "// %>\n", + offset: 3, + kind: TokenKind::Code + }, + Token { + content: "/* %%>*/ 1", + offset: 16, + kind: TokenKind::BufferedCode { escape: true } + }, + ] + ); + } +} diff --git a/sailfish-compiler/src/procmacro.rs b/sailfish-compiler/src/procmacro.rs new file mode 100644 index 0000000..3e51b9e --- /dev/null +++ b/sailfish-compiler/src/procmacro.rs @@ -0,0 +1,238 @@ +use proc_macro2::{Span, TokenStream}; +use quote::{quote, ToTokens}; +use std::fs; +use std::path::{Path, PathBuf}; +use syn::parse::{Parse, ParseStream, Result as ParseResult}; +use syn::punctuated::Punctuated; +use syn::{Fields, Ident, ItemStruct, Lifetime, LitBool, LitChar, LitStr, Token}; + +use crate::compiler::Compiler; +use crate::error::*; + +enum GenericParamName { + Ident(Ident), + LifeTime(Lifetime), +} + +impl ToTokens for GenericParamName { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + GenericParamName::Ident(ref i) => i.to_tokens(tokens), + GenericParamName::LifeTime(ref l) => l.to_tokens(tokens), + } + } +} + +// arguments for include_template* macros +#[derive(Default)] +struct DeriveTemplateOptions { + path: Option, + delimiter: Option, + escape: Option, + type_: Option, +} + +impl Parse for DeriveTemplateOptions { + fn parse(outer: ParseStream) -> ParseResult { + let s; + syn::parenthesized!(s in outer); + + let mut options = Self::default(); + let mut found_keys = Vec::new(); + + while !s.is_empty() { + let key = s.parse::()?; + s.parse::()?; + + // check if argument is repeated + if found_keys.iter().any(|e| *e == key) { + return Err(syn::Error::new( + key.span(), + format!("Argument `{}` was repeated.", key), + )); + } + + if key == "path" { + options.path = Some(s.parse::()?); + } else if key == "delimiter" { + options.delimiter = Some(s.parse::()?); + } else if key == "escape" { + options.escape = Some(s.parse::()?); + } else if key == "type" { + options.type_ = Some(s.parse::()?); + } else { + return Err(syn::Error::new( + key.span(), + format!("Unknown option: `{}`", key), + )); + } + + found_keys.push(key); + + // consume comma token + if s.is_empty() { + break; + } else { + s.parse::()?; + } + } + + Ok(options) + } +} + +impl DeriveTemplateOptions { + fn merge(&mut self, other: DeriveTemplateOptions) -> Result<(), syn::Error> { + fn merge_single( + lhs: &mut Option, + rhs: Option, + ) -> Result<(), syn::Error> { + if lhs.is_some() { + if let Some(rhs) = rhs { + Err(syn::Error::new_spanned(rhs, "keyword argument repeated.")) + } else { + Ok(()) + } + } else { + *lhs = rhs; + Ok(()) + } + } + + merge_single(&mut self.path, other.path)?; + merge_single(&mut self.delimiter, other.delimiter)?; + merge_single(&mut self.escape, other.escape)?; + merge_single(&mut self.type_, other.type_)?; + Ok(()) + } +} + +struct TemplateStruct { + options: DeriveTemplateOptions, +} + +fn compile( + input_file: &Path, + output_file: &Path, + options: &DeriveTemplateOptions, +) -> Result<(), Error> { + let mut compiler = Compiler::new(); + if let Some(ref delimiter) = options.delimiter { + compiler = compiler.delimiter(delimiter.value()); + } + if let Some(ref escape) = options.escape { + compiler = compiler.escape(escape.value); + } + + compiler.compile_file(input_file, &*output_file) +} + +fn derive_template_impl(tokens: TokenStream) -> Result { + let strct = syn::parse2::(tokens)?; + + let mut all_options = DeriveTemplateOptions::default(); + for attr in strct.attrs { + let opt = syn::parse2::(attr.tokens)?; + all_options.merge(opt)?; + } + + let input_file = match all_options.path { + Some(ref path) => { + let mut input = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").expect( + "Internal error: environmental variable `CARGO_MANIFEST_DIR` is not set.", + )); + input.push("templates"); + input.push(path.value()); + input + } + None => { + return Err(syn::Error::new( + Span::call_site(), + "`path` option must be specified.", + ) + .into()) + } + }; + + let filename = match input_file.file_name() { + Some(f) => f, + None => { + return Err(syn::Error::new( + Span::call_site(), + format!("Invalid file name: {:?}", input_file), + )) + } + }; + + let out_dir = match std::env::var("SAILFISH_OUTPUT_DIR") { + Ok(dir) => { + let p = PathBuf::from(dir); + fs::create_dir_all(&*p).unwrap(); + p.canonicalize().unwrap() + } + Err(_) => PathBuf::from(env!("OUT_DIR")), + }; + let mut output_file = out_dir.clone(); + output_file.push("templates"); + output_file.push(filename); + + compile(&*input_file, &*output_file, &all_options) + .map_err(|e| syn::Error::new(Span::call_site(), e))?; + + let input_file_string = input_file.to_string_lossy(); + let output_file_string = output_file.to_string_lossy(); + + // Generate tokens + + let name = strct.ident; + + let field_names: Punctuated = match strct.fields { + Fields::Named(fields) => fields + .named + .into_iter() + .map(|f| { + f.ident.expect( + "Internal error: Failed to get field name (error code: 73621)", + ) + }) + .collect(), + Fields::Unit => Punctuated::new(), + _ => { + return Err(syn::Error::new( + Span::call_site(), + "You cannot derive `Template` or `TemplateOnce` for tuple struct", + )); + } + }; + + let (impl_generics, ty_generics, where_clause) = strct.generics.split_for_impl(); + + let tokens = quote! { + impl #impl_generics sailfish::TemplateOnce for #name #ty_generics #where_clause { + fn render_once(self) -> sailfish::runtime::RenderResult { + include_bytes!(#input_file_string); + + use sailfish::runtime as sfrt; + use sfrt::Render as _; + + static SIZE_HINT: sfrt::SizeHint = sfrt::SizeHint::new(); + let _size_hint = SIZE_HINT.get(); + let mut _ctx = sfrt::Context { + buf: sfrt::Buffer::with_capacity(_size_hint) + }; + + let #name { #field_names } = self; + include!(#output_file_string); + + SIZE_HINT.update(_ctx.buf.len()); + _ctx.into_result() + } + } + }; + + Ok(tokens) +} + +pub fn derive_template(tokens: TokenStream) -> TokenStream { + derive_template_impl(tokens).unwrap_or_else(|e| e.to_compile_error()) +} diff --git a/sailfish-compiler/src/resolver.rs b/sailfish-compiler/src/resolver.rs new file mode 100644 index 0000000..b30c4b7 --- /dev/null +++ b/sailfish-compiler/src/resolver.rs @@ -0,0 +1,19 @@ +use syn::Block; + +use crate::error::*; + +#[derive(Clone, Debug, Default)] +pub struct Resolver {} + +impl Resolver { + #[inline] + pub fn new() -> Self { + Self {} + } + + #[inline] + pub fn resolve(&self, _ast: &mut Block) -> Result<(), Error> { + // not implemented yet + Ok(()) + } +} diff --git a/sailfish-compiler/src/translator.rs b/sailfish-compiler/src/translator.rs new file mode 100644 index 0000000..79291fd --- /dev/null +++ b/sailfish-compiler/src/translator.rs @@ -0,0 +1,223 @@ +use proc_macro2::Span; + +use crate::error::*; +use crate::parser::{ParseStream, Token, TokenKind}; + +use syn::Block; + +#[derive(Clone)] +pub struct SourceMapEntry { + pub original: usize, + pub new: usize, + pub length: usize, +} + +#[derive(Default)] +pub struct SourceMap { + entries: Vec, +} + +impl SourceMap { + #[inline] + pub fn entries(&self) -> &[SourceMapEntry] { + &*self.entries + } + + pub fn reverse_mapping(&self, offset: usize) -> Option { + // find entry which satisfies entry.new <= offset < entry.new + entry.length + let idx = self + .entries + .iter() + .position(|entry| offset < entry.new + entry.length && entry.new <= offset)?; + + let entry = &self.entries[idx]; + debug_assert!(entry.new <= offset); + debug_assert!(offset < entry.new + entry.length); + + Some(entry.original + offset - entry.new) + } +} + +pub struct TranslatedSource { + pub ast: Block, + pub source_map: SourceMap, +} + +// translate tokens into Rust code +#[derive(Clone, Debug, Default)] +pub struct Translator { + escape: bool, +} + +impl Translator { + #[inline] + pub fn new() -> Self { + Self { escape: true } + } + + #[inline] + pub fn escape(mut self, new: bool) -> Self { + self.escape = new; + self + } + + pub fn translate<'a>( + &self, + token_iter: ParseStream<'a>, + ) -> Result { + let original_source = token_iter.original_source; + + let mut source = String::with_capacity(original_source.len()); + source.push_str("{\n"); + let mut ps = SourceBuilder { + escape: self.escape, + source, + source_map: SourceMap::default(), + }; + ps.feed_tokens(&*token_iter.into_vec()?); + + Ok(ps.finalize()?) + } +} + +struct SourceBuilder { + escape: bool, + source: String, + source_map: SourceMap, +} + +impl SourceBuilder { + fn write_token<'a>(&mut self, token: &Token<'a>) { + let entry = SourceMapEntry { + original: token.offset(), + new: self.source.len(), + length: token.as_str().len(), + }; + self.source_map.entries.push(entry); + self.source.push_str(token.as_str()); + } + + fn write_code<'a>(&mut self, token: &Token<'a>) { + // TODO: automatically add missing tokens (e.g. ';', '{') + self.write_token(token); + self.source.push_str("\n"); + } + + fn write_text<'a>(&mut self, token: &Token<'a>) { + use std::fmt::Write; + + self.source.push_str("sfrt::render_text!(_ctx, "); + + // write text token with Debug::fmt + write!(self.source, "{:?}", token.as_str()).unwrap(); + + self.source.push_str(");\n"); + } + + fn write_buffered_code<'a>(&mut self, token: &Token<'a>, escape: bool) { + let method = if self.escape && escape { + "render_escaped" + } else { + "render" + }; + + self.source.push_str("sfrt::"); + self.source.push_str(method); + self.source.push_str("!(_ctx, "); + self.write_token(token); + self.source.push_str(");\n"); + } + + pub fn feed_tokens(&mut self, token_iter: &[Token]) { + let mut it = token_iter.iter().peekable(); + while let Some(token) = it.next() { + match token.kind() { + TokenKind::Code => self.write_code(&token), + TokenKind::Comment => {} + TokenKind::BufferedCode { escape } => { + self.write_buffered_code(&token, escape) + } + TokenKind::Text => { + // concatenate repeated text token + let offset = token.offset(); + let mut concatenated = String::new(); + concatenated.push_str(token.as_str()); + + while let Some(next_token) = it.peek() { + match next_token.kind() { + TokenKind::Text => { + concatenated.push_str(next_token.as_str()); + it.next(); + } + TokenKind::Comment => { + it.next(); + } + _ => break, + } + } + + let new_token = Token::new(&*concatenated, offset, TokenKind::Text); + self.write_text(&new_token); + } + } + } + } + + pub fn finalize(mut self) -> Result { + self.source.push_str("\n}"); + match syn::parse_str::(&*self.source) { + Ok(ast) => Ok(TranslatedSource { + ast, + source_map: self.source_map, + }), + Err(synerr) => { + let span = synerr.span(); + let original_offset = into_offset(&*self.source, span) + .and_then(|o| self.source_map.reverse_mapping(o)); + + let mut err = + make_error!(ErrorKind::RustSyntaxError(synerr), source = self.source); + + err.offset = original_offset; + + Err(err) + } + } + } +} + +fn into_offset(source: &str, span: Span) -> Option { + let lc = span.start(); + if lc.line > 0 { + Some( + source + .lines() + .take(lc.line - 1) + .fold(0, |s, e| s + e.len() + 1) + + lc.column, + ) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::Parser; + + #[test] + fn translate() { + let src = "<% pub fn sample() { %> <%% <%=//%>\n%><% } %>"; + let lexer = Parser::new(); + let token_iter = lexer.parse(src); + let mut ps = SourceBuilder { + escape: true, + source: String::with_capacity(token_iter.original_source.len()), + source_map: SourceMap::default(), + }; + ps.feed_tokens(&token_iter.clone().to_vec().unwrap()); + eprintln!("{}", ps.source); + Translator::new().translate(token_iter).unwrap(); + } +} diff --git a/sailfish-compiler/src/util.rs b/sailfish-compiler/src/util.rs new file mode 100644 index 0000000..329b66c --- /dev/null +++ b/sailfish-compiler/src/util.rs @@ -0,0 +1,36 @@ +use std::io::{self, Write}; +use std::process::{Command, Stdio}; + +/// Format block expression using `rustfmt` command +pub fn rustfmt_block(source: &str) -> io::Result { + let mut new_source = String::with_capacity(source.len() + 11); + new_source.push_str("fn render()"); + new_source.push_str(source); + + let mut child = Command::new("rustfmt") + .args(&["--emit", "stdout", "--color", "never", "--quiet"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn()?; + + let stdin = child + .stdin + .as_mut() + .ok_or_else(|| io::Error::from(io::ErrorKind::BrokenPipe))?; + stdin.write_all(new_source.as_bytes())?; + + let output = child.wait_with_output()?; + + if output.status.success() { + let mut s = unsafe { String::from_utf8_unchecked(output.stdout) }; + let brace_offset = s.find('{').unwrap(); + s.replace_range(..brace_offset, ""); + Ok(s) + } else { + Err(io::Error::new( + io::ErrorKind::Other, + "rustfmt command failed", + )) + } +} diff --git a/sailfish-macros/Cargo.toml b/sailfish-macros/Cargo.toml new file mode 100644 index 0000000..696ea8c --- /dev/null +++ b/sailfish-macros/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "sailfish-macros" +version = "0.0.1" +authors = ["Ryohei Machida "] +description = "Really fast, intuitive template engine for Rust" +homepage = "https://github.com/Kogia-sima/sailfish" +repository = "https://github.com/Kogia-sima/sailfish" +readme = "../README.md" +keywords = ["markup", "template", "html"] +categories = ["template-engine"] +license = "MIT" +workspace = ".." +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "sailfish_macros" +proc-macro = true +test = false +doctest = false + +[dependencies] +sailfish-compiler = { path = "../sailfish-compiler", version = "0.0.1", features = ["procmacro"] } +proc-macro2 = "1.0.17" diff --git a/sailfish-macros/src/lib.rs b/sailfish-macros/src/lib.rs new file mode 100644 index 0000000..b8625b1 --- /dev/null +++ b/sailfish-macros/src/lib.rs @@ -0,0 +1,18 @@ +extern crate proc_macro; + +use proc_macro::TokenStream; + +#[proc_macro_derive(TemplateOnce, attributes(template))] +pub fn derive_template_once(tokens: TokenStream) -> TokenStream { + let input = proc_macro2::TokenStream::from(tokens); + let output = sailfish_compiler::procmacro::derive_template(input); + TokenStream::from(output) +} + +/// WIP +#[proc_macro_derive(Template, attributes(template))] +pub fn derive_template(tokens: TokenStream) -> TokenStream { + let input = proc_macro2::TokenStream::from(tokens); + let output = sailfish_compiler::procmacro::derive_template(input); + TokenStream::from(output) +} diff --git a/sailfish/Cargo.toml b/sailfish/Cargo.toml new file mode 100644 index 0000000..048f477 --- /dev/null +++ b/sailfish/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "sailfish" +version = "0.0.1" +authors = ["Ryohei Machida "] +description = "Really fast, intuitive template engine for Rust" +homepage = "https://github.com/Kogia-sima/sailfish" +repository = "https://github.com/Kogia-sima/sailfish" +readme = "../README.md" +keywords = ["markup", "template", "html"] +categories = ["template-engine"] +license = "MIT" +workspace = ".." +edition = "2018" + +[dependencies] +itoa = "0.4.5" +ryu = "1.0.4" diff --git a/sailfish/src/lib.rs b/sailfish/src/lib.rs new file mode 100644 index 0000000..9f773aa --- /dev/null +++ b/sailfish/src/lib.rs @@ -0,0 +1,12 @@ +pub mod runtime; + +pub use runtime::{RenderError, RenderResult}; + +pub trait TemplateOnce { + fn render_once(self) -> runtime::RenderResult; +} + +/// WIP +pub trait Template { + fn render(self) -> runtime::RenderResult; +} diff --git a/sailfish/src/runtime/buffer.rs b/sailfish/src/runtime/buffer.rs new file mode 100644 index 0000000..bbeac60 --- /dev/null +++ b/sailfish/src/runtime/buffer.rs @@ -0,0 +1,129 @@ +use std::fmt; +use std::ops::{Add, AddAssign}; + +#[derive(Clone, Debug)] +pub struct Buffer { + inner: String, +} + +impl Buffer { + #[inline] + pub const fn new() -> Buffer { + Self { + inner: String::new(), + } + } + + #[inline] + pub fn with_capacity(n: usize) -> Buffer { + Self { + inner: String::with_capacity(n), + } + } + + #[inline] + pub fn as_str(&self) -> &str { + &*self.inner + } + + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + #[inline] + pub unsafe fn set_len(&mut self, new: usize) { + self.inner.as_mut_vec().set_len(new); + } + + #[inline] + pub fn reserve(&mut self, n: usize) { + if n > self.inner.capacity() - self.inner.len() { + self.inner.reserve(n); + } + } + + #[inline] + pub fn clear(&mut self) { + // unsafe { self.inner.set_len(0) }; + self.inner.clear(); + } + + #[inline] + pub fn into_string(self) -> String { + self.inner + } + + #[inline] + pub fn write_str(&mut self, data: &str) { + let inner_len = self.inner.len(); + let size = data.len(); + if size > self.inner.capacity() - self.inner.len() { + self.inner.reserve(size); + } + unsafe { + let p = self.inner.as_mut_ptr().add(self.inner.len()); + std::ptr::copy_nonoverlapping(data.as_ptr(), p, size); + self.inner.as_mut_vec().set_len(inner_len + size); + } + } + + #[inline] + pub fn write_char(&mut self, data: char) { + // TODO: do not use standard library + self.inner.push(data); + } +} + +impl fmt::Write for Buffer { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + Buffer::write_str(self, s); + Ok(()) + } +} + +impl From for Buffer { + #[inline] + fn from(other: String) -> Buffer { + Buffer { inner: other } + } +} + +impl From<&str> for Buffer { + #[inline] + fn from(other: &str) -> Buffer { + Buffer { + inner: other.to_owned(), + } + } +} + +impl Add<&str> for Buffer { + type Output = Buffer; + + #[inline] + fn add(mut self, other: &str) -> Buffer { + self.write_str(other); + self + } +} + +impl AddAssign<&str> for Buffer { + #[inline] + fn add_assign(&mut self, other: &str) { + self.write_str(other) + } +} + +impl Default for Buffer { + #[inline] + fn default() -> Buffer { + Buffer::new() + } +} diff --git a/sailfish/src/runtime/escape/avx2.rs b/sailfish/src/runtime/escape/avx2.rs new file mode 100644 index 0000000..b714dc8 --- /dev/null +++ b/sailfish/src/runtime/escape/avx2.rs @@ -0,0 +1,90 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +use std::slice; + +use super::{naive, sse2}; +use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT}; + +const VECTOR_BYTES: usize = std::mem::size_of::<__m256i>(); +const VECTOR_ALIGN: usize = VECTOR_BYTES - 1; + +#[target_feature(enable = "avx2")] +pub unsafe fn escape(writer: &mut F, bytes: &[u8]) { + let len = bytes.len(); + let mut start_ptr = bytes.as_ptr(); + let end_ptr = start_ptr.add(len); + + if len < VECTOR_BYTES { + if len < 16 { + naive::escape(writer, start_ptr, start_ptr, end_ptr); + } else { + sse2::escape(writer, bytes); + } + return; + } + + let v_independent1 = _mm256_set1_epi8(4); + let v_independent2 = _mm256_set1_epi8(2); + let v_key1 = _mm256_set1_epi8(0x26); + let v_key2 = _mm256_set1_epi8(0x3e); + + let maskgen = |x: __m256i| -> i32 { + _mm256_movemask_epi8(_mm256_or_si256( + _mm256_cmpeq_epi8(_mm256_or_si256(x, v_independent1), v_key1), + _mm256_cmpeq_epi8(_mm256_or_si256(x, v_independent2), v_key2), + )) + }; + + let mut ptr = start_ptr; + let aligned_ptr = ptr.add(VECTOR_BYTES - (start_ptr as usize & VECTOR_ALIGN)); + + { + let mut mask = maskgen(_mm256_loadu_si256(ptr as *const __m256i)); + loop { + let trailing_zeros = mask.trailing_zeros() as usize; + let ptr2 = ptr.add(trailing_zeros); + if ptr2 >= aligned_ptr { + break; + } + + let c = ESCAPE_LUT[*ptr2 as usize] as usize; + debug_assert!(c < ESCAPED_LEN); + if start_ptr < ptr2 { + let slc = + slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } + writer(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); + mask ^= 1 << trailing_zeros; + } + } + + ptr = aligned_ptr; + let mut next_ptr = ptr.add(VECTOR_BYTES); + + while next_ptr <= end_ptr { + let mut mask = maskgen(_mm256_load_si256(ptr as *const __m256i)); + while mask != 0 { + let trailing_zeros = mask.trailing_zeros() as usize; + let ptr2 = ptr.add(trailing_zeros); + let c = ESCAPE_LUT[*ptr2 as usize] as usize; + debug_assert!(c < ESCAPED_LEN); + if start_ptr < ptr2 { + let slc = + slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } + writer(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); + mask ^= 1 << trailing_zeros; + } + + ptr = next_ptr; + next_ptr = next_ptr.add(VECTOR_BYTES); + } + + sse2::escape_aligned(writer, start_ptr, ptr, end_ptr); +} diff --git a/sailfish/src/runtime/escape/fallback.rs b/sailfish/src/runtime/escape/fallback.rs new file mode 100644 index 0000000..9e75372 --- /dev/null +++ b/sailfish/src/runtime/escape/fallback.rs @@ -0,0 +1,79 @@ +use super::naive; + +#[cfg(target_pointer_width = "16")] +const USIZE_BYTES: usize = 2; + +#[cfg(target_pointer_width = "32")] +const USIZE_BYTES: usize = 4; + +#[cfg(target_pointer_width = "64")] +const USIZE_BYTES: usize = 8; + +const USIZE_ALIGN: usize = USIZE_BYTES - 1; + +#[inline(always)] +fn contains_zero_byte(x: usize) -> bool { + const LO_U64: u64 = 0x0101010101010101; + const HI_U64: u64 = 0x8080808080808080; + const LO_USIZE: usize = LO_U64 as usize; + const HI_USIZE: usize = HI_U64 as usize; + + x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 +} + +#[inline] +fn contains_key(x: usize) -> bool { + const INDEPENDENTS1: usize = 0x0404040404040404_u64 as usize; + const INDEPENDENTS2: usize = 0x0202020202020202_u64 as usize; + const KEY1: usize = 0x2626262626262626_u64 as usize; + const KEY2: usize = 0x3e3e3e3e3e3e3e3e_u64 as usize; + + let y1 = x | INDEPENDENTS1; + let y2 = x | INDEPENDENTS2; + let z1 = y1.wrapping_sub(KEY1); + let z2 = y2.wrapping_sub(KEY2); + contains_zero_byte(z1) || contains_zero_byte(z2) +} + +pub unsafe fn escape(writer: &mut F, bytes: &[u8]) { + let len = bytes.len(); + let mut start_ptr = bytes.as_ptr(); + let end_ptr = start_ptr.add(len); + + if bytes.len() < USIZE_BYTES { + naive::escape(writer, start_ptr, start_ptr, end_ptr); + return; + } + + let ptr = start_ptr; + let aligned_ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & USIZE_ALIGN)); + debug_assert_eq!(aligned_ptr as usize % USIZE_BYTES, 0); + debug_assert!(aligned_ptr <= end_ptr); + + let chunk = (ptr as *const usize).read_unaligned(); + if contains_key(chunk) { + start_ptr = naive::proceed(writer, start_ptr, ptr, aligned_ptr); + } + + escape_aligned(writer, start_ptr, aligned_ptr, end_ptr); +} + +pub unsafe fn escape_aligned( + writer: &mut F, + mut start_ptr: *const u8, + mut ptr: *const u8, + end_ptr: *const u8, +) { + while ptr.add(USIZE_BYTES) <= end_ptr { + debug_assert_eq!((ptr as usize) % USIZE_BYTES, 0); + + let chunk = *(ptr as *const usize); + if contains_key(chunk) { + start_ptr = naive::proceed(writer, start_ptr, ptr, ptr.add(USIZE_BYTES)) + } + ptr = ptr.add(USIZE_BYTES); + } + debug_assert!(ptr <= end_ptr); + debug_assert!(start_ptr <= ptr); + naive::escape(writer, start_ptr, ptr, end_ptr); +} diff --git a/sailfish/src/runtime/escape/mod.rs b/sailfish/src/runtime/escape/mod.rs new file mode 100644 index 0000000..016835a --- /dev/null +++ b/sailfish/src/runtime/escape/mod.rs @@ -0,0 +1,147 @@ +mod avx2; +mod fallback; +mod naive; +mod sse2; + +use std::ptr; + +use super::buffer::Buffer; + +static ESCAPE_LUT: [u8; 256] = [ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 2, 9, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, +]; + +const ESCAPED: [&'static str; 4] = [""", "&", "<", ">"]; +const ESCAPED_LEN: usize = 4; + +#[inline] +pub fn escape_with(mut writer: F, feed: &str) { + unsafe { + #[cfg(target_feature = "avx2")] + { + avx2::escape(&mut writer, feed.as_bytes()); + } + + #[cfg(not(target_feature = "avx2"))] + { + if is_x86_feature_detected!("avx2") { + avx2::escape(&mut writer, feed.as_bytes()); + } else if is_x86_feature_detected!("sse2") { + sse2::escape(&mut writer, feed.as_bytes()); + } else { + fallback::escape(&mut writer, feed.as_bytes()); + } + } + } +} + +#[doc(hidden)] +pub fn escape_to_buf(feed: &str, buf: &mut Buffer) { + escape_with(|e| buf.write_str(e), feed); +} + +#[inline] +pub fn escape_to_string(feed: &str, s: &mut String) { + unsafe { + let mut buf = Buffer::from(ptr::read(s)); + escape_to_buf(feed, &mut buf); + ptr::write(s, buf.into_string()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn escape(feed: &str) -> String { + let mut buf = Buffer::new(); + escape_to_buf(feed, &mut buf); + buf.into_string() + } + + #[test] + fn noescape() { + assert_eq!(escape(""), ""); + assert_eq!( + escape("abcdefghijklmnopqrstrvwxyz"), + "abcdefghijklmnopqrstrvwxyz" + ); + assert_eq!(escape("!#$%()*+,-.:;=?_^"), "!#$%()*+,-.:;=?_^"); + assert_eq!( + escape("漒字はエスケープしγͺγ„γ―γšγ γ‚ˆ"), + "漒字はエスケープしγͺγ„γ―γšγ γ‚ˆ" + ); + } + + #[test] + fn escape_short() { + assert_eq!(escape("<"), "<"); + assert_eq!(escape("\"&<>"), ""&<>"); + assert_eq!( + escape("{\"title\": \"This is a JSON!\"}"), + "{"title": "This is a JSON!"}" + ); + assert_eq!( + escape("

Hello, world

"), + "<html><body><h1>Hello, world</h1>\ + </body></html>" + ); + } + + #[test] + #[rustfmt::skip] + fn escape_long() { + assert_eq!( + escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqve/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###), + r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""### + ); + } + + #[test] + fn random() { + const ASCII_CHARS: &'static [u8] = br##"abcdefghijklmnopqrstuvwxyz0123456789-^\@[;:],./\!"#$%&'()~=~|`{+*}<>?_"##; + let mut state = 88172645463325252u64; + let mut data = Vec::with_capacity(100); + let mut buf1 = Buffer::new(); + let mut buf2 = Buffer::new(); + + for len in 0..100 { + data.clear(); + for _ in 0..len { + // xorshift + state ^= state << 13; + state ^= state >> 7; + state ^= state << 17; + + let idx = state as usize % ASCII_CHARS.len(); + data.push(ASCII_CHARS[idx]); + } + + let s = unsafe { std::str::from_utf8_unchecked(&*data) }; + + buf1.clear(); + buf2.clear(); + + unsafe { + escape_to_buf(&*s, &mut buf1); + naive::escape( + &mut |s| buf2.write_str(s), + s.as_ptr(), + s.as_ptr(), + s.as_ptr().add(s.len()), + ); + } + + assert_eq!(buf1.as_str(), buf2.as_str()); + } + } +} diff --git a/sailfish/src/runtime/escape/naive.rs b/sailfish/src/runtime/escape/naive.rs new file mode 100644 index 0000000..a2b513f --- /dev/null +++ b/sailfish/src/runtime/escape/naive.rs @@ -0,0 +1,46 @@ +use core::slice; + +use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT}; + +#[inline] +pub(super) unsafe fn escape( + writer: &mut F, + mut start_ptr: *const u8, + ptr: *const u8, + end_ptr: *const u8, +) { + start_ptr = proceed(writer, start_ptr, ptr, end_ptr); + + if end_ptr > start_ptr { + let slc = slice::from_raw_parts(start_ptr, end_ptr as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } +} + +#[inline] +pub(super) unsafe fn proceed( + writer: &mut F, + mut start_ptr: *const u8, + mut ptr: *const u8, + end_ptr: *const u8, +) -> *const u8 { + while ptr < end_ptr { + debug_assert!(start_ptr <= ptr); + let idx = ESCAPE_LUT[*ptr as usize] as usize; + debug_assert!(idx <= 9); + if idx < ESCAPED_LEN { + if ptr > start_ptr { + let slc = + slice::from_raw_parts(start_ptr, ptr as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } + writer(*ESCAPED.get_unchecked(idx)); + start_ptr = ptr.add(1); + } + ptr = ptr.add(1); + } + + debug_assert_eq!(ptr, end_ptr); + debug_assert!(start_ptr <= ptr); + return start_ptr; +} diff --git a/sailfish/src/runtime/escape/sse2.rs b/sailfish/src/runtime/escape/sse2.rs new file mode 100644 index 0000000..681fe7f --- /dev/null +++ b/sailfish/src/runtime/escape/sse2.rs @@ -0,0 +1,132 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +use std::slice; + +use super::naive; +use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT}; + +const VECTOR_BYTES: usize = std::mem::size_of::<__m128i>(); +const VECTOR_ALIGN: usize = VECTOR_BYTES - 1; + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn escape(writer: &mut F, bytes: &[u8]) { + let len = bytes.len(); + let mut start_ptr = bytes.as_ptr(); + let end_ptr = start_ptr.add(len); + + if bytes.len() < VECTOR_BYTES { + naive::escape(writer, start_ptr, start_ptr, end_ptr); + return; + } + + let v_independent1 = _mm_set1_epi8(4); + let v_independent2 = _mm_set1_epi8(2); + let v_key1 = _mm_set1_epi8(0x26); + let v_key2 = _mm_set1_epi8(0x3e); + + let maskgen = |x: __m128i| -> i32 { + _mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8(_mm_or_si128(x, v_independent1), v_key1), + _mm_cmpeq_epi8(_mm_or_si128(x, v_independent2), v_key2), + )) + }; + + let mut ptr = start_ptr; + let aligned_ptr = ptr.add(VECTOR_BYTES - (start_ptr as usize & VECTOR_ALIGN)); + + { + let mut mask = maskgen(_mm_loadu_si128(ptr as *const __m128i)); + loop { + let trailing_zeros = mask.trailing_zeros() as usize; + let ptr2 = ptr.add(trailing_zeros); + if ptr2 >= aligned_ptr { + break; + } + + let c = ESCAPE_LUT[*ptr2 as usize] as usize; + debug_assert!(c < ESCAPED_LEN); + if start_ptr < ptr2 { + let slc = + slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } + writer(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); + mask ^= 1 << trailing_zeros; + } + } + + ptr = aligned_ptr; + escape_aligned(writer, start_ptr, ptr, end_ptr); +} + +pub unsafe fn escape_aligned( + writer: &mut F, + mut start_ptr: *const u8, + mut ptr: *const u8, + end_ptr: *const u8, +) { + let mut next_ptr = ptr.add(VECTOR_BYTES); + let v_independent1 = _mm_set1_epi8(4); + let v_independent2 = _mm_set1_epi8(2); + let v_key1 = _mm_set1_epi8(0x26); + let v_key2 = _mm_set1_epi8(0x3e); + + let maskgen = |x: __m128i| -> i32 { + _mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8(_mm_or_si128(x, v_independent1), v_key1), + _mm_cmpeq_epi8(_mm_or_si128(x, v_independent2), v_key2), + )) + }; + + while next_ptr <= end_ptr { + debug_assert_eq!((ptr as usize) % VECTOR_BYTES, 0); + let mut mask = maskgen(_mm_load_si128(ptr as *const __m128i)); + while mask != 0 { + let trailing_zeros = mask.trailing_zeros() as usize; + let ptr2 = ptr.add(trailing_zeros); + let c = ESCAPE_LUT[*ptr2 as usize] as usize; + debug_assert!(c < ESCAPED_LEN); + if start_ptr < ptr2 { + let slc = + slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } + writer(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); + mask ^= 1 << trailing_zeros; + } + + ptr = next_ptr; + next_ptr = next_ptr.add(VECTOR_BYTES); + } + + next_ptr = ptr.add(8); + if next_ptr <= end_ptr { + debug_assert_eq!((ptr as usize) % VECTOR_BYTES, 0); + let mut mask = maskgen(_mm_loadl_epi64(ptr as *const __m128i)); + while mask != 0 { + let trailing_zeros = mask.trailing_zeros() as usize; + let ptr2 = ptr.add(trailing_zeros); + let c = ESCAPE_LUT[*ptr2 as usize] as usize; + debug_assert!(c < ESCAPED_LEN); + if start_ptr < ptr2 { + let slc = + slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); + writer(std::str::from_utf8_unchecked(slc)); + } + writer(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); + mask ^= 1 << trailing_zeros; + } + + ptr = next_ptr; + } + + debug_assert!(ptr <= end_ptr); + debug_assert!(start_ptr <= ptr); + naive::escape(writer, start_ptr, ptr, end_ptr); +} diff --git a/sailfish/src/runtime/macros.rs b/sailfish/src/runtime/macros.rs new file mode 100644 index 0000000..9968585 --- /dev/null +++ b/sailfish/src/runtime/macros.rs @@ -0,0 +1,29 @@ +#[macro_export] +#[doc(hidden)] +macro_rules! render { + ($ctx:ident, $value:expr) => { + (&($value)).render(&mut $ctx.buf)? + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! render_escaped { + ($ctx:ident, $value:expr) => { + (&($value)).render_escaped(&mut $ctx.buf)? + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! render_text { + ($ctx:ident, $value:expr) => { + $ctx.buf.write_str($value) + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! render_noop { + ($ctx:ident, $value:expr) => {}; +} diff --git a/sailfish/src/runtime/mod.rs b/sailfish/src/runtime/mod.rs new file mode 100644 index 0000000..ceaede0 --- /dev/null +++ b/sailfish/src/runtime/mod.rs @@ -0,0 +1,54 @@ +mod buffer; +pub mod escape; +mod macros; +mod render; +mod size_hint; + +pub use buffer::*; +pub use render::*; +pub use size_hint::*; + +use std::fmt; + +#[doc(hidden)] +pub use crate::{render, render_escaped, render_noop, render_text}; + +/// The error type which is returned from template function +#[derive(Clone, Debug)] +pub struct RenderError { + // currently RenderError simply wraps the fmt::Error + inner: fmt::Error, +} + +impl fmt::Display for RenderError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.fmt(f) + } +} + +impl std::error::Error for RenderError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(&self.inner) + } +} + +impl From for RenderError { + #[inline] + fn from(other: fmt::Error) -> Self { + Self { inner: other } + } +} + +pub type RenderResult = Result; + +pub struct Context { + #[doc(hidden)] + pub buf: Buffer, +} + +impl Context { + #[inline] + pub fn into_result(self) -> RenderResult { + Ok(self.buf.into_string()) + } +} diff --git a/sailfish/src/runtime/render.rs b/sailfish/src/runtime/render.rs new file mode 100644 index 0000000..b30ff47 --- /dev/null +++ b/sailfish/src/runtime/render.rs @@ -0,0 +1,207 @@ +use std::fmt::{self, Display}; +use std::path::{Path, PathBuf}; + +use super::buffer::Buffer; +use super::escape; + +pub trait Render { + fn render(&self, b: &mut Buffer) -> fmt::Result; + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + let mut tmp = Buffer::new(); + self.render(&mut tmp)?; + b.write_str(tmp.as_str()); + Ok(()) + } +} + +/// Autoref-based stable specialization +/// +/// Explanation can be found [here](https://github.com/dtolnay/case-studies/blob/master/autoref-specialization/README.md) +impl Render for &T { + fn render(&self, b: &mut Buffer) -> fmt::Result { + fmt::write(b, format_args!("{}", self)) + } + + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + struct Wrapper<'a>(&'a mut Buffer); + + impl<'a> fmt::Write for Wrapper<'a> { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + escape::escape_to_buf(s, self.0); + Ok(()) + } + } + + fmt::write(&mut Wrapper(b), format_args!("{}", self)) + } +} + +impl Render for str { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + b.write_str(self); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + escape::escape_to_buf(self, b); + Ok(()) + } +} + +impl<'a> Render for &'a str { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + b.write_str(self); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + // escape string + escape::escape_to_buf(self, b); + Ok(()) + } +} + +impl Render for String { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + b.write_str(self); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + // escape string + escape::escape_to_buf(self, b); + Ok(()) + } +} + +impl Render for char { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + b.write_char(*self); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + match *self { + '\"' => b.write_str("""), + '&' => b.write_str("&"), + '<' => b.write_str("<"), + '>' => b.write_str(">"), + _ => b.write_char(*self), + } + Ok(()) + } +} + +impl<'a> Render for &'a Path { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + // TODO: speed up on Windows using OsStrExt + b.write_str(&*self.to_string_lossy()); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + escape::escape_to_buf(&*self.to_string_lossy(), b); + Ok(()) + } +} + +impl Render for PathBuf { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + b.write_str(&*self.to_string_lossy()); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + // escape string + escape::escape_to_buf(&*self.to_string_lossy(), b); + + Ok(()) + } +} + +// impl Render for [u8] { +// #[inline] +// fn render(&self, b: &mut Buffer) -> fmt::Result { +// b.write_bytes(self); +// Ok(()) +// } +// } +// +// impl<'a> Render for &'a [u8] { +// #[inline] +// fn render(&self, b: &mut Buffer) -> fmt::Result { +// b.write_bytes(self); +// Ok(()) +// } +// } +// +// impl Render for Vec { +// #[inline] +// fn render(&self, b: &mut Buffer) -> fmt::Result { +// b.write_bytes(&**self); +// Ok(()) +// } +// } + +macro_rules! render_int { + ($($int:ty),*) => { + $( + impl Render for $int { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + let mut buffer = itoa::Buffer::new(); + let s = buffer.format(*self); + b.write_str(s); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + // write_str without escape + self.render(b) + } + } + )* + } +} + +render_int!(u8, u16, u32, u64, i8, i16, i32, i64, usize, isize); + +macro_rules! render_float { + ($($float:ty),*) => { + $( + impl Render for $float { + #[inline] + fn render(&self, b: &mut Buffer) -> fmt::Result { + let mut buffer = ryu::Buffer::new(); + let s = buffer.format(*self); + b.write_str(s); + Ok(()) + } + + #[inline] + fn render_escaped(&self, b: &mut Buffer) -> fmt::Result { + // escape string + self.render(b) + } + } + )* + } +} + +render_float!(f32, f64); diff --git a/sailfish/src/runtime/size_hint.rs b/sailfish/src/runtime/size_hint.rs new file mode 100644 index 0000000..d9e45d3 --- /dev/null +++ b/sailfish/src/runtime/size_hint.rs @@ -0,0 +1,32 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +#[derive(Debug, Default)] +pub struct SizeHint { + value: AtomicUsize, +} + +impl SizeHint { + pub const fn new() -> SizeHint { + SizeHint { + value: AtomicUsize::new(0), + } + } + + /// Get the current value + #[inline] + pub fn get(&self) -> usize { + self.value.load(Ordering::Acquire) + } + + /// Update size hint based on given value. + /// + /// There is no guarantee that the value of get() after calling update() is same + /// as the value passed on update() + #[inline] + pub fn update(&self, mut value: usize) { + value = value + value / 8; + if self.get() < value { + self.value.store(value, Ordering::Release); + } + } +} diff --git a/syntax/vim/ftdetect/sailfish.vim b/syntax/vim/ftdetect/sailfish.vim new file mode 100644 index 0000000..04a0884 --- /dev/null +++ b/syntax/vim/ftdetect/sailfish.vim @@ -0,0 +1,6 @@ +" Detect sailfish template files and set filetype +" Maintainer: Ryohei Machida +" URL: http://github.com/Kogia-sima/sailfish +" License: MIT + +autocmd BufNewFile,BufRead *.stpl set filetype=sailfish diff --git a/syntax/vim/indent/sailfish.vim b/syntax/vim/indent/sailfish.vim new file mode 100644 index 0000000..5b44d88 --- /dev/null +++ b/syntax/vim/indent/sailfish.vim @@ -0,0 +1,12 @@ +" Vim indent file +" Language: Sailfish template language +" Maintainer: Ryohei Machida +" Last Change: 2020 May 29 + +" Only load this indent file when no other was loaded. +if exists("b:did_indent") + finish +endif + +" Use HTML formatting rules. +runtime! indent/html.vim diff --git a/syntax/vim/syntax/sailfish.vim b/syntax/vim/syntax/sailfish.vim new file mode 100644 index 0000000..f13dbf6 --- /dev/null +++ b/syntax/vim/syntax/sailfish.vim @@ -0,0 +1,17 @@ +runtime! syntax/html.vim +unlet b:current_syntax + +syn include @rustSyntax syntax/rust.vim + +syn region sailfishCodeBlock matchgroup=sailfishTag start=/<%/ keepend end=/%>/ contains=@rustSyntax +syn region sailfishBufferBlock matchgroup=sailfishTag start=/<%=/ keepend end=/%>/ contains=@rustSyntax +syn region sailfishCommentBlock start=/<%#/ end=/%>/ + +" Redefine htmlTag so that it can contain jspExpr +syn clear htmlTag +syn region htmlTag start=+<[^/%]+ end=+>+ fold contains=htmlTagN,htmlString,htmlArg,htmlValue,htmlTagError,htmlEvent,htmlCssDefinition,@htmlPreproc,@htmlArgCluster,sailfishBufferBlock + +hi default link sailfishTag htmlTag +hi default link sailfishCommentBlock htmlComment + +let b:current_syntax = "sailfish" diff --git a/syntax/vscode/.gitignore b/syntax/vscode/.gitignore new file mode 100644 index 0000000..e3a9959 --- /dev/null +++ b/syntax/vscode/.gitignore @@ -0,0 +1 @@ +/test.stpl diff --git a/syntax/vscode/.vscode/launch.json b/syntax/vscode/.vscode/launch.json new file mode 100644 index 0000000..7bc18a4 --- /dev/null +++ b/syntax/vscode/.vscode/launch.json @@ -0,0 +1,18 @@ +// A launch configuration that launches the extension inside a new window +// Use IntelliSense to learn about possible attributes. +// Hover to view descriptions of existing attributes. +// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Extension", + "type": "extensionHost", + "request": "launch", + "runtimeExecutable": "${execPath}", + "args": [ + "--extensionDevelopmentPath=${workspaceFolder}" + ] + } + ] +} \ No newline at end of file diff --git a/syntax/vscode/.vscodeignore b/syntax/vscode/.vscodeignore new file mode 100644 index 0000000..f369b5e --- /dev/null +++ b/syntax/vscode/.vscodeignore @@ -0,0 +1,4 @@ +.vscode/** +.vscode-test/** +.gitignore +vsc-extension-quickstart.md diff --git a/syntax/vscode/CHANGELOG.md b/syntax/vscode/CHANGELOG.md new file mode 100644 index 0000000..6b5b8ad --- /dev/null +++ b/syntax/vscode/CHANGELOG.md @@ -0,0 +1,9 @@ +# Change Log + +All notable changes to the "vscode-sailfish" extension will be documented in this file. + +Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. + +## [Unreleased] + +- Initial release \ No newline at end of file diff --git a/syntax/vscode/README.md b/syntax/vscode/README.md new file mode 100644 index 0000000..1cf2bfb --- /dev/null +++ b/syntax/vscode/README.md @@ -0,0 +1,13 @@ +# Syntax Highlighting for Sailfish Templates in VSCode + +This directory contains Syntax Highlighting extension for sailfish templates in Visual Studio Code. + +## Features + +- Full Rust syntax highlighting rules inside code blocks +- Auto-closing brackets for code blocks +- Folding for comment blocks + +## Screenshots + +![screenshot](./screenshot.png) diff --git a/syntax/vscode/language-configuration.json b/syntax/vscode/language-configuration.json new file mode 100644 index 0000000..cca85c8 --- /dev/null +++ b/syntax/vscode/language-configuration.json @@ -0,0 +1,36 @@ +{ + "comments": { + "blockComment": [ "<%#", "%>" ] + }, + "brackets": [ + ["<%#", "%>"], + [""], + ["<", ">"], + ["{", "}"], + ["(", ")"] + ], + "autoClosingPairs": [ + { "open": "<%", "close": "%>"}. + { "open": "{", "close": "}"}, + { "open": "[", "close": "]"}, + { "open": "(", "close": ")" }, + { "open": "'", "close": "'" }, + { "open": "\"", "close": "\"" }, + { "open": "", "notIn": [ "comment", "string" ]} + ], + "surroundingPairs": [ + { "open": "<%", "close": "%>" }, + { "open": "'", "close": "'" }, + { "open": "\"", "close": "\"" }, + { "open": "{", "close": "}"}, + { "open": "[", "close": "]"}, + { "open": "(", "close": ")" }, + { "open": "<", "close": ">" } + ], + "folding": { + "markers": { + "start": "^\\s*<%#\\s*#region\\b.*%>", + "end": "^\\s*<%#\\s*#endregion\\b.*%>" + } + } +} diff --git a/syntax/vscode/package.json b/syntax/vscode/package.json new file mode 100644 index 0000000..3dbac8a --- /dev/null +++ b/syntax/vscode/package.json @@ -0,0 +1,27 @@ +{ + "name": "vscode-sailfish", + "displayName": "vscode-sailfish", + "description": "Syntax highlighting for sailfish templates in VSCode", + "version": "0.1.0", + "author": "Ryohei Machida ", + "license": "MIT", + "engines": { + "vscode": "^1.45.0" + }, + "categories": [ + "Programming Languages" + ], + "contributes": { + "languages": [{ + "id": "sailfish", + "aliases": ["sailfish"], + "extensions": [".stpl", ".html.stpl"], + "configuration": "./language-configuration.json" + }], + "grammars": [{ + "language": "sailfish", + "scopeName": "source.sailfish", + "path": "./syntaxes/sailfish.tmLanguage.json" + }] + } +} diff --git a/syntax/vscode/screenshot.png b/syntax/vscode/screenshot.png new file mode 100644 index 0000000..af25cf8 Binary files /dev/null and b/syntax/vscode/screenshot.png differ diff --git a/syntax/vscode/syntaxes/sailfish.tmLanguage.json b/syntax/vscode/syntaxes/sailfish.tmLanguage.json new file mode 100644 index 0000000..11e36f1 --- /dev/null +++ b/syntax/vscode/syntaxes/sailfish.tmLanguage.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "sailfish", + "patterns": [ + { + "include": "#commentblock" + }, + { + "include": "#codeblock" + }, + { + "include": "text.html.basic" + } + ], + "repository": { + "commentblock": { + "patterns": [{ + "name": "comment.block.embedded.html", + "begin": "<(%|\\?)#", + "end": "(%|\\?)>", + "captures": { + "0": { + "name": "punctuation.definition.comment.html" + } + } + }] + }, + "codeblock": { + "patterns": [{ + "name": "source.rust.embedded.html", + "begin": "<(%|\\?)(=|-)?", + "beginCaptures": { + "0": { + "name": "punctuation.definition.tag.begin.html" + } + }, + "end": "(%|\\?)>", + "endCaptures": { + "0": { + "name": "punctuation.definition.tag.end.html" + } + }, + "patterns": [{ + "include": "source.rust" + }] + }] + } + }, + "scopeName": "source.sailfish" +}