From 01ba06707b4d9d057ecd83f94c1ba2e59ab295a7 Mon Sep 17 00:00:00 2001 From: Newton Ni Date: Mon, 2 Oct 2023 12:31:14 -0500 Subject: [PATCH] Use timestamps for change detection This commit changes the caching mechanism to use the timestamp of all included templates instead of hashing the contents of the root template. The primary downside is that every build now has to parse, transform, and resolve the input templates instead of just reading and hashing the data--I didn't measure how much more expensive this is, but it seems unavoidable if we need information about child templates. Maybe a hacky regex-based approach could work as an alternative to a complete parse. On the bright side, this approach rebuilds parent templates correctly when a child template has changed, and doesn't require garbage collection for old compiled template files. --- sailfish-compiler/src/compiler.rs | 45 ++++----- sailfish-compiler/src/procmacro.rs | 149 +++++++++++++++++++---------- sailfish-compiler/src/util.rs | 11 +-- 3 files changed, 127 insertions(+), 78 deletions(-) diff --git a/sailfish-compiler/src/compiler.rs b/sailfish-compiler/src/compiler.rs index f870b7f..69dbb2a 100644 --- a/sailfish-compiler/src/compiler.rs +++ b/sailfish-compiler/src/compiler.rs @@ -12,7 +12,7 @@ use crate::optimizer::Optimizer; use crate::parser::Parser; use crate::resolver::Resolver; use crate::translator::{TranslatedSource, Translator}; -use crate::util::{copy_filetimes, read_to_string, rustfmt_block}; +use crate::util::{read_to_string, rustfmt_block}; #[derive(Default)] pub struct Compiler { @@ -42,34 +42,35 @@ impl Compiler { translator.translate(stream) } - pub fn compile_file( + pub fn resolve_file( &self, input: &Path, - output: &Path, - ) -> Result { - // TODO: introduce cache system - - let input = input - .canonicalize() - .map_err(|_| format!("Template file not found: {:?}", input))?; - + ) -> Result<(TranslatedSource, CompilationReport), Error> { let include_handler = Arc::new(|child_file: &Path| -> Result<_, Error> { Ok(self.translate_file_contents(&*child_file)?.ast) }); let resolver = Resolver::new().include_handler(include_handler); + let mut tsource = self.translate_file_contents(input)?; + let mut report = CompilationReport { deps: Vec::new() }; + + let r = resolver.resolve(input, &mut tsource.ast)?; + report.deps = r.deps; + Ok((tsource, report)) + } + + pub fn compile_file( + &self, + input: &Path, + tsource: TranslatedSource, + output: &Path, + ) -> Result<(), Error> { let analyzer = Analyzer::new(); let optimizer = Optimizer::new().rm_whitespace(self.config.rm_whitespace); - let compile_file = |input: &Path, + let compile_file = |mut tsource: TranslatedSource, output: &Path| - -> Result { - let mut tsource = self.translate_file_contents(input)?; - let mut report = CompilationReport { deps: Vec::new() }; - - let r = resolver.resolve(&*input, &mut tsource.ast)?; - report.deps = r.deps; - + -> Result<(), Error> { analyzer.analyze(&mut tsource.ast)?; optimizer.optimize(&mut tsource.ast); @@ -86,14 +87,10 @@ impl Compiler { .chain_err(|| format!("Failed to write artifact into {:?}", output))?; drop(f); - // FIXME: This is a silly hack to prevent output file from being tracking by - // cargo. Another better solution should be considered. - let _ = copy_filetimes(input, output); - - Ok(report) + Ok(()) }; - compile_file(&*input, &*output) + compile_file(tsource, output) .chain_err(|| "Failed to compile template.") .map_err(|mut e| { e.source = fs::read_to_string(&*input).ok(); diff --git a/sailfish-compiler/src/procmacro.rs b/sailfish-compiler/src/procmacro.rs index e3a1a55..409bcbe 100644 --- a/sailfish-compiler/src/procmacro.rs +++ b/sailfish-compiler/src/procmacro.rs @@ -3,6 +3,7 @@ use quote::quote; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::io::Write; +use std::iter; use std::path::{Path, PathBuf}; use std::time::Duration; use std::{env, thread}; @@ -10,9 +11,10 @@ use syn::parse::{ParseStream, Parser, Result as ParseResult}; use syn::punctuated::Punctuated; use syn::{Fields, Ident, ItemStruct, LitBool, LitChar, LitStr, Token}; -use crate::compiler::{CompilationReport, Compiler}; +use crate::compiler::Compiler; use crate::config::Config; use crate::error::*; +use crate::util::filetime; // options for `template` attributes #[derive(Default)] @@ -116,10 +118,7 @@ fn filename_hash(path: &Path, config: &Config) -> String { path_with_hash.push('-'); } - let input_bytes = std::fs::read(path).unwrap(); - let mut hasher = DefaultHasher::new(); - input_bytes.hash(&mut hasher); config.hash(&mut hasher); let hash = hasher.finish(); let _ = write!(path_with_hash, "{:016x}", hash); @@ -127,11 +126,10 @@ fn filename_hash(path: &Path, config: &Config) -> String { path_with_hash } -fn compile( - input_file: &Path, - output_file: &Path, +fn with_compiler Result>( config: Config, -) -> Result { + apply: F, +) -> Result { struct FallbackScope {} impl FallbackScope { @@ -155,7 +153,7 @@ fn compile( let compiler = Compiler::with_config(config); let _scope = FallbackScope::new(); - compiler.compile_file(input_file, &*output_file) + apply(compiler) } fn derive_template_impl(tokens: TokenStream) -> Result { @@ -199,14 +197,14 @@ fn derive_template_impl(tokens: TokenStream) -> Result let path = all_options.path.as_ref().ok_or_else(|| { syn::Error::new(Span::call_site(), "`path` option must be specified.") })?; - resolve_template_file(&*path.value(), &*config.template_dirs).ok_or_else( - || { + resolve_template_file(&*path.value(), &*config.template_dirs) + .and_then(|path| path.canonicalize().ok()) + .ok_or_else(|| { syn::Error::new( path.span(), format!("Template file {:?} not found", path.value()), ) - }, - )? + })? }; merge_config_options(&mut config, &all_options); @@ -221,52 +219,107 @@ fn derive_template_impl(tokens: TokenStream) -> Result std::fs::create_dir_all(&output_file.parent().unwrap()).unwrap(); - const DEPS_END_MARKER: &str = "=--end-of-deps--="; - let dep_file = output_file.with_extension("deps"); - // This makes sure max 1 process creates a new file, "create_new" check+create is an // atomic operation. Cargo sometimes runs multiple macro invocations for the same // file in parallel, so that's important to prevent a race condition. - let dep_file_status = std::fs::OpenOptions::new() - .write(true) - .create_new(true) - .open(&dep_file); + struct Lock<'path> { + path: &'path Path, + } - let deps = match dep_file_status { - Ok(mut file) => { - // Successfully created new .deps file. Now template needs to be compiled. - let report = compile(&*input_file, &*output_file, config) - .map_err(|e| syn::Error::new(Span::call_site(), e))?; - - for dep in &report.deps { - writeln!(file, "{}", dep.to_str().unwrap()).unwrap(); - } - writeln!(file, "{}", DEPS_END_MARKER).unwrap(); - - report.deps + impl<'path> Lock<'path> { + fn new(path: &'path Path) -> std::io::Result { + std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(path) + .map(|_| Lock { path }) } - Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { - // .deps file exists, template is already (currently being?) compiled. - let mut load_attempts = 0; - loop { - let dep_file_content = std::fs::read_to_string(&dep_file).unwrap(); - let mut lines_reversed = dep_file_content.rsplit_terminator('\n'); - if lines_reversed.next() == Some(DEPS_END_MARKER) { - // .deps file is complete, so we can continue. - break lines_reversed.map(PathBuf::from).collect(); + } + + impl<'path> Drop for Lock<'path> { + fn drop(&mut self) { + std::fs::remove_file(self.path) + .expect("Failed to clean up lock file {}. Delete it manually, or run `cargo clean`."); + } + } + + let deps = with_compiler(config, |compiler| { + let dep_path = output_file.with_extension("deps"); + let lock_path = output_file.with_extension("lock"); + let lock = Lock::new(&lock_path); + match lock { + Ok(lock) => { + let (tsource, report) = compiler.resolve_file(&input_file)?; + + let output_filetime = filetime(&output_file); + let input_filetime = iter::once(&input_file) + .chain(&report.deps) + .map(|path| filetime(path)) + .max() + .expect("Iterator contains at least `input_file`"); + + // Recompile template if any included templates were changed + // since the last time we compiled. + if input_filetime > output_filetime { + compiler.compile_file(&input_file, tsource, &output_file)?; + + // Write access to `dep_path` is serialized by `lock`. + let mut dep_file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&dep_path) + .unwrap_or_else(|e| { + panic!("Failed to open {:?}: {}", dep_path, e) + }); + + // Write out dependencies for concurrent processes to reuse. + for dep in &report.deps { + writeln!(&mut dep_file, "{}", dep.to_str().unwrap()).unwrap(); + } + + // Prevent output file from being tracked by Cargo. Without this hack, + // every change to a template causes two recompilations: + // + // 1. Change a template at timestamp t. + // 2. Cargo detects template change due to `include_bytes!` macro below. + // 3. Sailfish compiler generates an output file with a later timestamp t'. + // 4. Build finishes with timestamp t. + // 5. Next cargo build detects output file with timestamp t' > t and rebuilds. + // 6. Sailfish compiler does not regenerate output due to timestamp logic above. + // 7. Build finishes with timestamp t'. + let _ = filetime::set_file_times( + &output_file, + input_filetime, + input_filetime, + ); } - // .deps file exists, but appears incomplete. Wait a bit and try again. - load_attempts += 1; - if load_attempts > 100 { - panic!("file {:?} is incomplete. Try deleting it.", dep_file); + drop(lock); + Ok(report.deps) + } + // Lock file exists, template is already (currently being?) compiled. + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + let mut load_attempts = 0; + while lock_path.exists() { + load_attempts += 1; + if load_attempts > 100 { + panic!("Lock file {:?} is stuck. Try deleting it.", lock_path); + } + thread::sleep(Duration::from_millis(10)); } - thread::sleep(Duration::from_millis(10)); + Ok(std::fs::read_to_string(&dep_path) + .unwrap() + .trim() + .lines() + .map(PathBuf::from) + .collect()) } + Err(e) => panic!("{:?}: {}. Maybe try `cargo clean`?", lock_path, e), } - Err(e) => panic!("{:?}: {}. Maybe try `cargo clean`?", dep_file, e), - }; + }) + .map_err(|e| syn::Error::new(Span::call_site(), e))?; let input_file_string = input_file .to_str() diff --git a/sailfish-compiler/src/util.rs b/sailfish-compiler/src/util.rs index 0a123f7..8e25e69 100644 --- a/sailfish-compiler/src/util.rs +++ b/sailfish-compiler/src/util.rs @@ -1,4 +1,3 @@ -use filetime::FileTime; use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; @@ -78,10 +77,10 @@ pub fn rustfmt_block(source: &str) -> io::Result { } } -pub fn copy_filetimes(input: &Path, output: &Path) -> io::Result<()> { - let mtime = fs::metadata(input) +#[cfg(feature = "procmacro")] +pub fn filetime(input: &Path) -> filetime::FileTime { + use filetime::FileTime; + fs::metadata(input) .and_then(|metadata| metadata.modified()) - .map_or(FileTime::zero(), |time| FileTime::from_system_time(time)); - - filetime::set_file_times(output, mtime, mtime) + .map_or(FileTime::zero(), |time| FileTime::from_system_time(time)) }