Use timestamps for change detection

This commit changes the caching mechanism to use the timestamp of
all included templates instead of hashing the contents of the
root template.

The primary downside is that every build now has to parse, transform,
and resolve the input templates instead of just reading and hashing
the data--I didn't measure how much more expensive this is, but it
seems unavoidable if we need information about child templates.
Maybe a hacky regex-based approach could work as an alternative to
a complete parse.

On the bright side, this approach rebuilds parent templates correctly
when a child template has changed, and doesn't require garbage collection
for old compiled template files.
This commit is contained in:
Newton Ni 2023-10-02 12:31:14 -05:00
parent a93f66f7ea
commit 01ba06707b
3 changed files with 127 additions and 78 deletions

View File

@ -12,7 +12,7 @@ use crate::optimizer::Optimizer;
use crate::parser::Parser;
use crate::resolver::Resolver;
use crate::translator::{TranslatedSource, Translator};
use crate::util::{copy_filetimes, read_to_string, rustfmt_block};
use crate::util::{read_to_string, rustfmt_block};
#[derive(Default)]
pub struct Compiler {
@ -42,34 +42,35 @@ impl Compiler {
translator.translate(stream)
}
pub fn compile_file(
pub fn resolve_file(
&self,
input: &Path,
output: &Path,
) -> Result<CompilationReport, Error> {
// TODO: introduce cache system
let input = input
.canonicalize()
.map_err(|_| format!("Template file not found: {:?}", input))?;
) -> Result<(TranslatedSource, CompilationReport), Error> {
let include_handler = Arc::new(|child_file: &Path| -> Result<_, Error> {
Ok(self.translate_file_contents(&*child_file)?.ast)
});
let resolver = Resolver::new().include_handler(include_handler);
let mut tsource = self.translate_file_contents(input)?;
let mut report = CompilationReport { deps: Vec::new() };
let r = resolver.resolve(input, &mut tsource.ast)?;
report.deps = r.deps;
Ok((tsource, report))
}
pub fn compile_file(
&self,
input: &Path,
tsource: TranslatedSource,
output: &Path,
) -> Result<(), Error> {
let analyzer = Analyzer::new();
let optimizer = Optimizer::new().rm_whitespace(self.config.rm_whitespace);
let compile_file = |input: &Path,
let compile_file = |mut tsource: TranslatedSource,
output: &Path|
-> Result<CompilationReport, Error> {
let mut tsource = self.translate_file_contents(input)?;
let mut report = CompilationReport { deps: Vec::new() };
let r = resolver.resolve(&*input, &mut tsource.ast)?;
report.deps = r.deps;
-> Result<(), Error> {
analyzer.analyze(&mut tsource.ast)?;
optimizer.optimize(&mut tsource.ast);
@ -86,14 +87,10 @@ impl Compiler {
.chain_err(|| format!("Failed to write artifact into {:?}", output))?;
drop(f);
// FIXME: This is a silly hack to prevent output file from being tracking by
// cargo. Another better solution should be considered.
let _ = copy_filetimes(input, output);
Ok(report)
Ok(())
};
compile_file(&*input, &*output)
compile_file(tsource, output)
.chain_err(|| "Failed to compile template.")
.map_err(|mut e| {
e.source = fs::read_to_string(&*input).ok();

View File

@ -3,6 +3,7 @@ use quote::quote;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::io::Write;
use std::iter;
use std::path::{Path, PathBuf};
use std::time::Duration;
use std::{env, thread};
@ -10,9 +11,10 @@ use syn::parse::{ParseStream, Parser, Result as ParseResult};
use syn::punctuated::Punctuated;
use syn::{Fields, Ident, ItemStruct, LitBool, LitChar, LitStr, Token};
use crate::compiler::{CompilationReport, Compiler};
use crate::compiler::Compiler;
use crate::config::Config;
use crate::error::*;
use crate::util::filetime;
// options for `template` attributes
#[derive(Default)]
@ -116,10 +118,7 @@ fn filename_hash(path: &Path, config: &Config) -> String {
path_with_hash.push('-');
}
let input_bytes = std::fs::read(path).unwrap();
let mut hasher = DefaultHasher::new();
input_bytes.hash(&mut hasher);
config.hash(&mut hasher);
let hash = hasher.finish();
let _ = write!(path_with_hash, "{:016x}", hash);
@ -127,11 +126,10 @@ fn filename_hash(path: &Path, config: &Config) -> String {
path_with_hash
}
fn compile(
input_file: &Path,
output_file: &Path,
fn with_compiler<T, F: FnOnce(Compiler) -> Result<T, Error>>(
config: Config,
) -> Result<CompilationReport, Error> {
apply: F,
) -> Result<T, Error> {
struct FallbackScope {}
impl FallbackScope {
@ -155,7 +153,7 @@ fn compile(
let compiler = Compiler::with_config(config);
let _scope = FallbackScope::new();
compiler.compile_file(input_file, &*output_file)
apply(compiler)
}
fn derive_template_impl(tokens: TokenStream) -> Result<TokenStream, syn::Error> {
@ -199,14 +197,14 @@ fn derive_template_impl(tokens: TokenStream) -> Result<TokenStream, syn::Error>
let path = all_options.path.as_ref().ok_or_else(|| {
syn::Error::new(Span::call_site(), "`path` option must be specified.")
})?;
resolve_template_file(&*path.value(), &*config.template_dirs).ok_or_else(
|| {
resolve_template_file(&*path.value(), &*config.template_dirs)
.and_then(|path| path.canonicalize().ok())
.ok_or_else(|| {
syn::Error::new(
path.span(),
format!("Template file {:?} not found", path.value()),
)
},
)?
})?
};
merge_config_options(&mut config, &all_options);
@ -221,52 +219,107 @@ fn derive_template_impl(tokens: TokenStream) -> Result<TokenStream, syn::Error>
std::fs::create_dir_all(&output_file.parent().unwrap()).unwrap();
const DEPS_END_MARKER: &str = "=--end-of-deps--=";
let dep_file = output_file.with_extension("deps");
// This makes sure max 1 process creates a new file, "create_new" check+create is an
// atomic operation. Cargo sometimes runs multiple macro invocations for the same
// file in parallel, so that's important to prevent a race condition.
let dep_file_status = std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&dep_file);
struct Lock<'path> {
path: &'path Path,
}
let deps = match dep_file_status {
Ok(mut file) => {
// Successfully created new .deps file. Now template needs to be compiled.
let report = compile(&*input_file, &*output_file, config)
.map_err(|e| syn::Error::new(Span::call_site(), e))?;
for dep in &report.deps {
writeln!(file, "{}", dep.to_str().unwrap()).unwrap();
}
writeln!(file, "{}", DEPS_END_MARKER).unwrap();
report.deps
impl<'path> Lock<'path> {
fn new(path: &'path Path) -> std::io::Result<Self> {
std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(path)
.map(|_| Lock { path })
}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
// .deps file exists, template is already (currently being?) compiled.
let mut load_attempts = 0;
loop {
let dep_file_content = std::fs::read_to_string(&dep_file).unwrap();
let mut lines_reversed = dep_file_content.rsplit_terminator('\n');
if lines_reversed.next() == Some(DEPS_END_MARKER) {
// .deps file is complete, so we can continue.
break lines_reversed.map(PathBuf::from).collect();
}
impl<'path> Drop for Lock<'path> {
fn drop(&mut self) {
std::fs::remove_file(self.path)
.expect("Failed to clean up lock file {}. Delete it manually, or run `cargo clean`.");
}
}
let deps = with_compiler(config, |compiler| {
let dep_path = output_file.with_extension("deps");
let lock_path = output_file.with_extension("lock");
let lock = Lock::new(&lock_path);
match lock {
Ok(lock) => {
let (tsource, report) = compiler.resolve_file(&input_file)?;
let output_filetime = filetime(&output_file);
let input_filetime = iter::once(&input_file)
.chain(&report.deps)
.map(|path| filetime(path))
.max()
.expect("Iterator contains at least `input_file`");
// Recompile template if any included templates were changed
// since the last time we compiled.
if input_filetime > output_filetime {
compiler.compile_file(&input_file, tsource, &output_file)?;
// Write access to `dep_path` is serialized by `lock`.
let mut dep_file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&dep_path)
.unwrap_or_else(|e| {
panic!("Failed to open {:?}: {}", dep_path, e)
});
// Write out dependencies for concurrent processes to reuse.
for dep in &report.deps {
writeln!(&mut dep_file, "{}", dep.to_str().unwrap()).unwrap();
}
// Prevent output file from being tracked by Cargo. Without this hack,
// every change to a template causes two recompilations:
//
// 1. Change a template at timestamp t.
// 2. Cargo detects template change due to `include_bytes!` macro below.
// 3. Sailfish compiler generates an output file with a later timestamp t'.
// 4. Build finishes with timestamp t.
// 5. Next cargo build detects output file with timestamp t' > t and rebuilds.
// 6. Sailfish compiler does not regenerate output due to timestamp logic above.
// 7. Build finishes with timestamp t'.
let _ = filetime::set_file_times(
&output_file,
input_filetime,
input_filetime,
);
}
// .deps file exists, but appears incomplete. Wait a bit and try again.
load_attempts += 1;
if load_attempts > 100 {
panic!("file {:?} is incomplete. Try deleting it.", dep_file);
drop(lock);
Ok(report.deps)
}
// Lock file exists, template is already (currently being?) compiled.
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
let mut load_attempts = 0;
while lock_path.exists() {
load_attempts += 1;
if load_attempts > 100 {
panic!("Lock file {:?} is stuck. Try deleting it.", lock_path);
}
thread::sleep(Duration::from_millis(10));
}
thread::sleep(Duration::from_millis(10));
Ok(std::fs::read_to_string(&dep_path)
.unwrap()
.trim()
.lines()
.map(PathBuf::from)
.collect())
}
Err(e) => panic!("{:?}: {}. Maybe try `cargo clean`?", lock_path, e),
}
Err(e) => panic!("{:?}: {}. Maybe try `cargo clean`?", dep_file, e),
};
})
.map_err(|e| syn::Error::new(Span::call_site(), e))?;
let input_file_string = input_file
.to_str()

View File

@ -1,4 +1,3 @@
use filetime::FileTime;
use std::fs;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
@ -78,10 +77,10 @@ pub fn rustfmt_block(source: &str) -> io::Result<String> {
}
}
pub fn copy_filetimes(input: &Path, output: &Path) -> io::Result<()> {
let mtime = fs::metadata(input)
#[cfg(feature = "procmacro")]
pub fn filetime(input: &Path) -> filetime::FileTime {
use filetime::FileTime;
fs::metadata(input)
.and_then(|metadata| metadata.modified())
.map_or(FileTime::zero(), |time| FileTime::from_system_time(time));
filetime::set_file_times(output, mtime, mtime)
.map_or(FileTime::zero(), |time| FileTime::from_system_time(time))
}