#[macro_use] extern crate log; #[macro_use] extern crate lazy_static; use regex::Regex; use std::fs::{File, OpenOptions}; use std::io::{BufReader, Write as ioWrite, StdoutLock, StdinLock}; use std::io; use std::str::FromStr; use std::convert::TryFrom; use std::io::BufRead; use std::ops::{Add, Mul, MulAssign, AddAssign, SubAssign, Sub}; use std::fmt::{self, Display}; use serde::export::fmt::Debug; use core::fmt::Write as fmtWrite; const LOG_LEVELS: [&str; 5] = ["error", "warn", "info", "debug", "trace"]; const SPAMMY_LIBS: [&str; 5] = ["tokio_reactor", "hyper", "reqwest", "mio", "want"]; fn main() { let argv = clap::App::new("srtune") .version(env!("CARGO_PKG_VERSION")) .about("Modify a .srt file to match a video. Input and output can be a file or stream, \ so you pipe multiple invocations to create more complex operations. However, a single \ invocation should suffice in most cases.\n\ \n\ Times are specified with colons and always include seconds (HH:MM:SS, MM:SS, 0:SS). \ Decimal point can be either period or comma, so times can be copied directly from the \ .srt file. Numbers without colons are assumed to be subtitle indices.\n\ \n\ The tool can be used iteratively, adjusting the invocation until the generated \ subtitle file matches the audio track. As such, times accepted by its parameters \ are, by default, the ones seen in the output file (after shifts and moving), while \ indices are those from the input file.\n\ \n\ Indices are normally not renumbered, so the output file can be used as a reference \ for both times and indices. The flag '--renumber' will give each output entry a new \ sequential number. Please note that, once renumbered, the indices in the output file \ should no longer be used in the command invocation, as there can be a mismatch. They \ can be used when piped into a new process, of course. ") .arg(clap::Arg::with_name("input") .value_name("INFILE") .help("Input file, leave out for stdin"), ) .arg(clap::Arg::with_name("output") .short("o") .long("output") .value_name("OUTFILE") .help("Output file, defaults to stdout"), ) // .arg(clap::Arg::with_name("drop") // .short("d") // .long("drop") // .value_name("ITEMS") // .help("Drop one or multiple entries (separated by comma). Entries can be TIME \ // or INDEX. Two entries can be joined by '..' to specify a range. Use '..ENTRY'\ // or 'ENTRY..' to drop all in one direction. Ranges are inclusive."), // ) .arg(clap::Arg::with_name("move") .short("m") .long("move") .value_name("OFFSET") .help("Move all subtitles in time (e.g 12:00.15 or -0:44)"), ) .arg(clap::Arg::with_name("automove") .short("M") .long("automove") .value_name("ENTRY=VIDEOTIME") .multiple(true) .help("Move subtitles starting at a given time or index to align with \ a matching audio track time. This argument can be given multiple times. \ Some subtitles may be dropped if they fall outside the timeline after \ the move."), ) .arg(clap::Arg::with_name("scale") .short("s") .long("scale") .value_name("RATIO") .help("Scale all subtitle times and durations to compensate for bitrate \ differences. 1 means identity, 1.1 makes all times 10% longer. Scaling is \ relative to the first emitted subtitle; align it with '--move'. This option \ has no effect if '--autoscale' is used."), ) .arg(clap::Arg::with_name("autoscale") .short("S") .long("autoscale") .value_name("SUBTIME=VIDEOTIME") .help("Calculate scaling based on a perceived difference. The scaling is \ related to the first emitted subtitle; align it with '--move'. \ This overrides '--scale'."), ) .arg(clap::Arg::with_name("durscale") .short("d") .long("durscale") .value_name("RATIO") .help("Scale durations, can be combined with '--scale' or '--autoscale'. The \ given value will always be multiplied by the absolute time scale. 1 means \ identity, 1.1 makes all times 10% longer."), ) .arg(clap::Arg::with_name("renumber") .short("r") .long("renumber") .help("Renumber all emitted entries with sequential 1-based numbers."), ) .arg(clap::Arg::with_name("v").short("v").multiple(true).help( "Increase the logging verbosity; can be used multiple times", )) .get_matches(); let mut log_level = "info".to_owned(); if argv.is_present("v") { // bump verbosity if -v's are present let pos = LOG_LEVELS .iter() .position(|x| x == &log_level) .unwrap(); log_level = match LOG_LEVELS .iter() .nth(pos + argv.occurrences_of("v") as usize) { Some(new_level) => new_level.to_string(), None => "trace".to_owned(), }; } //println!("LEVEL={}", log_level); // init logging let env = env_logger::Env::default().default_filter_or(log_level); let mut builder = env_logger::Builder::from_env(env); let lib_level = log::LevelFilter::Info; for lib in &SPAMMY_LIBS { builder.filter_module(lib, lib_level); } builder.init(); let shift = match argv.value_of("move") { Some(s) => { SubDuration::try_from(s).expect("Bad --move format") } None => SubDuration(0f64) }; let scale = match argv.value_of("scale") { Some(s) => { s.parse().expect("Bad --scale format") } None => 1f64 }; let durscale = match argv.value_of("durscale") { Some(s) => { s.parse().expect("Bad --durscale format") } None => 1f64 }; // always also shrink durations let autoscale = match argv.value_of("autoscale") { Some(s) => { let halves : Vec<&str> = s.split("=").collect(); if halves.len() != 2 { panic!("Bad --autoscale format, should be SUBTIME=VIDEOTIME") } let (first, second) = (halves[0], halves[1]); if !first.contains(':') || !second.contains(':') { panic!("'--autoscale' requires two times"); } let subtime = SubDuration::try_from(first).expect("Bad --autoscale time format").as_instant(); let vidtime = SubDuration::try_from(second).expect("Bad --autoscale time format").as_instant(); Some((subtime, vidtime)) } None => None }; let mut automove = Vec::::new(); let mut automove_indices = vec![]; match argv.values_of("automove") { Some(ss) => { for s in ss { let halves: Vec<&str> = s.split("=").collect(); if halves.len() != 2 { panic!("Bad --automove format, should be ENTRY=VIDEOTIME") } let (first, second) = (halves[0], halves[1]); if !second.contains(':') { panic!("'--automove' requires time after '='"); } let vidtime = SubDuration::try_from(second).expect("Bad --automove format").as_instant(); if first.contains(':') { let subtime = SubDuration::try_from(first).expect("Bad --automove format").as_instant(); automove.push(AutoMoveTag::ByTime(subtime, vidtime)); } else { let index : u32 = first.parse().expect("Bad --automove format"); if automove_indices.contains(&index) { panic!("Index {} already used in automove.", index); } automove_indices.push(index); automove.push(AutoMoveTag::ByIndex(index, vidtime)); } } } None => (/* no automoves */) } debug!("Automove: {:?}", automove); let inf = argv.value_of("input"); let outf = argv.value_of("output"); let stdin = io::stdin(); let stdout = io::stdout(); let renumber = argv.is_present("renumber"); let opts = TransformOpts { renumber, autoscale, durscale, scale, shift, automove, }; let lines_iterator: Box = match inf { None => { Box::new(StdinSubsInput::new(stdin.lock())) } Some(f) => { let file = File::open(f).expect(&format!("Could not open file: {:?}", f)); Box::new(FileSubsInput::new(file)) } }; let outfile: Box = match outf { None => { Box::new(StdoutSubsOutput::new(stdout.lock())) } Some(f) => { let file = OpenOptions::new() .create(true) .truncate(true) .write(true) .open(f) .expect(&format!("Could not open file: {:?}", f)); Box::new(FileSubsOutput::new(file)) } }; transform_subtitles(lines_iterator, outfile, opts); } //region SubInput trait SubsInput : Iterator {} struct StdinSubsInput<'a> { inner : io::Lines> } impl<'a> Iterator for StdinSubsInput<'a> { type Item = String; fn next(&mut self) -> Option { self.inner.next().map_or(None, Result::ok) } } impl<'a> StdinSubsInput<'a> { pub fn new(lock : StdinLock<'a>) -> Self { Self { inner : lock.lines() } } } impl<'a> SubsInput for StdinSubsInput<'a> {} struct FileSubsInput { inner : io::Lines> } impl FileSubsInput { pub fn new(file : File) -> Self { Self { inner : BufReader::new(file).lines() } } } impl Iterator for FileSubsInput { type Item = String; fn next(&mut self) -> Option { self.inner.next().map_or(None, Result::ok) } } impl SubsInput for FileSubsInput {} //endregion //region SubOutput trait SubsOutput { fn emit(&mut self, subtitle : Subtitle); } struct StdoutSubsOutput<'a> { inner : StdoutLock<'a> } impl<'a> StdoutSubsOutput<'a> { pub fn new(inner : StdoutLock<'a>) -> Self { Self { inner } } } impl<'a> SubsOutput for StdoutSubsOutput<'a> { fn emit(&mut self, subtitle: Subtitle) { self.inner.write(subtitle.to_string().as_bytes()).expect("failed to write"); } } struct FileSubsOutput { inner : File } impl FileSubsOutput { pub fn new(inner : File) -> Self { Self { inner } } } impl SubsOutput for FileSubsOutput { fn emit(&mut self, subtitle: Subtitle) { self.inner.write(subtitle.to_string().as_bytes()).expect("failed to write"); } } //endregion #[derive(Debug)] struct TransformOpts { renumber: bool, autoscale: Option<(SubInstant, SubInstant)>, durscale: f64, scale: f64, shift: SubDuration, automove: Vec, } #[derive(Debug)] enum AutoMoveTag { ByTime(SubInstant, SubInstant), ByIndex(u32, SubInstant) } #[derive(Debug,Default,Clone,Copy)] struct IterState { start_time : Option, renumber_i : u32, timeline_head : SubInstant, } fn transform_subtitles<'a>(mut lines : Box, mut outfile : Box, mut opts : TransformOpts) { debug!("Opts: {:#?}", opts); let mut istate = IterState::default(); let mut linebuf : Vec = vec![]; 'lines: while let Some(x) = lines.next() { let mut x = x.trim(); if x.starts_with('\u{feff}') { debug!("Stripping BOM mark"); x = &x[3..]; } let x = x.trim(); if x.is_empty() { continue; } // 236 // 00:18:01,755 --> 00:18:03,774 // (掃除機の音) // う~ん…。 match u32::from_str(x) { Ok(num) => { // println!("Entry {}", num); let datesrow = lines.next().unwrap(); if datesrow.contains(" --> ") { let mut halves = datesrow.split(" --> "); let (first, second) = (halves.next().unwrap(), halves.next().unwrap()); let sub_start = SubInstant::try_from(first).unwrap(); let sub_end = SubInstant::try_from(second).unwrap(); linebuf.clear(); 'text: while let Some(x) = lines.next() { if x.is_empty() { break 'text; // space between the entries } linebuf.push(x); } let mut subtitle = Subtitle { num, start: sub_start, dur: sub_end - sub_start, text: linebuf.join("\n"), }; if istate.start_time.is_none() { debug!("Scaling anchored at {} (#{})", sub_start, num); istate.start_time = Some(sub_start); if let Some((mut subt, mut vidt)) = opts.autoscale { debug!("Autoscale: VT {} -> ST {}", vidt, subt); subt -= sub_start; vidt -= sub_start + opts.shift; if subt.0 <= 0f64 { panic!("Error in autoscale, start time is negative or zero."); } if vidt.0 <= 0f64 { panic!("Error in autoscale, end time is negative or zero."); } debug!(" relative to #{}, after \"move\": VT {} -> ST {}", num, vidt, subt); opts.scale = vidt.0 / subt.0; debug!("Resolved scale as {}", opts.scale); } opts.durscale *= opts.scale; debug!("Duration scaling is {}", opts.durscale); } if opts.scale != 1f64 { let scaled = subtitle.start.scale(istate.start_time.unwrap(), opts.scale); trace!("Scale #{} ({}) -> {}", num, subtitle.start, scaled); subtitle.start = scaled; } subtitle.dur *= opts.durscale; // TODO prevent durations overlap (will need to buffer one entry) let would_be_shifted_start = subtitle.start + opts.shift; // TODO use drain_filter when stable let mut to_drop = vec![]; for (i, amove) in opts.automove.iter().enumerate() { match amove { AutoMoveTag::ByIndex(idx, ref vidt) => { if num >= *idx { let dif = *vidt - would_be_shifted_start; debug!("Move by index #{} starts at #{} ({}) -> {}, diff {}", *idx, num, subtitle.start, *vidt, dif); opts.shift += dif; to_drop.push(i); } else if would_be_shifted_start > *vidt { warn!("Discarding out-of-order entry #{} @ {} (timeline head is {})", num, subtitle.start, istate.timeline_head); continue 'lines; } } AutoMoveTag::ByTime(ref subt, ref vidt) => { if would_be_shifted_start >= *subt { let dif = *vidt - *subt; debug!("Move by time {} starts at #{} ({}) -> {}, diff {}", subt, num, subtitle.start, *vidt, dif); opts.shift += dif; to_drop.push(i); } } } } for i in &to_drop { let x = opts.automove.swap_remove(*i); trace!("Clean up: {:?}", x); } if !to_drop.is_empty() { debug!("New shift: {}, Timeline head: {}", opts.shift, istate.timeline_head); } if opts.shift.0 != 0f64 { let shifted = subtitle.start + opts.shift; trace!("Shift #{} ({}) by {} -> {}", num, subtitle.start, opts.shift, shifted); subtitle.start = shifted; } if subtitle.start <= istate.timeline_head { warn!("Discarding out-of-order entry #{} @ {} (timeline head is {})", num, subtitle.start, istate.timeline_head); continue 'lines; } if subtitle.start.is_negative() { warn!("Discarding negative time entry #{} @ {:.3}s", num, sub_start); continue 'lines; } istate.timeline_head = subtitle.start; if opts.renumber { istate.renumber_i += 1; subtitle.num = istate.renumber_i; } outfile.emit(subtitle); } } Err(e) => { error!("couldnt parse >{}<: {}", x, e); for b in x.as_bytes() { error!("{:#02x} - {}", b, b); } error!("\n"); } } } } //region Time types #[derive(Default, Copy, Clone, PartialEq, PartialOrd)] struct SubInstant(f64); impl Debug for SubInstant { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "Time({})", self) } } impl SubInstant { fn is_negative(&self) -> bool { self.0 < 0f64 } } #[derive(Default, Copy, Clone, PartialEq, PartialOrd)] struct SubDuration(f64); impl Debug for SubDuration { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "Duration({})", self) } } impl Display for SubDuration { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { let sign = self.0.signum(); let mut secs = self.0.abs(); let hours = (secs / 3600f64).floor(); secs -= hours * 3600f64; let mins = (secs / 60f64).floor(); secs -= mins * 60f64; let msecs = ((secs % 1f64) * 1000f64).round(); secs = secs.floor(); if sign.is_sign_negative() { f.write_char('-')?; } if hours > 0f64 { write!(f, "{:02}:", hours)?; } if hours > 0f64 || mins > 0f64 { write!(f, "{:02}:{:02},{:03}", mins, secs, msecs) } else { write!(f, "{},{:03}", secs, msecs) } } } impl Add for SubInstant { type Output = SubInstant; fn add(self, rhs: SubDuration) -> Self::Output { SubInstant(self.0 + rhs.0) } } impl Sub for SubInstant { type Output = SubInstant; fn sub(self, rhs: SubDuration) -> Self::Output { SubInstant(self.0 - rhs.0) } } impl Sub for SubInstant { type Output = SubDuration; fn sub(self, rhs: SubInstant) -> Self::Output { SubDuration(self.0 - rhs.0) } } impl Mul for SubDuration { type Output = SubDuration; fn mul(self, rhs: f64) -> Self::Output { SubDuration(self.0 * rhs) } } impl MulAssign for SubDuration { fn mul_assign(&mut self, rhs: f64) { self.0 *= rhs; } } impl AddAssign for SubDuration { fn add_assign(&mut self, rhs: f64) { self.0 += rhs; } } impl SubInstant { /// Scale by a factor with a custom start time pub fn scale(&self, start: SubInstant, factor: f64) -> SubInstant { SubInstant(start.0 + (self.0 - start.0) * factor) } } impl AddAssign for SubInstant { fn add_assign(&mut self, rhs: f64) { self.0 += rhs; } } impl AddAssign for SubInstant { fn add_assign(&mut self, rhs: SubInstant) { self.0 += rhs.0; } } impl SubAssign for SubInstant { fn sub_assign(&mut self, rhs: SubInstant) { self.0 -= rhs.0; } } impl SubAssign for SubInstant { fn sub_assign(&mut self, rhs: SubDuration) { self.0 -= rhs.0; } } impl SubAssign for SubDuration { fn sub_assign(&mut self, rhs: SubDuration) { self.0 -= rhs.0; } } impl AddAssign for SubDuration { fn add_assign(&mut self, rhs: SubDuration) { self.0 += rhs.0; } } impl AddAssign for SubInstant { fn add_assign(&mut self, rhs: SubDuration) { self.0 += rhs.0; } } impl Display for SubInstant { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { let sign = self.0.signum(); let mut secs = self.0.abs(); let hours = (secs / 3600f64).floor(); secs -= hours * 3600f64; let minutes = (secs / 60f64).floor(); secs -= minutes * 60f64; let msecs = ((secs % 1f64) * 1000f64).round(); write!(f, "{}{:02}:{:02}:{:02},{:03}", if sign.is_sign_negative() { "-" } else { "" }, hours, minutes, secs.floor(), msecs) } } //endregion #[derive(Clone, Debug)] struct Subtitle { num: u32, start: SubInstant, dur: SubDuration, text: String, } impl Display for Subtitle { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "{}\n{} --> {}\n{}\n\n", self.num, self.start, self.start + self.dur, self.text ) } } impl TryFrom<&str> for SubInstant { type Error = failure::Error; fn try_from(value: &str) -> Result { lazy_static! { static ref DATE_RE: Regex = Regex::new(r"^(-)?(?P\d+):(?P\d+):(?P\d+(:?[,.]\d+)?)$").unwrap(); } match DATE_RE.captures(value) { Some(caps) => { let minus = if caps.get(1).is_some() { -1f64 } else { 1f64 }; let h = &caps["h"]; let m = &caps["m"]; let s = caps["s"].replace(",", "."); Ok(SubInstant(minus * (f64::from_str(h).unwrap() * 3600f64 + f64::from_str(m).unwrap() * 60f64 + f64::from_str(&s).unwrap()))) } None => Err(failure::format_err!("Error parsing time: {}", value)) } } } impl SubDuration { pub fn as_instant(&self) -> SubInstant { SubInstant(self.0) } } impl TryFrom<&str> for SubDuration { type Error = failure::Error; fn try_from(mut value: &str) -> Result { lazy_static! { static ref TIME_RE: Regex = Regex::new(r"^(?U)(?:(?P\d+):)?(?:(?P\d+):)?(?P\d+(?:[.,]\d+)?)$").unwrap(); } let negative = value.starts_with('-'); if negative { value = &value[1..]; } if value.starts_with(':') { // prefixed colon when someone is lazy to type 0: value = &value[1..]; } match TIME_RE.captures(value) { Some(caps) => { let minus = if negative { -1f64 } else { 1f64 }; let h = caps.name("h").map_or(0f64, |m| f64::from_str(m.as_str()).unwrap()); let m = caps.name("m").map_or(0f64, |m| f64::from_str(m.as_str()).unwrap()); let s = caps.name("s").map_or(0f64, |m| f64::from_str(&m.as_str().replace(",", ".")).unwrap()); Ok(SubDuration(minus * (h * 3600f64 + m * 60f64 + s))) } None => { Err(failure::format_err!("Error parsing time: {}", value)) } } } } #[test] fn test_parse_duration() { // this is used for user input on the command line let bad = SubDuration(-1f64); assert_eq!(SubDuration::try_from(":45678").unwrap_or(bad), SubDuration(45678f64), "integer secs with colon prefix"); assert_eq!(SubDuration::try_from("-:45678").unwrap_or(bad), SubDuration(-45678f64), "neg integer secs with colon prefix"); assert_eq!(SubDuration::try_from("45678").unwrap_or(bad), SubDuration(45678f64), "integer secs"); assert_eq!(SubDuration::try_from("1.23").unwrap_or(bad), SubDuration(1.23f64), "float secs with period"); assert_eq!(SubDuration::try_from("-1.23").unwrap_or(bad), SubDuration(-1.23f64), "MINUS float secs with period"); assert_eq!(SubDuration::try_from("1,23").unwrap_or(bad), SubDuration(1.23f64), "float secs with comma"); assert_eq!(SubDuration::try_from("2:1.15").unwrap_or(bad), SubDuration(121.15f64), "m:s.frac"); assert_eq!(SubDuration::try_from("2:01.15").unwrap_or(bad), SubDuration(121.15f64), "m:0s.frac"); assert_eq!(SubDuration::try_from("02:01.15").unwrap_or(bad), SubDuration(121.15f64), "0m:0s.frac"); assert_eq!(SubDuration::try_from("02:01,15").unwrap_or(bad), SubDuration(121.15f64), "0m:0s,frac"); assert_eq!(SubDuration::try_from("1:02:01,15").unwrap_or(bad), SubDuration(3721.15f64), "h:0m:0s,frac"); assert_eq!(SubDuration::try_from("1:02:01,15").unwrap_or(bad), SubDuration(3721.15f64), "h:0m:0s.frac"); assert_eq!(SubDuration::try_from("01:02:01,15").unwrap_or(bad), SubDuration(3721.15f64), "0h:0m:0s,frac"); assert_eq!(SubDuration::try_from("-01:02:01,15").unwrap_or(bad), SubDuration(-3721.15f64), "-0h:0m:0s,frac"); assert_eq!(SubDuration::try_from("18:01,755").unwrap_or(bad), SubDuration(1081.755f64)); assert_eq!(SubDuration::try_from("-18:01,755").unwrap_or(bad), SubDuration(-1081.755f64)); assert_eq!(SubDuration::try_from("-18:01.7").unwrap_or(bad), SubDuration(-1081.7f64)); assert_eq!(SubDuration::try_from("-12:18:01.7").unwrap_or(bad), SubDuration(-44281.7f64)); assert_eq!(SubDuration::try_from("0,000").unwrap_or(bad), SubDuration(0f64)); assert_eq!(SubDuration::try_from("-0,000").unwrap_or(bad), SubDuration(-0f64)); } #[test] fn test_parse_instant() { let bad = SubInstant(-1f64); assert_eq!(SubInstant::try_from("00:18:01,755").unwrap_or(bad), SubInstant(1081.755f64)); assert_eq!(SubInstant::try_from("00:18:01.755").unwrap_or(bad), SubInstant(1081.755f64)); assert_eq!(SubInstant::try_from("00:18:01.7").unwrap_or(bad), SubInstant(1081.7f64)); assert_eq!(SubInstant::try_from("0:18:1.7").unwrap_or(bad), SubInstant(1081.7f64)); assert_eq!(SubInstant::try_from("00:00:00,000").unwrap_or(bad), SubInstant(0f64)); assert_eq!(SubInstant::try_from("-01:00:00,000").unwrap_or(bad), SubInstant(-3600f64)); } #[test] fn test_stringify_instant() { assert_eq!(SubInstant::try_from("00:18:01,755").unwrap().to_string(), "00:18:01,755"); assert_eq!(SubInstant::try_from("-00:18:01,755").unwrap().to_string(), "-00:18:01,755"); assert_eq!(SubInstant::try_from("-00:18:01.7").unwrap().to_string(), "-00:18:01,700"); assert_eq!(SubInstant::try_from("00:00:00,000").unwrap().to_string(), "00:00:00,000"); assert_eq!(SubInstant::try_from("-00:00:00,000").unwrap().to_string(), "-00:00:00,000"); } #[test] fn test_stringify_duration() { assert_eq!(SubDuration::try_from("18:01,755").unwrap().to_string(), "18:01,755"); assert_eq!(SubDuration::try_from("-18:01,755").unwrap().to_string(), "-18:01,755"); assert_eq!(SubDuration::try_from("-18:01.7").unwrap().to_string(), "-18:01,700"); assert_eq!(SubDuration::try_from("-12:18:01.7").unwrap().to_string(), "-12:18:01,700"); assert_eq!(SubDuration::try_from("0,000").unwrap().to_string(), "0,000"); assert_eq!(SubDuration::try_from("-0,000").unwrap().to_string(), "-0,000"); }