#! /usr/bin/awk -f

# Merge multiple .mpsub files into a single .mpsub.

# Copyright 2007 Alexandre Oliva <lxoliva@fsfla.org>

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, a copy can be downloaded from
# http://www.gnu.org/copyleft/gpl.html, or by writing to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.

# This script arranges for subtitles that would have been displayed by
# each separate input to be displayed for the same extent of time,
# along with whatever other inputs would have displayed at that time.

# partsep is printed after each chunk of text, unless it is empty
# (default).  When it is empty, if targetlines is non-zero, then,
# after the first part, as many blanklines as needed are going to be
# added such that the displayed text is that many lines long, and the
# first part is set as far apart from the rest as possible within this
# limit.  maxlinelen can be set to warn about long lines that would
# require line breaking and thus set the line number count off.

BEGIN {
    partsep = "";
    targetlines = 6;
    # This can't be a regular blank, so it's U+2000 (EN QUAD)
    blankline = " "; 
    maxlinelen = 50;
    checkminskip = 0;
    checkminlen = 10;
}
/^FORMAT=TIME$/ {
    idx++;
    sep[part] = partsep;
    part++;
    curidx[part] = idx+1;
    next;
}
/^$/ {
    nextistime = 1;
    next;
}
nextistime {
    idx++;
    nextistime = 0;
    skip[idx] = int ($1 * 1000 + 0.5);
    len[idx] = int ($2 * 1000 + 0.5);
    next;
}
!nextistime {
    if (str[idx] != "")
	str[idx] = str[idx] "\n";
    if ($0 == "-")
	$0 = "/*";
    str[idx] = str[idx] $0;
    lines[idx]++;
    if (length ($0) > maxlinelen)
	print FILENAME ":" FNR ": line too long: " $0 > "/dev/stderr";
    next;
}
END {
    nparts = part;
    print "FORMAT=TIME";
    print "";
    for (;;) {
	minskip = -1;
	for (part = 1; part <= nparts; part++) {
	    idx = curidx[part];
	    if (str[idx] == "")
		continue;
	    if (minskip == -1) {
		minskip = skip[idx];
		nextev = minskip + len[idx];
		lin = lines[idx];
	    } else if (skip[idx] < minskip) {
		nextev = minskip;
		minskip = skip[idx];
		lin = lines[idx];
		if (minskip + len[idx] < nextev)
		    nextev = minskip + len[idx];
	    } else if (skip[idx] == minskip) {
		lin += lines[idx];
		if (minskip + len[idx] < nextev)
		    nextev = minskip + len[idx];
	    } else if (skip[idx] > minskip) {
		if (skip[idx] < nextev)
		    nextev = skip[idx];
	    }
	}
	if (minskip == -1)
	    break;
	minlen = nextev - minskip;
	if ((checkminskip && minskip && minskip < checkminskip) \
	    || (checkminlen && minlen < checkminlen))
	    print "Too short intervals: skip " minskip / 1000 \
		" len " minlen/1000 " :" > "/dev/stderr";
	print minskip / 1000, minlen / 1000;
	for (part = 1; part <= nparts; part++) {
	    idx = curidx[part];
	    if (str[idx] == "")
		;
	    else if (skip[idx] == minskip) {
		skip[idx] -= minskip;
		len[idx] -= minlen;
		if ((checkminskip && minskip && minskip < checkminskip) \
		    || (checkminlen && minlen < checkminlen))
		    print str[idx] > "/dev/stderr";
		print str[idx];
		if (len[idx] == 0)
		    curidx[part]++;
	    } else {
		skip[idx] -= nextev;
		if (skip[idx] < 0) {
		    print "Something wrong here! part " part " len " len[idx] > "/dev/stderr";
		    print str[idx] > "/dev/stderr";
		}
	    }
	    if (sep[part] != "")
		print sep[part];
	    else if (targetlines && part == 1)
		while (lin < targetlines) {
		    lin++;
		    print blankline;
		}
	}
	print "";
    }
}
