#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# cruft_blame.py: given a file and a portage logfile, guess which package owns
# the file by comparing mtime with start and end times of merges
# 
# Copyright © 2004 Ed Catmur <ed@catmur.co.uk>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

import sys, os, os.path, stat
import time
import string, re, textwrap

sys.path = ["/usr/lib/portage/pym"]+sys.path
import portage

import locale
locale.setlocale(locale.LC_ALL, '')

import itertools

def memoize(f):
	"""Modify a function to speed it up by remembering its return value for
	each argument list."""
	cache = {}
	def fm(*args):
		if not cache.has_key(args):
			cache[args] = apply(f, args)
		return cache[args]
	return fm

def imemoize(f):
	"""Modify a generator to return a common iterator for each argument
	list, caching the initial segment of the iterated list."""
	cache = {}
	def fm(*args):
		if not cache.has_key(args):
			cache[args] = ([], apply(f, args))
		queue = cache[args][0]
		source = cache[args][1]
		def ifm():
			counter = 0
			while True:
				for element in queue[counter:]:
					yield element
				counter = len(queue)
				try:
					queue.append(source.next())
				except StopIteration:
					break
		return ifm()
	return fm

def rFile(filename):
	"""Read a file backwards."""
	def inner():
		f = file(filename)
		f.seek(0, 2)
		pos = f.tell()
		if pos > 0:
			buffer = ''
			for i in range(pos - (pos % 1000), -1, -1000):
				f.seek(i)
				buffer = f.read(1000) + buffer
				lines = buffer.split('\n')
				for x in lines[:0:-1]:
					yield x
				buffer = lines[0]
			yield buffer
	t = inner()
	try:
		s = t.next()
	except StopIteration:
		return
	if s != '':
		yield s
	for s in t:
		yield s + '\n'

class LogFile:
	def defaultParser(iter):
		for line in iter:
			try:
				stamp, entry = line.split(':', 1)
				yield int(stamp), entry
			except (ValueError, IndexError):
				pass
	defaultParser = staticmethod(imemoize(defaultParser))

	def __init__(self, filename, reversed = False, parser = None):
		if parser is None:
			parser = self.defaultParser
		self.reversed = reversed
		def lines(selector = None):
			l = parser((reversed and rFile or file)(filename))
			if selector is None:
				return l
			else:
				return selector(l)
		self.lines = imemoize(lines)
	
	def reselector(*args):
		def inner(lines):
			for stamp, entry in lines:
				m = tuple([myre.match(entry) for myre in args])
				if m != (None,) * len(args):
					yield (stamp, ) + m
		return imemoize(inner)
	reselector = staticmethod(memoize(reselector))
	
	def intervals(self, entries):
		"""Return intervals (tok, start_stamp, end_stamp) ordered by 
		stamp furthest from initial end of log, gleaned from the stream 
		processor entries yielding (stamp, start_tok, end_tok) tuples 
		ordered by stamp."""
		near_stamp = {}
		for stamp, s_tok, e_tok in self.lines(entries):
			try:
				if self.reversed and s_tok:
					yield s_tok, stamp, near_stamp[s_tok]
					del near_stamp[s_tok]
				elif self.reversed and e_tok:
					near_stamp[e_tok] = stamp
				elif not self.reversed and s_tok:
					near_stamp[s_tok] = stamp
				elif not self.reversed and e_tok:
					yield e_tok, near_stamp[e_tok], stamp
					del near_stamp[e_tok]
			except KeyError:
				pass
	intervals = imemoize(intervals)

def ctime(secs):
	return time.strftime("%x %X", time.localtime(secs))

class EmergeLog(LogFile):
	start_re = re.compile("  >>> emerge \(\d+ of \d+\) (.+) to ")
	end_re = re.compile("  ::: completed emerge \(\d+ of \d+\) (.+) to ")

	def __init__(self, elog = "/var/log/emerge.log"):
		LogFile.__init__(self, elog, True)
		try:
			self.logstart = LogFile(elog).lines().next()[0]
		except IndexError:
			raise "Log file %s not readable. Stop." % elog
		
	def entries(lines):
		for stamp, start_m, end_m in LogFile.reselector(
				EmergeLog.start_re, EmergeLog.end_re)(lines):
			if start_m:
				yield stamp, start_m.group(1), None
			elif end_m:
				yield stamp, None, end_m.group(1)
	entries = staticmethod(imemoize(entries))

	def intervals(self):
		return LogFile.intervals(self, self.entries)

	def owners(self, mtime, limit = 86400):
		"""Returns (start, end, cpvr) tuples for packages whose start 
		and end of merging encompass mtime, most recent end first"""
		if mtime < self.logstart:
			raise IndexError, "%d is before start of log" % mtime
		for cpvr, start, end in self.intervals():
			if start + limit < mtime:
				return
			elif start <= mtime and end >= mtime:
				yield (start, end, cpvr)

def blame_file(log, filename):
	if not os.path.exists(filename):
		print "'%s' does not exist." % filename
		return
	filename = os.path.abspath(filename)
	try:
		stats = os.lstat(filename)
		if stat.S_ISLNK(stats.st_mode):
			print textwrap.fill("'%s' is a symbolic link to '%s'. This can be caused by a symbolic link being created in pkg_postinst() or by a library version symlink being omitted, and later being created by ldconfig. If you can determine why this symlink exists and it is reproducible, please report it." % (filename, os.path.realpath(filename)))
		if stats.st_mtime < log.logstart:
			print textwrap.fill("'%s' has mtime of %s, which is before emerge log starts. Sorry." % (filename, ctime(stats.st_mtime)))
			return
		print "%s: %s" % (filename, ctime(stats.st_mtime))
		f_owners = list(log.owners(stats.st_mtime))
		if not f_owners:
			print textwrap.fill("Cannot find any possible owners. Possible reasons are that the owner was merged using ebuild instead of emerge, that it is generated or cached data, that it is a config file, or that you created it. If you believe this is reproducible, please report it.")
			return
		print "possible owners: "
		for start, end, cpvr in f_owners:
			print "%-40s%20s%20s" % (cpvr + ':', ctime(start), ctime(end))
			if portage.db["/"]["vartree"].dbapi.match("=" + cpvr):
				print "\tis currently installed."
				continue
			cat, pkgname, version, rev = portage.catpkgsplit(cpvr)
			mylist = portage.db["/"]["vartree"].dbapi.match("%s/%s" % (cat, pkgname))
			if not mylist:
				print "\tpackage is not currently installed."
				continue
			best = portage.best(mylist)
			print "\tlatest installed version is: %s" % best
	except OSError:
		print "Cannot stat '%s'.\n" % filename
	except IndexError:
		pass

if __name__ == "__main__":
	log = EmergeLog()
	for filename in sys.argv[1:]:
		blame_file(log, filename)
