#!/usr/bin/env python3

# Parse timelog files, filter the lines and output interesting data
# Copyright (C) 2014 Tobias Bengfort <tobias.bengfort@posteo.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse
import os
from datetime import datetime, timedelta

DT_FORMAT = '%Y-%m-%d %H:%M'


def pairwise(items):
	# backport of itertools.pairwise from python 3.10
	for i in range(len(items) - 1):
		yield items[i], items[i + 1]


class LazyMap:
	def __init__(self, src, fn):
		self._src = src
		self._fn = fn
		self._cache = {}

	def __len__(self):
		return len(self._src)

	def __getitem__(self, key):
		if isinstance(key, int) and key < 0:
			key += len(self)
		if key not in self._cache:
			self._cache[key] = self._fn(self._src[key])
		return self._cache[key]


def parse_line(line):
	dt, comment = line.split(': ', 1)
	return {
		'dt': datetime.strptime(dt, DT_FORMAT),
		'comment': comment
	}


def datetime_add(dt, years=0, months=0, weeks=0, days=0, hours=0, minutes=0, seconds=0, microseconds=0):  # noqa
	dt += timedelta(
		weeks=weeks,
		days=days,
		hours=hours,
		minutes=minutes,
		seconds=seconds,
		microseconds=microseconds,
	)

	month = dt.month + months
	div, month = divmod(month - 1, 12)
	month += 1
	year = dt.year + years + div

	return datetime(
		year, month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond
	)


def timedelta2str(delta):
	seconds = delta.total_seconds()
	minutes, seconds = divmod(seconds, 60)
	hours, minutes = divmod(minutes, 60)
	return '{: =3}:{:0=2}'.format(int(hours), int(minutes))


class Query:
	def __init__(self, timelog):
		self.start = 0
		self.stop = len(timelog)
		self.timelog = timelog

	def split(self, dt, after):
		low = self.start
		high = self.stop - 1

		if after:
			if self.timelog[low]['dt'] > dt:
				return
		else:
			if self.timelog[high]['dt'] < dt:
				return

		while high - low > 1:
			new = int((low + high) / 2)
			if self.timelog[new]['dt'] <= dt:
				low = new
			else:
				high = new

		if after:
			self.start = high
		else:
			self.stop = high

	def before(self, dt):
		self.split(dt, False)

	def after(self, dt):
		self.split(dt, True)

	def day(self, offset=0):
		now = datetime.now()
		start = datetime(now.year, now.month, now.day)
		self.after(datetime_add(start, days=offset))
		self.before(datetime_add(start, days=offset + 1))

	def week(self, offset=0):
		now = datetime.now()
		start = datetime(now.year, now.month, now.day)
		self.after(datetime_add(start, weeks=offset, days=-now.weekday()))
		self.before(datetime_add(start, weeks=offset + 1, days=-now.weekday()))

	def month(self, offset=0):
		now = datetime.now()
		start = datetime(now.year, now.month, 1)
		self.after(datetime_add(start, months=offset))
		self.before(datetime_add(start, months=offset + 1))

	def year(self, offset=0):
		now = datetime.now()
		start = datetime(now.year, 1, 1)
		self.after(datetime_add(start, years=offset))
		self.before(datetime_add(start, years=offset + 1))

	def all(self):
		return [self.timelog[i] for i in range(self.start, self.stop)]


class Extractor:
	def __init__(self, data):
		self.data = data

	def sum(self):
		return sum((
			entry['dt'] - prev['dt']
			for prev, entry in pairwise(self.data)
			if '**' not in entry['comment']
		), start=timedelta())

	def by_date(self):
		dates = {}
		for entry in self.data:
			date = entry['dt'].date()
			if date not in dates:
				dates[date] = []
			dates[date].append(entry)
		for date, entries in dates.items():
			e = Extractor(entries)
			yield entries[0]['dt'], entries[-1]['dt'], e.sum()

	def by_comment(self):
		d = {}
		for prev, entry in pairwise(self.data):
			if '**' not in entry['comment']:
				d.setdefault(entry['comment'], timedelta())
				d[entry['comment']] += entry['dt'] - prev['dt']
		return d


class ExpectedHoursPer:
	WORKDAYS_PER_WEEK = 5
	WORKHOURS_PER_WEEK = 35
	HOLIDAYS_PER_YEAR = 9
	VACATION_DAYS_PER_YEAR = 30

	@classmethod
	def day(cls):
		return int(cls.WORKHOURS_PER_WEEK / cls.WORKDAYS_PER_WEEK)

	@classmethod
	def week(cls):
		return int(cls.WORKHOURS_PER_WEEK)

	@classmethod
	def month(cls):
		return int(cls.year() / 12)

	@classmethod
	def year(cls):
		return int(cls.day() * cls._workdays_per_year())

	@classmethod
	def days(cls, n):
		"""interpolation between day and year"""
		from math import exp
		d1 = cls.day()
		d2 = cls.year() / 365
		fn = 2 / (1 + exp(-n / 7) / exp(-1 / 7)) - 1
		d = (1 - fn) * d1 + fn * d2
		return d * n

	@classmethod
	def _workdays_per_year(cls):
		return (
			(365 - cls.HOLIDAYS_PER_YEAR) * cls.WORKDAYS_PER_WEEK / 7
			- cls.VACATION_DAYS_PER_YEAR
		)


if __name__ == '__main__':
	parser = argparse.ArgumentParser(
		description='extract interesting data from timelogs')
	parser.add_argument('--file',
		default=os.path.expanduser('~/.gtimelog/timelog.txt'))
	parser.add_argument('-d', '--day', nargs='?', const=0, type=int,
		help='show entries from today or DAY days ago')
	parser.add_argument('-w', '--week', nargs='?', const=0, type=int,
		help='show entries from this week or WEEK weeks ago')
	parser.add_argument('-m', '--month', nargs='?', const=0, type=int,
		help='show entries from this month or MONTH months ago')
	parser.add_argument('-y', '--year', nargs='?', const=0, type=int,
		help='show entries from this year or YEAR years ago')
	parser.add_argument('-c', '--check', action='store_true',
		help='find potential issues')
	parser.add_argument('-s', '--timesheet', action='store_true',
		help='output as timesheet CSV')
	args = parser.parse_args()

	# load data from file
	with open(args.file) as fh:
		lines = [line.strip() for line in fh.readlines() if line.strip()]
		data = LazyMap(lines, parse_line)

	# filter
	q = Query(data)
	if args.day is not None:
		q.day(offset=-args.day)
		expected = ExpectedHoursPer.day()
	elif args.week is not None:
		q.week(offset=-args.week)
		expected = ExpectedHoursPer.week()
	elif args.month is not None:
		q.month(offset=-args.month)
		expected = ExpectedHoursPer.month()
	elif args.year is not None:
		q.year(offset=-args.year)
		expected = ExpectedHoursPer.year()
	else:
		expected = ExpectedHoursPer.days(
			(data[-1]['dt'] - data[0]['dt']).total_seconds() / 3600 / 24
		)

	if args.check:
		for prev, entry in pairwise(q.all()):
			delta = entry['dt'] - prev['dt']
			if delta < timedelta(0) or (
				'**' not in entry['comment'] and delta > timedelta(hours=12)
			):
				print(entry['dt'].strftime(DT_FORMAT) + ': ' + entry['comment'])
	else:
		extractor = Extractor(q.all())

		if args.timesheet:
			for start, end, total in extractor.by_date():
				date = start.strftime('%Y-%m-%d')
				s = start.strftime('%H:%M')
				e = end.strftime('%H:%M')
				_break = ((end - start) - total).total_seconds()
				b = '{:02d}:{:02d}'.format(int(_break / 60 / 60), int(_break / 60 % 60))
				print('{}\t{}\t{}\t{}'.format(date, s, e, b))
		else:
			# output by comment
			by_comment = extractor.by_comment()
			if len(by_comment) > 0:
				length = max(len(k) for k in by_comment.keys())
				for comment, delta in sorted(by_comment.items(), key=lambda a: a[1]):
					padding = ' ' * (length + 1 - len(comment))
					print(comment + padding + timedelta2str(delta))
				print()

			# output total workhours
			done = int(extractor.sum().total_seconds() / 3600)
			print('Total workhours done: %i (%i extra)' % (done, done - expected))