#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Biggest change in a file hierarchy
**********************************

big is a proof of usage of how archery by embracing linear algebrae for
dict is making « group by »/« aggregations » easier in real life.

What it does
************

It scans for all dirs given a departure point and a predicate (is it a 
terminal dir or a just a dir) and instead for ncgu or gdu will compute 
the biggest growth in size according to threshold (day, month, week, year)
and give results sorted by biggest change (topb 5)

How it works?
*************

./big.py path_to_scan
will scan all dirs in a descendant way WITHOUT recursion.

PS: because I like SIGINFO on BSD ctrl+C will give the current progression
and 3 hit in 2 seconds are required to stop the program
"""

import os
from functools import reduce
from os.path import abspath
from os import scandir
from json import dumps
from sys import argv, stdout, stderr
from archery import mdict
from time import time
from signal import signal, SIGINT, SIGUSR1, alarm, SIGALRM
import FreeSimpleGUI as sg

sig_msg=""
interrupted=0
def sigint(*whatever):
    global interrupted, sig_msg
    stderr.write(sig_msg)
    stderr.write("Ctrl + C hit. Info given, rehit ctrl+C to stop the program ....\n")

    stderr.flush()
    if interrupted >= 3:
        stderr.write("Ctrl + C hit more than 3 times in 2 second, EXITING ....\n")
        exit()
    # clear the interruption in 2 seconds
    alarm(2)
    interrupted+=1

def reset_interrupt(*whatever):
    global interrupted
    # a little more sensitive than interrupted = 0
    interrupted -= 1


signal(SIGALRM, reset_interrupt)
signal(SIGINT, sigint)

pp = lambda d : print(dumps(d, indent=4))
path = '.'
dbg = print
dbg = lambda d : d
usage = lambda : print(__doc__)
try:
    path=argv[1]
except IndexError:
    usage()
    print("using default path = %r" % path)
    input("Press Enter to continue...")

terminal= set([])

def dot(prog):
    flush_me = False

    if prog == 0:
        flush_me = True
        stdout.flush()
        stderr.flush()

    if prog%100==0 and prog!=0:
        flush_me = True

        stdout.write("\b..")
    else:
        if prog%20==0:
            flush_me = True
            stdout.write('\b')
            stdout.write(("/", "|", "-", '\\')[(prog%80)//20])
    if prog%(80* 100)==79 * 100:
        flush_me = True
        stdout.write("\b")
    if prog%(80* 100)==0 and prog > 0:
        flush_me = True
        stdout.write("\n")

    if flush_me:
        stdout.flush()
        stderr.flush()
    
to_scan = [ abspath(path), ]
ignored = []
exclude = {"/home/.ecryptfs","/proc", "/dev", "/sys", "/run"}
seen =set(to_scan)
errors=mdict()
print("#### exclude list ####")
pp(list(exclude))
take_only_leaves = True

def scan_for_leaves(to_scan):
    global seen, terminal, errors, sig_msg
    res=mdict()
    prog=0
    threshold_as_seconds = (
        ("hour",                3600),
        ("day" ,         24.0 * 3600),
        ("week" ,   7 * (24.0 * 3600)),
        ("month" , 30 * (24.0 * 3600)),
        ("year" , 365 * (24.0 * 3600)),
        ("infinite" , time()),
    ) 
    threshold_init_growth = mdict(threshold_as_seconds) * 0
    print()
    print("*"*80)
    print("scanning for dirs without dirs in them (leaves)")
    print("*"*80)

        
    while to_scan:
        current = to_scan.pop()
        sig_msg = "\nscanning %s\n" %  current
        if current in exclude:
            stdout.write("\b\nskipping %r\n" % current)
            continue 
        dot(prog)
        prog+=1
        try:
            with scandir(current) as ld:
                has_leaves = False
                print(ld)
                for entry in ld:
                    try:

                        if entry.is_dir(follow_symlinks=False) and not entry.is_symlink():
                            if entry.path not in seen:
                                to_scan+= [ entry.path, ]  
                            seen |= set([entry.path])
                            has_leaves=True
                            break
                    except Exception as e:
                        print(e)
                        print(entry.name)
                        has_leaves=True
                if not has_leaves:
                    print(ld)
                    for _path in ld:
                        res[_path] = mdict(threshold_init_growth.copy())
                        dot(prog)
                        prog+=1
                        sig_msg = "\nscanning %s\n" %  _path
                        sys.stdout.write(sig_msg)
                        sys.stdout.flush()
                             
                        try:
                            for entry in ld:
                                it+=1
                                sig_msg = "\nscanning (%dth entry) in %s\n" % (it, _path) 
                                if not entry.is_file(follow_symlinks=False):
                                    next
                                try:
                                    stat = os.stat(entry)
                                    size = stat.st_size
                                    mtime = stat.st_ctime
                                    res[_path] += mdict(infinite=size )
                                    for fresh_name, fresh_value in threshold_as_seconds:
                                        if (now - mtime) < fresh_value:  
                                            res[_path] += mdict({fresh_name :  size * 1.0 })
                                except Exception as e:
                                    errors += mdict({ str(e) : 1 })
                            res[_path]["ratio"] = 1.0 * res[_path] / (res[_path].get("infinite",1.0) or 1 )
                        except Exception as e:
                            errors += mdict({ str(e) : 1 })


        except Exception as e:
            errors += mdict({ str(e) : 1 })

        print("\b")
        print("found %d leaves" % len(terminal))
        print()
    pp(res)


def scan_them_all(to_scan):
    global seen, terminal, errors
    prog=0
    print()
    print("*"*80)
    print("scanning for ALL dirs")
    print("*"*80)
    while to_scan:
        current = to_scan.pop()
        if current in exclude:
            stdout.write("\b\nskipping %r\n" % current)
            continue
        dot(prog)
        prog+=1
        try:
            with scandir(current) as ld:
                dbg("scanning %r" % path)
                for entry in ld:
                    try:
                        if entry.is_dir(follow_symlinks=False) and not entry.is_symlink():
                            if entry.path not in seen:
                                to_scan+= [ entry.path, ]  
                            seen |= set([entry.path])

                    except Exception as e:
                        print(e)
                        print(entry.name)
                dbg("on a des enfant")
                terminal |= { current, }
        except Exception as e:
            errors += mdict({ str(e) : 1 })

        if not to_scan:
            break
    print("\b")
    print("found %d dir " % len(terminal))
    print()

### proud ###
( scan_them_all, scan_for_leaves)[take_only_leaves](to_scan)

print("*" * 80)
print("computing increase in size on the last day, week, month, year")
print("*" * 80)
now = time() 
res=mdict()
threshold_as_seconds = (
    ("hour",                3600),
    ("day" ,         24.0 * 3600),
    ("week" ,   7 * (24.0 * 3600)),
    ("month" , 30 * (24.0 * 3600)),
    ("year" , 365 * (24.0 * 3600)),
    ("infinite" , time()),
) 



def as_human(size):
    from math import log
    return int(log(size) - 3) * "*"

def as_human(size):
    size = int(size)
    units = ("b", "kb", "mb", "gb", 'tb')
    for o, unit in enumerate(units):
        if size >> ( o * 10 ) < 1<<10: 
            return "%d %s" % ( size >> (o * 10), unit)
    else:
        return "%d %s" % ( size >> (o * 10), unit)


print("\b")
print("treated %d dirs " % len(terminal))

arcs=()
colors = tuple(reversed(("purple","blue", "cyan", "green", "yellow", "orange", "red", "pink"),))
window = sg.Window("pie chart", [ [ graph:=sg.Graph(
    canvas_size=(1980, 1000),
    graph_bottom_left=(0, 0), graph_top_right=(1980, 1000)
    ),] ], size=(1980,1000),finalize=True)
for i1,freshness in enumerate([ nv[0] for nv in threshold_as_seconds if nv[0] not in { 'infinite', 'ratio' }]):
    print()
    off=i1
    print("*" * 80)
    print("biggest change by the %s" % freshness)
    print("*" * 80)
    
    growth = sorted([ (p , res[p][freshness] ) for p in res.keys() if res[p][freshness] > 0 ],
                key=lambda kv_pair: kv_pair[1],
                reverse=True,
                )
    current_angle=0
    ratios=mdict(growth[:len(colors)])/(sum( [x[1] for x in  growth ]) or 1 )
    x=0
    graph.draw_text(freshness, (175+int(off*350),950),color= "black",font="Helvetica 40", text_location=sg.TEXT_LOCATION_CENTER)
    for path, ratio in ratios.items():
        graph.draw_arc((25+350*off,500), (350*(off+1)-25, 850),ratio*360, current_angle, "pieslice", fill_color= colors[x])
        x+=1
        current_angle+=ratio*360

    top_growth=dict(growth[:len(colors)])
    reportme= "\n".join([ "%8s : %r" % (as_human(int(v)), repr(k)) for k,v in top_growth.items() ]) 
    in80=lambda s: 40 > len(s) >= 0 and s or s[:20] + "..." + s[-20:] 
    reportme2= "\n".join([ "%8s : %r" % (as_human(int(v)), in80(k)) for k,v in top_growth.items() ]) 
    graph.draw_text(reportme2, (int(off*350)+250, 125 +  250 * (off%2)),color= "black",font="Helvetica 14",text_location=sg.TEXT_LOCATION_CENTER )
    off+=1
    window.refresh()
    print(reportme)
    print("-" * 80)
    print( "%8s : reste" % as_human(sum([ x[1] for x in growth[len(colors):]])))
    print("-" * 80)

graph.widget.postscript(file="~/big.ps")

print("#### errors ####")
pp(errors)
while True:
    from time import sleep
    sleep(1)
