import json
import csv
import os
import sys
import glob
import collections

feed_files = {}
for fn in glob.glob("feeds/*.dmfr.json"):
    data = {}
    with open(fn, encoding="utf-8") as f:
        data = json.load(f)
    for feed in data['feeds']:
        feed_files[feed["id"]] = fn

operators = {}
operator_file_matches =  collections.defaultdict(set)
operator_file_matches_single_feed = collections.defaultdict(set)
operator_multiple_files = collections.defaultdict(set)
operator_no_file = {}
operator_no_feed = {}
for fn in glob.glob("operators/*.json"):
    data = {}
    with open(fn, encoding="utf-8") as f:
        data = json.load(f)
    operator = data
    osid = operator.get('onestop_id')
    operators[osid] = operator
    fsids = set(i.get('feed_onestop_id') for i in operator.get('associated_feeds',[]))
    files = set(feed_files.get(i) for i in fsids)
    if None in files or None in fsids:
        operator_no_feed[osid] = osid
    elif len(files) > 1:
        # print(fn)
        # print("\toperator:", operator["onestop_id"])
        # print("\t\tfeeds in files:", fsids)
        operator_multiple_files[osid].add(osid)
    elif len(files) == 1 and len(fsids) == 1:
        # operator appears in exactly 1 feed file and has single feed
        operator_file_matches_single_feed[list(files)[0]].add(osid)
    elif len(files) == 1:
        # operator appears in exactly 1 feed file
        operator_file_matches[list(files)[0]].add(osid)
    elif len(files) == 0:
        operator_no_file[osid] = osid

def filter_empty(d):
    a = {}
    for k,v in d.items():
        if v:
            a[k] = v
    return a


single_feed_items = set(operator_file_matches_single_feed.keys()) | set(operator_file_matches.keys())
single_feed_items = set([])
for feed_path in single_feed_items:
    osids = operator_file_matches_single_feed[feed_path]
    print("single file and feed match:", feed_path, osids)    

    data = {}
    with open(feed_path, encoding="utf-8") as f:
        data = json.load(f)

    # process this way to preserve order
    for feed in data.get('feeds'):
        fsid = feed["id"]
        feed_operators = collections.defaultdict(list)
        for osid in osids:
            operator = operators[osid]
            fsids = [i.get("feed_onestop_id") for i in operator["associated_feeds"]]
            oifs = [{"gtfs_agency_id": i.get("gtfs_agency_id")} for i in operator["associated_feeds"] if i.get("gtfs_agency_id")]
            if fsid not in fsids:
                continue
            if len(set(fsids)) != 1:
                raise Exception("more than one unique oif")
            operator['associated_feeds'] = oifs
            if not feed.get("operators"):
                feed["operators"] = []
            feed["operators"].append(filter_empty(operator))
            os.unlink(os.path.join("operators", osid+".json"))
    
    filematches = operator_file_matches.get(feed_path)
    if filematches:
        print("single file match:", feed_path, filematches)    
        data["operators"] = [filter_empty(operators.get(i)) for i in filematches]
        for i in filematches:
            os.unlink(os.path.join("operators", i+".json"))

    with open(feed_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

for k,v in operator_multiple_files.items():
    print("multiple matches:", k, v)

for k,v in operator_no_file.items():
    print("no file:", k, v)

for k,v in operator_no_feed.items():
    print("no feed:", k, v)