#include "commands/load.h"

#include "commands/parser.h"
#include "gtfs_types.h"
#include "helpers/calculations.h"
#include "helpers/console.h"
#include "helpers/exception.h"
#include "helpers/progress_writer.h"
#include "helpers/string_functions.h"

#include <fstream>
#include <functional>
#include <sstream>
#include <unordered_map>
#include <unordered_set>

namespace gtfsplanner {
Load_cmd::Load_cmd(std::vector<std::string> const& parameters) : Command(parameters) {}

void Load_cmd::sanitize()
{
    auto const& params = get_parameters();
    if (params.size() != 1)
    {
        error("Unexpected parameters for load command. Only the path from which to load is to be "
              "given.");
    }
}

std::string extract_column(std::string const& key,
                           std::unordered_map<std::string, size_t> const& column_map,
                           std::vector<std::string> const& data)
{
    auto iter = column_map.find(key);
    if (iter != column_map.end())
    {
        return data[iter->second];
    }
    return {};
}

void parse_agency(std::unordered_map<std::string, size_t> const& column_map,
                  std::vector<std::string> const& agency_data,
                  gtfs::Dataset& dataset)
{
    gtfs::Agency agency {extract_column("agency_id", column_map, agency_data),
                         extract_column("agency_name", column_map, agency_data),
                         extract_column("agency_url", column_map, agency_data),
                         extract_column("agency_timezone", column_map, agency_data)};
    dataset.agencies.push_back(agency);
}

void parse_stops(std::unordered_map<std::string, size_t> const& column_map,
                 std::vector<std::string> const& stop_data,
                 gtfs::Dataset& dataset)
{
    gtfs::Stop stop {extract_column("stop_id", column_map, stop_data),
                     extract_column("stop_name", column_map, stop_data),
                     to<double>(extract_column("stop_lat", column_map, stop_data)),
                     to<double>(extract_column("stop_lon", column_map, stop_data))};
    dataset.stops.push_back(stop);
}

void parse_routes(std::unordered_map<std::string, size_t> const& column_map,
                  std::vector<std::string> const& route_data,
                  gtfs::Dataset& dataset)
{
    gtfs::Route route {extract_column("route_id", column_map, route_data),
                       extract_column("agency_id", column_map, route_data),
                       extract_column("route_short_name", column_map, route_data),
                       extract_column("route_long_name", column_map, route_data),
                       static_cast<gtfs::Route_type>(
                           to<uint32_t>(extract_column("route_type", column_map, route_data)))};
    dataset.routes.push_back(route);
}

void parse_trips(std::unordered_map<std::string, size_t> const& column_map,
                 std::vector<std::string> const& trip_data,
                 gtfs::Dataset& dataset)
{
    gtfs::Trip trip {extract_column("route_id", column_map, trip_data),
                     extract_column("service_id", column_map, trip_data),
                     extract_column("trip_id", column_map, trip_data),
                     static_cast<gtfs::Direction>(
                         to<uint32_t>(extract_column("direction_id", column_map, trip_data)))};
    dataset.trips.push_back(trip);
}

void parse_stop_times(std::unordered_map<std::string, size_t> const& column_map,
                      std::vector<std::string> const& stop_time_data,
                      gtfs::Dataset& dataset)
{
    auto pickup_type = extract_column("pickup_type", column_map, stop_time_data);
    auto drop_off_type = extract_column("drop_off_type", column_map, stop_time_data);
    gtfs::Stop_time stop_time {
        extract_column("trip_id", column_map, stop_time_data),
        to<Time>(extract_column("arrival_time", column_map, stop_time_data)),
        to<Time>(extract_column("departure_time", column_map, stop_time_data)),
        extract_column("stop_id", column_map, stop_time_data),
        to<uint32_t>(extract_column("stop_sequence", column_map, stop_time_data)),
        (pickup_type.empty()) ? gtfs::Access::YES
                              : static_cast<gtfs::Access>(to<uint32_t>(pickup_type)),
        (drop_off_type.empty()) ? gtfs::Access::YES
                                : static_cast<gtfs::Access>(to<uint32_t>(drop_off_type))};
    dataset.stop_times.push_back(stop_time);
}

void parse_calendar(std::unordered_map<std::string, size_t> const& column_map,
                    std::vector<std::string> const& calendar_data,
                    gtfs::Dataset& dataset)
{
    gtfs::Calendar calendar {
        extract_column("service_id", column_map, calendar_data),
        to<bool>(extract_column("monday", column_map, calendar_data)),
        to<bool>(extract_column("tuesday", column_map, calendar_data)),
        to<bool>(extract_column("wednesday", column_map, calendar_data)),
        to<bool>(extract_column("thursday", column_map, calendar_data)),
        to<bool>(extract_column("friday", column_map, calendar_data)),
        to<bool>(extract_column("saturday", column_map, calendar_data)),
        to<bool>(extract_column("sunday", column_map, calendar_data)),
        to<Date>(extract_column("start_date", column_map, calendar_data)),
        to<Date>(extract_column("end_date", column_map, calendar_data)),
    };
    dataset.calendars.push_back(calendar);
}

void parse_calendar_dates(std::unordered_map<std::string, size_t> const& column_map,
                          std::vector<std::string> const& calendar_date_data,
                          gtfs::Dataset& dataset)
{
    gtfs::Calendar_date calendar_date {
        extract_column("service_id", column_map, calendar_date_data),
        to<Date>(extract_column("date", column_map, calendar_date_data)),
        static_cast<gtfs::Service_exception>(
            to<uint32_t>(extract_column("exception_type", column_map, calendar_date_data)))};
    dataset.calendar_dates.push_back(calendar_date);

    // ensure there is an entry in the calendar that fits together so that the ids are in use consistently
    // a service_id used in calendar_datas must also exist in calendar, otherwise merging will go wrong
    auto iter = std::find_if(dataset.calendars.begin(), dataset.calendars.end(),
                             [&calendar_date](gtfs::Calendar const& calendar) {
                                 return calendar.service_id == calendar_date.service_id;
                                 ;
                             });
    if (iter == dataset.calendars.end())
    {
        gtfs::Calendar calendar {calendar_date.service_id,
                                 false,
                                 false,
                                 false,
                                 false,
                                 false,
                                 false,
                                 false,
                                 calendar_date.date,
                                 calendar_date.date};
        dataset.calendars.push_back(calendar);
    }
}

void parse_transfers(std::unordered_map<std::string, size_t> const& column_map,
                     std::vector<std::string> const& transfer_data,
                     gtfs::Dataset& dataset)
{
    auto transfer_type = extract_column("transfer_type", column_map, transfer_data);
    gtfs::Transfer transfer {
        extract_column("from_stop_id", column_map, transfer_data),
        extract_column("to_stop_id", column_map, transfer_data),
        (transfer_type.empty()) ? gtfs::Transfer_type::TRANSFER_PROPOSED
                                : static_cast<gtfs::Transfer_type>(to<uint32_t>(transfer_type)),
        to<uint32_t>(extract_column("exception_type", column_map, transfer_data))};
    dataset.transfers.push_back(transfer);
}

std::streamoff get_file_size(std::string const& inputname)
{
    std::ifstream in_file(inputname, std::ios::binary);
    in_file.seekg(0, std::ios::end);
    return in_file.tellg();
}

void read_gtfs_file(std::string const& folder,
                    std::string const& filename,
                    gtfs::Dataset& dataset,
                    std::function<void(std::unordered_map<std::string, size_t> const&,
                                       std::vector<std::string> const&,
                                       gtfs::Dataset&)> const& handler)
{
    std::string inputname = folder + '/' + filename;

    std::streamoff file_size = get_file_size(inputname);

    std::ifstream input(inputname);
    if (!input.is_open())
    {
        throw std::runtime_error("Cannot open " + inputname);
    }

    std::unordered_map<std::string, size_t> column_map;
    std::string line;

    // the first line provides the format of the remaining input, i.e. which column is what
    std::getline(input, line);
    std::string prefix = "Reading " + inputname;
    Progress_writer progress(prefix, file_size);
    progress.update(line.size() + 1);
    {
        auto columns = tokenize(line, ',');
        for (auto i = 0U; i < columns.size(); i++)
        {
            auto const& token = columns[i];
            column_map[token] = i;
        }
    }


    while (std::getline(input, line))
    {
        auto tokens = tokenize_with_quotes(line, ',');
        handler(column_map, tokens, dataset);
        progress.update(line.size() + 1);
    }
}

void merge_stations(gtfs::Dataset& dataset)
{
    // apparently there are multiple entries for stations with different stop_id, but same name and same coordinates
    // this is merged to reduce overhead
    std::vector<gtfs::Stop> merged_stops;
    std::unordered_map<std::string, std::string> replaced_ids; // replace id first with id second
    {
        Progress_writer progress("Preparing station merge", dataset.stops.size());
        merged_stops.reserve(dataset.stops.size());

        std::unordered_map<std::string, gtfs::Stop>
            stop_lookup; // helper structure to speed up looking for stations
        for (auto const& stop : dataset.stops)
        {
            auto iter = replaced_ids.find(stop.id);
            if (iter == replaced_ids.end())
            {
                // this station has not yet been handled
                auto it = stop_lookup.find(stop.name);
                // treat stations as equal if the name is equal and the distance is < 100m
                if (it != stop_lookup.end()
                    && calculate_distance(it->second.lon, it->second.lat, stop.lon, stop.lat) < 100)
                {
                    // we really already have the station stored, create new id replacement entry
                    replaced_ids.insert(std::make_pair(stop.id, it->second.id));
                }
                else
                {
                    // we do not yet have the station, add it and update the lookup datastructures
                    merged_stops.push_back(stop);
                    stop_lookup.insert(std::make_pair(stop.name, stop));
                }
            }
            progress.update(1);
        }
    }

    std::ostringstream oss;
    oss << "Removed " << dataset.stops.size() - merged_stops.size() << " out of "
        << dataset.stops.size() << " stations" << std::endl;
    Console::write(oss.str());
    dataset.stops = merged_stops;

    Progress_writer progress("Updating stop times according to merged stations",
                             dataset.stop_times.size());
    for (auto& stop_time : dataset.stop_times)
    {
        auto iter = replaced_ids.find(stop_time.stop_id);
        if (iter != replaced_ids.end())
        {
            stop_time.stop_id = iter->second;
        }
        progress.update(1);
    }
}

void interpolate_transfers(gtfs::Dataset& dataset)
{
    Progress_writer progress("Interpolating transfers between stations", dataset.stops.size());
    // first build a distance matrix from all stations to all other stations
    // then create transfers for those stations where the distance is less than x
    using station_pair = std::pair<std::string, std::string>;
    std::unordered_map<station_pair, uint32_t, pair_hash> distances;

    for (auto i = 0U; i < dataset.stops.size(); i++)
    {
        auto const& stopA = dataset.stops[i];
        for (auto j = i + 1; j < dataset.stops.size(); j++)
        {
            auto const& stopB = dataset.stops[j];
            station_pair keyA = std::make_pair(stopA.id, stopB.id);
            station_pair keyB = std::make_pair(stopB.id, stopA.id);

            auto distance = calculate_distance(stopA.lon, stopA.lat, stopB.lon, stopB.lat);
            // if distance is less than 350m (experimental value, difference between Munich Hbf and
            // the outside train stations
            if (distance < 350)
            {
                distances.insert(std::make_pair(keyA, static_cast<uint32_t>(distance)));
                distances.insert(std::make_pair(keyB, static_cast<uint32_t>(distance)));
            }
        }
        progress.update(1);
    }
    dataset.transfers.reserve(distances.size());
    for (auto const& distance : distances)
    {
        // the minimal transfer time is calculated based on distance: per 100m 3min are assumed (to somehow incorporate stairs,
        // lifts, complex connecting pathways etc)
        auto d = distance.second / 100;
        dataset.transfers.push_back({distance.first.first, distance.first.second,
                                     gtfs::Transfer_type::TRANSFER_PROPOSED, 180 * d});
    }
    // sort the result by the ids to allow faster search later on
    std::sort(dataset.transfers.begin(), dataset.transfers.end(),
              [](gtfs::Transfer const& lhs, gtfs::Transfer const& rhs) {
                  if (lhs.from_stop_id != rhs.from_stop_id)
                  {
                      return lhs.from_stop_id < rhs.from_stop_id;
                  }
                  return lhs.to_stop_id < rhs.to_stop_id;
              });
}

template <typename T>
void build_index(std::string const& type,
                 std::vector<T> const& data,
                 std::unordered_map<std::string, size_t>& index)
{
    std::string prefix = "Building index on " + type;
    Progress_writer progress(prefix, data.size());
    index.clear();
    for (auto i = 0U; i < data.size(); i++)
    {
        index.insert(std::make_pair(data[i].id, i));
        progress.update(1);
    }
}

void build_indices(gtfs::Dataset& dataset)
{
    build_index("routes", dataset.routes, dataset.route_id_to_idx);
    build_index("trips", dataset.trips, dataset.trip_id_to_idx);
    build_index("stops", dataset.stops, dataset.stop_id_to_idx);

    for (auto& trip : dataset.trips)
    {
        trip.stop_time_indices.clear();
    }

    // add cross references to all trips, from trip to stop_time, in order
    Progress_writer progress("Indexing stops on trips", dataset.stop_times.size());
    for (auto i = 0U; i < dataset.stop_times.size(); i++)
    {
        auto const& stop_time = dataset.stop_times[i];
        auto trip_id = stop_time.trip_id;
        auto trip_it = dataset.trip_id_to_idx.find(trip_id);
        check_iter(trip_it, dataset.trip_id_to_idx);
        auto& trip = dataset.trips[trip_it->second];
        // update stop indices in the trip
        if (trip.stop_time_indices.size() <= stop_time.stop_sequence)
        {
            trip.stop_time_indices.resize(stop_time.stop_sequence + 1);
        }
        trip.stop_time_indices[stop_time.stop_sequence] = i;

        progress.update(1);
    }
}

gtfs::Dataset load_gtfs(std::string const& folder)
{
    gtfs::Dataset dataset;
    // mandatory
    read_gtfs_file(folder, "agency.txt", dataset, parse_agency);
    read_gtfs_file(folder, "stops.txt", dataset, parse_stops);
    read_gtfs_file(folder, "routes.txt", dataset, parse_routes);
    read_gtfs_file(folder, "trips.txt", dataset, parse_trips);
    read_gtfs_file(folder, "stop_times.txt", dataset, parse_stop_times);

    // "somewhat" mandatory parts
    try
    {
        read_gtfs_file(folder, "calendar.txt", dataset, parse_calendar);
    }
    catch (std::exception const&)
    {
        // ignore exception, if something happens we just don't have a calendar
        dataset.calendars.clear();
    }
    try
    {
        read_gtfs_file(folder, "calendar_dates.txt", dataset, parse_calendar_dates);
    }
    catch (std::exception const&)
    {
        // ignore exception, if something happens we don't have calendar exceptions
        dataset.calendar_dates.clear();
    }
    try
    {
        read_gtfs_file(folder, "transfers.txt", dataset, parse_transfers);
    }
    catch (std::exception const&)
    {
        // ignore exception, if something happens we don't have calendar exceptions
        dataset.transfers.clear();
    }

    // remove duplicate data
    merge_stations(dataset);
    if (dataset.transfers.empty())
    {
        interpolate_transfers(dataset);
    }

    build_indices(dataset);

    return dataset;
}

template <typename T, typename F>
std::unordered_set<std::string> collect_ids(std::vector<T> const& data, F f)
{
    std::unordered_set<std::string> ids;
    for (auto const& entry : data)
    {
        ids.insert(f(entry));
    }
    return ids;
}

std::string generate_new_id(std::unordered_set<std::string> const& old_ids,
                            std::unordered_set<std::string> const& current_ids,
                            std::unordered_set<size_t>& created_ids)
{
    // generate a new id that does not exist in old_ids and also not in current_ids (so that we can replace it in current_ids
    // without compromising uniqueness
    // start with the highest value in created_ids, anything lower will not work anyway
    auto maximum = std::max_element(created_ids.begin(), created_ids.end());
    auto start = (maximum == created_ids.end()) ? 1U : *maximum;
    for (auto i = start; i < 1000000; i++)
    {
        auto id = std::to_string(i);
        if (old_ids.find(id) == old_ids.end() && current_ids.find(id) == current_ids.end()
            && created_ids.find(i) == created_ids.end())
        {
            created_ids.insert(i);
            return id;
        }
    }
    throw Data_error("Could not generate unique id!");
}

template <typename T, typename F>
std::unordered_map<std::string, std::string> make_unique_ids(std::vector<T> const& data,
                                                             std::vector<T> const& new_data,
                                                             F f,
                                                             Progress_writer& progress)
{
    std::unordered_map<std::string, std::string> result;
    auto ids = collect_ids(data, f);
    auto new_ids = collect_ids(new_data, f);
    std::unordered_set<size_t> created_ids;
    for (auto& new_entry : new_data)
    {
        if (ids.find(f(new_entry)) != ids.end())
        {
            // id already exists, create a new one and add it to the mapping
            auto new_id = generate_new_id(ids, new_ids, created_ids);
            result.insert(std::make_pair(f(new_entry), new_id));
        }
        progress.update(1);
    }
    return result;
}

template <typename T, typename F, typename G>
void update_ids(std::vector<T>& data,
                std::unordered_map<std::string, std::string> const& mapping,
                F f,
                G g,
                Progress_writer& progress)
{
    for (auto& entry : data)
    {
        auto iter = mapping.find(f(entry));
        if (iter != mapping.end())
        {
            g(entry, iter->second);
        }
        progress.update(1);
    }
}

template <typename T, typename F, typename G>
std::unordered_map<std::string, std::string> find_duplicates(
    std::vector<T> const& data, std::vector<T> const& new_data, F f, G g, Progress_writer& progress)
{
    std::unordered_map<std::string, std::string> result;

    for (auto const& new_entry : new_data)
    {
        auto iter = std::find_if(data.begin(), data.end(),
                                 [&new_entry, &f](T const& entry) { return f(entry, new_entry); });
        if (iter != data.end())
        {
            result.insert(std::make_pair(g(new_entry), g(*iter)));
        }
        progress.update(1);
    }

    return result;
}

template <typename T, typename F>
void merge(std::vector<T>& data,
           std::vector<T> const& new_data,
           std::unordered_map<std::string, std::string> const& mapping,
           F f,
           Progress_writer& progress)
{
    for (auto const& new_entry : new_data)
    {
        if (mapping.find(f(new_entry)) == mapping.end())
        {
            data.push_back(new_entry);
        }
        progress.update(1);
    }
}

template <typename T, typename F>
class exists
{
public:
    exists(std::vector<T> const& data, F f, Progress_writer& progress)
        : m_data(data), m_f(f), m_progress(progress)
    {
    }

    bool operator()(T const& new_entry) const
    {
        m_progress.update(1);
        return std::find_if(m_data.begin(), m_data.end(),
                            [&new_entry, this](T const& entry) { return m_f(entry, new_entry); })
               != m_data.end();
    }

private:
    std::vector<T> const& m_data;
    F m_f;
    Progress_writer& m_progress;
};

template <typename T, typename F>
void merge(std::vector<T>& data, std::vector<T> const& new_data, F f, Progress_writer& progress)
{
    std::remove_copy_if(new_data.begin(), new_data.end(), std::back_inserter(data),
                        exists<T, F>(data, f, progress));
}


void merge_datasets(gtfs::Dataset& dataset, gtfs::Dataset& new_dataset)
{
    if (dataset.agencies.empty() && dataset.calendars.empty() && dataset.calendar_dates.empty()
        && dataset.routes.empty() && dataset.stops.empty() && dataset.stop_times.empty()
        && dataset.transfers.empty() && dataset.trips.empty())
    {
        // nothing to merge, go ahead
        dataset = new_dataset;
        return;
    }
    // the first part changes ids so that all ids in new_dataset are different to what is used in dataset
    // that way everything can be adapted or taken over without running into potentially existing entries
    auto get_agency_id = [](gtfs::Agency const& a) { return a.id; };
    auto get_route_id = [](gtfs::Route const& a) { return a.id; };
    auto get_trip_id = [](gtfs::Trip const& a) { return a.id; };
    auto get_calendar_service_id = [](gtfs::Calendar const& a) { return a.service_id; };
    auto get_stop_id = [](gtfs::Stop const& a) { return a.id; };

    // the numbers are counted based on the usage of the variables further down in this function to get
    // the count for 100% of steps
    uint64_t total_steps = 4U * new_dataset.agencies.size();
    total_steps += 6U * new_dataset.routes.size();
    total_steps += 5U * new_dataset.trips.size();
    total_steps += 4U * new_dataset.calendars.size();
    total_steps += 3U * new_dataset.calendar_dates.size();
    total_steps += 4U * new_dataset.stops.size();
    total_steps += 3U * new_dataset.stop_times.size();
    total_steps += 5U * new_dataset.transfers.size();

    Progress_writer progress("Merging datasets", total_steps);

    auto agency_mapping =
        make_unique_ids(dataset.agencies, new_dataset.agencies, get_agency_id, progress);
    auto route_mapping =
        make_unique_ids(dataset.routes, new_dataset.routes, get_route_id, progress);
    auto service_mapping = make_unique_ids(dataset.calendars, new_dataset.calendars,
                                           get_calendar_service_id, progress);
    auto stop_mapping = make_unique_ids(dataset.stops, new_dataset.stops, get_stop_id, progress);

    // update agencies and cross references to agencies
    update_ids(
        new_dataset.agencies, agency_mapping, get_agency_id,
        [](gtfs::Agency& a, std::string new_id) { a.id = std::move(new_id); }, progress);
    update_ids(
        new_dataset.routes, agency_mapping, [](gtfs::Route const& a) { return a.agency_id; },
        [](gtfs::Route& a, std::string const& new_id) { a.agency_id = new_id; }, progress);

    // same for routes
    update_ids(
        new_dataset.routes, route_mapping, get_route_id,
        [](gtfs::Route& a, std::string const& new_id) { a.id = new_id; }, progress);
    update_ids(
        new_dataset.trips, route_mapping, [](gtfs::Trip const& a) { return a.route_id; },
        [](gtfs::Trip& a, std::string const& new_id) { a.route_id = new_id; }, progress);

    // stops
    update_ids(
        new_dataset.stops, stop_mapping, get_stop_id,
        [](gtfs::Stop& a, std::string const& new_id) { a.id = new_id; }, progress);
    update_ids(
        new_dataset.stop_times, stop_mapping, [](gtfs::Stop_time const& a) { return a.stop_id; },
        [](gtfs::Stop_time& a, std::string const& new_id) { a.stop_id = new_id; }, progress);
    update_ids(
        new_dataset.transfers, stop_mapping, [](gtfs::Transfer const& a) { return a.from_stop_id; },
        [](gtfs::Transfer& a, std::string const& new_id) { a.from_stop_id = new_id; }, progress);
    update_ids(
        new_dataset.transfers, stop_mapping, [](gtfs::Transfer const& a) { return a.to_stop_id; },
        [](gtfs::Transfer& a, std::string const& new_id) { a.to_stop_id = new_id; }, progress);

    // services
    update_ids(
        new_dataset.calendars, service_mapping, get_calendar_service_id,
        [](gtfs::Calendar& a, std::string const& new_id) { a.service_id = new_id; }, progress);
    update_ids(
        new_dataset.calendar_dates, service_mapping,
        [](gtfs::Calendar_date const& a) { return a.service_id; },
        [](gtfs::Calendar_date& a, std::string const& new_id) { a.service_id = new_id; }, progress);
    update_ids(
        new_dataset.trips, service_mapping, [](gtfs::Trip const& a) { return a.service_id; },
        [](gtfs::Trip& a, std::string const& new_id) { a.service_id = new_id; }, progress);

    // now that all ids are unique the actual merge can be started, including eliminating duplicates
    agency_mapping = find_duplicates(
        dataset.agencies, new_dataset.agencies,
        [](gtfs::Agency const& agency, gtfs::Agency const& new_agency) {
            return new_agency.name == agency.name && new_agency.timezone == agency.timezone
                   && new_agency.url == agency.url;
        },
        get_agency_id, progress);
    merge(dataset.agencies, new_dataset.agencies, agency_mapping, get_agency_id, progress);
    update_ids(
        new_dataset.routes, agency_mapping, [](gtfs::Route const& a) { return a.agency_id; },
        [](gtfs::Route& a, std::string const& new_id) { a.agency_id = new_id; }, progress);

    route_mapping = find_duplicates(
        dataset.routes, new_dataset.routes,
        [](gtfs::Route const& route, gtfs::Route const& new_route) {
            return route.agency_id == new_route.agency_id && route.long_name == new_route.long_name
                   && route.short_name == new_route.short_name && route.type == new_route.type;
        },
        get_route_id, progress);
    merge(dataset.routes, new_dataset.routes, route_mapping, get_route_id, progress);
    update_ids(
        new_dataset.trips, route_mapping, [](gtfs::Trip const& a) { return a.route_id; },
        [](gtfs::Trip& a, std::string const& new_id) { a.route_id = new_id; }, progress);

    stop_mapping = find_duplicates(
        dataset.stops, new_dataset.stops,
        [](gtfs::Stop const& stop, gtfs::Stop const& new_stop) {
            return stop.name == new_stop.name
                   && calculate_distance(stop.lon, stop.lat, new_stop.lon, new_stop.lat) < 100;
        },
        get_stop_id, progress);
    merge(dataset.stops, new_dataset.stops, stop_mapping, get_stop_id, progress);
    update_ids(
        new_dataset.stop_times, stop_mapping, [](gtfs::Stop_time const& a) { return a.stop_id; },
        [](gtfs::Stop_time& a, std::string const& new_id) { a.stop_id = new_id; }, progress);
    update_ids(
        new_dataset.transfers, stop_mapping, [](gtfs::Transfer const& a) { return a.from_stop_id; },
        [](gtfs::Transfer& a, std::string const& new_id) { a.from_stop_id = new_id; }, progress);
    update_ids(
        new_dataset.transfers, stop_mapping, [](gtfs::Transfer const& a) { return a.to_stop_id; },
        [](gtfs::Transfer& a, std::string const& new_id) { a.to_stop_id = new_id; }, progress);

    service_mapping = find_duplicates(
        dataset.calendars, new_dataset.calendars,
        [](gtfs::Calendar const& calendar, gtfs::Calendar const& new_calendar) {
            return calendar.start_date == new_calendar.start_date
                   && calendar.end_date == new_calendar.end_date
                   && calendar.monday == new_calendar.monday
                   && calendar.tuesday == new_calendar.tuesday
                   && calendar.wednesday == new_calendar.wednesday
                   && calendar.thursday == new_calendar.thursday
                   && calendar.friday == new_calendar.friday
                   && calendar.saturday == new_calendar.saturday
                   && calendar.sunday == new_calendar.sunday;
        },
        get_calendar_service_id, progress);
    merge(dataset.calendars, new_dataset.calendars, service_mapping, get_calendar_service_id,
          progress);
    update_ids(
        new_dataset.calendar_dates, service_mapping,
        [](gtfs::Calendar_date const& a) { return a.service_id; },
        [](gtfs::Calendar_date& a, std::string const& new_id) { a.service_id = new_id; }, progress);
    update_ids(
        new_dataset.trips, service_mapping, [](gtfs::Trip const& a) { return a.service_id; },
        [](gtfs::Trip& a, std::string const& new_id) { a.service_id = new_id; }, progress);

    // trips, stop_times, transfers and calendar dates have no id, so here we can simply merge by checking for duplicates
    // in fact, trips have an id, but it must not be changed as it describes the actual trip as in ICE 0815 - 0815 is the id, this
    // must not be merged or changed as it is user visible
    merge(
        dataset.trips, new_dataset.trips,
        [](gtfs::Trip const& trip, gtfs::Trip const& new_trip) {
            return trip.direction_id == new_trip.direction_id && trip.id == new_trip.id
                   && trip.route_id == new_trip.route_id && trip.service_id == new_trip.service_id;
        },
        progress);
    merge(
        dataset.stop_times, new_dataset.stop_times,
        [](gtfs::Stop_time const& stop_time, gtfs::Stop_time const& new_stop_time) {
            return stop_time.arrival_time == new_stop_time.arrival_time
                   && stop_time.departure_time == new_stop_time.departure_time
                   && stop_time.drop_off_type == new_stop_time.drop_off_type
                   && stop_time.pickup_type == new_stop_time.pickup_type
                   && stop_time.stop_id == new_stop_time.stop_id
                   && stop_time.stop_sequence == new_stop_time.stop_sequence
                   && stop_time.trip_id == new_stop_time.trip_id;
        },
        progress);
    merge(
        dataset.transfers, new_dataset.transfers,
        [](gtfs::Transfer const& transfer, gtfs::Transfer const& new_transfer) {
            return transfer.from_stop_id == new_transfer.from_stop_id
                   && transfer.min_transfer_time_sec == new_transfer.min_transfer_time_sec
                   && transfer.to_stop_id == new_transfer.to_stop_id
                   && transfer.transfer_type == new_transfer.transfer_type;
        },
        progress);
    merge(
        dataset.calendar_dates, new_dataset.calendar_dates,
        [](gtfs::Calendar_date const& date, gtfs::Calendar_date const& new_date) {
            return date.date == new_date.date && date.exception_type == new_date.exception_type
                   && date.service_id == new_date.service_id;
        },
        progress);
}

void Load_cmd::execute(gtfs::Dataset& dataset)
{
    auto new_dataset = load_gtfs(get_parameters().back());
    // merge the new dataset into the existing dataset

    merge_datasets(dataset, new_dataset);
    build_indices(dataset);
}

void Load_cmd::help()
{
    Console::write("Usage: load <path>\n");
    Console::write("Loads a gtfs dataset existing of a couple of txt files in CSV format from the "
                   "given path. For the format please check the gtfs documentation.\n");
}
} // namespace gtfsplanner
