diff --git a/README.md b/README.md index f1a761c..f3cd6d4 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,25 @@ make ## Usage For comprehensive help, use `dooked --help` + +### DNS history tracking + +When a previous JSON scan is passed back as input, dooked now carries DNS +record history forward: + +- `first-seen`: first scan time for a domain/type/value tuple +- `last-seen`: most recent scan time where that tuple was observed +- `seen`: number of scans where that tuple was observed +- `currently_seen`: whether the tuple appeared in the latest scan + +This keeps rotating DNS records in the output after they disappear from the +latest scan, so load-balanced targets can be reviewed without losing older +addresses immediately. + +Additional comparison flags: + +- `--fs`: report records first seen in the current scan +- `--ls N`: report records missing from the current scan that have not been + seen in at least `N` days +- `--lsd MM/DD/YYYY`: report missing records last seen before a US-formatted + date. `MM/DD/YYYY HH:MM:SS` is also accepted. diff --git a/dooked/include/cli_preprocessor.hpp b/dooked/include/cli_preprocessor.hpp index 43fa1ba..e8c41a1 100644 --- a/dooked/include/cli_preprocessor.hpp +++ b/dooked/include/cli_preprocessor.hpp @@ -2,6 +2,7 @@ #include "dns/dns_resolver.hpp" #include "utils/io_utils.hpp" +#include #include // maximum sockets to open regardless of the number of threads @@ -24,7 +25,10 @@ struct cli_args_t { int post_http_request{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; bool include_date{false}; + bool show_first_seen{false}; + std::string last_seen_date{}; }; struct runtime_args_t { @@ -36,6 +40,11 @@ struct runtime_args_t { http_process_e http_request_time_{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; + bool show_first_seen{false}; + std::string last_seen_date{}; + std::string scan_time{}; + std::time_t scan_time_epoch{}; }; void run_program(cli_args_t const &cli_args); diff --git a/dooked/include/utils/io_utils.hpp b/dooked/include/utils/io_utils.hpp index 829b09e..26e98cb 100644 --- a/dooked/include/utils/io_utils.hpp +++ b/dooked/include/utils/io_utils.hpp @@ -26,14 +26,46 @@ void trim(std::string &); struct json_data_t { std::string domain_name{}; std::string rdata{}; + std::string first_seen{}; + std::string last_seen{}; int ttl{}; int http_code{}; int content_length{}; + int seen{}; + bool currently_seen{true}; dns_record_type_e type{}; static json_data_t serialize(std::string const &d, int const len, int const http_code, json::object_t &json_object) { + auto const string_value = [&json_object](char const *first_key, + char const *second_key) { + auto first_iter = json_object.find(first_key); + if (first_iter != json_object.end() && first_iter->second.is_string()) { + return first_iter->second.get(); + } + auto second_iter = json_object.find(second_key); + if (second_iter != json_object.end() && second_iter->second.is_string()) { + return second_iter->second.get(); + } + return json::string_t{}; + }; + auto const int_value = [&json_object](char const *key, int const fallback) { + auto iter = json_object.find(key); + if (iter != json_object.end() && iter->second.is_number_integer()) { + return static_cast(iter->second.get()); + } + return fallback; + }; + auto const bool_value = [&json_object](char const *key, + bool const fallback) { + auto iter = json_object.find(key); + if (iter != json_object.end() && iter->second.is_boolean()) { + return iter->second.get(); + } + return fallback; + }; + json_data_t data{}; data.domain_name = d; data.type = @@ -42,6 +74,10 @@ struct json_data_t { data.ttl = json_object["ttl"].get(); data.content_length = len; data.http_code = http_code; + data.first_seen = string_value("first-seen", "first_seen"); + data.last_seen = string_value("last-seen", "last_seen"); + data.seen = int_value("seen", data.last_seen.empty() ? 0 : 1); + data.currently_seen = bool_value("currently_seen", true); return data; } }; @@ -82,6 +118,7 @@ void write_json_result_impl(map_container_t const &result_map, json::object_t res_object; res_object["program"] = "dooked"; + res_object["scanned_at"] = rt_args.scan_time; res_object["result"] = std::move(list); (*rt_args.output_file) << json(res_object).dump(2) << "\n"; rt_args.output_file->close(); diff --git a/dooked/include/utils/probe_result.hpp b/dooked/include/utils/probe_result.hpp index 07211c6..18825f3 100644 --- a/dooked/include/utils/probe_result.hpp +++ b/dooked/include/utils/probe_result.hpp @@ -12,6 +12,10 @@ struct probe_result_t { std::string rdata{}; dns_record_type_e type{}; // RR TYPE (2 octets) std::uint32_t ttl{}; // time to live(4 octets) + std::string first_seen{}; + std::string last_seen{}; + int seen{}; + bool currently_seen{true}; friend bool operator==(probe_result_t const &a, probe_result_t const &b) { return case_insensitive_compare(a.rdata, b.rdata) && (a.type == b.type); diff --git a/dooked/include/utils/random_utils.hpp b/dooked/include/utils/random_utils.hpp index 0e87be2..88c2f69 100644 --- a/dooked/include/utils/random_utils.hpp +++ b/dooked/include/utils/random_utils.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace dooked { diff --git a/dooked/source/cli_preprocessor.cpp b/dooked/source/cli_preprocessor.cpp index c08d7fb..25e753d 100644 --- a/dooked/source/cli_preprocessor.cpp +++ b/dooked/source/cli_preprocessor.cpp @@ -4,10 +4,16 @@ #include "utils/exceptions.hpp" #include "utils/random_utils.hpp" #include "utils/string_utils.hpp" +#include #include #include +#include +#include +#include #include #include +#include +#include // defined (and assigned to) in main.cpp extern bool silent; @@ -17,6 +23,204 @@ namespace dooked { namespace net = boost::asio; using namespace fmt::v7::literals; +using record_key_t = std::tuple; + +std::string lower_copy(std::string value) { + std::transform(value.begin(), value.end(), value.begin(), + [](unsigned char c) { return (char)std::tolower(c); }); + return value; +} + +record_key_t record_key(std::string const &domain, dns_record_type_e const type, + std::string const &rdata) { + return {lower_copy(domain), type, lower_copy(rdata)}; +} + +std::string inherited_first_seen(json_data_t const &record, + std::string const &scan_time) { + if (!record.first_seen.empty()) { + return record.first_seen; + } + if (!record.last_seen.empty()) { + return record.last_seen; + } + return scan_time; +} + +int next_seen_count(json_data_t const &record) { + if (record.seen > 0) { + return record.seen + 1; + } + return record.last_seen.empty() ? 1 : 2; +} + +std::map +index_previous_records(std::optional> const &records) { + std::map result; + if (!records) { + return result; + } + for (auto const &record : *records) { + result[record_key(record.domain_name, record.type, record.rdata)] = record; + } + return result; +} + +std::set +apply_record_history(map_container_t &result_map, + std::optional> const &records, + std::string const &scan_time) { + auto const previous_index = index_previous_records(records); + std::set current_keys; + + for (auto &result_pair : result_map.result()) { + auto const &domain = result_pair.first; + for (auto &record : result_pair.second.dns_result_list_) { + auto const key = record_key(domain, record.type, record.rdata); + current_keys.insert(key); + record.currently_seen = true; + + auto const previous_iter = previous_index.find(key); + if (previous_iter == previous_index.end()) { + record.first_seen = scan_time; + record.last_seen = scan_time; + record.seen = 1; + continue; + } + + auto const &previous = previous_iter->second; + record.first_seen = inherited_first_seen(previous, scan_time); + record.last_seen = scan_time; + record.seen = next_seen_count(previous); + } + } + return current_keys; +} + +void preserve_unseen_history(map_container_t &result_map, + std::optional> const &records, + std::set const ¤t_keys) { + if (!records) { + return; + } + + std::set preserved_keys; + for (auto const &previous : *records) { + auto const key = record_key(previous.domain_name, previous.type, + previous.rdata); + if (current_keys.find(key) != current_keys.end() || + preserved_keys.find(key) != preserved_keys.end()) { + continue; + } + + preserved_keys.insert(key); + probe_result_t stale_record{}; + stale_record.rdata = previous.rdata; + stale_record.type = previous.type; + stale_record.ttl = (std::uint32_t)previous.ttl; + stale_record.first_seen = inherited_first_seen(previous, previous.last_seen); + stale_record.last_seen = previous.last_seen; + stale_record.seen = previous.seen; + stale_record.currently_seen = false; + result_map.result()[previous.domain_name].dns_result_list_.push_back( + std::move(stale_record)); + } +} + +std::vector +active_previous_records(std::vector const &records) { + std::vector active; + active.reserve(records.size()); + std::copy_if(records.cbegin(), records.cend(), std::back_inserter(active), + [](auto const &record) { return record.currently_seen; }); + return active; +} + +std::time_t parse_time(std::string const &value, char const *format) { + std::tm parsed{}; + parsed.tm_isdst = -1; + std::istringstream stream{value}; + stream >> std::get_time(&parsed, format); + if (stream.fail()) { + return (std::time_t)-1; + } + return std::mktime(&parsed); +} + +std::time_t parse_last_seen_time(std::string const &value) { + if (value.empty()) { + return (std::time_t)-1; + } + auto parsed = parse_time(value, "%Y-%m-%d %H:%M:%S"); + if (parsed != (std::time_t)-1) { + return parsed; + } + parsed = parse_time(value, "%m/%d/%Y %H:%M:%S"); + if (parsed != (std::time_t)-1) { + return parsed; + } + return parse_time(value, "%m/%d/%Y"); +} + +void report_last_seen_record(std::string const &domain, + probe_result_t const &record) { + spdlog::warn("[LAST-SEEN][{}][{}] `{}` last seen `{}` (seen {} times)", + domain, dns_record_type_to_str(record.type), record.rdata, + record.last_seen, record.seen); +} + +void report_history_alerts(map_container_t const &result_map, + runtime_args_t const &rt_args) { + bool const needs_last_seen_days = rt_args.last_seen_days >= 0; + bool const needs_last_seen_date = !rt_args.last_seen_date.empty(); + if (!rt_args.show_first_seen && !needs_last_seen_days && + !needs_last_seen_date) { + return; + } + + spdlog::set_pattern("[%^CHECK%$] %v"); + + std::time_t days_cutoff = (std::time_t)-1; + if (needs_last_seen_days) { + auto const day_seconds = (std::time_t)rt_args.last_seen_days * 24 * 60 * 60; + days_cutoff = rt_args.scan_time_epoch - day_seconds; + } + + std::time_t date_cutoff = (std::time_t)-1; + if (needs_last_seen_date) { + date_cutoff = parse_last_seen_time(rt_args.last_seen_date); + if (date_cutoff == (std::time_t)-1) { + spdlog::error("unable to parse --lsd date `{}`", rt_args.last_seen_date); + } + } + + for (auto const &result_pair : result_map.cresult()) { + auto const &domain = result_pair.first; + for (auto const &record : result_pair.second.dns_result_list_) { + if (rt_args.show_first_seen && record.currently_seen && + record.seen == 1) { + spdlog::info("[FIRST-SEEN][{}][{}] `{}` first seen `{}`", domain, + dns_record_type_to_str(record.type), record.rdata, + record.first_seen); + } + + if (record.currently_seen || record.last_seen.empty()) { + continue; + } + + auto const record_time = parse_last_seen_time(record.last_seen); + if (record_time == (std::time_t)-1) { + continue; + } + if (needs_last_seen_days && record_time <= days_cutoff) { + report_last_seen_record(domain, record); + } else if (date_cutoff != (std::time_t)-1 && + record_time < date_cutoff) { + report_last_seen_record(domain, record); + } + } + } +} void compare_http_result(int const base_cl, json_data_t const &prev_http_result, http_response_t const ¤t_result) { @@ -350,17 +554,16 @@ void start_name_checking(runtime_args_t &&rt_args) { } thread_pool->join(); } - if (!silent) { - spdlog::info("Writing JSON output"); - } - write_json_result(result_map, rt_args); + auto const current_keys = + apply_record_history(result_map, rt_args.previous_data, rt_args.scan_time); // compare old with new result -- only if we had previous record if (rt_args.previous_data) { auto &previous_data = *rt_args.previous_data; + auto active_previous_data = active_previous_records(previous_data); // sort the (domain)names in (alphabetical, record type) tuple order - std::sort(previous_data.begin(), previous_data.end(), + std::sort(active_previous_data.begin(), active_previous_data.end(), [](json_data_t const &a, json_data_t const &b) { return std::tie(a.domain_name, a.type) < std::tie(b.domain_name, b.type); @@ -373,9 +576,15 @@ void start_name_checking(runtime_args_t &&rt_args) { return std::tie(a.type, a.rdata) < std::tie(b.type, b.rdata); }); } - return compare_results(*rt_args.previous_data, result_map, - rt_args.content_length); + compare_results(active_previous_data, result_map, rt_args.content_length); + preserve_unseen_history(result_map, rt_args.previous_data, current_keys); } + + report_history_alerts(result_map, rt_args); + if (!silent) { + spdlog::info("Writing JSON output"); + } + write_json_result(result_map, rt_args); } void run_program(cli_args_t const &cli_args) { @@ -477,6 +686,14 @@ void run_program(cli_args_t const &cli_args) { static_cast(cli_args.post_http_request); rt_args.thread_count = cli_args.thread_count; rt_args.content_length = cli_args.content_length; + rt_args.last_seen_days = cli_args.last_seen_days; + rt_args.last_seen_date = cli_args.last_seen_date; + rt_args.show_first_seen = cli_args.show_first_seen; + rt_args.scan_time_epoch = std::time(nullptr); + if (!timet_to_string(rt_args.scan_time, rt_args.scan_time_epoch, + "%Y-%m-%d %H:%M:%S")) { + rt_args.scan_time = "unknown"; + } return start_name_checking(std::move(rt_args)); } diff --git a/dooked/source/http/requests_handler.cpp b/dooked/source/http/requests_handler.cpp index d21a592..2fd0c90 100644 --- a/dooked/source/http/requests_handler.cpp +++ b/dooked/source/http/requests_handler.cpp @@ -10,6 +10,11 @@ extern bool silent; namespace dooked { +template +std::string header_value_to_string(StringView const &value) { + return {value.data(), value.size()}; +} + http_request_handler_t::http_request_handler_t(net::io_context &io_context, std::string domain_name) : io_{io_context}, domain_{std::move(domain_name)} {} @@ -139,7 +144,8 @@ void http_request_handler_t::on_data_received( if (status_code_simple == 2) { response_int = response_type_e::ok; } else if (status_code_simple == 3) { // redirected - response_string = (*response_)[http::field::location].to_string(); + response_string = + header_value_to_string((*response_)[http::field::location]); if (response_string.empty()) { response_int = response_type_e::unknown_response; } else { @@ -171,7 +177,8 @@ void http_request_handler_t::on_data_received( int content_length{}; if (response_->has_content_length()) { try { - auto const cl_str = (*response_)[http::field::content_length].to_string(); + auto const cl_str = + header_value_to_string((*response_)[http::field::content_length]); content_length = std::stoi(cl_str); } catch (std::exception const &) { } @@ -365,7 +372,8 @@ void https_request_handler_t::on_data_received( if (status_code_simple == 2) { response_int = response_type_e::ok; } else if (status_code_simple == 3) { // redirected - response_string = (*response_)[http::field::location].to_string(); + response_string = + header_value_to_string((*response_)[http::field::location]); if (response_string.empty()) { response_int = response_type_e::unknown_response; } else { @@ -392,7 +400,8 @@ void https_request_handler_t::on_data_received( int content_length = 0; if (response_->has_content_length()) { try { - auto const cl_str = (*response_)[http::field::content_length].to_string(); + auto const cl_str = + header_value_to_string((*response_)[http::field::content_length]); content_length = std::stoi(cl_str); } catch (std::exception const &) { } diff --git a/dooked/source/main.cpp b/dooked/source/main.cpp index cf29460..e3ff40f 100644 --- a/dooked/source/main.cpp +++ b/dooked/source/main.cpp @@ -41,6 +41,14 @@ int main(int argc, char **argv) { "defers http request until after all DNS requests have been completed"); app.add_flag("--compare-cl", compare_cl, "compare content-length of HTTP requests"); + app.add_flag("--fs", cli_args.show_first_seen, + "show DNS records that are being seen for the first time"); + app.add_option("--ls", cli_args.last_seen_days, + "show DNS records missing from this run that have not been " + "seen in at least N days"); + app.add_option("--lsd", cli_args.last_seen_date, + "show missing DNS records last seen before a US date " + "(MM/DD/YYYY or MM/DD/YYYY HH:MM:SS)"); app.add_flag("--nbc", no_bytes_count, "in case `content-length` is missing in an HTTP header field," diff --git a/dooked/source/utils/io_utils.cpp b/dooked/source/utils/io_utils.cpp index a1bd5d3..657dd87 100644 --- a/dooked/source/utils/io_utils.cpp +++ b/dooked/source/utils/io_utils.cpp @@ -5,7 +5,11 @@ namespace dooked { void to_json(json &j, probe_result_t const &record) { j = json{{"ttl", record.ttl}, {"type", dns_record_type_to_str(record.type)}, - {"info", record.rdata}}; + {"info", record.rdata}, + {"first-seen", record.first_seen}, + {"last-seen", record.last_seen}, + {"seen", record.seen}, + {"currently_seen", record.currently_seen}}; } bool is_text_file(std::string const &file_extension) {