From 7f22918675bbac5012d88f6ac97921066414b03f Mon Sep 17 00:00:00 2001 From: Vas Crabb Date: Sat, 25 Jun 2016 20:56:46 +1000 Subject: [PATCH] Supply modified time for files in archives [Vas Crabb] --- src/lib/util/un7z.cpp | 106 +++++++++++++++++++----- src/lib/util/unzip.cpp | 173 ++++++++++++++++++++++++++++++++++----- src/lib/util/unzip.h | 2 + src/lib/util/zippath.cpp | 10 ++- 4 files changed, 247 insertions(+), 44 deletions(-) diff --git a/src/lib/util/un7z.cpp b/src/lib/util/un7z.cpp index 75377d4093a..4fee8d156d8 100644 --- a/src/lib/util/un7z.cpp +++ b/src/lib/util/un7z.cpp @@ -23,10 +23,13 @@ #include #include #include +#include #include #include #include +#include #include +#include #include #include @@ -145,11 +148,14 @@ public: bool current_is_directory() const { return m_curr_is_dir; } const std::string ¤t_name() const { return m_curr_name; } std::uint64_t current_uncompressed_length() const { return m_curr_length; } + virtual std::chrono::system_clock::time_point current_last_modified() const { return m_curr_modified; } std::uint32_t current_crc() const { return m_curr_crc; } archive_file::error decompress(void *buffer, std::uint32_t length); private: + typedef std::chrono::duration > ntfs_duration; + m7z_file_impl(const m7z_file_impl &) = delete; m7z_file_impl(m7z_file_impl &&) = delete; m7z_file_impl &operator=(const m7z_file_impl &) = delete; @@ -163,34 +169,39 @@ private: bool matchname, bool partialpath); void make_utf8_name(int index); + void set_curr_modified(); - static constexpr std::size_t CACHE_SIZE = 8; - static std::array s_cache; - static std::mutex s_cache_mutex; + static ntfs_duration calculate_ntfs_offset(); - const std::string m_filename; // copy of _7Z filename (for caching) + static constexpr std::size_t CACHE_SIZE = 8; + static const ntfs_duration s_ntfs_offset; + static std::array s_cache; + static std::mutex s_cache_mutex; - int m_curr_file_idx; // current file index - bool m_curr_is_dir; // current file is directory - std::string m_curr_name; // current file name - std::uint64_t m_curr_length; // current file uncompressed length - std::uint32_t m_curr_crc; // current file crc + const std::string m_filename; // copy of _7Z filename (for caching) - std::vector m_utf16_buf; - std::vector m_uchar_buf; - std::vector m_utf8_buf; + int m_curr_file_idx; // current file index + bool m_curr_is_dir; // current file is directory + std::string m_curr_name; // current file name + std::uint64_t m_curr_length; // current file uncompressed length + std::chrono::system_clock::time_point m_curr_modified; // current file modification time + std::uint32_t m_curr_crc; // current file crc - CFileInStream m_archive_stream; - CLookToRead m_look_stream; - CSzArEx m_db; - ISzAlloc m_alloc_imp; - ISzAlloc m_alloc_temp_imp; - bool m_inited; + std::vector m_utf16_buf; + std::vector m_uchar_buf; + std::vector m_utf8_buf; + + CFileInStream m_archive_stream; + CLookToRead m_look_stream; + CSzArEx m_db; + ISzAlloc m_alloc_imp; + ISzAlloc m_alloc_temp_imp; + bool m_inited; // cached stuff for solid blocks - UInt32 m_block_index; - Byte * m_out_buffer; - std::size_t m_out_buffer_size; + UInt32 m_block_index; + Byte * m_out_buffer; + std::size_t m_out_buffer_size; }; @@ -220,6 +231,7 @@ public: virtual bool current_is_directory() const override { return m_impl->current_is_directory(); } virtual const std::string ¤t_name() const override { return m_impl->current_name(); } virtual std::uint64_t current_uncompressed_length() const override { return m_impl->current_uncompressed_length(); } + virtual std::chrono::system_clock::time_point current_last_modified() const override { return m_impl->current_last_modified(); } virtual std::uint32_t current_crc() const override { return m_impl->current_crc(); } virtual error decompress(void *buffer, std::uint32_t length) override { return m_impl->decompress(buffer, length); } @@ -234,6 +246,7 @@ private: GLOBAL VARIABLES ***************************************************************************/ +const m7z_file_impl::ntfs_duration m7z_file_impl::s_ntfs_offset(calculate_ntfs_offset()); std::array m7z_file_impl::s_cache; std::mutex m7z_file_impl::s_cache_mutex; @@ -277,6 +290,7 @@ m7z_file_impl::m7z_file_impl(const std::string &filename) , m_curr_is_dir(false) , m_curr_name() , m_curr_length(0) + , m_curr_modified() , m_curr_crc(0) , m_utf16_buf(128) , m_uchar_buf(128) @@ -447,6 +461,7 @@ int m7z_file_impl::search( m_curr_is_dir = is_dir; m_curr_name = &m_utf8_buf[0]; m_curr_length = size; + set_curr_modified(); m_curr_crc = crc; return i; @@ -499,6 +514,55 @@ void m7z_file_impl::make_utf8_name(int index) m_utf8_buf.resize(out_pos); } + +void m7z_file_impl::set_curr_modified() +{ + if (SzBitWithVals_Check(&m_db.MTime, m_curr_file_idx)) + { + CNtfsFileTime const &file_time(m_db.MTime.Vals[m_curr_file_idx]); + ntfs_duration const ticks((std::uint64_t(file_time.High) << 32) | std::uint64_t(file_time.Low)); + m_curr_modified = std::chrono::system_clock::from_time_t(0) + (ticks - s_ntfs_offset); + } + else + { + // FIXME: what do we do about a lack of time? + } +} + + +m7z_file_impl::ntfs_duration m7z_file_impl::calculate_ntfs_offset() +{ + constexpr auto days_in_year(365); + constexpr auto days_in_four_years((days_in_year * 4) + 1); + constexpr auto days_in_century((days_in_four_years * 25) - 1); + constexpr auto days_in_four_centuries((days_in_century * 4) + 1); + + constexpr ntfs_duration day(std::chrono::hours(24)); + constexpr ntfs_duration year(day * days_in_year); + constexpr ntfs_duration four_years(day * days_in_four_years); + constexpr ntfs_duration century(day * days_in_century); + constexpr ntfs_duration four_centuries(day * days_in_four_centuries); + + std::time_t const zero(0); + std::tm const epoch(*std::gmtime(&zero)); + + ntfs_duration result(day * epoch.tm_yday); + result += std::chrono::hours(epoch.tm_hour); + result += std::chrono::minutes(epoch.tm_min); + result += std::chrono::seconds(epoch.tm_sec); + + int years(1900 - 1601 + epoch.tm_year); + result += four_centuries * (years / 400); + years %= 400; + result += century * (years / 100); + years %= 100; + result += four_years * (years / 4); + years %= 4; + result += year * years; + + return result; +} + } // anonymous namespace diff --git a/src/lib/util/unzip.cpp b/src/lib/util/unzip.cpp index 021d9602dae..a7f3295551d 100644 --- a/src/lib/util/unzip.cpp +++ b/src/lib/util/unzip.cpp @@ -14,18 +14,21 @@ #include "hashing.h" #include "osdcore.h" -#include -#include -#include -#include -#include -#include -#include -#include +#include "lzma/C/LzmaDec.h" #include -#include "lzma/C/LzmaDec.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace util { @@ -164,11 +167,14 @@ public: bool current_is_directory() const { return m_curr_is_dir; } const std::string ¤t_name() const { return m_header.file_name; } std::uint64_t current_uncompressed_length() const { return m_header.uncompressed_length; } + std::chrono::system_clock::time_point current_last_modified() const { return m_header.modified; } std::uint32_t current_crc() const { return m_header.crc; } archive_file::error decompress(void *buffer, std::uint32_t length); private: + typedef std::chrono::duration > ntfs_duration; + zip_file_impl(const zip_file_impl &) = delete; zip_file_impl(zip_file_impl &&) = delete; zip_file_impl &operator=(const zip_file_impl &) = delete; @@ -195,6 +201,24 @@ private: return archive_file::error::NONE; } + static std::chrono::system_clock::time_point decode_dos_time(std::uint16_t date, std::uint16_t time) + { + // FIXME: work out why this doesn't always work + // negative tm_isdst should automatically determine whether DST is in effect for the date, + // but on Windows apparently it doesn't, so you get time offsets + std::tm datetime; + datetime.tm_sec = (time << 1) & 0x003e; + datetime.tm_min = (time >> 5) & 0x003f; + datetime.tm_hour = (time >> 11) & 0x001f; + datetime.tm_mday = (date >> 0) & 0x001f; + datetime.tm_mon = ((date >> 5) & 0x000f) - 1; + datetime.tm_year = ((date >> 9) & 0x007f) + 80; + datetime.tm_wday = 0; + datetime.tm_yday = 0; + datetime.tm_isdst = -1; + return std::chrono::system_clock::from_time_t(std::mktime(&datetime)); + } + // ZIP file parsing archive_file::error read_ecd(); archive_file::error get_compressed_data_offset(std::uint64_t &offset); @@ -204,18 +228,22 @@ private: archive_file::error decompress_data_type_8(std::uint64_t offset, void *buffer, std::uint32_t length); archive_file::error decompress_data_type_14(std::uint64_t offset, void *buffer, std::uint32_t length); + // precalculation + static ntfs_duration calculate_ntfs_offset(); + struct file_header { - std::uint16_t version_created; // version made by - std::uint16_t version_needed; // version needed to extract - std::uint16_t bit_flag; // general purpose bit flag - std::uint16_t compression; // compression method - std::uint32_t crc; // crc-32 - std::uint64_t compressed_length; // compressed size - std::uint64_t uncompressed_length; // uncompressed size - std::uint32_t start_disk_number; // disk number start - std::uint64_t local_header_offset; // relative offset of local header - std::string file_name; // file name + std::uint16_t version_created; // version made by + std::uint16_t version_needed; // version needed to extract + std::uint16_t bit_flag; // general purpose bit flag + std::uint16_t compression; // compression method + std::chrono::system_clock::time_point modified; // last mod file date/time + std::uint32_t crc; // crc-32 + std::uint64_t compressed_length; // compressed size + std::uint64_t uncompressed_length; // uncompressed size + std::uint32_t start_disk_number; // disk number start + std::uint64_t local_header_offset; // relative offset of local header + std::string file_name; // file name }; // contains extracted end of central directory information @@ -231,6 +259,7 @@ private: static constexpr std::size_t DECOMPRESS_BUFSIZE = 16384; static constexpr std::size_t CACHE_SIZE = 8; // number of open files to cache + static const ntfs_duration s_ntfs_offset; static std::array s_cache; static std::mutex s_cache_mutex; @@ -274,6 +303,7 @@ public: virtual bool current_is_directory() const override { return m_impl->current_is_directory(); } virtual const std::string ¤t_name() const override { return m_impl->current_name(); } virtual std::uint64_t current_uncompressed_length() const override { return m_impl->current_uncompressed_length(); } + virtual std::chrono::system_clock::time_point current_last_modified() const override { return m_impl->current_last_modified(); } virtual std::uint32_t current_crc() const override { return m_impl->current_crc(); } virtual error decompress(void *buffer, std::uint32_t length) override { return m_impl->decompress(buffer, length); } @@ -540,6 +570,59 @@ private: }; +class ntfs_tag_reader : private reader_base +{ +public: + ntfs_tag_reader(void const *buf, std::size_t len) : reader_base(buf), m_length(len) { } + + std::uint16_t tag() const { return read_word(0x00); } + std::uint16_t size() const { return read_word(0x02); } + void const * data() const { return m_buffer + 0x04; } + ntfs_tag_reader next() const { return ntfs_tag_reader(m_buffer + total_length(), m_length - total_length()); } + + bool length_sufficient() const { return (m_length >= minimum_length()) && (m_length >= total_length()); } + + std::size_t total_length() const { return minimum_length() + size(); } + static std::size_t minimum_length() { return 0x04; } + +private: + std::size_t m_length; +}; + + +class ntfs_reader : private reader_base +{ +public: + ntfs_reader(extra_field_reader const &field) : reader_base(field.data()), m_length(field.data_size()) { } + + std::uint32_t reserved() const { return read_dword(0x00); } + ntfs_tag_reader tag1() const { return ntfs_tag_reader(m_buffer + 0x04, m_length - 4); } + + std::size_t total_length() const { return m_length; } + static std::size_t minimum_length() { return 0x08; } + +private: + std::size_t m_length; +}; + + +class ntfs_times_reader : private reader_base +{ +public: + ntfs_times_reader(ntfs_tag_reader const &tag) : reader_base(tag.data()) { } + + std::uint64_t mtime() const { return read_qword(0x00); } + std::uint64_t atime() const { return read_qword(0x08); } + std::uint64_t ctime() const { return read_qword(0x10); } + + std::size_t total_length() const { return minimum_length(); } + static std::size_t minimum_length() { return 0x18; } + +private: + std::size_t m_length; +}; + + class general_flag_reader { public: @@ -566,7 +649,7 @@ private: GLOBAL VARIABLES ***************************************************************************/ -/** @brief The zip cache[ zip cache size]. */ +const zip_file_impl::ntfs_duration zip_file_impl::s_ntfs_offset(calculate_ntfs_offset()); std::array zip_file_impl::s_cache; std::mutex zip_file_impl::s_cache_mutex; @@ -631,6 +714,7 @@ int zip_file_impl::search(std::uint32_t search_crc, const std::string &search_fi m_header.version_needed = reader.version_needed(); m_header.bit_flag = reader.general_flag(); m_header.compression = reader.compression_method(); + m_header.modified = decode_dos_time(reader.modified_date(), reader.modified_time()); m_header.crc = reader.crc32(); m_header.compressed_length = reader.compressed_size(); m_header.uncompressed_length = reader.uncompressed_size(); @@ -676,6 +760,21 @@ int zip_file_impl::search(std::uint32_t search_crc, const std::string &search_fi } } } + + // look for NTFS extra field + if ((extra.header_id() == 0x000a) && (extra.data_size() >= ntfs_reader::minimum_length())) + { + ntfs_reader const ntfs(extra); + for (auto tag = ntfs.tag1(); tag.length_sufficient(); tag = tag.next()) + { + if ((tag.tag() == 0x0001) && (tag.size() >= ntfs_times_reader::minimum_length())) + { + ntfs_times_reader const times(tag); + ntfs_duration const ticks(times.mtime()); + m_header.modified = std::chrono::system_clock::from_time_t(0) + (ticks - s_ntfs_offset); + } + } + } } // FIXME: if (!is_utf8) convert filename to UTF8 (assume CP437 or something) @@ -1312,6 +1411,40 @@ archive_file::error zip_file_impl::decompress_data_type_14(std::uint64_t offset, } } + +zip_file_impl::ntfs_duration zip_file_impl::calculate_ntfs_offset() +{ + constexpr auto days_in_year(365); + constexpr auto days_in_four_years((days_in_year * 4) + 1); + constexpr auto days_in_century((days_in_four_years * 25) - 1); + constexpr auto days_in_four_centuries((days_in_century * 4) + 1); + + constexpr ntfs_duration day(std::chrono::hours(24)); + constexpr ntfs_duration year(day * days_in_year); + constexpr ntfs_duration four_years(day * days_in_four_years); + constexpr ntfs_duration century(day * days_in_century); + constexpr ntfs_duration four_centuries(day * days_in_four_centuries); + + std::time_t const zero(0); + std::tm const epoch(*std::gmtime(&zero)); + + ntfs_duration result(day * epoch.tm_yday); + result += std::chrono::hours(epoch.tm_hour); + result += std::chrono::minutes(epoch.tm_min); + result += std::chrono::seconds(epoch.tm_sec); + + int years(1900 - 1601 + epoch.tm_year); + result += four_centuries * (years / 400); + years %= 400; + result += century * (years / 100); + years %= 100; + result += four_years * (years / 4); + years %= 4; + result += year * years; + + return result; +} + } // anonymous namespace diff --git a/src/lib/util/unzip.h b/src/lib/util/unzip.h index b521119c54b..a1388cd0b4c 100644 --- a/src/lib/util/unzip.h +++ b/src/lib/util/unzip.h @@ -15,6 +15,7 @@ #include "osdcore.h" +#include #include #include #include @@ -77,6 +78,7 @@ public: virtual bool current_is_directory() const = 0; virtual const std::string ¤t_name() const = 0; virtual std::uint64_t current_uncompressed_length() const = 0; + virtual std::chrono::system_clock::time_point current_last_modified() const = 0; virtual std::uint32_t current_crc() const = 0; // decompress the most recently found file in the ZIP diff --git a/src/lib/util/zippath.cpp b/src/lib/util/zippath.cpp index 3322c1152cc..6cecdb371e5 100644 --- a/src/lib/util/zippath.cpp +++ b/src/lib/util/zippath.cpp @@ -918,9 +918,10 @@ const osd::directory::entry *zippath_readdir(zippath_directory *directory) { /* first thing's first - return parent directory */ directory->returned_parent = true; - memset(&directory->returned_entry, 0, sizeof(directory->returned_entry)); directory->returned_entry.name = ".."; directory->returned_entry.type = osd::directory::entry::entry_type::DIR; + directory->returned_entry.size = 0; // FIXME: what would stat say? + // FIXME: modified time? result = &directory->returned_entry; } else if (directory->directory) @@ -938,6 +939,8 @@ const osd::directory::entry *zippath_readdir(zippath_directory *directory) /* copy; but change the entry type */ directory->returned_entry = *result; directory->returned_entry.type = osd::directory::entry::entry_type::DIR; + directory->returned_entry.size = 0; // FIXME: what would stat say? + // FIXME: modified time? result = &directory->returned_entry; } } @@ -981,19 +984,20 @@ const osd::directory::entry *zippath_readdir(zippath_directory *directory) directory->returned_dirlist.emplace_front(relpath, separator - relpath); /* ...and return it */ - memset(&directory->returned_entry, 0, sizeof(directory->returned_entry)); directory->returned_entry.name = directory->returned_dirlist.front().c_str(); directory->returned_entry.type = osd::directory::entry::entry_type::DIR; + directory->returned_entry.size = 0; // FIXME: what would stat say? + // FIXME: modified time? result = &directory->returned_entry; } } else { /* a real file */ - memset(&directory->returned_entry, 0, sizeof(directory->returned_entry)); directory->returned_entry.name = relpath; directory->returned_entry.type = osd::directory::entry::entry_type::FILE; directory->returned_entry.size = directory->zipfile->current_uncompressed_length(); + directory->returned_entry.last_modified = directory->zipfile->current_last_modified(); result = &directory->returned_entry; } }