Supply modified time for files in archives [Vas Crabb]

This commit is contained in:
Vas Crabb 2016-06-25 20:56:46 +10:00
parent 0b37361fdf
commit 7f22918675
4 changed files with 247 additions and 44 deletions

View File

@ -23,10 +23,13 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <mutex>
#include <ratio>
#include <utility>
#include <vector>
@ -145,11 +148,14 @@ public:
bool current_is_directory() const { return m_curr_is_dir; }
const std::string &current_name() const { return m_curr_name; }
std::uint64_t current_uncompressed_length() const { return m_curr_length; }
virtual std::chrono::system_clock::time_point current_last_modified() const { return m_curr_modified; }
std::uint32_t current_crc() const { return m_curr_crc; }
archive_file::error decompress(void *buffer, std::uint32_t length);
private:
typedef std::chrono::duration<std::uint64_t, std::ratio<1, 10000000> > ntfs_duration;
m7z_file_impl(const m7z_file_impl &) = delete;
m7z_file_impl(m7z_file_impl &&) = delete;
m7z_file_impl &operator=(const m7z_file_impl &) = delete;
@ -163,34 +169,39 @@ private:
bool matchname,
bool partialpath);
void make_utf8_name(int index);
void set_curr_modified();
static constexpr std::size_t CACHE_SIZE = 8;
static std::array<ptr, CACHE_SIZE> s_cache;
static std::mutex s_cache_mutex;
static ntfs_duration calculate_ntfs_offset();
const std::string m_filename; // copy of _7Z filename (for caching)
static constexpr std::size_t CACHE_SIZE = 8;
static const ntfs_duration s_ntfs_offset;
static std::array<ptr, CACHE_SIZE> s_cache;
static std::mutex s_cache_mutex;
int m_curr_file_idx; // current file index
bool m_curr_is_dir; // current file is directory
std::string m_curr_name; // current file name
std::uint64_t m_curr_length; // current file uncompressed length
std::uint32_t m_curr_crc; // current file crc
const std::string m_filename; // copy of _7Z filename (for caching)
std::vector<UInt16> m_utf16_buf;
std::vector<unicode_char> m_uchar_buf;
std::vector<char> m_utf8_buf;
int m_curr_file_idx; // current file index
bool m_curr_is_dir; // current file is directory
std::string m_curr_name; // current file name
std::uint64_t m_curr_length; // current file uncompressed length
std::chrono::system_clock::time_point m_curr_modified; // current file modification time
std::uint32_t m_curr_crc; // current file crc
CFileInStream m_archive_stream;
CLookToRead m_look_stream;
CSzArEx m_db;
ISzAlloc m_alloc_imp;
ISzAlloc m_alloc_temp_imp;
bool m_inited;
std::vector<UInt16> m_utf16_buf;
std::vector<unicode_char> m_uchar_buf;
std::vector<char> m_utf8_buf;
CFileInStream m_archive_stream;
CLookToRead m_look_stream;
CSzArEx m_db;
ISzAlloc m_alloc_imp;
ISzAlloc m_alloc_temp_imp;
bool m_inited;
// cached stuff for solid blocks
UInt32 m_block_index;
Byte * m_out_buffer;
std::size_t m_out_buffer_size;
UInt32 m_block_index;
Byte * m_out_buffer;
std::size_t m_out_buffer_size;
};
@ -220,6 +231,7 @@ public:
virtual bool current_is_directory() const override { return m_impl->current_is_directory(); }
virtual const std::string &current_name() const override { return m_impl->current_name(); }
virtual std::uint64_t current_uncompressed_length() const override { return m_impl->current_uncompressed_length(); }
virtual std::chrono::system_clock::time_point current_last_modified() const override { return m_impl->current_last_modified(); }
virtual std::uint32_t current_crc() const override { return m_impl->current_crc(); }
virtual error decompress(void *buffer, std::uint32_t length) override { return m_impl->decompress(buffer, length); }
@ -234,6 +246,7 @@ private:
GLOBAL VARIABLES
***************************************************************************/
const m7z_file_impl::ntfs_duration m7z_file_impl::s_ntfs_offset(calculate_ntfs_offset());
std::array<m7z_file_impl::ptr, m7z_file_impl::CACHE_SIZE> m7z_file_impl::s_cache;
std::mutex m7z_file_impl::s_cache_mutex;
@ -277,6 +290,7 @@ m7z_file_impl::m7z_file_impl(const std::string &filename)
, m_curr_is_dir(false)
, m_curr_name()
, m_curr_length(0)
, m_curr_modified()
, m_curr_crc(0)
, m_utf16_buf(128)
, m_uchar_buf(128)
@ -447,6 +461,7 @@ int m7z_file_impl::search(
m_curr_is_dir = is_dir;
m_curr_name = &m_utf8_buf[0];
m_curr_length = size;
set_curr_modified();
m_curr_crc = crc;
return i;
@ -499,6 +514,55 @@ void m7z_file_impl::make_utf8_name(int index)
m_utf8_buf.resize(out_pos);
}
void m7z_file_impl::set_curr_modified()
{
if (SzBitWithVals_Check(&m_db.MTime, m_curr_file_idx))
{
CNtfsFileTime const &file_time(m_db.MTime.Vals[m_curr_file_idx]);
ntfs_duration const ticks((std::uint64_t(file_time.High) << 32) | std::uint64_t(file_time.Low));
m_curr_modified = std::chrono::system_clock::from_time_t(0) + (ticks - s_ntfs_offset);
}
else
{
// FIXME: what do we do about a lack of time?
}
}
m7z_file_impl::ntfs_duration m7z_file_impl::calculate_ntfs_offset()
{
constexpr auto days_in_year(365);
constexpr auto days_in_four_years((days_in_year * 4) + 1);
constexpr auto days_in_century((days_in_four_years * 25) - 1);
constexpr auto days_in_four_centuries((days_in_century * 4) + 1);
constexpr ntfs_duration day(std::chrono::hours(24));
constexpr ntfs_duration year(day * days_in_year);
constexpr ntfs_duration four_years(day * days_in_four_years);
constexpr ntfs_duration century(day * days_in_century);
constexpr ntfs_duration four_centuries(day * days_in_four_centuries);
std::time_t const zero(0);
std::tm const epoch(*std::gmtime(&zero));
ntfs_duration result(day * epoch.tm_yday);
result += std::chrono::hours(epoch.tm_hour);
result += std::chrono::minutes(epoch.tm_min);
result += std::chrono::seconds(epoch.tm_sec);
int years(1900 - 1601 + epoch.tm_year);
result += four_centuries * (years / 400);
years %= 400;
result += century * (years / 100);
years %= 100;
result += four_years * (years / 4);
years %= 4;
result += year * years;
return result;
}
} // anonymous namespace

View File

@ -14,18 +14,21 @@
#include "hashing.h"
#include "osdcore.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <mutex>
#include <utility>
#include <vector>
#include "lzma/C/LzmaDec.h"
#include <zlib.h>
#include "lzma/C/LzmaDec.h"
#include <algorithm>
#include <array>
#include <cassert>
#include <chrono>
#include <cstring>
#include <cstdlib>
#include <ctime>
#include <mutex>
#include <ratio>
#include <utility>
#include <vector>
namespace util {
@ -164,11 +167,14 @@ public:
bool current_is_directory() const { return m_curr_is_dir; }
const std::string &current_name() const { return m_header.file_name; }
std::uint64_t current_uncompressed_length() const { return m_header.uncompressed_length; }
std::chrono::system_clock::time_point current_last_modified() const { return m_header.modified; }
std::uint32_t current_crc() const { return m_header.crc; }
archive_file::error decompress(void *buffer, std::uint32_t length);
private:
typedef std::chrono::duration<std::uint64_t, std::ratio<1, 10000000> > ntfs_duration;
zip_file_impl(const zip_file_impl &) = delete;
zip_file_impl(zip_file_impl &&) = delete;
zip_file_impl &operator=(const zip_file_impl &) = delete;
@ -195,6 +201,24 @@ private:
return archive_file::error::NONE;
}
static std::chrono::system_clock::time_point decode_dos_time(std::uint16_t date, std::uint16_t time)
{
// FIXME: work out why this doesn't always work
// negative tm_isdst should automatically determine whether DST is in effect for the date,
// but on Windows apparently it doesn't, so you get time offsets
std::tm datetime;
datetime.tm_sec = (time << 1) & 0x003e;
datetime.tm_min = (time >> 5) & 0x003f;
datetime.tm_hour = (time >> 11) & 0x001f;
datetime.tm_mday = (date >> 0) & 0x001f;
datetime.tm_mon = ((date >> 5) & 0x000f) - 1;
datetime.tm_year = ((date >> 9) & 0x007f) + 80;
datetime.tm_wday = 0;
datetime.tm_yday = 0;
datetime.tm_isdst = -1;
return std::chrono::system_clock::from_time_t(std::mktime(&datetime));
}
// ZIP file parsing
archive_file::error read_ecd();
archive_file::error get_compressed_data_offset(std::uint64_t &offset);
@ -204,18 +228,22 @@ private:
archive_file::error decompress_data_type_8(std::uint64_t offset, void *buffer, std::uint32_t length);
archive_file::error decompress_data_type_14(std::uint64_t offset, void *buffer, std::uint32_t length);
// precalculation
static ntfs_duration calculate_ntfs_offset();
struct file_header
{
std::uint16_t version_created; // version made by
std::uint16_t version_needed; // version needed to extract
std::uint16_t bit_flag; // general purpose bit flag
std::uint16_t compression; // compression method
std::uint32_t crc; // crc-32
std::uint64_t compressed_length; // compressed size
std::uint64_t uncompressed_length; // uncompressed size
std::uint32_t start_disk_number; // disk number start
std::uint64_t local_header_offset; // relative offset of local header
std::string file_name; // file name
std::uint16_t version_created; // version made by
std::uint16_t version_needed; // version needed to extract
std::uint16_t bit_flag; // general purpose bit flag
std::uint16_t compression; // compression method
std::chrono::system_clock::time_point modified; // last mod file date/time
std::uint32_t crc; // crc-32
std::uint64_t compressed_length; // compressed size
std::uint64_t uncompressed_length; // uncompressed size
std::uint32_t start_disk_number; // disk number start
std::uint64_t local_header_offset; // relative offset of local header
std::string file_name; // file name
};
// contains extracted end of central directory information
@ -231,6 +259,7 @@ private:
static constexpr std::size_t DECOMPRESS_BUFSIZE = 16384;
static constexpr std::size_t CACHE_SIZE = 8; // number of open files to cache
static const ntfs_duration s_ntfs_offset;
static std::array<ptr, CACHE_SIZE> s_cache;
static std::mutex s_cache_mutex;
@ -274,6 +303,7 @@ public:
virtual bool current_is_directory() const override { return m_impl->current_is_directory(); }
virtual const std::string &current_name() const override { return m_impl->current_name(); }
virtual std::uint64_t current_uncompressed_length() const override { return m_impl->current_uncompressed_length(); }
virtual std::chrono::system_clock::time_point current_last_modified() const override { return m_impl->current_last_modified(); }
virtual std::uint32_t current_crc() const override { return m_impl->current_crc(); }
virtual error decompress(void *buffer, std::uint32_t length) override { return m_impl->decompress(buffer, length); }
@ -540,6 +570,59 @@ private:
};
class ntfs_tag_reader : private reader_base
{
public:
ntfs_tag_reader(void const *buf, std::size_t len) : reader_base(buf), m_length(len) { }
std::uint16_t tag() const { return read_word(0x00); }
std::uint16_t size() const { return read_word(0x02); }
void const * data() const { return m_buffer + 0x04; }
ntfs_tag_reader next() const { return ntfs_tag_reader(m_buffer + total_length(), m_length - total_length()); }
bool length_sufficient() const { return (m_length >= minimum_length()) && (m_length >= total_length()); }
std::size_t total_length() const { return minimum_length() + size(); }
static std::size_t minimum_length() { return 0x04; }
private:
std::size_t m_length;
};
class ntfs_reader : private reader_base
{
public:
ntfs_reader(extra_field_reader const &field) : reader_base(field.data()), m_length(field.data_size()) { }
std::uint32_t reserved() const { return read_dword(0x00); }
ntfs_tag_reader tag1() const { return ntfs_tag_reader(m_buffer + 0x04, m_length - 4); }
std::size_t total_length() const { return m_length; }
static std::size_t minimum_length() { return 0x08; }
private:
std::size_t m_length;
};
class ntfs_times_reader : private reader_base
{
public:
ntfs_times_reader(ntfs_tag_reader const &tag) : reader_base(tag.data()) { }
std::uint64_t mtime() const { return read_qword(0x00); }
std::uint64_t atime() const { return read_qword(0x08); }
std::uint64_t ctime() const { return read_qword(0x10); }
std::size_t total_length() const { return minimum_length(); }
static std::size_t minimum_length() { return 0x18; }
private:
std::size_t m_length;
};
class general_flag_reader
{
public:
@ -566,7 +649,7 @@ private:
GLOBAL VARIABLES
***************************************************************************/
/** @brief The zip cache[ zip cache size]. */
const zip_file_impl::ntfs_duration zip_file_impl::s_ntfs_offset(calculate_ntfs_offset());
std::array<zip_file_impl::ptr, zip_file_impl::CACHE_SIZE> zip_file_impl::s_cache;
std::mutex zip_file_impl::s_cache_mutex;
@ -631,6 +714,7 @@ int zip_file_impl::search(std::uint32_t search_crc, const std::string &search_fi
m_header.version_needed = reader.version_needed();
m_header.bit_flag = reader.general_flag();
m_header.compression = reader.compression_method();
m_header.modified = decode_dos_time(reader.modified_date(), reader.modified_time());
m_header.crc = reader.crc32();
m_header.compressed_length = reader.compressed_size();
m_header.uncompressed_length = reader.uncompressed_size();
@ -676,6 +760,21 @@ int zip_file_impl::search(std::uint32_t search_crc, const std::string &search_fi
}
}
}
// look for NTFS extra field
if ((extra.header_id() == 0x000a) && (extra.data_size() >= ntfs_reader::minimum_length()))
{
ntfs_reader const ntfs(extra);
for (auto tag = ntfs.tag1(); tag.length_sufficient(); tag = tag.next())
{
if ((tag.tag() == 0x0001) && (tag.size() >= ntfs_times_reader::minimum_length()))
{
ntfs_times_reader const times(tag);
ntfs_duration const ticks(times.mtime());
m_header.modified = std::chrono::system_clock::from_time_t(0) + (ticks - s_ntfs_offset);
}
}
}
}
// FIXME: if (!is_utf8) convert filename to UTF8 (assume CP437 or something)
@ -1312,6 +1411,40 @@ archive_file::error zip_file_impl::decompress_data_type_14(std::uint64_t offset,
}
}
zip_file_impl::ntfs_duration zip_file_impl::calculate_ntfs_offset()
{
constexpr auto days_in_year(365);
constexpr auto days_in_four_years((days_in_year * 4) + 1);
constexpr auto days_in_century((days_in_four_years * 25) - 1);
constexpr auto days_in_four_centuries((days_in_century * 4) + 1);
constexpr ntfs_duration day(std::chrono::hours(24));
constexpr ntfs_duration year(day * days_in_year);
constexpr ntfs_duration four_years(day * days_in_four_years);
constexpr ntfs_duration century(day * days_in_century);
constexpr ntfs_duration four_centuries(day * days_in_four_centuries);
std::time_t const zero(0);
std::tm const epoch(*std::gmtime(&zero));
ntfs_duration result(day * epoch.tm_yday);
result += std::chrono::hours(epoch.tm_hour);
result += std::chrono::minutes(epoch.tm_min);
result += std::chrono::seconds(epoch.tm_sec);
int years(1900 - 1601 + epoch.tm_year);
result += four_centuries * (years / 400);
years %= 400;
result += century * (years / 100);
years %= 100;
result += four_years * (years / 4);
years %= 4;
result += year * years;
return result;
}
} // anonymous namespace

View File

@ -15,6 +15,7 @@
#include "osdcore.h"
#include <chrono>
#include <cstdint>
#include <memory>
#include <string>
@ -77,6 +78,7 @@ public:
virtual bool current_is_directory() const = 0;
virtual const std::string &current_name() const = 0;
virtual std::uint64_t current_uncompressed_length() const = 0;
virtual std::chrono::system_clock::time_point current_last_modified() const = 0;
virtual std::uint32_t current_crc() const = 0;
// decompress the most recently found file in the ZIP

View File

@ -918,9 +918,10 @@ const osd::directory::entry *zippath_readdir(zippath_directory *directory)
{
/* first thing's first - return parent directory */
directory->returned_parent = true;
memset(&directory->returned_entry, 0, sizeof(directory->returned_entry));
directory->returned_entry.name = "..";
directory->returned_entry.type = osd::directory::entry::entry_type::DIR;
directory->returned_entry.size = 0; // FIXME: what would stat say?
// FIXME: modified time?
result = &directory->returned_entry;
}
else if (directory->directory)
@ -938,6 +939,8 @@ const osd::directory::entry *zippath_readdir(zippath_directory *directory)
/* copy; but change the entry type */
directory->returned_entry = *result;
directory->returned_entry.type = osd::directory::entry::entry_type::DIR;
directory->returned_entry.size = 0; // FIXME: what would stat say?
// FIXME: modified time?
result = &directory->returned_entry;
}
}
@ -981,19 +984,20 @@ const osd::directory::entry *zippath_readdir(zippath_directory *directory)
directory->returned_dirlist.emplace_front(relpath, separator - relpath);
/* ...and return it */
memset(&directory->returned_entry, 0, sizeof(directory->returned_entry));
directory->returned_entry.name = directory->returned_dirlist.front().c_str();
directory->returned_entry.type = osd::directory::entry::entry_type::DIR;
directory->returned_entry.size = 0; // FIXME: what would stat say?
// FIXME: modified time?
result = &directory->returned_entry;
}
}
else
{
/* a real file */
memset(&directory->returned_entry, 0, sizeof(directory->returned_entry));
directory->returned_entry.name = relpath;
directory->returned_entry.type = osd::directory::entry::entry_type::FILE;
directory->returned_entry.size = directory->zipfile->current_uncompressed_length();
directory->returned_entry.last_modified = directory->zipfile->current_last_modified();
result = &directory->returned_entry;
}
}