Implementation of the SZE archive
Format of the SZE archive
<archive> :== <single_file> | <single_file> <file_content>
<single_file> :== "{"<filename>|<size>"}"<file_content>
<filename> :== <file_name>"."<file_extension>
<size> :== number of bytes in <file_content>
<file_content> :== series of bytes representing file
<archive> :== <single_file> | <single_file> <file_content>
<single_file> :== "{"<filename>|<size>"}"<file_content>
<filename> :== <file_name>"."<file_extension>
<size> :== number of bytes in <file_content>
<file_content> :== series of bytes representing file
Not all methods are discussed here. See sze-archive.cc for the full implementation.
The SZE archive emulates the real archive format. It is not readonly and user can add files to it. If user adds file with invalid symbols we should show error. As usual, lets start with the definitions of the classes required to implement the SZE archive.
class SzeInArchive : public CMyUnknownImp,
public IInArchive,
public IOutArchive {
Z7_IFACES_IMP_UNK_2(IInArchive, IOutArchive);
private:
struct File {
std::string path;
std::vector<char> content;
};
void WriteFilesToOutStream(ISequentialOutStream* outStream);
void UpdateItemsInMem(UInt32 numItems,
IArchiveUpdateCallback* updateCallback);
UInt32 all_size_;
std::vector<File> items_;
};
class SzeInArchive : public CMyUnknownImp,
public IInArchive,
public IOutArchive {
Z7_IFACES_IMP_UNK_2(IInArchive, IOutArchive);
private:
struct File {
std::string path;
std::vector<char> content;
};
void WriteFilesToOutStream(ISequentialOutStream* outStream);
void UpdateItemsInMem(UInt32 numItems,
IArchiveUpdateCallback* updateCallback);
UInt32 all_size_;
std::vector<File> items_;
};
Archive format implements the both IInArchive and IOutArchive interfaces. We use items_
to store files of the archive in memory. We do not resist the file stream and dispose in after we read all files in memory.
IInArchive::Open
HRESULT SzeInArchive::Open(IInStream* stream,
const UInt64* maxCheckStartPosition,
IArchiveOpenCallback* openCallback) noexcept {
items_.clear();
ArchiveReader archive_reader(stream);
auto curr_it = archive_reader.begin();
auto next = [](ArchiveReader::Iterator& it) -> ArchiveReader::Iterator& {
return ++it;
};
// validate signature
if (*curr_it != 'S' && *next(curr_it) != 'Z') {
return S_FALSE;
}
while (curr_it != archive_reader.end()) {
if (*curr_it == '{') {
File file;
// read item path
while (*next(curr_it) != '|') {
file.path.push_back(*curr_it);
}
// read item size
std::string size_str;
while (*next(curr_it) != '}') {
size_str.push_back(*curr_it);
}
// read item content
auto size = std::atoi(size_str.c_str());
file.content.resize(size);
for (auto i = 0; i < size; i++) {
file.content[i] = *next(curr_it);
}
items_.push_back(std::move(file));
}
next(curr_it);
}
return S_OK;
}
HRESULT SzeInArchive::Open(IInStream* stream,
const UInt64* maxCheckStartPosition,
IArchiveOpenCallback* openCallback) noexcept {
items_.clear();
ArchiveReader archive_reader(stream);
auto curr_it = archive_reader.begin();
auto next = [](ArchiveReader::Iterator& it) -> ArchiveReader::Iterator& {
return ++it;
};
// validate signature
if (*curr_it != 'S' && *next(curr_it) != 'Z') {
return S_FALSE;
}
while (curr_it != archive_reader.end()) {
if (*curr_it == '{') {
File file;
// read item path
while (*next(curr_it) != '|') {
file.path.push_back(*curr_it);
}
// read item size
std::string size_str;
while (*next(curr_it) != '}') {
size_str.push_back(*curr_it);
}
// read item content
auto size = std::atoi(size_str.c_str());
file.content.resize(size);
for (auto i = 0; i < size; i++) {
file.content[i] = *next(curr_it);
}
items_.push_back(std::move(file));
}
next(curr_it);
}
return S_OK;
}
This code is responsible for parsing archive format and validating its signature.
IInArchive::Extract
HRESULT SzeInArchive::Extract(
const UInt32* indices, UInt32 numItems, Int32 testMode,
IArchiveExtractCallback* extractCallback) noexcept {
if (testMode) {
return S_OK;
}
while (numItems-- > 0) {
extractCallback->PrepareOperation(NArchive::NExtract::NAskMode::kExtract);
CMyComPtr<ISequentialOutStream> stream;
extractCallback->GetStream(*indices, &stream, 0);
UInt32 processed;
stream->Write(items_[*indices].content.data(),
static_cast<UInt32>(items_[*indices].content.size()),
&processed);
indices = indices + 1;
extractCallback->SetOperationResult(
NArchive::NExtract::NOperationResult::kOK);
}
return S_OK;
}
HRESULT SzeInArchive::Extract(
const UInt32* indices, UInt32 numItems, Int32 testMode,
IArchiveExtractCallback* extractCallback) noexcept {
if (testMode) {
return S_OK;
}
while (numItems-- > 0) {
extractCallback->PrepareOperation(NArchive::NExtract::NAskMode::kExtract);
CMyComPtr<ISequentialOutStream> stream;
extractCallback->GetStream(*indices, &stream, 0);
UInt32 processed;
stream->Write(items_[*indices].content.data(),
static_cast<UInt32>(items_[*indices].content.size()),
&processed);
indices = indices + 1;
extractCallback->SetOperationResult(
NArchive::NExtract::NOperationResult::kOK);
}
return S_OK;
}
It looks similar to the implementation of the SZ archive, except items_
is used to extract files.
IOutArchive::UpdateItems
HRESULT SzeInArchive::UpdateItems(
ISequentialOutStream* outStream, UInt32 numItems,
IArchiveUpdateCallback* updateCallback) noexcept {
UpdateItemsInMem(numItems, updateCallback);
WriteFilesToOutStream(outStream);
return S_OK;
}
HRESULT SzeInArchive::UpdateItems(
ISequentialOutStream* outStream, UInt32 numItems,
IArchiveUpdateCallback* updateCallback) noexcept {
UpdateItemsInMem(numItems, updateCallback);
WriteFilesToOutStream(outStream);
return S_OK;
}
Implementation splits into 2 steps:
- Updating the
items_
array in memory. We should cover 3 cases:- item(s) added;
- item(s) removed;
- item(s) renamed;
- Writing the
items_
array to the output steam
UpdateItemsInMemItems
void SzeInArchive::UpdateItemsInMem(
UInt32 numItems, IArchiveUpdateCallback* updateCallback) {
std::vector<File> new_items;
for (UInt32 i = 0; i < numItems; i++) {
Int32 newData;
Int32 newProps;
UInt32 indexInArchive;
HRESULT res = updateCallback->GetUpdateItemInfo(i, &newData, &newProps,
&indexInArchive);
if (indexInArchive != -1) {
new_items.push_back(items_[indexInArchive]);
} else {
new_items.push_back(File{});
}
indexInArchive = static_cast<UInt32>(new_items.size() - 1);
if (newData == 0 && newProps == 0) {
continue;
}
if (newData) {
CMyComPtr<ISequentialInStream> in_stream;
res = updateCallback->GetStream(i, &in_stream);
if (FAILED(res)) {
continue;
}
ArchiveReader reader(in_stream);
new_items[indexInArchive].content.clear();
for (byte b : reader) {
new_items[indexInArchive].content.push_back(b);
}
}
if (newProps) {
PROPVARIANT variant_path{};
updateCallback->GetProperty(i, kpidPath, &variant_path);
new_items[indexInArchive].path =
std::string(utils::Ws2s(variant_path.bstrVal));
}
}
items_ = std::move(new_items);
}
void SzeInArchive::UpdateItemsInMem(
UInt32 numItems, IArchiveUpdateCallback* updateCallback) {
std::vector<File> new_items;
for (UInt32 i = 0; i < numItems; i++) {
Int32 newData;
Int32 newProps;
UInt32 indexInArchive;
HRESULT res = updateCallback->GetUpdateItemInfo(i, &newData, &newProps,
&indexInArchive);
if (indexInArchive != -1) {
new_items.push_back(items_[indexInArchive]);
} else {
new_items.push_back(File{});
}
indexInArchive = static_cast<UInt32>(new_items.size() - 1);
if (newData == 0 && newProps == 0) {
continue;
}
if (newData) {
CMyComPtr<ISequentialInStream> in_stream;
res = updateCallback->GetStream(i, &in_stream);
if (FAILED(res)) {
continue;
}
ArchiveReader reader(in_stream);
new_items[indexInArchive].content.clear();
for (byte b : reader) {
new_items[indexInArchive].content.push_back(b);
}
}
if (newProps) {
PROPVARIANT variant_path{};
updateCallback->GetProperty(i, kpidPath, &variant_path);
new_items[indexInArchive].path =
std::string(utils::Ws2s(variant_path.bstrVal));
}
}
items_ = std::move(new_items);
}
This code updates items_
collection by creating a new vector new_items
of items and then swapping it with items_
at the end.
WriteFilesToOutStream
void SzeInArchive::WriteFilesToOutStream(ISequentialOutStream* outStream) {
UInt32 processed = 0;
outStream->Write("SZ", 2, &processed);
for (const auto& file : items_) {
outStream->Write("{", 1, &processed);
outStream->Write(file.path.data(), static_cast<UInt32>(file.path.size()),
&processed);
outStream->Write("|", 1, &processed);
std::string size = std::to_string(file.content.size());
outStream->Write(size.data(), static_cast<UInt32>(size.size()), &processed);
outStream->Write("}", 1, &processed);
outStream->Write(file.content.data(),
static_cast<UInt32>(file.content.size()), &processed);
}
}
void SzeInArchive::WriteFilesToOutStream(ISequentialOutStream* outStream) {
UInt32 processed = 0;
outStream->Write("SZ", 2, &processed);
for (const auto& file : items_) {
outStream->Write("{", 1, &processed);
outStream->Write(file.path.data(), static_cast<UInt32>(file.path.size()),
&processed);
outStream->Write("|", 1, &processed);
std::string size = std::to_string(file.content.size());
outStream->Write(size.data(), static_cast<UInt32>(size.size()), &processed);
outStream->Write("}", 1, &processed);
outStream->Write(file.content.data(),
static_cast<UInt32>(file.content.size()), &processed);
}
}
This code serializes in-mem items_
back to the output stream.
IInArchive::GetPropertyInfo
HRESULT SzeInArchive::GetPropertyInfo(UInt32 index, BSTR* name, PROPID* propID,
VARTYPE* varType) noexcept {
*name = SysAllocString(L"Sample");
*propID = kpidSize;
*varType = VT_UI8;
return S_OK;
}
HRESULT SzeInArchive::GetPropertyInfo(UInt32 index, BSTR* name, PROPID* propID,
VARTYPE* varType) noexcept {
*name = SysAllocString(L"Sample");
*propID = kpidSize;
*varType = VT_UI8;
return S_OK;
}
It is optional method to implement. I just added this sample to show how to display properties. As result, we can see "Size" as new column in 7z File Manager.
At the end archive will look like this: