Skip to content

Implementation of the SZE archive

Format of the SZE archive

bnf
<archive> :== <single_file> | <single_file> <file_content> 
<single_file> :== "{"<filename>|<size>"}"<file_content>
<filename> :== <file_name>"."<file_extension>
<size> :== number of bytes in <file_content>
<file_content> :== series of bytes representing file
<archive> :== <single_file> | <single_file> <file_content> 
<single_file> :== "{"<filename>|<size>"}"<file_content>
<filename> :== <file_name>"."<file_extension>
<size> :== number of bytes in <file_content>
<file_content> :== series of bytes representing file

Not all methods are discussed here. See sze-archive.cc for the full implementation.

The SZE archive emulates the real archive format. It is not readonly and user can add files to it. If user adds file with invalid symbols we should show error. As usual, lets start with the definitions of the classes required to implement the SZE archive.

C++
class SzeInArchive : public CMyUnknownImp,
                     public IInArchive,
                     public IOutArchive {
  Z7_IFACES_IMP_UNK_2(IInArchive, IOutArchive);

 private:
  struct File {
    std::string path;
    std::vector<char> content;
  };

  void WriteFilesToOutStream(ISequentialOutStream* outStream);
  void UpdateItemsInMem(UInt32 numItems,
                             IArchiveUpdateCallback* updateCallback);

  UInt32 all_size_;
  std::vector<File> items_;
};
class SzeInArchive : public CMyUnknownImp,
                     public IInArchive,
                     public IOutArchive {
  Z7_IFACES_IMP_UNK_2(IInArchive, IOutArchive);

 private:
  struct File {
    std::string path;
    std::vector<char> content;
  };

  void WriteFilesToOutStream(ISequentialOutStream* outStream);
  void UpdateItemsInMem(UInt32 numItems,
                             IArchiveUpdateCallback* updateCallback);

  UInt32 all_size_;
  std::vector<File> items_;
};

Archive format implements the both IInArchive and IOutArchive interfaces. We use items_ to store files of the archive in memory. We do not resist the file stream and dispose in after we read all files in memory.

IInArchive::Open

C++
HRESULT SzeInArchive::Open(IInStream* stream,
                           const UInt64* maxCheckStartPosition,
                           IArchiveOpenCallback* openCallback) noexcept {
  items_.clear();
  ArchiveReader archive_reader(stream);
  auto curr_it = archive_reader.begin();
  auto next = [](ArchiveReader::Iterator& it) -> ArchiveReader::Iterator& {
    return ++it;
  };

  // validate signature
  if (*curr_it != 'S' && *next(curr_it) != 'Z') {
    return S_FALSE;
  }

  while (curr_it != archive_reader.end()) {
    if (*curr_it == '{') {
      File file;

      // read item path
      while (*next(curr_it) != '|') {
        file.path.push_back(*curr_it);
      }

      // read item size
      std::string size_str;
      while (*next(curr_it) != '}') {
        size_str.push_back(*curr_it);
      }

      // read item content
      auto size = std::atoi(size_str.c_str());
      file.content.resize(size);
      for (auto i = 0; i < size; i++) {
        file.content[i] = *next(curr_it);
      }

      items_.push_back(std::move(file));
    }

    next(curr_it);
  }

  return S_OK;
}
HRESULT SzeInArchive::Open(IInStream* stream,
                           const UInt64* maxCheckStartPosition,
                           IArchiveOpenCallback* openCallback) noexcept {
  items_.clear();
  ArchiveReader archive_reader(stream);
  auto curr_it = archive_reader.begin();
  auto next = [](ArchiveReader::Iterator& it) -> ArchiveReader::Iterator& {
    return ++it;
  };

  // validate signature
  if (*curr_it != 'S' && *next(curr_it) != 'Z') {
    return S_FALSE;
  }

  while (curr_it != archive_reader.end()) {
    if (*curr_it == '{') {
      File file;

      // read item path
      while (*next(curr_it) != '|') {
        file.path.push_back(*curr_it);
      }

      // read item size
      std::string size_str;
      while (*next(curr_it) != '}') {
        size_str.push_back(*curr_it);
      }

      // read item content
      auto size = std::atoi(size_str.c_str());
      file.content.resize(size);
      for (auto i = 0; i < size; i++) {
        file.content[i] = *next(curr_it);
      }

      items_.push_back(std::move(file));
    }

    next(curr_it);
  }

  return S_OK;
}

This code is responsible for parsing archive format and validating its signature.

IInArchive::Extract

C++
HRESULT SzeInArchive::Extract(
    const UInt32* indices, UInt32 numItems, Int32 testMode,
    IArchiveExtractCallback* extractCallback) noexcept {
  if (testMode) {
    return S_OK;
  }

  while (numItems-- > 0) {
    extractCallback->PrepareOperation(NArchive::NExtract::NAskMode::kExtract);
    CMyComPtr<ISequentialOutStream> stream;
    extractCallback->GetStream(*indices, &stream, 0);

    UInt32 processed;
    stream->Write(items_[*indices].content.data(),
                  static_cast<UInt32>(items_[*indices].content.size()),
                  &processed);
    indices = indices + 1;
    extractCallback->SetOperationResult(
        NArchive::NExtract::NOperationResult::kOK);
  }

  return S_OK;
}
HRESULT SzeInArchive::Extract(
    const UInt32* indices, UInt32 numItems, Int32 testMode,
    IArchiveExtractCallback* extractCallback) noexcept {
  if (testMode) {
    return S_OK;
  }

  while (numItems-- > 0) {
    extractCallback->PrepareOperation(NArchive::NExtract::NAskMode::kExtract);
    CMyComPtr<ISequentialOutStream> stream;
    extractCallback->GetStream(*indices, &stream, 0);

    UInt32 processed;
    stream->Write(items_[*indices].content.data(),
                  static_cast<UInt32>(items_[*indices].content.size()),
                  &processed);
    indices = indices + 1;
    extractCallback->SetOperationResult(
        NArchive::NExtract::NOperationResult::kOK);
  }

  return S_OK;
}

It looks similar to the implementation of the SZ archive, except items_ is used to extract files.

IOutArchive::UpdateItems

C++
HRESULT SzeInArchive::UpdateItems(
    ISequentialOutStream* outStream, UInt32 numItems,
    IArchiveUpdateCallback* updateCallback) noexcept {
  UpdateItemsInMem(numItems, updateCallback);
  WriteFilesToOutStream(outStream);
  return S_OK;
}
HRESULT SzeInArchive::UpdateItems(
    ISequentialOutStream* outStream, UInt32 numItems,
    IArchiveUpdateCallback* updateCallback) noexcept {
  UpdateItemsInMem(numItems, updateCallback);
  WriteFilesToOutStream(outStream);
  return S_OK;
}

Implementation splits into 2 steps:

  1. Updating the items_ array in memory. We should cover 3 cases:
    • item(s) added;
    • item(s) removed;
    • item(s) renamed;
  2. Writing the items_ array to the output steam

UpdateItemsInMemItems

C++
void SzeInArchive::UpdateItemsInMem(
    UInt32 numItems, IArchiveUpdateCallback* updateCallback) {
  std::vector<File> new_items;
  for (UInt32 i = 0; i < numItems; i++) {
    Int32 newData;
    Int32 newProps;
    UInt32 indexInArchive;
    HRESULT res = updateCallback->GetUpdateItemInfo(i, &newData, &newProps,
                                                    &indexInArchive);
    if (indexInArchive != -1) {
      new_items.push_back(items_[indexInArchive]);
    } else {
      new_items.push_back(File{});
    }
    indexInArchive = static_cast<UInt32>(new_items.size() - 1);

    if (newData == 0 && newProps == 0) {
      continue;
    }

    if (newData) {
      CMyComPtr<ISequentialInStream> in_stream;
      res = updateCallback->GetStream(i, &in_stream);
      if (FAILED(res)) {
        continue;
      }

      ArchiveReader reader(in_stream);
      new_items[indexInArchive].content.clear();
      for (byte b : reader) {
        new_items[indexInArchive].content.push_back(b);
      }
    }

    if (newProps) {
      PROPVARIANT variant_path{};
      updateCallback->GetProperty(i, kpidPath, &variant_path);
      new_items[indexInArchive].path =
          std::string(utils::Ws2s(variant_path.bstrVal));
    }
  }

  items_ = std::move(new_items);
}
void SzeInArchive::UpdateItemsInMem(
    UInt32 numItems, IArchiveUpdateCallback* updateCallback) {
  std::vector<File> new_items;
  for (UInt32 i = 0; i < numItems; i++) {
    Int32 newData;
    Int32 newProps;
    UInt32 indexInArchive;
    HRESULT res = updateCallback->GetUpdateItemInfo(i, &newData, &newProps,
                                                    &indexInArchive);
    if (indexInArchive != -1) {
      new_items.push_back(items_[indexInArchive]);
    } else {
      new_items.push_back(File{});
    }
    indexInArchive = static_cast<UInt32>(new_items.size() - 1);

    if (newData == 0 && newProps == 0) {
      continue;
    }

    if (newData) {
      CMyComPtr<ISequentialInStream> in_stream;
      res = updateCallback->GetStream(i, &in_stream);
      if (FAILED(res)) {
        continue;
      }

      ArchiveReader reader(in_stream);
      new_items[indexInArchive].content.clear();
      for (byte b : reader) {
        new_items[indexInArchive].content.push_back(b);
      }
    }

    if (newProps) {
      PROPVARIANT variant_path{};
      updateCallback->GetProperty(i, kpidPath, &variant_path);
      new_items[indexInArchive].path =
          std::string(utils::Ws2s(variant_path.bstrVal));
    }
  }

  items_ = std::move(new_items);
}

This code updates items_ collection by creating a new vector new_items of items and then swapping it with items_ at the end.

WriteFilesToOutStream

C++
void SzeInArchive::WriteFilesToOutStream(ISequentialOutStream* outStream) {
  UInt32 processed = 0;
  outStream->Write("SZ", 2, &processed);
  for (const auto& file : items_) {
    outStream->Write("{", 1, &processed);
    outStream->Write(file.path.data(), static_cast<UInt32>(file.path.size()),
                     &processed);
    outStream->Write("|", 1, &processed);
    std::string size = std::to_string(file.content.size());
    outStream->Write(size.data(), static_cast<UInt32>(size.size()), &processed);
    outStream->Write("}", 1, &processed);
    outStream->Write(file.content.data(),
                     static_cast<UInt32>(file.content.size()), &processed);
  }
}
void SzeInArchive::WriteFilesToOutStream(ISequentialOutStream* outStream) {
  UInt32 processed = 0;
  outStream->Write("SZ", 2, &processed);
  for (const auto& file : items_) {
    outStream->Write("{", 1, &processed);
    outStream->Write(file.path.data(), static_cast<UInt32>(file.path.size()),
                     &processed);
    outStream->Write("|", 1, &processed);
    std::string size = std::to_string(file.content.size());
    outStream->Write(size.data(), static_cast<UInt32>(size.size()), &processed);
    outStream->Write("}", 1, &processed);
    outStream->Write(file.content.data(),
                     static_cast<UInt32>(file.content.size()), &processed);
  }
}

This code serializes in-mem items_ back to the output stream.

IInArchive::GetPropertyInfo

C++
HRESULT SzeInArchive::GetPropertyInfo(UInt32 index, BSTR* name, PROPID* propID,
                                      VARTYPE* varType) noexcept {

  *name = SysAllocString(L"Sample");
  *propID = kpidSize;
  *varType = VT_UI8;
  return S_OK;
}
HRESULT SzeInArchive::GetPropertyInfo(UInt32 index, BSTR* name, PROPID* propID,
                                      VARTYPE* varType) noexcept {

  *name = SysAllocString(L"Sample");
  *propID = kpidSize;
  *varType = VT_UI8;
  return S_OK;
}

It is optional method to implement. I just added this sample to show how to display properties. As result, we can see "Size" as new column in 7z File Manager.

At the end archive will look like this:

An image