feat: pigz#254
Conversation
Prefer a tar-to-pigz pipeline when packing integrated test baselines so gzip compression can use the available CPU cores. Keep the existing Python gztar path as a fallback when tar or pigz is unavailable, and preserve the existing .tar.gz archive format.
There was a problem hiding this comment.
Pull request overview
This PR introduces an optional fast-path for creating baseline tarballs using external tar + parallel gzip (pigz), while keeping the existing Python shutil.make_archive(..., format="gztar") behavior as a fallback. It also expands the default restart-check exclusion patterns to ignore additional HDF5 paths that are likely unstable across runs.
Changes:
- Add a
tar | pigzpipeline for baseline archive creation, falling back to Pythongztarif tools are unavailable or the pipeline fails. - Add a helper to determine an appropriate CPU thread count for
pigz. - Update
restart_check.pydefault exclusion regex list to includedNdXanddetJ.
Reviewed changes
Copilot reviewed 2 out of 2 changed files in this pull request and generated 1 comment.
| File | Description |
|---|---|
| geos-ats/src/geos/ats/helpers/restart_check.py | Extends default HDF5 exclusion patterns used during restart comparisons. |
| geos-ats/src/geos/ats/baseline_io.py | Adds an external tar + pigz archiving path to speed up baseline archive creation with a fallback to Python’s gztar. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| try: | ||
| with open( archive_path, 'wb' ) as output: | ||
| tar_process = subprocess.Popen( [ tar_bin, '-C', baseline_path, '-cf', '-', '.' ], | ||
| stdout=subprocess.PIPE ) | ||
| if tar_process.stdout is None: | ||
| raise RuntimeError( 'failed to capture tar output' ) | ||
| pigz_process = subprocess.Popen( [ pigz_bin, '-9', '-p', threads ], | ||
| stdin=tar_process.stdout, | ||
| stdout=output ) | ||
| tar_process.stdout.close() | ||
|
|
||
| pigz_status = pigz_process.wait() | ||
| tar_status = tar_process.wait() | ||
|
|
||
| if tar_status != 0 or pigz_status != 0: | ||
| try: | ||
| os.remove( archive_path ) | ||
| except FileNotFoundError: | ||
| pass | ||
| raise RuntimeError( f'tar exited with {tar_status}; pigz exited with {pigz_status}' ) | ||
|
|
||
| except Exception as e: | ||
| logger.warning( 'Parallel baseline archive creation failed; using Python gztar archiver' ) | ||
| logger.warning( repr( e ) ) | ||
| return False |
jafranc
left a comment
There was a problem hiding this comment.
Great perf booster ! Thanks ! 🙏
Thanks for label logic clean-up too.
| pigz_status = pigz_process.wait() | ||
| tar_status = tar_process.wait() | ||
|
|
||
| if tar_status != 0 or pigz_status != 0: | ||
| try: | ||
| os.remove( archive_path ) | ||
| except FileNotFoundError: | ||
| pass | ||
| raise RuntimeError( f'tar exited with {tar_status}; pigz exited with {pigz_status}' ) | ||
|
|
||
| except Exception as e: | ||
| logger.warning( 'Parallel baseline archive creation failed; using Python gztar archiver' ) | ||
| logger.warning( repr( e ) ) | ||
| return False |
There was a problem hiding this comment.
| pigz_status = pigz_process.wait() | |
| tar_status = tar_process.wait() | |
| if tar_status != 0 or pigz_status != 0: | |
| try: | |
| os.remove( archive_path ) | |
| except FileNotFoundError: | |
| pass | |
| raise RuntimeError( f'tar exited with {tar_status}; pigz exited with {pigz_status}' ) | |
| except Exception as e: | |
| logger.warning( 'Parallel baseline archive creation failed; using Python gztar archiver' ) | |
| logger.warning( repr( e ) ) | |
| return False | |
| pigz_status = pigz_process.wait() | |
| if pigz_status != 0: | |
| tar_process.kill() | |
| tar_process.wait() | |
| raise RuntimeError(f'pigz exited with {pigz_status}') | |
| tar_status = tar_process.wait() | |
| if tar_status != 0: | |
| raise RuntimeError(f'tar exited with {tar_status}') | |
| except Exception as e: | |
| for proc in (pigz_process, tar_process): | |
| if proc is not None and proc.poll() is None: | |
| try: | |
| proc.kill() | |
| except ProcessLookupError: | |
| pass | |
| proc.wait() | |
| try: | |
| os.remove(archive_path) | |
| except FileNotFoundError: | |
| pass | |
| logger.warning('Parallel baseline archive creation failed; using Python gztar archiver') | |
| logger.warning(repr(e)) | |
| return False |
There was a problem hiding this comment.
Suggest adding a zombie process clean-up if fail in this context
Uh oh!
There was an error while loading. Please reload this page.