Skip to content

Save git info #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 20, 2021
119 changes: 105 additions & 14 deletions labscript/labscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import sys
import subprocess
import keyword
import threading
from inspect import getcallargs
from functools import wraps

Expand All @@ -31,6 +32,8 @@

import labscript_utils.h5_lock, h5py
import labscript_utils.properties
from labscript_utils.labconfig import LabConfig
from labscript_utils.filewatcher import FileWatcher

# This imports the default Qt library that other labscript suite code will
# import as well, since it all uses qtutils. By having a Qt library already
Expand Down Expand Up @@ -69,7 +72,10 @@
startupinfo.dwFlags |= 1 #subprocess.STARTF_USESHOWWINDOW # This variable isn't defined, but apparently it's equal to one.
else:
startupinfo = None


# Extract settings from labconfig
_SAVE_HG_INFO = LabConfig().getboolean('labscript', 'save_hg_info', fallback=True)
_SAVE_GIT_INFO = LabConfig().getboolean('labscript', 'save_git_info', fallback=False)

class config(object):
suppress_mild_warnings = True
Expand Down Expand Up @@ -2173,8 +2179,90 @@ def generate_connection_table(hdf5_file):
else:
master_pseudoclock_name = compiler.master_pseudoclock.name
dataset.attrs['master_pseudoclock'] = master_pseudoclock_name



# Create a dictionary for caching results from vcs commands. The keys will be
# the paths to files that are saved during save_labscripts(). The values will be
# a list of tuples of the form (command, info, err); see the "Returns" section
# of the _run_vcs_commands() docstring for more info. Also create a FileWatcher
# instance for tracking when vcs results need updating. The callback will
# replace the outdated cache entry with a new list of updated vcs commands and
# outputs.
_vcs_cache = {}
_vcs_cache_rlock = threading.RLock()
def _file_watcher_callback(name, info, event):
with _vcs_cache_rlock:
_vcs_cache[name] = _run_vcs_commands(name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the lock is now being held for however long it takes to query the subprocess information. You can reduce lock contention by doing something like temp = _run_vcs_commands(name) outside of the lock and then only holding the lock when you place that into the _vcs_cache dictionary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Believe it or not I actually originally set things up as you're suggesting in both of these comments with Lock in place of RLock, but then changed it. For the lock here in _file_watcher_callback(), if the _file_watcher thread called _file_watcher_callback() then at least some of the cache data is out of date, so it is actually better for it to block the save_labscripts() thread from writing cached results to drive until the cache is updated.


_file_watcher = FileWatcher(_file_watcher_callback)

def _run_vcs_commands(path):
"""Run some VCS commands on a file and return their output.

The function is used to gather up version control system information so that
it can be stored in the hdf5 files of shots. This is for convenience and
compliments the full copy of the file already included in the shot file.

Whether hg and git commands are run is controlled by the `save_hg_info`
and `save_git_info` options in the `[labscript]` section of the labconfig.

Args:
path (str): The path with file name and extension of the file on which
the commands will be run. The working directory will be set to the
directory containing the specified file.

Returns:
results (list of (tuple, str, str)): A list of tuples, each
containing information related to one vcs command of the form
(command, info, err). The first entry in that tuple is itself a
tuple of strings which was passed to subprocess.Popen() in order to
run the command. Then info is a string that contains the text
printed to stdout by that command, and err contains the text printed
to stderr by the command.
"""
# Gather together a list of commands to run.
module_directory, module_filename = os.path.split(path)
vcs_commands = []
if compiler.save_hg_info:
hg_commands = [
['log', '--limit', '1'],
['status'],
['diff'],
]
for command in hg_commands:
command = tuple(['hg'] + command + [module_filename])
vcs_commands.append((command, module_directory))
if compiler.save_git_info:
git_commands = [
['branch', '--show-current'],
['describe', '--tags', '--always', 'HEAD'],
['rev-parse', 'HEAD'],
['diff', 'HEAD', module_filename],
]
for command in git_commands:
command = tuple(['git'] + command)
vcs_commands.append((command, module_directory))

# Now go through and start running the commands.
process_list = []
for command, module_directory in vcs_commands:
process = subprocess.Popen(
command,
cwd=module_directory,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
startupinfo=startupinfo,
)
process_list.append((command, process))

# Gather up results from the commands issued.
results = []
for command, process in process_list:
info, err = process.communicate()
info = info.decode('utf-8')
err = err.decode('utf-8')
results.append((command, info, err))
return results

def save_labscripts(hdf5_file):
if compiler.labscript_file is not None:
script_text = open(compiler.labscript_file).read()
Expand All @@ -2198,18 +2286,19 @@ def save_labscripts(hdf5_file):
# Doesn't seem to want to double count files if you just import the contents of a file within a module
continue
hdf5_file.create_dataset(save_path, data=open(path).read())
if compiler.save_hg_info:
hg_commands = [['log', '--limit', '1'], ['status'], ['diff']]
for command in hg_commands:
process = subprocess.Popen(['hg'] + command + [os.path.split(path)[1]], cwd=os.path.split(path)[0],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=startupinfo)
info, err = process.communicate()
if info or err:
hdf5_file[save_path].attrs['hg ' + str(command[0])] = info.decode('utf-8') + '\n' + err.decode('utf-8')
with _vcs_cache_rlock:
if path not in _vcs_cache:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly here, you could do:

with _vcs_cache_rlock:
    condition = path not in _vcs_cache
if condition:
    #add to filewatch, and call callback
with _vcs_cache_rlock:
    for command, info, err in _vcs_cache[path]:

to reduce lock contention. This actually might mean I was wrong about needing the RLock over a plain Lock (but whatever...RLocks are safer in the long run).

Copy link
Contributor Author

@zakv zakv Oct 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I did it this way I thought I was just making things more complicated than necessary. As soon as the lock is released after the first with statement, it is almost immediately re-acquired either by the explicit callback call or the following with statement.

But now that you brought it up again, I think you're right that this is better. Freeing up the lock even briefly gives the _file_watcher thread an opportunity to run _file_watcher_callback() between those statements so it could potentially update the cache last minute right before last with statement there. I'll make this change.

I also considered reducing lock contention even further by locking just to do file_cache_entry = _vcs_cache[path].copy() then freeing the lock during for loop which is slowed by writing to disk. I think that would work fine as long as nothing in the list returned by _vcs_cache[path] is mutable and edited by both threads. Currently the list only has tuples of strings so that should be ok unless something changes. Alternatively copy.deepcopy() could be used to play it safe. Thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit 4716ceb implements your suggestion, but doesn't move the for loop out of the locked section as mentioned above.

# Add file to watch list and create its entry in the cache.
_file_watcher.add_file(path)
_file_watcher_callback(path, None, None)
# Save the cached vcs output to the file.
for command, info, err in _vcs_cache[path]:
attribute_str = command[0] + ' ' + command[1]
hdf5_file[save_path].attrs[attribute_str] = (info + '\n' + err)
except ImportError:
pass
except WindowsError if os.name == 'nt' else None:
sys.stderr.write('Warning: Cannot save Mercurial data for imported scripts. Check that the hg command can be run from the command line.\n')
sys.stderr.write('Warning: Cannot save version control data for imported scripts. Check that the hg and/or git command can be run from the command line.\n')


def write_device_properties(hdf5_file):
Expand Down Expand Up @@ -2539,7 +2628,8 @@ def labscript_cleanup():
compiler.wait_delay = 0
compiler.time_markers = {}
compiler._PrimaryBLACS = None
compiler.save_hg_info = True
compiler.save_hg_info = _SAVE_HG_INFO
compiler.save_git_info = _SAVE_GIT_INFO
compiler.shot_properties = {}

class compiler(object):
Expand All @@ -2559,7 +2649,8 @@ class compiler(object):
wait_delay = 0
time_markers = {}
_PrimaryBLACS = None
save_hg_info = True
save_hg_info = _SAVE_HG_INFO
save_git_info = _SAVE_GIT_INFO
shot_properties = {}

# safety measure in case cleanup is called before init
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy