Skip to content

Commit 22826e0

Browse files
PostgresNode::pid is improved (#199)
* PostgresNode::pid is improved - We do multiple attempts to read pid file. - We process a case when we see that node is stopped between test and read. - We process a case when pid-file is empty. * PostgresNode::pid is updated Assert is added. * execute_utility2 is updated (ignore_errors) - New parameters "ignore_errors" is added. Default value is False. - Asserts are added. * PostgresNode::_try_shutdown is rewrited (normalization) * PostgresNode::pid uses the data from "pg_ctl status" output. * PostgresNode::_try_shutdown is correct (return None) This method returns nothing (None).
1 parent de432ed commit 22826e0

File tree

2 files changed

+211
-40
lines changed

2 files changed

+211
-40
lines changed

testgres/consts.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,7 @@
3535

3636
# logical replication settings
3737
LOGICAL_REPL_MAX_CATCHUP_ATTEMPTS = 60
38+
39+
PG_CTL__STATUS__OK = 0
40+
PG_CTL__STATUS__NODE_IS_STOPPED = 3
41+
PG_CTL__STATUS__BAD_DATADIR = 4

testgres/node.py

Lines changed: 207 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@
4949
RECOVERY_CONF_FILE, \
5050
PG_LOG_FILE, \
5151
UTILS_LOG_FILE, \
52-
PG_PID_FILE
52+
PG_CTL__STATUS__OK, \
53+
PG_CTL__STATUS__NODE_IS_STOPPED, \
54+
PG_CTL__STATUS__BAD_DATADIR \
5355

5456
from .consts import \
5557
MAX_LOGICAL_REPLICATION_WORKERS, \
@@ -208,14 +210,136 @@ def pid(self):
208210
Return postmaster's PID if node is running, else 0.
209211
"""
210212

211-
if self.status():
212-
pid_file = os.path.join(self.data_dir, PG_PID_FILE)
213-
lines = self.os_ops.readlines(pid_file)
214-
pid = int(lines[0]) if lines else None
215-
return pid
213+
self__data_dir = self.data_dir
216214

217-
# for clarity
218-
return 0
215+
_params = [
216+
self._get_bin_path('pg_ctl'),
217+
"-D", self__data_dir,
218+
"status"
219+
] # yapf: disable
220+
221+
status_code, out, error = execute_utility2(
222+
self.os_ops,
223+
_params,
224+
self.utils_log_file,
225+
verbose=True,
226+
ignore_errors=True)
227+
228+
assert type(status_code) == int # noqa: E721
229+
assert type(out) == str # noqa: E721
230+
assert type(error) == str # noqa: E721
231+
232+
# -----------------
233+
if status_code == PG_CTL__STATUS__NODE_IS_STOPPED:
234+
return 0
235+
236+
# -----------------
237+
if status_code == PG_CTL__STATUS__BAD_DATADIR:
238+
return 0
239+
240+
# -----------------
241+
if status_code != PG_CTL__STATUS__OK:
242+
errMsg = "Getting of a node status [data_dir is {0}] failed.".format(self__data_dir)
243+
244+
raise ExecUtilException(
245+
message=errMsg,
246+
command=_params,
247+
exit_code=status_code,
248+
out=out,
249+
error=error,
250+
)
251+
252+
# -----------------
253+
assert status_code == PG_CTL__STATUS__OK
254+
255+
if out == "":
256+
__class__._throw_error__pg_ctl_returns_an_empty_string(
257+
_params
258+
)
259+
260+
C_PID_PREFIX = "(PID: "
261+
262+
i = out.find(C_PID_PREFIX)
263+
264+
if i == -1:
265+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
266+
out,
267+
_params
268+
)
269+
270+
assert i > 0
271+
assert i < len(out)
272+
assert len(C_PID_PREFIX) <= len(out)
273+
assert i <= len(out) - len(C_PID_PREFIX)
274+
275+
i += len(C_PID_PREFIX)
276+
start_pid_s = i
277+
278+
while True:
279+
if i == len(out):
280+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
281+
out,
282+
_params
283+
)
284+
285+
ch = out[i]
286+
287+
if ch == ")":
288+
break
289+
290+
if ch.isdigit():
291+
i += 1
292+
continue
293+
294+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
295+
out,
296+
_params
297+
)
298+
assert False
299+
300+
if i == start_pid_s:
301+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
302+
out,
303+
_params
304+
)
305+
306+
# TODO: Let's verify a length of pid string.
307+
308+
pid = int(out[start_pid_s:i])
309+
310+
if pid == 0:
311+
__class__._throw_error__pg_ctl_returns_a_zero_pid(
312+
out,
313+
_params
314+
)
315+
316+
assert pid != 0
317+
return pid
318+
319+
@staticmethod
320+
def _throw_error__pg_ctl_returns_an_empty_string(_params):
321+
errLines = []
322+
errLines.append("Utility pg_ctl returns empty string.")
323+
errLines.append("Command line is {0}".format(_params))
324+
raise RuntimeError("\n".join(errLines))
325+
326+
@staticmethod
327+
def _throw_error__pg_ctl_returns_an_unexpected_string(out, _params):
328+
errLines = []
329+
errLines.append("Utility pg_ctl returns an unexpected string:")
330+
errLines.append(out)
331+
errLines.append("------------")
332+
errLines.append("Command line is {0}".format(_params))
333+
raise RuntimeError("\n".join(errLines))
334+
335+
@staticmethod
336+
def _throw_error__pg_ctl_returns_a_zero_pid(out, _params):
337+
errLines = []
338+
errLines.append("Utility pg_ctl returns a zero pid. Output string is:")
339+
errLines.append(out)
340+
errLines.append("------------")
341+
errLines.append("Command line is {0}".format(_params))
342+
raise RuntimeError("\n".join(errLines))
219343

220344
@property
221345
def auxiliary_pids(self):
@@ -338,41 +462,84 @@ def version(self):
338462
return self._pg_version
339463

340464
def _try_shutdown(self, max_attempts, with_force=False):
465+
assert type(max_attempts) == int # noqa: E721
466+
assert type(with_force) == bool # noqa: E721
467+
assert max_attempts > 0
468+
341469
attempts = 0
470+
471+
# try stopping server N times
472+
while attempts < max_attempts:
473+
attempts += 1
474+
try:
475+
self.stop()
476+
except ExecUtilException:
477+
continue # one more time
478+
except Exception:
479+
eprint('cannot stop node {}'.format(self.name))
480+
break
481+
482+
return # OK
483+
484+
# If force stopping is enabled and PID is valid
485+
if not with_force:
486+
return
487+
342488
node_pid = self.pid
489+
assert node_pid is not None
490+
assert type(node_pid) == int # noqa: E721
343491

344-
if node_pid > 0:
345-
# try stopping server N times
346-
while attempts < max_attempts:
347-
try:
348-
self.stop()
349-
break # OK
350-
except ExecUtilException:
351-
pass # one more time
352-
except Exception:
353-
eprint('cannot stop node {}'.format(self.name))
354-
break
355-
356-
attempts += 1
357-
358-
# If force stopping is enabled and PID is valid
359-
if with_force and node_pid != 0:
360-
# If we couldn't stop the node
361-
p_status_output = self.os_ops.exec_command(cmd=f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8')
362-
if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output:
363-
try:
364-
eprint(f'Force stopping node {self.name} with PID {node_pid}')
365-
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
366-
except Exception:
367-
# The node has already stopped
368-
pass
369-
370-
# Check that node stopped - print only column pid without headers
371-
p_status_output = self.os_ops.exec_command(f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8')
372-
if p_status_output and str(node_pid) in p_status_output:
373-
eprint(f'Failed to stop node {self.name}.')
374-
else:
375-
eprint(f'Node {self.name} has been stopped successfully.')
492+
if node_pid == 0:
493+
return
494+
495+
# TODO: [2025-02-28] It is really the old ugly code. We have to rewrite it!
496+
497+
ps_command = ['ps', '-o', 'pid=', '-p', str(node_pid)]
498+
499+
ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8')
500+
assert type(ps_output) == str # noqa: E721
501+
502+
if ps_output == "":
503+
return
504+
505+
if ps_output != str(node_pid):
506+
__class__._throw_bugcheck__unexpected_result_of_ps(
507+
ps_output,
508+
ps_command)
509+
510+
try:
511+
eprint('Force stopping node {0} with PID {1}'.format(self.name, node_pid))
512+
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
513+
except Exception:
514+
# The node has already stopped
515+
pass
516+
517+
# Check that node stopped - print only column pid without headers
518+
ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8')
519+
assert type(ps_output) == str # noqa: E721
520+
521+
if ps_output == "":
522+
eprint('Node {0} has been stopped successfully.'.format(self.name))
523+
return
524+
525+
if ps_output == str(node_pid):
526+
eprint('Failed to stop node {0}.'.format(self.name))
527+
return
528+
529+
__class__._throw_bugcheck__unexpected_result_of_ps(
530+
ps_output,
531+
ps_command)
532+
533+
@staticmethod
534+
def _throw_bugcheck__unexpected_result_of_ps(result, cmd):
535+
assert type(result) == str # noqa: E721
536+
assert type(cmd) == list # noqa: E721
537+
errLines = []
538+
errLines.append("[BUG CHECK] Unexpected result of command ps:")
539+
errLines.append(result)
540+
errLines.append("-----")
541+
errLines.append("Command line is {0}".format(cmd))
542+
raise RuntimeError("\n".join(errLines))
376543

377544
def _assign_master(self, master):
378545
"""NOTE: this is a private method!"""

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy