-
Notifications
You must be signed in to change notification settings - Fork 133
/
Copy pathutils.py
1627 lines (1327 loc) · 52.8 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Utility Functions and Classes
This module collects small pieces of code used throughout :py:mod:`bioconda_utils`.
"""
import asyncio
import contextlib
import datetime
import fnmatch
import glob
import logging
import os
import subprocess as sp
import sys
import shutil
import json
import queue
import warnings
from threading import Event, Thread
from pathlib import PurePath
from collections import Counter, defaultdict, namedtuple
from collections.abc import Iterable
from itertools import product, chain, groupby, zip_longest
from functools import partial
from typing import Sequence, Collection, List, Dict, Any, Union
from multiprocessing import Pool
from multiprocessing.pool import ThreadPool
from urllib3 import Retry
from github import Github
import pkg_resources
import pandas as pd
import tqdm as _tqdm
import aiohttp
import backoff
import yaml
import jinja2
from jinja2 import Environment, PackageLoader
# FIXME(upstream): For conda>=4.7.0 initialize_logging is (erroneously) called
# by conda.core.index.get_index which messes up our logging.
# => Prevent custom conda logging init before importing anything conda-related.
import conda.gateways.logging
conda.gateways.logging.initialize_logging = lambda: None
from conda_build import api
from conda.exports import VersionOrder
from boa.cli.mambabuild import prepare as insert_mambabuild
from jsonschema import validate
from colorlog import ColoredFormatter
from boltons.funcutils import FunctionBuilder
logger = logging.getLogger(__name__)
class TqdmHandler(logging.StreamHandler):
"""Tqdm aware logging StreamHandler
Passes all log writes through tqdm to allow progress bars and log
messages to coexist without clobbering terminal
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# initialise internal tqdm lock so that we can use tqdm.write
_tqdm.tqdm(disable=True, total=0)
def emit(self, record):
_tqdm.tqdm.write(self.format(record))
def tqdm(*args, **kwargs):
"""Wrapper around TQDM handling disable
Logging is disabled if:
- ``TERM`` is set to ``dumb``
- ``CIRCLECI`` is set to ``true``
- the effective log level of the is lower than set via ``loglevel``
Args:
loglevel: logging loglevel (the number, so logging.INFO)
logger: local logger (in case it has different effective log level)
"""
term_ok = (sys.stderr.isatty()
and os.environ.get("TERM", "") != "dumb"
and os.environ.get("CIRCLECI", "") != "true")
loglevel_ok = (kwargs.get('logger', logger).getEffectiveLevel()
<= kwargs.get('loglevel', logging.INFO))
kwargs['disable'] = not (term_ok and loglevel_ok)
return _tqdm.tqdm(*args, **kwargs)
def ensure_list(obj):
"""Wraps **obj** in a list if necessary
>>> ensure_list("one")
["one"]
>>> ensure_list(["one", "two"])
["one", "two"]
"""
if isinstance(obj, Sequence) and not isinstance(obj, str):
return obj
return [obj]
def wraps(func):
"""Custom wraps() function for decorators
This one differs from functiools.wraps and boltons.funcutils.wraps in
that it allows *adding* keyword arguments to the function signature.
>>> def decorator(func):
>>> @wraps(func)
>>> def wrapper(*args, extra_param=None, **kwargs):
>>> print("Called with extra_param=%s" % extra_param)
>>> func(*args, **kwargs)
>>> return wrapper
>>>
>>> @decorator()
>>> def test(arg1, arg2, arg3='default'):
>>> pass
>>>
>>> test('val1', 'val2', extra_param='xyz')
"""
fb = FunctionBuilder.from_func(func)
def wrapper_wrapper(wrapper_func):
fb_wrapper = FunctionBuilder.from_func(wrapper_func)
fb.kwonlyargs += fb_wrapper.kwonlyargs
fb.kwonlydefaults.update(fb_wrapper.kwonlydefaults)
fb.body = 'return _call(%s)' % fb.get_invocation_str()
execdict = dict(_call=wrapper_func, _func=func)
fully_wrapped = fb.get_func(execdict)
fully_wrapped.__wrapped__ = func
return fully_wrapped
return wrapper_wrapper
class LogFuncFilter:
"""Logging filter capping the number of messages emitted from given function
Arguments:
func: The function for which to filter log messages
trunc_msg: The message to emit when logging is truncated, to inform user that
messages will from now on be hidden.
max_lines: Max number of log messages to allow to pass
consectuctive: If try, filter applies to consectutive messages and resets
if a message from a different source is encountered.
Fixme:
The implementation assumes that **func** uses a logger initialized with
``getLogger(__name__)``.
"""
def __init__(self, func, trunc_msg: str = None, max_lines: int = 0,
consecutive: bool = True) -> None:
self.func = func
self.max_lines = max_lines + 1
self.cur_max_lines = max_lines + 1
self.consecutive = consecutive
self.trunc_msg = trunc_msg
def filter(self, record: logging.LogRecord) -> bool:
if record.name == self.func.__module__ and record.funcName == self.func.__name__:
if self.cur_max_lines > 1:
self.cur_max_lines -= 1
return True
if self.cur_max_lines == 1 and self.trunc_msg:
self.cur_max_lines -= 1
record.msg = self.trunc_msg
return True
return False
if self.consecutive:
self.cur_max_lines = self.max_lines
return True
class LoggingSourceRenameFilter:
"""Logging filter for abbreviating module name in logs
Maps ``bioconda_utils`` to ``BIOCONDA`` and for everything else
to just the top level package uppercased.
"""
def filter(self, record: logging.LogRecord) -> bool:
if record.name.startswith("bioconda_utils"):
record.name = "BIOCONDA"
else:
record.name = record.name.split('.')[0].upper()
return True
def setup_logger(name: str = 'bioconda_utils', loglevel: Union[str, int] = logging.INFO,
logfile: str = None, logfile_level: Union[str, int] = logging.DEBUG,
log_command_max_lines = None,
prefix: str = "BIOCONDA ",
msgfmt: str = ("%(asctime)s "
"%(log_color)s%(name)s %(levelname)s%(reset)s "
"%(message)s"),
datefmt: str ="%H:%M:%S") -> logging.Logger:
"""Set up logging for bioconda-utils
Args:
name: Module name for which to get a logger (``__name__``)
loglevel: Log level, can be name or int level
logfile: File to log to as well
logfile_level: Log level for file logging
prefix: Prefix to add to our log messages
msgfmt: Format for messages
datefmt: Format for dates
Returns:
A new logger
"""
new_logger = logging.getLogger(name)
root_logger = logging.getLogger()
if root_logger.hasHandlers():
root_logger.handlers.clear()
if logfile:
if isinstance(logfile_level, str):
logfile_level = getattr(logging, logfile_level.upper())
log_file_handler = logging.FileHandler(logfile)
log_file_handler.setLevel(logfile_level)
log_file_formatter = logging.Formatter(
msgfmt.replace("%(log_color)s", "").replace("%(reset)s", "").format(prefix=prefix),
datefmt=None,
)
log_file_handler.setFormatter(log_file_formatter)
root_logger.addHandler(log_file_handler)
else:
logfile_level = logging.FATAL
if isinstance(loglevel, str):
loglevel = getattr(logging, loglevel.upper())
# Base logger is set to the lowest of console or file logging
root_logger.setLevel(min(loglevel, logfile_level))
# Console logging is passed through TqdmHandler so that the progress bar does not
# get broken by log lines emitted.
log_stream_handler = TqdmHandler()
if loglevel:
log_stream_handler.setLevel(loglevel)
log_stream_handler.setFormatter(ColoredFormatter(
msgfmt.format(prefix=prefix),
datefmt=datefmt,
reset=True,
log_colors={
'DEBUG': 'cyan',
'INFO': 'green',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'red',
}))
log_stream_handler.addFilter(LoggingSourceRenameFilter())
root_logger.addHandler(log_stream_handler)
# Add filter for `utils.run` to truncate after n lines emitted.
# We do this here rather than in `utils.run` so that it can be configured
# from the CLI more easily
if log_command_max_lines is not None:
log_filter = LogFuncFilter(run, "Command output truncated", log_command_max_lines)
log_stream_handler.addFilter(log_filter)
return new_logger
def ellipsize_recipes(recipes: Collection[str], recipe_folder: str,
n: int = 5, m: int = 50) -> str:
"""Logging helper showing recipe list
Args:
recipes: List of recipes
recipe_folder: Folder name to strip from recipes.
n: Show at most this number of recipes, with "..." if more are found.
m: Don't show anything if more recipes than this
(pointless to show first 5 of 5000)
Returns:
A string like " (htslib, samtools, ...)" or ""
"""
if not recipes or len(recipes) > m:
return ""
if len(recipes) > n:
if not isinstance(recipes, Sequence):
recipes = list(recipes)
recipes = recipes[:n]
append = ", ..."
else:
append = ""
return ' ('+', '.join(recipe.replace(recipe_folder,'').lstrip('/')
for recipe in recipes) + append + ')'
class JinjaSilentUndefined(jinja2.Undefined):
def _fail_with_undefined_error(self, *args, **kwargs):
return ""
__add__ = __radd__ = __mul__ = __rmul__ = __div__ = __rdiv__ = \
__truediv__ = __rtruediv__ = __floordiv__ = __rfloordiv__ = \
__mod__ = __rmod__ = __pos__ = __neg__ = __call__ = \
__getitem__ = __lt__ = __le__ = __gt__ = __ge__ = __int__ = \
__float__ = __complex__ = __pow__ = __rpow__ = \
_fail_with_undefined_error
jinja = Environment(
loader=PackageLoader('bioconda_utils', 'templates'),
trim_blocks=True,
lstrip_blocks=True
)
jinja_silent_undef = Environment(
undefined=JinjaSilentUndefined
)
# Patterns of allowed environment variables that are allowed to be passed to
# conda-build.
ENV_VAR_WHITELIST = [
'PATH',
'LC_*',
'LANG',
'MACOSX_DEPLOYMENT_TARGET',
'HTTPS_PROXY','HTTP_PROXY', 'https_proxy', 'http_proxy',
]
# Of those that make it through the whitelist, remove these specific ones
ENV_VAR_BLACKLIST = [
]
# Of those, also remove these when we're running in a docker container
ENV_VAR_DOCKER_BLACKLIST = [
'PATH',
]
def get_free_space():
"""Return free space in MB on disk"""
s = os.statvfs(os.getcwd())
return s.f_frsize * s.f_bavail / (1024 ** 2)
def allowed_env_var(s, docker=False):
for pattern in ENV_VAR_WHITELIST:
if fnmatch.fnmatch(s, pattern):
for bpattern in ENV_VAR_BLACKLIST:
if fnmatch.fnmatch(s, bpattern):
return False
if docker:
for dpattern in ENV_VAR_DOCKER_BLACKLIST:
if fnmatch.fnmatch(s, dpattern):
return False
return True
def bin_for(name='conda'):
if 'CONDA_ROOT' in os.environ:
return os.path.join(os.environ['CONDA_ROOT'], 'bin', name)
return name
@contextlib.contextmanager
def temp_env(env):
"""
Context manager to temporarily set os.environ.
Used to send values in **env** to processes that only read the os.environ,
for example when filling in meta.yaml with jinja2 template variables.
All values are converted to string before sending to os.environ
"""
env = dict(env)
orig = os.environ.copy()
_env = {k: str(v) for k, v in env.items()}
os.environ.update(_env)
try:
yield
finally:
os.environ.clear()
os.environ.update(orig)
@contextlib.contextmanager
def sandboxed_env(env):
"""
Context manager to temporarily set os.environ, only allowing env vars from
the existing `os.environ` or the provided **env** that match
ENV_VAR_WHITELIST globs.
"""
env = dict(env)
orig = os.environ.copy()
_env = {k: v for k, v in orig.items() if allowed_env_var(k)}
_env.update({k: str(v) for k, v in env.items() if allowed_env_var(k)})
os.environ = _env
try:
yield
finally:
os.environ.clear()
os.environ.update(orig)
def load_all_meta(recipe, config=None, finalize=True):
"""
For each environment, yield the rendered meta.yaml.
Parameters
----------
finalize : bool
If True, do a full conda-build render. Determines exact package builds
of build/host dependencies. It involves costly dependency resolution
via conda and also download of those packages (to inspect possible
run_exports). For fast-running tasks like linting, set to False.
"""
insert_mambabuild()
if config is None:
config = load_conda_build_config()
# `bypass_env_check=True` prevents evaluating (=environment solving) the
# package versions used for `pin_compatible` and the like.
# To avoid adding a separate `bypass_env_check` alongside every `finalize`
# parameter, just assume we do not want to bypass if `finalize is True`.
metas = [
meta
for (meta, _, _) in api.render(
recipe,
config=config,
finalize=False,
bypass_env_check=True,
)
]
# Render again if we want the finalized version.
# Rendering the non-finalized version beforehand lets us filter out
# variants that get skipped. (E.g., with a global `numpy 1.16` pin for
# py==27 the env check fails when evaluating `pin_compatible('numpy')` for
# recipes that use a pinned `numpy` and also require `numpy>=1.17` but
# actually skip py==27. Filtering out that variant beforehand avoids this.
if finalize:
metas = [
meta
for non_finalized_meta in metas
for (meta, _, _) in api.render(
recipe,
config=config,
variants=non_finalized_meta.config.variant,
finalize=True,
bypass_env_check=False,
)
]
return metas
def load_meta_fast(recipe: str, env=None):
"""
Given a package name, find the current meta.yaml file, parse it, and return
the dict.
Args:
recipe: Path to recipe (directory containing the meta.yaml file)
env: Optional variables to expand
Returns:
Tuple of original recipe string and rendered dict
"""
if not env:
env = {}
try:
pth = os.path.join(recipe, 'meta.yaml')
template = jinja_silent_undef.from_string(open(pth, 'r', encoding='utf-8').read())
meta = yaml.safe_load(template.render(env))
return (meta, recipe)
except Exception:
raise ValueError('Problem inspecting {0}'.format(recipe))
def load_conda_build_config(platform=None, trim_skip=True):
"""
Load conda build config while considering global pinnings from conda-forge.
"""
config = api.Config(
no_download_source=True,
set_build_id=False)
# get environment root
env_root = PurePath(shutil.which("bioconda-utils")).parents[1]
# set path to pinnings from conda forge package
config.exclusive_config_files = [
os.path.join(env_root, "conda_build_config.yaml"),
os.path.join(
os.path.dirname(__file__),
'bioconda_utils-conda_build_config.yaml'),
]
for cfg in chain(config.exclusive_config_files, config.variant_config_files or []):
assert os.path.exists(cfg), ('error: {0} does not exist'.format(cfg))
if platform:
config.platform = platform
config.trim_skip = trim_skip
return config
CondaBuildConfigFile = namedtuple('CondaBuildConfigFile', (
'arg', # either '-e' or '-m'
'path',
))
def get_conda_build_config_files(config=None):
if config is None:
config = load_conda_build_config()
# TODO: open PR upstream for conda-build to support multiple exclusive_config_files
for file_path in (config.exclusive_config_files or []):
yield CondaBuildConfigFile('-e', file_path)
for file_path in (config.variant_config_files or []):
yield CondaBuildConfigFile('-m', file_path)
def load_first_metadata(recipe, config=None, finalize=True):
"""
Returns just the first of possibly many metadata files. Used for when you
need to do things like check a package name or version number (which are
not expected to change between variants).
If the recipe will be skipped, then returns None
Parameters
----------
finalize : bool
If True, do a full conda-build render. Determines exact package builds
of build/host dependencies. It involves costly dependency resolution
via conda and also download of those packages (to inspect possible
run_exports). For fast-running tasks like linting, set to False.
"""
metas = load_all_meta(recipe, config, finalize=finalize)
if len(metas) > 0:
return metas[0]
@contextlib.contextmanager
def temp_os(platform):
"""
Context manager to temporarily set sys.platform.
"""
original = sys.platform
sys.platform = platform
try:
yield
finally:
sys.platform = original
def run(cmds: List[str], env: Dict[str, str]=None, mask: List[str]=None, live: bool=True,
mylogger: logging.Logger=logger, loglevel: int=logging.INFO,
**kwargs: Dict[Any, Any]) -> sp.CompletedProcess:
"""
Run a command (with logging, masking, etc)
- Explicitly decodes stdout to avoid UnicodeDecodeErrors that can occur when
using the ``universal_newlines=True`` argument in the standard
subprocess.run.
- Masks secrets
- Passed live output to `logging`
Arguments:
cmd: List of command and arguments
env: Optional environment for command
mask: List of terms to mask (secrets)
live: Whether output should be sent to log
kwargs: Additional arguments to `subprocess.Popen`
Returns:
CompletedProcess object
Raises:
subprocess.CalledProcessError if the process failed
FileNotFoundError if the command could not be found
"""
logq = queue.Queue()
def pushqueue(out, pipe):
"""Reads from a pipe and pushes into a queue, pushing "None" to
indicate closed pipe"""
for line in iter(pipe.readline, b''):
out.put((pipe, line))
out.put(None) # End-of-data-token
def do_mask(arg: str) -> str:
"""Masks secrets in **arg**"""
if mask is None:
# caller has not considered masking, hide the entire command
# for security reasons
return '<hidden>'
if mask is False:
# masking has been deactivated
return arg
for mitem in mask:
arg = arg.replace(mitem, '<hidden>')
return arg
mylogger.log(loglevel, "(COMMAND) %s", ' '.join(do_mask(arg) for arg in cmds))
# bufsize=4 result of manual experimentation. Changing it can
# drop performance drastically.
with sp.Popen(cmds, stdout=sp.PIPE, stderr=sp.PIPE,
close_fds=True, env=env, bufsize=4, **kwargs) as proc:
# Start threads reading stdout/stderr and pushing it into queue q
out_thread = Thread(target=pushqueue, args=(logq, proc.stdout))
err_thread = Thread(target=pushqueue, args=(logq, proc.stderr))
out_thread.daemon = True # Do not wait for these threads to terminate
err_thread.daemon = True
out_thread.start()
err_thread.start()
output_lines = []
try:
for _ in range(2): # Run until we've got both `None` tokens
for pipe, line in iter(logq.get, None):
line = do_mask(line.decode(errors='replace').rstrip())
output_lines.append(line)
if live:
if pipe == proc.stdout:
prefix = "OUT"
else:
prefix = "ERR"
mylogger.log(loglevel, "(%s) %s", prefix, line)
except Exception:
proc.kill()
proc.wait()
raise
output = "\n".join(output_lines)
if isinstance(cmds, str):
masked_cmds = do_mask(cmds)
else:
masked_cmds = [do_mask(c) for c in cmds]
if proc.poll() is None:
mylogger.log(loglevel, 'Command closed STDOUT/STDERR but is still running')
waitfor = 30
waittimes = 5
for attempt in range(waittimes):
mylogger.log(loglevel, "Waiting %s seconds (%i/%i)", waitfor, attempt+1, waittimes)
try:
proc.wait(timeout=waitfor)
break;
except sp.TimeoutExpired:
pass
else:
mylogger.log(loglevel, "Terminating process")
proc.kill()
proc.wait()
returncode = proc.poll()
if returncode:
logger.error('COMMAND FAILED (exited with %s): %s', returncode, ' '.join(masked_cmds))
if not live:
logger.error('STDOUT+STDERR:\n%s', output)
raise sp.CalledProcessError(returncode, masked_cmds, output=output)
return sp.CompletedProcess(returncode, masked_cmds, output)
def envstr(env):
env = dict(env)
return ';'.join(['='.join([i, str(j)]) for i, j in sorted(env.items())])
def flatten_dict(dict):
for key, values in dict.items():
if isinstance(values, str) or not isinstance(values, Iterable):
values = [values]
yield [(key, value) for value in values]
class EnvMatrix:
"""
Intended to be initialized with a YAML file and iterated over to yield all
combinations of environments.
YAML file has the following format::
CONDA_PY:
- "2.7"
- "3.5"
CONDA_BOOST: "1.60"
CONDA_PERL: "5.22.0"
CONDA_NPY: "110"
CONDA_NCURSES: "5.9"
CONDA_GSL: "1.16"
"""
def __init__(self, env):
"""
Parameters
----------
env : str or dict
If str, assume it's a path to a YAML-format filename and load it
into a dict. If a dict is provided, use it directly.
"""
if isinstance(env, str):
with open(env) as f:
self.env = yaml.safe_load(f)
else:
self.env = env
for key, val in self.env.items():
if key != "CONDA_PY" and not isinstance(val, str):
raise ValueError(
"All versions except CONDA_PY must be strings.")
def __iter__(self):
"""
Given the YAML::
CONDA_PY:
- "2.7"
- "3.5"
CONDA_BOOST: "1.60"
CONDA_NPY: "110"
We get the following sets of env vars::
[('CONDA_BOOST', '1.60'), ('CONDA_PY', '2.7'), ('CONDA_NPY', '110')]
[('CONDA_BOOST', '1.60'), ('CONDA_PY', '3.5'), ('CONDA_NPY', '110')]
A copy of the entire os.environ dict is updated and yielded for each of
these sets.
"""
for env in product(*flatten_dict(self.env)):
yield env
def get_deps(recipe=None, build=True):
"""
Generator of dependencies for a single recipe
Only names (not versions) of dependencies are yielded.
If the variant/version matrix yields multiple instances of the metadata,
the union of these dependencies is returned.
Parameters
----------
recipe : str or MetaData
If string, it is a path to the recipe; otherwise assume it is a parsed
conda_build.metadata.MetaData instance.
build : bool
If True yield build dependencies, if False yield run dependencies.
"""
if recipe is not None:
assert isinstance(recipe, str)
metadata = load_all_meta(recipe, finalize=False)
elif meta is not None:
metadata = [meta]
else:
raise ValueError("Either meta or recipe has to be specified.")
all_deps = set()
for meta in metadata:
if build:
deps = meta.get_value('requirements/build', [])
else:
deps = meta.get_value('requirements/run', [])
all_deps.update(dep.split()[0] for dep in deps)
return all_deps
_max_threads = 1
def set_max_threads(n):
global _max_threads
_max_threads = n
def threads_to_use():
"""Returns the number of cores we are allowed to run on"""
if hasattr(os, 'sched_getaffinity'):
cores = len(os.sched_getaffinity(0))
else:
cores = os.cpu_count()
return min(_max_threads, cores)
def parallel_iter(func, items, desc, *args, **kwargs):
pfunc = partial(func, *args, **kwargs)
with Pool(threads_to_use()) as pool:
yield from tqdm(
pool.imap_unordered(pfunc, items),
desc=desc,
total=len(items)
)
def get_recipes(recipe_folder, package="*", exclude=None):
"""
Generator of recipes.
Finds (possibly nested) directories containing a ``meta.yaml`` file.
Parameters
----------
recipe_folder : str
Top-level dir of the recipes
package : str or iterable
Pattern or patterns to restrict the results.
"""
if isinstance(package, str):
package = [package]
if isinstance(exclude, str):
exclude = [exclude]
if exclude is None:
exclude = []
for p in package:
logger.debug("get_recipes(%s, package='%s'): %s",
recipe_folder, package, p)
path = os.path.join(recipe_folder, p)
for new_dir in glob.glob(path):
meta_yaml_found_or_excluded = False
for dir_path, dir_names, file_names in os.walk(new_dir):
if any(fnmatch.fnmatch(dir_path[len(recipe_folder):], pat) for pat in exclude):
meta_yaml_found_or_excluded = True
continue
if "meta.yaml" in file_names:
meta_yaml_found_or_excluded = True
yield dir_path
if not meta_yaml_found_or_excluded and os.path.isdir(new_dir):
logger.warn(
"No meta.yaml found in %s."
" If you want to ignore this directory, add it to the blacklist.",
new_dir
)
yield new_dir
def get_latest_recipes(recipe_folder, config, package="*"):
"""
Generator of recipes.
Finds (possibly nested) directories containing a ``meta.yaml`` file and returns
the latest version of each recipe.
Parameters
----------
recipe_folder : str
Top-level dir of the recipes
config : dict or filename
package : str or iterable
Pattern or patterns to restrict the results.
"""
def toplevel(x):
return x.replace(
recipe_folder, '').strip(os.path.sep).split(os.path.sep)[0]
config = load_config(config)
recipes = sorted(get_recipes(recipe_folder, package), key=toplevel)
for package, group in groupby(recipes, key=toplevel):
group = list(group)
if len(group) == 1:
yield group[0]
else:
def get_version(p):
meta_path = os.path.join(p, 'meta.yaml')
meta = load_first_metadata(meta_path, finalize=False)
version = meta.get_value('package/version')
return VersionOrder(version)
sorted_versions = sorted(group, key=get_version)
if sorted_versions:
yield sorted_versions[-1]
class DivergentBuildsError(Exception):
pass
def _string_or_float_to_integer_python(s):
"""
conda-build 2.0.4 expects CONDA_PY values to be integers (e.g., 27, 35) but
older versions were OK with strings or even floats.
To avoid editing existing config files, we support those values here.
"""
try:
s = float(s)
if s < 10: # it'll be a looong time before we hit Python 10.0
s = int(s * 10)
else:
s = int(s)
except ValueError:
raise ValueError("{} is an unrecognized Python version".format(s))
return s
def built_package_paths(recipe):
"""
Returns the path to which a recipe would be built.
Does not necessarily exist; equivalent to ``conda build --output recipename``
but without the subprocess.
"""
config = load_conda_build_config()
# NB: Setting bypass_env_check disables ``pin_compatible`` parsing, which
# these days does not change the package build string, so should be fine.
paths = api.get_output_file_paths(recipe, config=config, bypass_env_check=True)
return paths
def last_commit_to_master():
"""
Identifies the day of the last commit to master branch.
"""
if not shutil.which('git'):
raise ValueError("git not found")
p = sp.run(
'git log master --date=iso | grep "^Date:" | head -n1',
shell=True, stdout=sp.PIPE, check=True
)
date = datetime.datetime.strptime(
p.stdout[:-1].decode().split()[1],
'%Y-%m-%d')
return date
def file_from_commit(commit, filename):
"""
Returns the contents of a file at a particular commit as a string.
Parameters
----------
commit : commit-like string
filename : str
"""
if commit == 'HEAD':
return open(filename).read()
p = run(['git', 'show', '{0}:{1}'.format(commit, filename)], mask=False,
loglevel=0)
return str(p.stdout)
def newly_unblacklisted(config_file, recipe_folder, git_range):
"""
Returns the set of recipes that were blacklisted in master branch but have
since been removed from the blacklist. Considers the contents of all
blacklists in the current config file and all blacklists in the same config
file in master branch.
Parameters
----------
config_file : str
Needs filename (and not dict) because we check what the contents of the
config file were in the master branch.
recipe_folder : str
Path to recipe dir, needed by get_blacklist
git_range : str or list
If str or single-item list. If ``'HEAD'`` or ``['HEAD']`` or ``['master',
'HEAD']``, compares the current changes to master. If other commits are
specified, then use those commits directly via ``git show``.
"""
# 'HEAD' becomes ['HEAD'] and then ['master', 'HEAD'].
# ['HEAD'] becomes ['master', 'HEAD']
# ['HEAD~~', 'HEAD'] stays the same
if isinstance(git_range, str):
git_range = [git_range]
if len(git_range) == 1:
git_range = ['master', git_range[0]]
# Get the set of previously blacklisted recipes by reading the original
# config file and then all the original blacklists it had listed
previous = set()
orig_config = file_from_commit(git_range[0], config_file)
for bl in yaml.safe_load(orig_config)['blacklists']:
with open('.tmp.blacklist', 'w', encoding='utf8') as fout: