File Coverage

File:lib/Pipeline/Simple.pm
Coverage:81.0%

linestmtbrancondsubpodtimecode
1#-----------------------------------------------------------------
2# Pipeline::Simple
3# Author: Heikki Lehvaslaiho <heikki.lehvaslaiho@gmail.com>
4# For copyright and disclaimer see Pipeline::Simple.pod.
5#
6# Lightweight workflow manager
7## no critic
8package Pipeline::Simple;
9# ABSTRACT: Simple workflow manager
10
11
2
2
2
495
3
32
use strict;
12
2
2
2
6
6
40
use warnings;
13
2
2
2
392
11190
14
use autodie;
14## use critic
15
16
2
2
2
54134
4
61
use Carp;
17
2
2
2
7
3
90
use File::Basename;
18
2
2
2
591
3035
74
use File::Copy;
19
2
2
2
831
11090
12
use XML::Simple;
20
2
2
2
100
4
72
use Data::Dumper;
21
2
2
2
9792
61057
15
use Log::Log4perl qw(get_logger :levels :no_extra_logdie_message);
22
23
24#-----------------------------------------------------------------
25# Global variables
26#-----------------------------------------------------------------
27
28my $logger_level = {
29    '-1' => $WARN,
30    '0' => $INFO,
31    '1' => $DEBUG,
32};
33
34#-----------------------------------------------------------------
35# new
36#-----------------------------------------------------------------
37sub new {
38
3
1
12
    my ($class, @args) = @_;
39
40    # create an object
41
3
40
    my $self = bless {}, ref ($class) || $class;
42
43    # set all @args into this object with 'set' values
44
3
15
    my (%args) = (@args == 1 ? (value => $args[0]) : @args);
45
46    # do dir() first so that we know where to write the log
47
3
11
    $self->dir($args{'dir'}) if defined $args{'dir'};
48
49    # start logging
50
3
8
    $self->_configure_logging;
51
52
3
16
    foreach my $key (keys %args) {
53
8
18
        next if $key eq 'config'; # this needs to be evaluated last
54
7
15
        next if $key eq 'dir'; # done this
55        ## no critic
56
2
2
2
290
4
4120
        no strict 'refs';
57        ## use critic
58
6
16
        $self->$key($args{$key});
59    }
60    # delayed to find out verbosity level
61
3
7
    $self->logger->info("Logging started");
62
63    # this argument needs to be done last
64
3
31
    $self->config($args{'config'}) if defined $args{'config'};
65
66    # look into dir() if config not given
67
3
14
    $self->config($self->dir. '/config.xml')
68        if not $self->{config} and defined $self->dir and -e $self->dir. '/config.xml';
69
70    # die if no config found
71
3
14
    $self->logger->fatal("pipeline config file not provided or not found in pwd")
72        if not $self->{config} and not $self->debug;
73
74    # done
75
3
29
    return $self;
76}
77
78
79#-----------------------------------------------------------------
80# Configure the logger
81#-----------------------------------------------------------------
82
83sub _configure_logging {
84
3
6
    my $self = shift;
85
86
3
6
    my $logger_config = q(
87      log4perl.category.Pipeline = INFO, Screen
88        log4perl.appender.Screen = Log::Log4perl::Appender::Screen
89        log4perl.appender.Screen.stderr = 1
90        log4perl.appender.Screen.layout = Log::Log4perl::Layout::SimpleLayout
91    );
92
93
3
22
    Log::Log4perl->init_once( \$logger_config );
94
3
4005
    my $logger = Log::Log4perl->get_logger("Pipeline");
95
96
3
55
    if ($self->dir) {
97
1
2
        my $to_file = Log::Log4perl::Appender->new
98            ("Log::Log4perl::Appender::File",
99             name => 'Log',
100             filename => $self->dir. '/pipeline.log',
101             mode => 'append');
102
1
1515
        my $pattern = '%d [%r] %p %L | %m%n';
103
1
9
        my $layout = Log::Log4perl::Layout::PatternLayout->new ($pattern);
104
1
383
        $to_file->layout ($layout);
105
106
1
9
        $logger->add_appender($to_file);
107    }
108
109
3
470
    $logger->level( $INFO );
110
111
3
1324
    $self->logger($logger);
112
113}
114
115
116
117#-----------------------------------------------------------------
118#
119#-----------------------------------------------------------------
120
121sub verbose {
122
4
1
13
    my ($self, $value) = @_;
123
4
11
    if (defined $value) {
124
3
6
        $self->{_verbose} = $value;
125
126        # verbose = -1 0 1
127        # log level = WARN INFO DEBUG
128
129
3
6
        $self->logger->level( $logger_level->{$value} );
130    }
131
4
3446
    return $self->{_verbose};
132}
133
134sub id {
135
66
1
135
    my ($self, $value) = @_;
136
66
170
    if (defined $value) {
137
11
24
        $self->{_id} = $value;
138    }
139
66
288
    return $self->{_id};
140}
141
142sub description {
143
7
1
19
    my ($self, $value) = @_;
144
7
66
    if (defined $value) {
145
3
8
        $self->{_description} = $value;
146    }
147
7
33
    return $self->{_description};
148}
149
150sub name {
151
12
1
28
    my ($self, $value) = @_;
152
12
31
    if (defined $value) {
153
3
8
        $self->{_name} = $value;
154    }
155
12
85
    return $self->{_name};
156}
157
158sub path {
159
2
1
5
    my ($self, $value) = @_;
160
2
5
    if (defined $value) {
161
1
2
        $self->{_path} = $value;
162    }
163
2
11
    return $self->{_path};
164}
165
166sub next_id {
167
1
1
2
    my ($self, $value) = @_;
168
1
3
    if (defined $value) {
169
1
3
        $self->{_next_id} = $value;
170    }
171
1
6
    return $self->{_next_id};
172}
173
174
175sub input {
176
1
1
3
    my ($self, $value) = @_;
177
1
3
    if (defined $value) {
178
1
2
        $self->{_input} = $value;
179    }
180
1
6
    return $self->{_input};
181}
182
183
184sub itype {
185
3
1
6
    my ($self, $value) = @_;
186
3
8
    if (defined $value) {
187
1
2
        $self->{_itype} = $value;
188    }
189
3
16
    return $self->{_itype};
190}
191
192sub add {
193
1
1
3
    my ($self, $value) = @_;
194
1
4
    if (defined $value) {
195
1
2
        $self->{_add} = $value;
196    }
197
1
7
    return $self->{_add};
198}
199
200sub start {
201
2
1
7
    my ($self, $value) = @_;
202
2
8
    if (defined $value) {
203
1
6
        $self->{_start} = $value;
204    }
205
2
11
    return $self->{_start};
206}
207
208
209sub stop {
210
1
1
4
    my ($self, $value) = @_;
211
1
4
    if (defined $value) {
212
1
3
        $self->{_stop} = $value;
213    }
214
1
7
    return $self->{_stop};
215}
216
217
218sub debug {
219
4
1
8
    my ($self, $value) = @_;
220
4
10
    if (defined $value) {
221
2
3
        $self->{_debug} = $value;
222    }
223
4
12
    return $self->{_debug};
224}
225
226sub logger {
227
28
1
77
    my ($self, $value) = @_;
228
28
85
    if (defined $value) {
229
3
7
        $self->{_logger} = $value;
230    }
231
28
224
    return $self->{_logger};
232}
233
234sub config {
235
2
1
5
    my ($self, $config) = @_;
236
237
2
6
    if ($config) {
238
2
4
        $self->logger->info("Using config file: ". $config);
239
2
2
3644
15
        my $pwd = `pwd`; chomp $pwd;
240
2
53
        $self->logger->debug("pwd: $pwd");
241
2
41
        die unless -e $config;
242        # copy the pipeline config
243
244
2
10
        if ($self->dir and not -e $self->dir."/config.xml") {
245            #print "--->", `pwd`, "\n";
246
1
3
            copy $config, $self->dir."/config.xml";
247
1
192
            $self->logger->debug("Config [$config] file copied to: ".
248                                  $self->dir."/config.xml");
249        }
250
251
2
16
        $self->{config} = XMLin($self->dir."/config.xml", KeyAttr => {step => 'id'});
252
253        # set pipeline start parameters
254
2
69640
        $self->id('s0');
255
2
13
        $self->name($self->{config}->{name} || '');
256
2
18
        $self->description($self->{config}->{description} || '');
257
258        # go through all steps once
259
2
4
        my $nexts; # hashref for finding start point(s)
260
2
2
5
16
        for my $id (sort keys %{$self->{config}->{step}}) {
261
8
18
            my $step = $self->{config}->{step}->{$id};
262
263            # bless all steps into Pipeline objects
264
8
29
            bless $step, ref($self);
265
266            #print "ERROR: $id already exists\n" if defined $self->step($id);
267            # create the list of all steps to be used by each_step()
268
8
17
            $step->id($id);
269
8
8
13
17
            push @{$self->{steps}}, $step;
270
271            #turn a next hashref into an arrayref, (fixing XML::Simple complication)
272
8
25
            unless ( ref($step->{next}) eq 'ARRAY' ) {
273
6
12
                my $next = $step->{next};
274
6
12
                delete $step->{next};
275
6
6
7
13
                push @{$step->{next}}, $next;
276            }
277
278            # a step without a parent is a starting point
279
8
8
14
16
            foreach my $next (@{$step->{next}}) {
280
10
33
                $nexts->{$next->{id}}++ if $next->{id};
281            }
282        }
283
284        # store starting points
285
2
11
        foreach my $step ($self->each_step) {
286
8
2
17
6
            push @{$self->{next}}, { id => $step->id}
287               unless $nexts->{$step->id}
288        }
289
290        #run needs to fail if starting input values are not set!
291
292        # insert the startup value into the appropriate starting step
293        # unless we are reading old config
294
2
10
        if ($self->itype and $self->input) { # only if new starting input value has been given
295
0
0
            my $real_start_id;
296
0
0
            for my $step_id ( $self->each_next) {
297
0
0
                my $step = $self->step($step_id);
298
299                # if input type is right, insert the value
300                # note only one of the each type can be used
301
0
0
0
0
                foreach my $arg (@{$step->{arg}}) {
302                    #print Dumper $arg;
303
0
0
                    next unless $arg->{key} eq 'in' and
304                                defined $arg->{type} and
305                                $arg->{type} eq $self->itype;
306                    #print Dumper $self->itype, $step->id, $arg;
307
0
0
                    $arg->{value} = $self->input;
308                    #print Dumper $arg;
309
0
0
                    $real_start_id = $step_id;
310                }
311            }
312
0
0
            $self->{next} = undef;
313
0
0
0
0
            push @{$self->{next}}, { id => $real_start_id};
314        }
315    }
316
2
17
    return $self->{config};
317}
318
319#-----------------------------------------------------------------
320#
321#-----------------------------------------------------------------
322sub dir {
323
18
1
39
    my ($self, $dir) = @_;
324
18
47
    if ($dir) {
325
2
40
        mkdir $dir unless -e $dir and -d $dir;
326
2
115
        croak "Can not create project directory $dir"
327            unless -e $dir and -d $dir;
328
2
6
        $self->{_dir} = $dir;
329    }
330
18
144
    $self->{_dir};
331}
332
333#-----------------------------------------------------------------
334#
335#-----------------------------------------------------------------
336sub step {
337
8
1
16
    my ($self) = shift;
338
8
20
    my $id = shift;
339
8
23
    return $self->{config}->{step}->{$id};
340}
341
342sub each_next {
343
32
28
40
32
1
66
79
108
77
    map { $_->{id} } grep { $_->{id} } @{shift->{next}};
344}
345
346sub each_step {
347
6
6
1
46
32
    @{shift->{steps}};
348}
349
350
351
352sub run {
353
1
1
2
    my ($self) = shift;
354
1
3
    unless ($self->dir) {
355
0
0
        $self->logger->fatal("Need an output directory to run()");
356
0
0
        croak "Need an output directory to run()";
357    }
358
359    ###
360    # check for input file and warn if not found
361
362
1
15
    chdir $self->{_dir};
363
364    #
365    # Determine where in the pipeline to start
366    #
367
368
1
55
    my @steps; # array of next execution points
369
370    # User has given a starting point id
371
1
5
    if ($self->start) {
372
0
0
        push @steps, $self->start;
373
0
0
        $self->logger->info("Starting at [". $self->start. "]" );
374    }
375    # determine where the execution of the pipeline was interrupted
376    else {
377
1
3
        open my $LOG, '<', $self->dir. "/pipeline.log"
378            or $self->logger->fatal("Can't open ". $self->dir.
379                                    "/pipeline.log for reading: $!");
380
1
63
        my $in_execution;
381
382        # take only the previous run
383
1
2
        my @log;
384
1
25
        while (<$LOG>) {
385
0
0
            push @log, $_;
386
0
0
            @log = () if /Run started/;
387            #print scalar @log, "\n";
388        }
389# print "========================\n";
390# print "@log";
391
1
3
        for (@log) {
392
0
0
            next unless /\[(\d+)\]/;
393
0
0
            undef $in_execution; # start of a new run
394
0
0
            next unless /\| (Running|Finished) +\[(\w+)\]/;
395
0
0
            $in_execution->{$2}++ if $1 eq 'Running';
396
0
0
            delete $in_execution->{$2} if $1 eq 'Finished';
397           # print Dumper $in_execution;
398        }
399
400
1
5
        @steps = sort keys %$in_execution;
401
1
13
        if (not @steps and scalar @log > 2) {
402
0
0
            $self->logger->warn("Pipeline is already finished. ".
403                                "Drop -config and define the start step to rerun" );
404
0
0
            exit 0;
405        }
406        elsif (@steps) {
407
0
0
            $self->logger->info("Continuing at ". $steps[0] );
408        } else {
409            # start from beginning
410
1
4
            @steps = $self->each_next;
411
1
5
            $self->logger->info("Starting at [". $steps[0] . "]");
412        }
413    }
414
415    #
416    # Execute one step at a time
417    #
418
419
1
18
    $self->logger->info("Run started");
420
421
1
9
    while (my $step_id = shift @steps) {
422
4
10
        $self->logger->debug("steps: [". join (", ", @steps). "]");
423
4
41
        my $step = $self->step($step_id);
424
4
9
        croak "ERROR: Step [$step_id] does not exist" unless $step;
425        # check that we got an object
426
427        # check that the input file exists
428
4
4
8
15
        foreach my $arg (@{$step->{arg}}) {
429
11
35
            next unless $arg->{key} eq 'in';
430
4
31
            next unless $arg->{type} =~ /file|dir/ ;
431        }
432
433
4
12
        my $command = $step->render;
434
4
9
        $self->logger->info("Running [". $step->id . "] $command" );
435
4
7031
        `$command`;
436
4
58
        $self->logger->info("Finished [". $step->id . "]" );
437
438        # Add next step(s) to the execution queue unless
439        # the user has asked to stop here
440
4
61
        if ( defined $self->{_stop} and $step->id eq $self->{_stop} ) {
441
0
0
            $self->logger->info("Stopping at [". $step->id . "]" );
442        } else {
443
4
15
            push @steps, $step->each_next
444        }
445
446    }
447
1
15
    1;
448}
449
450
451#-----------------------------------------------------------------
452# Render a step into a command line string
453#-----------------------------------------------------------------
454
455sub render {
456
20
1
51
    my ($step, $display) = @_;
457
458# $step ||= $self;
459# print "\n"; print Dumper $step; print "\n";
460
461
20
34
    my $str;
462    # path to program
463
20
51
    if (defined $step->{path}) {
464
0
0
        $str .= $step->{path};
465
0
0
        $str .= '/' unless substr($str, -1, 1) eq '/' ;
466    }
467    # program name
468
20
67
    $str .= $step->{name} || '';
469
470    # arguments
471
20
43
    my $endstr = '';
472
20
20
29
59
    foreach my $arg (@{$step->{arg}}) {
473
474
48
233
        if (defined $arg->{type} and $arg->{type} eq 'unnamed') {
475            #$str .= ' "'. $arg->{value}. '"';
476
6
13
            $str .= ' '. $arg->{value};
477
6
13
            next;
478        }
479
480
42
184
        if (defined $arg->{type} and $arg->{type} eq 'redir') {
481
30
88
            if ($arg->{key} eq 'in') {
482
12
31
                $endstr .= " < ". $arg->{value};
483            }
484            elsif ($arg->{key} eq 'out') {
485
18
42
                $endstr .= " > ". $arg->{value};
486            } else {
487
0
0
                croak "Unknown key ". $arg->{key};
488            }
489
30
61
            next;
490        }
491
492
12
29
        if (defined $arg->{value}) {
493
0
0
            $str .= " -". $arg->{key}. "=". $arg->{value};
494        } else {
495
12
33
            $str .= " -". $arg->{key};
496        }
497
498    }
499
20
42
    $str .= $endstr;
500
501
20
62
    $str =~ s/(['"])/\\$1/g if $display;
502
503
20
96
    return $str;
504}
505
506sub stringify {
507
1
1
3
    my ($self) = @_;
508
509
1
2
    my @res;
510    # add checks for duplicated ids
511
512    # add check for a next pointer that leads nowhere
513
514
1
4
    my @steps = $self->each_next;
515
1
2
    my $outputs; #hashref for storing input and output filenames
516
1
4
    while (my $step_id = shift @steps) {
517
4
10
        my $step = $self->step($step_id);
518
4
7
        push @res, $step->id, "\n";
519
4
11
        push @res, "\t", $step->render('4display'), " # ";
520
4
3
11
7
        map { push @res, "->", $_, " " } $step->each_next;
521
522
4
9
        push @steps, $step->each_next;
523
524
4
4
7
14
        foreach my $arg (@{$step->{arg}}) {
525
11
49
            if ($arg->{key} eq 'out') {
526
4
7
                for ($step->each_next) {
527
3
17
                    push @res, "\n\t", "WARNING: Output file [".
528                        $arg->{value}."] is read by [",
529                        $outputs->{$arg->{value}}, "] and [$_]"
530                    if $outputs->{$arg->{value}};
531
532
3
9
                    $outputs->{$arg->{value}} = $_;
533                }
534            }
535            elsif ($arg->{key} eq 'in' and $arg->{type} ne 'redir') {
536
1
10
                my $prev_step_id = $outputs->{$arg->{value}} || '';
537
1
3
                push @res, "\n\t". "ERROR: Output from the previous step is not [".
538                    ($arg->{value} || ''). "]"
539                    if $prev_step_id ne $step->id and $prev_step_id eq $self->id;
540            }
541            # test for steps not refencesed by other steps (missing next tag)
542        }
543
4
18
        push @res, "\n";
544    }
545
1
14
    return join '', @res;
546}
547
548
549sub graphviz {
550
2
1
5
    my $self = shift;
551
2
5
    my $function = shift;
552
553
2
637
    require GraphViz;
554
2
40793
    my $g= GraphViz->new;
555
556
2
76
    my $end;
557
2
9
    $g->add_node($self->id,
558                 label => $self->id. " : ".
559                 $self->render('4display'), rank => 'top');
560
2
2
85
11
    map { $g->add_edge('s0' => $_) } $self->each_next;
561
2
6306
    if ($self->description) {
562
2
6
        $g->add_node('desc', label => $self->description,
563                     shape => 'box', rank => 'top');
564
2
91
        $g->add_edge('s0' => 'desc');
565    }
566
567
2
36
    foreach my $step ($self->each_step) {
568
8
200
        $g->add_node($step->id, label => $step->id. " : ". ($step->name||'') );
569
8
269
        if ($step->each_next) {
570
4
6
10
91
            map { $g->add_edge($step->id => $_, label => $step->render('display') ) }
571                $step->each_next;
572        } else {
573
4
8
            $end++;
574
4
20
            $g->add_node($end, label => ' ');
575
4
137
            $g->add_edge($step->id => $end, label => $step->render('display') );
576        }
577
578    }
579
2
46
    return $g->as_dot;
580
581}
582
5831;