Extraction
Extract data from CPAN
url("http://search.cpan.org/recent")->(); submit_form( form_name => "f", fields => { query => "perl" }); template("<!--item-->[% p %]<!--end item-->"); extract; print Dumper extresult;
Extract data from CPAN after some HTML cleanup
url("http://search.cpan.org/recent")->(); submit_form( form_name => "f", fields => { query => "perl" }); preproc(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)); print document->as_string; # print content to STDOUT template("<!--item-->[% p %]<!--end item-->"); extract; print Dumper extresult;
HTML cleanup, extract data, and refine results
url("http://search.cpan.org/recent")->(); submit_form( form_name => "f", fields => { query => "perl" }); preproc(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)); print $$_; # print content to STDOUT template("<!--item-->[% rec %]<!--end item-->"); extract; postproc(q($_->{rec} =~ s/<.+?>//g)); # Strip HTML tags print Dumper extresult;
Use filtering syntax
fetch("http://search.cpan.org/recent"); submit_form( form_name => "f", fields => { query => "perl" }); $_ | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)) | _template("<!--item-->[% rec %]<!--end item-->") | _result_filter(q($_->{rec} =~ s/<.+?>//g)); print Dumper \@$_;
Invoke handler for extracted results
fetch("http://search.cpan.org/recent"); submit_form( form_name => "f", fields => { query => "perl" }); $_ | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s)) | "<!--item-->[% rec %]<!--end item-->" | _result_filter(q($_->{rec} =~ s/<.+?>//g)); invoke_handler('Data::Dumper');
You can also put extracted results straight into relational databases.
invoke_handler('Some::Module::based::on::Class::DBI'); invoke_handler('Some::Module::based::on::DBIx::Class::CDBICompat');