Extraction
Extract data from CPAN
url("http://search.cpan.org/recent")->();
submit_form(
form_name => "f",
fields => {
query => "perl"
});
template("<!--item-->[% p %]<!--end item-->");
extract;
print Dumper extresult;
Extract data from CPAN after some HTML cleanup
url("http://search.cpan.org/recent")->();
submit_form(
form_name => "f",
fields => {
query => "perl"
});
preproc(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s));
print document->as_string; # print content to STDOUT
template("<!--item-->[% p %]<!--end item-->");
extract;
print Dumper extresult;
HTML cleanup, extract data, and refine results
url("http://search.cpan.org/recent")->();
submit_form(
form_name => "f",
fields => {
query => "perl"
});
preproc(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s));
print $$_; # print content to STDOUT
template("<!--item-->[% rec %]<!--end item-->");
extract;
postproc(q($_->{rec} =~ s/<.+?>//g)); # Strip HTML tags
print Dumper extresult;
Use filtering syntax
fetch("http://search.cpan.org/recent");
submit_form(
form_name => "f",
fields => {
query => "perl"
});
$_ | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s))
| _template("<!--item-->[% rec %]<!--end item-->")
| _result_filter(q($_->{rec} =~ s/<.+?>//g));
print Dumper \@$_;
Invoke handler for extracted results
fetch("http://search.cpan.org/recent");
submit_form(
form_name => "f",
fields => {
query => "perl"
});
$_ | _doc_filter(q(s/\A.+<!--results-->(.+)<!--end results-->.+\Z/$1/s))
| "<!--item-->[% rec %]<!--end item-->"
| _result_filter(q($_->{rec} =~ s/<.+?>//g));
invoke_handler('Data::Dumper');