Ignore:
Timestamp:
Oct 7, 2010, 1:05:25 AM (11 years ago)
Author:
teymour
Message:

Parsing des députés prenant en compte le nouveau design de l'AN

File:
1 edited

Legend:

Unmodified
Added
Removed
  • cpc/trunk/project/batch/depute/parse_depute.pl

    r1239 r1700  
    2424    $depute{'Sexe'} = ($depute{'Nom'} =~ /M[ml]/) ? 'F' : 'H';
    2525    $depute{'Nom'} =~ s/^\s*M\S+\s//;
    26     while($t = $p->get_tag('u', '/div')) {
     26    while($t = $p->get_tag('span', '/div')) {
    2727        return if ($t->[0] eq '/div');
    28         $txt = $p->get_text('/u');
     28        $txt = $p->get_text('/span');
    2929        $txt =~ /^(\S+)\s*/;
    3030        $e = $1;
     
    4848sub contact {
    4949    $p = shift;
    50     while($p->get_tag('u', '/div')) {
     50    while($p->get_tag('span', '/div')) {
    5151        last if ($t->[0] =~ /^\//);
    52         $_ = $p->get_text('/u');
     52        $_ = $p->get_text('/span');
    5353        if (/Mél/) {
    5454            $_ = $p->get_text('/li');
     
    8383sub mandat {
    8484    $p = shift;
    85     while ($t = $p->get_tag('u', '/div')) {
     85    while ($t = $p->get_tag('span', '/div')) {
    8686        last if ($t->[0] =~ /^\//);
    87         $_ = $p->get_text('/u');
     87        $_ = $p->get_text('/span');
    8888        if (/Mandat|Commission|Mission/) {
    8989            $text = $p->get_text('ul');
     
    151151}
    152152
    153 while($t = $p->get_tag("h1", "img")) {
     153while($t = $p->get_tag("h2", "img")) {
    154154    if ($t->[0] eq 'img') {
    155155        if (! $depute{'photo'} && $t->[1]{'src'} =~ /photo/) {
     
    162162        next;
    163163    }
    164     $_ = $p->get_text('/h1');
     164    $_ = $p->get_text('/h2');
    165165    if (/Informations générales/) {
    166166        infosgene($p);
Note: See TracChangeset for help on using the changeset viewer.