Changeset 2533


Ignore:
Timestamp:
Dec 4, 2011, 3:59:39 PM (9 years ago)
Author:
komodo
Message:

compute latest pour amdmts et docs

Location:
cpc/branches/senat/project/batch
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • cpc/branches/senat/project/batch/documents/compute_latest.sh

    r2196 r2533  
    11#!/bin/bash
    22
    3 if ! test -e out/ ; then
    4         mkdir out
     3mkdir -p out
     4
     5if [ -e to_parse.list ]; then
     6  for file in `cat to_parse.list`; do
     7    echo $file
     8    output=`echo $file | sed 's/\(pjl\|ppl\|ppr\|rap\|rga\|tas\)\//out\//'`
     9    perl parse_doc.pl $file > $output
     10  done
    511fi
    612
    7 for file in `perl download_docs.pl`; do
    8   echo $file
    9   file2=`echo $file | sed 's/^\(pjl\|ppl\|ppr\|rap\|rga\|tas\)\//out\//'`
    10   perl parse_doc.pl $file > $file2
    11 done;
    12 
    13 
  • cpc/branches/senat/project/batch/download_elements_dossiers.pl

    r2526 r2533  
    77use File::Path qw(make_path);
    88
    9 #Annee des dossiers à télécharger
    10 $year = shift;
    11 $since_hour = shift || 24;
    12 $verbose = shift || 0;
    13 
    149$lastyear = localtime(time);
    1510my @month = `date +%m`;
     
    1712$lastyear-- if ($month[0] < 10);
    1813
    19 $year = $lastyear if (!$year);
     14#Annee des dossiers à télécharger
     15$year = shift || $lastyear;
     16$since_hour = shift || 24;
     17$verbose = shift || 0;
     18
    2019$yearzero = $year;
    2120if (! $year =~ /^\d{4}$/) {
     
    2928$a = WWW::Mechanize->new();
    3029$aif = WWW::Mechanize->new();
    31 $aif->add_header('If-Modified-Since' => scalar(localtime(time()-3600*$since_hour)));
     30$aif->add_header('If-Modified-Since' => scalar(localtime(time()-3600*$since_hour))) if ($since_hour > 0);
     31
     32open LISTAMD, ">:utf8", "amendements/to_parse.list";
     33open LISTDOC, ">:utf8", "documents/to_parse.list";
    3234
    3335sub download_one {
     
    5759  $thecontent = $aif->content;
    5860  if (!$thecontent) {
    59         $aif->back();
    60         return ;
     61    $aif->back();
     62    return ;
    6163  }
    6264
    63   print "    $dir\t\t->\t\t$htmfile\n";
     65  if ($dir =~ /amendements/) {
     66    if ($dir =~ /html/) {
     67      print LISTAMD "html/$htmfile\n";
     68    } else {
     69# gestion des pdfs?
     70    }
     71  } else {
     72    $ssdir = $dir;
     73    $ssdir =~ s/documents\///;
     74    print LISTDOC "$ssdir/$htmfile\n";
     75  }
    6476  open FILE, ">:utf8", "$dir/$htmfile";
    6577  if ($thecontent =~ s/iso-8859-1/utf-8/gi) {
     
    167179explore_page("http://www.senat.fr/rapports/rapports-groupe-amitie.html");
    168180
     181close LISTAMD;
     182close LISTDOC;
Note: See TracChangeset for help on using the changeset viewer.