- Timestamp:
- Oct 29, 2011, 10:50:36 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
cpc/branches/senat/project/lib/task/tagSeanceTask.class.php
r2320 r2427 14 14 protected function count($array, $excludeS = 0) { 15 15 foreach($array as $i) { 16 $i = preg_replace('/\([^\)]+\)/', '', $i['intervention']); 17 $i = strip_tags($i); 16 $i = strip_tags($i['intervention']); 17 $i = preg_replace('/<[^>]+>/', '', $i); 18 $i = html_entity_decode(str_replace(' ', ' ', htmlentities($i, ENT_COMPAT, 'UTF-8')), ENT_COMPAT, 'UTF-8'); 19 $i = preg_replace('/\([^\)]+\)/', '', $i); 18 20 $i = preg_replace('/œ/', 'oe', $i); 19 foreach(preg_split('/[\s\,\;\.\:\_\(\)\&\#\<\>\']+/i', $i) as $w) { 21 $i = str_replace(array(',',';','.',':','_','(',')','&','#','<','>','\'','"','«','»',' -','- ','?','!'), ' ', $i); 22 $i = preg_replace('/\s+/', ' ', $i); 23 foreach(explode(" ", $i) as $w) { 20 24 if (!preg_match('/^[A-Z]+$/', $w)) 21 25 $w = strtolower($w); 22 if (strlen($w)> 1&& preg_match('/[a-z]/i', $w)) {26 if (strlen($w)>2 && preg_match('/[a-z]/i', $w)) { 23 27 // $s = soundex($w); 24 28 $s = $w; … … 60 64 $tot = count($words); 61 65 62 $exclude = array(' lecture'=>1, 'séance'=>1, 'alinéa'=>1, 'résolution'=>1, 'adoption'=>1, 'collègue'=>1, 'cher'=>1, 'collègues'=>1, 'chers'=>1,'bis'=>1, '1er'=>1, 'rectifié'=>1, 'question'=>1, 'rédactionnel'=>1, 'scrutin'=>1, 'exposer'=>1, 'identiques'=>1, 'identique'=>1, 'commission'=>1, 'adopte'=>1, 'rejette' => 1, 'additionnel' => 1, 'tendant' => 1, 'examiné' => 1, 'examine' => 1, 'rejeté'=> 1, 'avis' => 1, 'suivant'=>1, 'estimé'=>1, 'déclaré'=>1);66 $exclude = array('vice-président' => 1, 'restant' => 1, 'mixte' => 1, 'paritaire' => 1, 'rapporteure' => 1, 'rapporteur' => 1, 'députée' => 1, 'député' => 1, 'sénateur' => 1, 'sénatrice' => 1, 'présidente' => 1, 'président' => 1, 'rédaction' => 1, 'issue' => 1, 'spéciale' => 1, 'adopté' => 1, 'lecture'=>1, 'séance'=>1, 'alinéa'=>1, 'résolution'=>1, 'adoption'=>1, 'collègue'=>1, 'cher'=>1, 'collègues'=>1, 'chers'=>1,'bis'=>1, '1er'=>1, 'rectifié'=>1, 'question'=>1, 'rédactionnel'=>1, 'scrutin'=>1, 'exposer'=>1, 'identiques'=>1, 'identique'=>1, 'commission'=>1, 'adopte'=>1, 'rejette' => 1, 'additionnel' => 1, 'tendant' => 1, 'examiné' => 1, 'examine' => 1, 'rejeté'=> 1, 'avis' => 1, 'suivant'=>1, 'estimé'=>1, 'déclaré'=>1); 63 67 $include = array('télévision' => 1, 'dimanche'=>1, 'internet'=>1, 'outre-mer'=>1, 'logement'=>1, 'militaire'=>1, 'taxe'=>1, 'médecin'=>1, 'hôpital'=>1); 64 $exclude_sentences = array(' garde des sceaux'=>1, 'haut-commissaire' => 1, 'monsieur' => 1, 'madame'=>1);68 $exclude_sentences = array('commission spéciale' => 1, 'garde des sceaux'=>1, 'haut-commissaire' => 1, 'monsieur' => 1, 'madame'=>1, 'mixte paritaire' => 1, 'commission mixte' => 1, 'commission mixte paritaire' => 1); 65 69 66 70 foreach(array_keys($words) as $k) { … … 82 86 83 87 $qs = Doctrine::getTable('Seance')->createQuery()->select('id')->where('tagged IS NULL')->orderBy('date DESC'); 88 //$qs = Doctrine::getTable('Seance')->createQuery()->select('id')->where('id = ?', '1859'); 84 89 85 90 foreach($qs->fetchArray() as $s) { … … 88 93 //Recherche toutes les interventions pour cette séance 89 94 $q = Doctrine_Query::create(); 90 $q->select('intervention, id, parlementaire_id')->from('Intervention i')->where('seance_id = ?', $s['id'])->andWhere('( i.parlementaire_id IS NOT NULL OR i.personnalite_id IS NOT NULL )') ;//->andWhere('(i.fonction IS NULL OR i.fonction NOT LIKE ? )', 'président%');95 $q->select('intervention, id, parlementaire_id')->from('Intervention i')->where('seance_id = ?', $s['id'])->andWhere('( i.parlementaire_id IS NOT NULL OR i.personnalite_id IS NOT NULL )')->andWhere('(i.fonction IS NULL OR i.fonction NOT LIKE ? )', 'président%'); 91 96 92 97 $array = $q->fetchArray(); … … 101 106 //Pour les mots le plus populaires non exclus on les gardes 102 107 foreach(array_keys($words) as $k) { 108 $k = trim($k); 103 109 if (!isset($exclude[$k])) { 104 110 $cpt++; … … 115 121 foreach ($array as $inter) { 116 122 $i = null; 123 $inter['intervention'] = html_entity_decode(str_replace(' ', ' ', htmlentities($inter['intervention'], ENT_COMPAT, 'UTF-8')), ENT_COMPAT, 'UTF-8'); 124 117 125 foreach (array_keys($tags) as $tag) { 118 if (preg_match('/([^\s\,\.\:\>\;\(\)]*[^\,\.\:\>\;\(\)]{6}'.$tag.'[^\s\,\.\:\<\&\(\)]*)/i', $inter['intervention'], $match)) { 119 $sent = strtolower($match[1]); 126 $srctag = preg_replace('/\//', '\/', $tag); 127 if (preg_match('/([^\s,\.:>;\(\)«»]*[^,\.:>;\(\)«»]{7}'.$srctag.'[^\s,\.:<\(\)«»]*)/i', $inter['intervention'], $match)) { 128 $sent = trim(strtolower($match[1])); 129 if (!isset($sentences[$sent])) 130 $sentences[$sent] = 1; 131 else 132 $sentences[$sent]++; 133 $sent2word[$sent] = $tag; 134 } 135 if (preg_match('/([^\s,\.:>;\(\)«»]*'.$srctag.'[^,\.:<\(\)«»]{7}[^\s,\.:<\(\)«»]*)/i', $inter['intervention'], $match)) { 136 $sent = trim(strtolower($match[1])); 120 137 if (!isset($sentences[$sent])) 121 138 $sentences[$sent] = 1; … … 124 141 $sent2word[$sent] = $tag; 125 142 } 126 if (preg_match('/([^\s\,\.\:\>\;\)\)]*'.$tag.'[^\,\.\:\<\&\(\)]{6}[^\s\,\.\:\<\&\(\)]*)/i', $inter['intervention'], $match)) {127 $sent = strtolower($match[1]);128 if (!isset($sentences[$sent]))129 $sentences[$sent] = 1;130 else131 $sentences[$sent]++;132 $sent2word[$sent] = $tag;133 }134 143 } 135 144 } … … 141 150 if (count($sentences)) { 142 151 foreach (array_keys($sentences) as $sent) { 143 144 152 if (preg_match("/^($debut_bani)/i", $sent) || preg_match("/($debut_bani)$/i", $sent) || preg_match('/\d|amendement|rapporteur|commision|collègue/i', $sent) ) 145 153 continue; … … 157 165 $ok = 1; 158 166 foreach($exclude_sentences as $excl_sent) { 159 if (preg_match('/'.$excl_sent.'/ ', $sent)) {167 if (preg_match('/'.$excl_sent.'/i', $sent)) { 160 168 $ok = 0; 161 169 break; … … 173 181 unset($sent2word); 174 182 183 foreach ($tags as $t => $n) print htmlentities($t, ENT_COMPAT, 'UTF-8')."\n"; 175 184 print_r($tags); 176 185 … … 184 193 $i = null; 185 194 foreach (array_keys($tags) as $tag) { 186 if (preg_match('/'.$tag.'/i', $inter['intervention'])) { 195 $tag = trim($tag); 196 if (preg_match('/'.preg_replace('/\//', '\/', $tag).'/i', $inter['intervention'])) { 187 197 if (!$i) 188 198 $i = Doctrine::getTable('Intervention')->find($inter['id']);
Note: See TracChangeset
for help on using the changeset viewer.