Changeset 3060
- Timestamp:
- Jan 5, 2013, 1:53:12 PM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
cpc/trunk/project/lib/task/tagSeanceTask.class.php
r3059 r3060 12 12 } 13 13 14 protected function count($interventions, $excludeS = 0, $minsize = 1) {14 protected function wordize($interventions, $excludeS = 0, $minsize = 1) { 15 15 foreach($interventions as $i) { 16 16 $i = preg_replace('/\([^\)]+\)/', '', $i); … … 68 68 $exclude_sentences = array('vice-président' => 1, 'sceaux'=>1, 'commissaire' => 1, 'monsieur' => 1, 'madame'=>1, 'professeur' => 1, 'amendement' => 1, 'règlement' => 1, 'rectificative' => 1, 'rapporteur' => 1); 69 69 70 //On exclue les mots les plus populaires (en plus des stopwords) 70 71 foreach(array_keys($words) as $k) { 71 72 if (!isset($include[$k])) … … 76 77 } 77 78 unset($words); 79 80 //Exclusion des noms des parlementaires 78 81 $q = Doctrine_Query::create(); 79 82 $q->select('nom as intervention')->from('Parlementaire o'); 80 83 $interventions = $q->fetchArray(); 81 $words = $this-> count($interventions, 0, $minsize);84 $words = $this->wordize($interventions, 0, $minsize); 82 85 foreach(array_keys($words) as $k) { 83 86 $exclude[$k] = 1; … … 87 90 $qs = Doctrine::getTable('Seance')->createQuery()->select('id')->where('tagged IS NULL'); 88 91 92 //Pour chacune des séances 89 93 foreach($qs->fetchArray() as $s) { 90 94 echo "Seance ".$s['id']." .."; … … 99 103 continue; 100 104 } 101 $words = $this-> count($interventions, 1, $minsize);105 $words = $this->wordize($interventions, 1, $minsize); 102 106 $cpt = 0; 103 107 $tot = count($words); 104 108 $tags = array(); 105 //Pour les mots le plus populaires non exclus on les garde s109 //Pour les mots le plus populaires non exclus on les garde 106 110 foreach(array_keys($words) as $k) { 107 if (!isset($exclude[$k]) && !preg_match('/-((il|elle)s |on|ci|le|[nv]ous)$/', $k)) {111 if (!isset($exclude[$k]) && !preg_match('/-((il|elle)s?|on|ci|le|[nv]ous)$/', $k)) { 108 112 $cpt++; 109 113 $pc = $words[$k]*100/$tot; … … 116 120 $sentences = null; 117 121 $sent2word = null; 118 //On cherche des groupes de mots commu ms à partir des tags trouvés122 //On cherche des groupes de mots communs à partir des tags trouvés 119 123 foreach ($interventions as $inter) { 120 124 $i = null;
Note: See TracChangeset
for help on using the changeset viewer.