1 | <?php |
---|
2 | |
---|
3 | class tagSeanceTask extends sfBaseTask |
---|
4 | { |
---|
5 | protected function configure() |
---|
6 | { |
---|
7 | $this->namespace = 'tag'; |
---|
8 | $this->name = 'Seance'; |
---|
9 | $this->briefDescription = 'Tag Seance'; |
---|
10 | $this->addOption('env', null, sfCommandOption::PARAMETER_OPTIONAL, 'Changes the environment this task is run in', 'test'); |
---|
11 | $this->addOption('app', null, sfCommandOption::PARAMETER_OPTIONAL, 'Changes the environment this task is run in', 'frontend'); |
---|
12 | } |
---|
13 | |
---|
14 | protected function count($array, $excludeS = 0) { |
---|
15 | foreach($array as $i) { |
---|
16 | $i = preg_replace('/\([^\)]+\)/', '', $i['intervention']); |
---|
17 | $i = strip_tags($i); |
---|
18 | $i = preg_replace('/œ/', 'oe', $i); |
---|
19 | foreach(preg_split('/[\s\,\;\.\:\_\(\)\&\#\<\>\']+/i', $i) as $w) { |
---|
20 | if (!preg_match('/^[A-Z]+$/', $w)) |
---|
21 | $w = strtolower($w); |
---|
22 | if (strlen($w)>1 && preg_match('/[a-z]/i', $w)) { |
---|
23 | // $s = soundex($w); |
---|
24 | $s = $w; |
---|
25 | if (!isset($words[$s])) |
---|
26 | $words[$s] = 1; |
---|
27 | else |
---|
28 | $words[$s]++; |
---|
29 | if (!isset($this->sound[$s])) |
---|
30 | $this->sound[$s] = $w; |
---|
31 | } |
---|
32 | } |
---|
33 | } |
---|
34 | foreach(array_keys($words) as $k) { |
---|
35 | if (preg_match('/s$/', $k)) { |
---|
36 | $ks = preg_replace('/s$/', '', $k); |
---|
37 | if (isset($words[$ks])) { |
---|
38 | $words[$ks]+=$words[$k]; |
---|
39 | if ($excludeS) |
---|
40 | unset($words[$k]); |
---|
41 | } |
---|
42 | } |
---|
43 | } |
---|
44 | arsort($words); |
---|
45 | return $words; |
---|
46 | } |
---|
47 | |
---|
48 | protected function execute($arguments = array(), $options = array()) |
---|
49 | { |
---|
50 | |
---|
51 | // your code here |
---|
52 | $manager = new sfDatabaseManager($this->configuration); |
---|
53 | $q = Doctrine_Query::create(); |
---|
54 | $q->select('intervention')->from('Intervention i')->where('i.parlementaire_id IS NOT NULL'); |
---|
55 | echo "count:\n\t"; |
---|
56 | echo $q->count()."\n"; |
---|
57 | $array = $q->fetchArray(); |
---|
58 | $words = $this->count($array); |
---|
59 | $cpt = 0; |
---|
60 | $tot = count($words); |
---|
61 | |
---|
62 | $exclude = array('lecture'=>1, 'séance'=>1, 'alinéa'=>1, 'résolution'=>1, 'adoption'=>1, 'collègue'=>1, 'cher'=>1, 'collègues'=>1, 'chers'=>1,'bis'=>1, '1er'=>1, 'rectifié'=>1, 'question'=>1, 'rédactionnel'=>1, 'scrutin'=>1, 'exposer'=>1, 'identiques'=>1, 'identique'=>1, 'commission'=>1, 'adopte'=>1, 'rejette' => 1, 'additionnel' => 1, 'tendant' => 1, 'examiné' => 1, 'examine' => 1, 'rejeté'=> 1, 'avis' => 1, 'suivant'=>1, 'estimé'=>1, 'déclaré'=>1); |
---|
63 | $include = array('télévision' => 1, 'dimanche'=>1, 'internet'=>1, 'outre-mer'=>1, 'logement'=>1, 'militaire'=>1, 'taxe'=>1, 'médecin'=>1, 'hôpital'=>1); |
---|
64 | $exclude_sentences = array('garde des sceaux'=>1, 'haut-commissaire' => 1, 'monsieur' => 1, 'madame'=>1); |
---|
65 | |
---|
66 | foreach(array_keys($words) as $k) { |
---|
67 | if (!isset($include[$k])) |
---|
68 | $exclude[$k] = 1; |
---|
69 | echo $k.': '.$words[$k]*100/$tot."\n"; |
---|
70 | if ($words[$k]*100/$tot < 3) |
---|
71 | break; |
---|
72 | } |
---|
73 | unset($words); |
---|
74 | $q = Doctrine_Query::create(); |
---|
75 | $q->select('nom as intervention')->from('Parlementaire o'); |
---|
76 | $array = $q->fetchArray(); |
---|
77 | $words = $this->count($array); |
---|
78 | foreach(array_keys($words) as $k) { |
---|
79 | $exclude[$k] = 1; |
---|
80 | } |
---|
81 | unset($words); |
---|
82 | |
---|
83 | $qs = Doctrine::getTable('Seance')->createQuery()->select('id')->where('tagged IS NULL')->orderBy('date DESC'); |
---|
84 | |
---|
85 | foreach($qs->fetchArray() as $s) { |
---|
86 | echo "Seance ".$s['id']." .."; |
---|
87 | |
---|
88 | //Recherche toutes les interventions pour cette séance |
---|
89 | $q = Doctrine_Query::create(); |
---|
90 | $q->select('intervention, id, parlementaire_id')->from('Intervention i')->where('seance_id = ?', $s['id'])->andWhere('( i.parlementaire_id IS NOT NULL OR i.personnalite_id IS NOT NULL )');//->andWhere('(i.fonction IS NULL OR i.fonction NOT LIKE ? )', 'président%'); |
---|
91 | |
---|
92 | $array = $q->fetchArray(); |
---|
93 | if (!count($array)) { |
---|
94 | echo " pas d'intervention trouvée\n"; |
---|
95 | continue; |
---|
96 | } |
---|
97 | $words = $this->count($array, 1); |
---|
98 | $cpt = 0; |
---|
99 | $tot = count($words); |
---|
100 | $tags = array(); |
---|
101 | //Pour les mots le plus populaires non exclus on les gardes |
---|
102 | foreach(array_keys($words) as $k) { |
---|
103 | if (!isset($exclude[$k])) { |
---|
104 | $cpt++; |
---|
105 | $pc = $words[$k]*100/$tot; |
---|
106 | if ($pc < 0.8) |
---|
107 | break; |
---|
108 | $tags[$k] = strlen($k); |
---|
109 | } |
---|
110 | } |
---|
111 | |
---|
112 | $sentences = null; |
---|
113 | $sent2word = null; |
---|
114 | //On cherche des groupes de mots commums à partir des tags trouvés |
---|
115 | foreach ($array as $inter) { |
---|
116 | $i = null; |
---|
117 | foreach (array_keys($tags) as $tag) { |
---|
118 | if (preg_match('/([^\s\,\.\:\>\;\(\)]*[^\,\.\:\>\;\(\)]{6}'.$tag.'[^\s\,\.\:\<\&\(\)]*)/i', $inter['intervention'], $match)) { |
---|
119 | $sent = strtolower($match[1]); |
---|
120 | if (!isset($sentences[$sent])) |
---|
121 | $sentences[$sent] = 1; |
---|
122 | else |
---|
123 | $sentences[$sent]++; |
---|
124 | $sent2word[$sent] = $tag; |
---|
125 | } |
---|
126 | if (preg_match('/([^\s\,\.\:\>\;\)\)]*'.$tag.'[^\,\.\:\<\&\(\)]{6}[^\s\,\.\:\<\&\(\)]*)/i', $inter['intervention'], $match)) { |
---|
127 | $sent = strtolower($match[1]); |
---|
128 | if (!isset($sentences[$sent])) |
---|
129 | $sentences[$sent] = 1; |
---|
130 | else |
---|
131 | $sentences[$sent]++; |
---|
132 | $sent2word[$sent] = $tag; |
---|
133 | } |
---|
134 | } |
---|
135 | } |
---|
136 | //asort($sentences); |
---|
137 | |
---|
138 | //Si les groupes de mots ont une certernaines popularité, on les garde |
---|
139 | //Au dessus de 70% d'utilisation le tag contenu est supprimé |
---|
140 | $debut_bani = 'à|de|la|ainsi|ensuite'; |
---|
141 | if (count($sentences)) { |
---|
142 | foreach (array_keys($sentences) as $sent) { |
---|
143 | |
---|
144 | if (preg_match("/^($debut_bani)/i", $sent) || preg_match("/($debut_bani)$/i", $sent) || preg_match('/\d|amendement|rapporteur|commision|collègue/i', $sent) ) |
---|
145 | continue; |
---|
146 | |
---|
147 | if (preg_match('/^[A-Z][a-z]/', $sent)) { |
---|
148 | unset($tags[$sent2word[$sent]]); |
---|
149 | continue; |
---|
150 | } |
---|
151 | |
---|
152 | if (preg_match('/^([a-z]{2} |[A-Z]+)/', $sent) || preg_match('/ [a-z]$/i', $sent)) { |
---|
153 | continue; |
---|
154 | } |
---|
155 | |
---|
156 | if (($sentences[$sent]*100/$tot > 0.8 || $sentences[$sent]*100/$words[$sent2word[$sent]] > 70)&& $words[$sent2word[$sent]] > 5) { |
---|
157 | $ok = 1; |
---|
158 | foreach($exclude_sentences as $excl_sent) { |
---|
159 | if (preg_match('/'.$excl_sent.'/', $sent)) { |
---|
160 | $ok = 0; |
---|
161 | break; |
---|
162 | } |
---|
163 | } |
---|
164 | if ($ok) |
---|
165 | $tags[$sent] = strlen($sent); |
---|
166 | if ($sentences[$sent]*100/$words[$sent2word[$sent]] > 70) |
---|
167 | unset($tags[$sent2word[$sent]]); |
---|
168 | } |
---|
169 | } |
---|
170 | } |
---|
171 | unset($words); |
---|
172 | unset($sentences); |
---|
173 | unset($sent2word); |
---|
174 | |
---|
175 | print_r($tags); |
---|
176 | |
---|
177 | //On cherche maintenant les tags dans les interventions pour les associer |
---|
178 | arsort($tags); |
---|
179 | $tagged = 0; |
---|
180 | foreach ($array as $inter) { |
---|
181 | if (!$inter['parlementaire_id']) |
---|
182 | continue; |
---|
183 | |
---|
184 | $i = null; |
---|
185 | foreach (array_keys($tags) as $tag) { |
---|
186 | if (preg_match('/'.$tag.'/i', $inter['intervention'])) { |
---|
187 | if (!$i) |
---|
188 | $i = Doctrine::getTable('Intervention')->find($inter['id']); |
---|
189 | $i->addTag($tag); |
---|
190 | } |
---|
191 | } |
---|
192 | if ($i) { |
---|
193 | $tagged = 1; |
---|
194 | $i->save(); |
---|
195 | } |
---|
196 | } |
---|
197 | if ($tagged == 1) { |
---|
198 | $seance = Doctrine::getTable('Seance')->find($s['id']); |
---|
199 | $seance->tagged = 1; |
---|
200 | $seance->save(); |
---|
201 | } |
---|
202 | unset($tags); |
---|
203 | unset($array); |
---|
204 | echo " done."; |
---|
205 | unset($s); |
---|
206 | if ($tagged == 0) |
---|
207 | echo " WARNING: No tag found !"; |
---|
208 | echo "\n"; |
---|
209 | } |
---|
210 | } |
---|
211 | } |
---|