From e32874258168f95a6dc1c50eb877b35c546cfeb3 Mon Sep 17 00:00:00 2001
From: Bruno Nocera Zanette <brunonzanette@gmail.com>
Date: Tue, 17 Sep 2013 15:22:53 -0300
Subject: [PATCH] Refs #1216 Add support for writing multiple dump files

This commit adds the support for writing one file per user/group.
Each of this files contains only the information about a single user or group.

Signed-off-by: Bruno Nocera Zanette <brunonzanette@gmail.com>
---
 lib/json/group_section.py | 122 ++++++++++++++++++++++++--------------
 lib/json/user_section.py  |  82 +++++++++++++++++--------
 lib/xml/group_section.py  |  98 +++++++++++++++++++-----------
 lib/xml/user_section.py   |  83 +++++++++++++++++---------
 opendata_json_version.py  |  17 ++++--
 opendata_xml_version.py   |  17 ++++--
 6 files changed, 280 insertions(+), 139 deletions(-)

diff --git a/lib/json/group_section.py b/lib/json/group_section.py
index 187bdeb..3c477eb 100644
--- a/lib/json/group_section.py
+++ b/lib/json/group_section.py
@@ -381,7 +381,57 @@ def write_groupevents_subsection (db, json, group_guid):
 #--------------------------------------------------------------------#
 
 #--------------------------------------------------------------------#
-def write_groups_section (db, dir_results):
+def write_groups_section (db, json,\
+    guid, title, desc, owner_id, owner_name, owner_username, time):
+    
+    # 45 = select * from elgg_metastrings where string='briefdescription';
+    brief_desc=wrt.post_content(db,guid, 45)
+        
+    prefix='groups/profile/'
+    group_attr=wrt.urlparticipa(prefix,str(guid))
+    wrt.write_tag(json,2,"cid",group_attr,",")
+    
+    # Write all group's information
+    prefix='profile/'
+    owner_attr=wrt.urlparticipa(prefix,owner_username)
+    
+    wrt.write_open_tag(json,2,"proprietario","{")
+    wrt.write_tag(json,3,"uid",owner_attr,",")
+    wrt.write_tag(json,3,"nome",owner_name,"")
+    wrt.write_close_tag(json,2,"}",True)
+            
+    wrt.write_tag(json,2,"titulo",title,",")
+    wrt.write_tag(json,2,"data",wrt.datestr(time),",")
+    wrt.write_tag(json,2,"descricao",desc,",")
+
+    group_access = wrt.groupaccess_permission(db, guid)
+    
+    if group_access == 'public':
+        comma=","
+    else:
+        comma=""
+        
+    wrt.write_tag(json,2,"breveDescricao",brief_desc,comma)
+                                        
+    if group_access == 'public':
+        
+        # Write a list of group member's name
+        write_groupmembers_subsection(db, json, guid)
+    
+        # Write a list, and all the info, of all posts made on the group.
+        write_groupfiles_subsection(db, json, guid)
+        write_groupforumtopics_subsection(db, json, guid)
+        write_groupbookmarks_subsection(db, json, guid)
+        write_grouppages_subsection(db, json, guid)
+        write_groupvideos_subsection(db, json, guid)
+        write_groupevents_subsection(db, json, guid)
+#--------------------------------------------------------------------#
+
+#--------------------------------------------------------------------#
+def write_singlefile_groups_section (db, dir_results):
+
+    groups_info = db.cursor()
+    groups_info.execute(qry.qry_groups_info)
 
     json_filename=dir_results+wrt.date_today()+"_comunidades"+".json"
     json = wrt.open_json_file(json_filename)
@@ -389,56 +439,16 @@ def write_groups_section (db, dir_results):
     wrt.write_open_tag(json,0,"","{")
     wrt.write_open_tag(json,0,"comunidades","[")
     
-    groups_info = db.cursor()
-    groups_info.execute(qry.qry_groups_info)
-    
     row=0
     for (guid, title, desc, owner_id, owner_name, owner_username, time)\
         in groups_info:
             
         row=row+1
-            
-        # 45 = select * from elgg_metastrings where string='briefdescription';
-        brief_desc=wrt.post_content(db,guid, 45)
         
         wrt.write_open_tag(json,1,"","{")
-        
-        prefix='groups/profile/'
-        group_attr=wrt.urlparticipa(prefix,str(guid))
-        wrt.write_tag(json,2,"cid",group_attr,",")
-
-        # Write all group's information
-        prefix='profile/'
-        owner_attr=wrt.urlparticipa(prefix,owner_username)
-        
-        wrt.write_open_tag(json,2,"proprietario","{")
-        wrt.write_tag(json,3,"uid",owner_attr,",")
-        wrt.write_tag(json,3,"nome",owner_name,"")
-        wrt.write_close_tag(json,2,"}",True)
-                
-        wrt.write_tag(json,2,"titulo",title,",")
-        wrt.write_tag(json,2,"data",wrt.datestr(time),",")
-        wrt.write_tag(json,2,"descricao",desc,",")
-        
-        if wrt.groupaccess_permission(db, guid) == 'public':
-            comma=","
-        else:
-            comma=""
-            
-        wrt.write_tag(json,2,"breveDescricao",brief_desc,comma)
-                                            
-        if wrt.groupaccess_permission(db, guid) == 'public':
             
-            # Write a list of group member's name
-            write_groupmembers_subsection(db, json, guid)
-        
-            # Write a list, and all the info, of all posts made on the group.
-            write_groupfiles_subsection(db, json, guid)
-            write_groupforumtopics_subsection(db, json, guid)
-            write_groupbookmarks_subsection(db, json, guid)
-            write_grouppages_subsection(db, json, guid)
-            write_groupvideos_subsection(db, json, guid)
-            write_groupevents_subsection(db, json, guid)
+        write_groups_section(db,json,\
+            guid,title,desc,owner_id,owner_name,owner_username,time)
             
         wrt.write_close_tag(json,1,"}",(row < groups_info.rowcount))
         
@@ -450,4 +460,30 @@ def write_groups_section (db, dir_results):
     json.close()
 #--------------------------------------------------------------------#
 
+#--------------------------------------------------------------------#
+def write_multifile_groups_section (db, dir_results):
+
+    groups_info = db.cursor()
+    groups_info.execute(qry.qry_groups_info)
+
+    for (guid, title, desc, owner_id, owner_name, owner_username, time)\
+        in groups_info:
+    
+        json_filename=dir_results+'/groups/'+str(guid)+'.json'
+        json = wrt.open_json_file(json_filename)
+        
+        wrt.write_open_tag(json,0,"","{")
+        wrt.write_open_tag(json,1,"usuario","{")
+            
+        write_groups_section(db,json,\
+            guid,title,desc,owner_id,owner_name,owner_username,time)
+            
+        wrt.write_close_tag(json,1,"}",False)
+        wrt.write_close_tag(json,0,"}",False)
+        
+        json.close()
+    
+    groups_info.close()
+#--------------------------------------------------------------------#
+
 ######################################################################
diff --git a/lib/json/user_section.py b/lib/json/user_section.py
index e96987d..b5de824 100644
--- a/lib/json/user_section.py
+++ b/lib/json/user_section.py
@@ -369,46 +369,54 @@ def write_userevents_subsection (db, json, user_guid):
     user_events.close()
 #--------------------------------------------------------------------#
 
+#--------------------------------------------------------------------#
+def write_users_section (db, json, \
+    guid, name, username):
+
+    prefix='profile/'
+    user_attr=wrt.urlparticipa(prefix,username)
+    
+    # Write all user's information
+    wrt.write_tag(json,2,"uid",user_attr,",")
+    wrt.write_tag(json,2,"nome",name,",")
+    
+    # Write a list of user friend's names
+    write_userfriends_subsection(db, json, guid)
+    
+    # Write a list of all groups that the user owns or belongs
+    write_usergroups_subsection(db, json, guid)
+    
+    # Write a list, and all the info, of all posts made by the user
+    write_userfiles_subsection(db, json, guid)
+    write_userblogs_subsection(db, json, guid)
+    write_userbookmarks_subsection(db, json, guid)
+    write_userpages_subsection(db, json, guid)
+    write_uservideos_subsection(db, json, guid)
+    write_userevents_subsection(db, json, guid)
+#--------------------------------------------------------------------#
+
 #--------------------------------------------------------------------#    
-def write_users_section (db, dir_results):
+def write_singlefile_users_section (db, dir_results):
     
+    users_info = db.cursor()
+    users_info.execute(qry.qry_users_info)
+
     json_filename=dir_results+wrt.date_today()+"_usuarios"+".json"
     json = wrt.open_json_file(json_filename)
     
     wrt.write_open_tag(json,0,"","{")
     wrt.write_open_tag(json,0,"usuarios","[")
     
-    users_info = db.cursor()
-    users_info.execute(qry.qry_users_info)
-    
     row=0
     for (guid, name, username)\
         in users_info:
             
         row=row+1
         
-        prefix='profile/'
-        user_attr=wrt.urlparticipa(prefix,username)
-        
         wrt.write_open_tag(json,1,"","{")
         
-        # Write all user's information
-        wrt.write_tag(json,2,"uid",user_attr,",")
-        wrt.write_tag(json,2,"nome",name,",")
-            
-        # Write a list of user friend's names
-        write_userfriends_subsection(db, json, guid)
-        
-        # Write a list of all groups that the user owns or belongs
-        write_usergroups_subsection(db, json, guid)
-        
-        # Write a list, and all the info, of all posts made by the user
-        write_userfiles_subsection(db, json, guid)
-        write_userblogs_subsection(db, json, guid)
-        write_userbookmarks_subsection(db, json, guid)
-        write_userpages_subsection(db, json, guid)
-        write_uservideos_subsection(db, json, guid)
-        write_userevents_subsection(db, json, guid)
+        write_users_section(db,json,\
+            guid,name,username)
         
         wrt.write_close_tag(json,1,"}",(row < users_info.rowcount))
     
@@ -420,4 +428,30 @@ def write_users_section (db, dir_results):
     json.close()
 #--------------------------------------------------------------------#
 
+#--------------------------------------------------------------------#    
+def write_multifile_users_section (db, dir_results):
+
+    users_info = db.cursor()
+    users_info.execute(qry.qry_users_info)
+        
+    for (guid, name, username)\
+        in users_info:
+            
+        json_filename=dir_results+'/users/'+str(guid)+'.json'
+        json = wrt.open_json_file(json_filename)
+            
+        wrt.write_open_tag(json,0,"","{")
+        wrt.write_open_tag(json,1,"usuario","{")
+                
+        write_users_section(db,json,\
+            guid,name,username)
+        
+        wrt.write_close_tag(json,1,"}",False)
+        wrt.write_close_tag(json,0,"}",False)
+        
+        json.close()
+    
+    users_info.close()
+#--------------------------------------------------------------------#
+
 ######################################################################
diff --git a/lib/xml/group_section.py b/lib/xml/group_section.py
index 917ef37..0fc6150 100644
--- a/lib/xml/group_section.py
+++ b/lib/xml/group_section.py
@@ -300,50 +300,58 @@ def write_groupevents_subsection (db, xml, group_guid):
 #--------------------------------------------------------------------#
 
 #--------------------------------------------------------------------#
-def write_groups_section (db, dir_results):
+def write_groups_section(db, xml, \
+    guid, title, desc, owner_id, owner_name, owner_username, time):
+
+    # 45 = select * from elgg_metastrings where string='briefdescription';
+    brief_desc=wrt.post_content(db,guid, 45)
+    
+    prefix='groups/profile/'
+    group_attr=wrt.cidstr(wrt.urlparticipa(prefix,str(guid)))
+    wrt.write_open_tag(xml,1,"comunidade",group_attr)
+    
+    # Write all group's information
+    prefix='profile/'
+    owner_attr=wrt.uidstr(wrt.urlparticipa(prefix,owner_username))
+    wrt.write_tag(xml,2,"proprietario",owner_name,owner_attr)
+    wrt.write_tag(xml,2,"titulo",title,'')
+    wrt.write_tag(xml,2,"data",wrt.datestr(time),'')
+    wrt.write_tag(xml,2,"descricao",wrt.cdata(desc),'')
+    wrt.write_tag(xml,2,"breve_descricao",wrt.cdata(brief_desc),'')
+                                
+    if wrt.groupaccess_permission(db, guid) == 'public':
+            
+        # Write a list of group member's name
+        write_groupmembers_subsection(db, xml, guid)
+        
+        # Write a list, and all the info, of all posts made on the group.
+        write_groupfiles_subsection(db, xml, guid)
+        write_groupforumtopics_subsection(db, xml, guid)
+        write_groupbookmarks_subsection(db, xml, guid)
+        write_grouppages_subsection(db, xml, guid)
+        write_groupvideos_subsection(db, xml, guid)
+        write_groupevents_subsection(db, xml, guid)
+        
+    wrt.write_close_tag(xml,1,"comunidade")
+#--------------------------------------------------------------------#
+
+#--------------------------------------------------------------------#
+def write_singlefile_groups_section (db, dir_results):
     
+    groups_info = db.cursor()
+    groups_info.execute(qry.qry_groups_info)
+
     xml_filename=dir_results+wrt.date_today()+"_comunidades"+".xml"
     xml = wrt.open_xml_file(xml_filename)
 
     wrt.write_open_tag(xml,0,"comunidades",'')
     
-    groups_info = db.cursor()
-    groups_info.execute(qry.qry_groups_info)
-    
     for (guid, title, desc, owner_id, owner_name, owner_username, time)\
         in groups_info:
         
-        # 45 = select * from elgg_metastrings where string='briefdescription';
-        brief_desc=wrt.post_content(db,guid, 45)
-        
-        prefix='groups/profile/'
-        group_attr=wrt.cidstr(wrt.urlparticipa(prefix,str(guid)))
-        wrt.write_open_tag(xml,1,"comunidade",group_attr)
-
-        # Write all group's information
-        prefix='profile/'
-        owner_attr=wrt.uidstr(wrt.urlparticipa(prefix,owner_username))
-        wrt.write_tag(xml,2,"proprietario",owner_name,owner_attr)
-        wrt.write_tag(xml,2,"titulo",title,'')
-        wrt.write_tag(xml,2,"data",wrt.datestr(time),'')
-        wrt.write_tag(xml,2,"descricao",wrt.cdata(desc),'')
-        wrt.write_tag(xml,2,"breve_descricao",wrt.cdata(brief_desc),'')
-                                    
-        if wrt.groupaccess_permission(db, guid) == 'public':
-            
-            # Write a list of group member's name
-            write_groupmembers_subsection(db, xml, guid)
-        
-            # Write a list, and all the info, of all posts made on the group.
-            write_groupfiles_subsection(db, xml, guid)
-            write_groupforumtopics_subsection(db, xml, guid)
-            write_groupbookmarks_subsection(db, xml, guid)
-            write_grouppages_subsection(db, xml, guid)
-            write_groupvideos_subsection(db, xml, guid)
-            write_groupevents_subsection(db, xml, guid)
-        
-        wrt.write_close_tag(xml,1,"comunidade")
-        
+        write_groups_section(db,xml,\
+            guid,title,desc,owner_id,owner_name,owner_username,time)
+    
     wrt.write_close_tag(xml,0,"comunidades")
     
     groups_info.close()
@@ -351,4 +359,24 @@ def write_groups_section (db, dir_results):
     xml.close()
 #--------------------------------------------------------------------#
 
+#--------------------------------------------------------------------#
+def write_multifile_groups_section (db, dir_results):
+
+    groups_info = db.cursor()
+    groups_info.execute(qry.qry_groups_info)
+
+    for (guid, title, desc, owner_id, owner_name, owner_username, time)\
+        in groups_info:
+            
+        xml_filename=dir_results+'/groups/'+str(guid)+'.xml'
+        xml = wrt.open_xml_file(xml_filename)
+        
+        write_groups_section(db,xml,\
+            guid,title,desc,owner_id,owner_name,owner_username,time)
+            
+        xml.close()
+        
+    groups_info.close()
+#--------------------------------------------------------------------#
+
 ######################################################################
diff --git a/lib/xml/user_section.py b/lib/xml/user_section.py
index 48a702b..9144649 100644
--- a/lib/xml/user_section.py
+++ b/lib/xml/user_section.py
@@ -303,40 +303,49 @@ def write_userevents_subsection (db, xml, user_guid):
 #--------------------------------------------------------------------#
 
 #--------------------------------------------------------------------#    
-def write_users_section (db, dir_results):
+def write_users_section (db, xml, \
+    guid, name, username):
+
+    prefix='profile/'
+    user_attr=wrt.uidstr(wrt.urlparticipa(prefix,username))
+    wrt.write_open_tag(xml,1,"usuario",user_attr)
+    
+    # Write all user's information
+    wrt.write_tag(xml,2,"nome",name,'')
+        
+    # Write a list of user friend's names
+    write_userfriends_subsection(db, xml, guid)
+    
+    # Write a list of all groups that the user owns or belongs
+    write_usergroups_subsection(db, xml, guid)
+    
+    # Write a list, and all the info, of all posts made by the user
+    write_userfiles_subsection(db, xml, guid)
+    write_userblogs_subsection(db, xml, guid)
+    write_userbookmarks_subsection(db, xml, guid)
+    write_userpages_subsection(db, xml, guid)
+    write_uservideos_subsection(db, xml, guid)
+    write_userevents_subsection(db, xml, guid)
+    
+    wrt.write_close_tag(xml,1,"usuario")
+#--------------------------------------------------------------------#    
+
+#--------------------------------------------------------------------#    
+def write_singlefile_users_section (db, dir_results):
+   
+    users_info = db.cursor()
+    users_info.execute(qry.qry_users_info)
     
     xml_filename=dir_results+wrt.date_today()+"_usuarios"+".xml"
     xml = wrt.open_xml_file(xml_filename)
 
     wrt.write_open_tag(xml,0,"usuarios",'')
     
-    users_info = db.cursor()
-    users_info.execute(qry.qry_users_info)
-    
-    for (guid, name, username) in users_info:
-        
-        prefix='profile/'
-        user_attr=wrt.uidstr(wrt.urlparticipa(prefix,username))
-        wrt.write_open_tag(xml,1,"usuario",user_attr)
-        
-        # Write all user's information
-        wrt.write_tag(xml,2,"nome",name,'')
+    for (guid, name, username)\
+        in users_info:
             
-        # Write a list of user friend's names
-        write_userfriends_subsection(db, xml, guid)
-        
-        # Write a list of all groups that the user owns or belongs
-        write_usergroups_subsection(db, xml, guid)
-        
-        # Write a list, and all the info, of all posts made by the user
-        write_userfiles_subsection(db, xml, guid)
-        write_userblogs_subsection(db, xml, guid)
-        write_userbookmarks_subsection(db, xml, guid)
-        write_userpages_subsection(db, xml, guid)
-        write_uservideos_subsection(db, xml, guid)
-        write_userevents_subsection(db, xml, guid)
-        
-        wrt.write_close_tag(xml,1,"usuario")
+        write_users_section(db,xml,\
+            guid,name,username)        
     
     wrt.write_close_tag(xml,0,"usuarios")
     
@@ -345,4 +354,24 @@ def write_users_section (db, dir_results):
     xml.close()
 #--------------------------------------------------------------------#
 
+#--------------------------------------------------------------------#    
+def write_multifile_users_section (db, dir_results):
+   
+    users_info = db.cursor()
+    users_info.execute(qry.qry_users_info)
+    
+    for (guid, name, username)\
+        in users_info:
+        
+        xml_filename=dir_results+'/users/'+str(guid)+'.xml'
+        xml = wrt.open_xml_file(xml_filename)
+    
+        write_users_section(db,xml,\
+            guid,name,username)        
+        
+        xml.close()
+    
+    users_info.close()
+#--------------------------------------------------------------------#
+
 ######################################################################
diff --git a/opendata_json_version.py b/opendata_json_version.py
index 6768425..12ca510 100644
--- a/opendata_json_version.py
+++ b/opendata_json_version.py
@@ -24,8 +24,11 @@
 import MySQLdb
 import datetime
 
-from lib.json.user_section import write_users_section 
-from lib.json.group_section import write_groups_section 
+from lib.json.user_section import write_singlefile_users_section
+from lib.json.group_section import write_singlefile_groups_section
+
+from lib.json.user_section import write_multifile_users_section
+from lib.json.group_section import write_multifile_groups_section
 
 def main():
     
@@ -42,9 +45,13 @@ def main():
     # Get the execution start time information
     time_script_start=datetime.datetime.now()
     
-    # Call functions to write JSON files
-    write_users_section(db,dir_results)
-    write_groups_section(db,dir_results)
+    # Call functions to write Single Dump JSON file
+    write_singlefile_users_section(db,dir_results)
+    write_singlefile_groups_section(db,dir_results)    
+    
+    # Call functions to write Multiple Dumps JSON files
+    write_multifile_users_section(db,dir_results)
+    write_multifile_groups_section(db,dir_results)
     
     # Calculate and Print script time duration
     script_duration=datetime.datetime.now()-time_script_start
diff --git a/opendata_xml_version.py b/opendata_xml_version.py
index 18c434a..f15645a 100644
--- a/opendata_xml_version.py
+++ b/opendata_xml_version.py
@@ -24,8 +24,11 @@
 import MySQLdb
 import datetime
 
-from lib.xml.user_section import write_users_section 
-from lib.xml.group_section import write_groups_section 
+from lib.xml.user_section import write_singlefile_users_section
+from lib.xml.group_section import write_singlefile_groups_section
+
+from lib.xml.user_section import write_multifile_users_section
+from lib.xml.group_section import write_multifile_groups_section
 
 def main():
     
@@ -42,9 +45,13 @@ def main():
     # Get the execution start time information
     time_script_start=datetime.datetime.now()
     
-    # Call functions to write XML files
-    write_users_section(db,dir_results)
-    write_groups_section(db,dir_results)
+    # Call functions to write Single Dump XML file
+    write_singlefile_users_section(db,dir_results)
+    write_singlefile_groups_section(db,dir_results)
+    
+    # Call functions to write Multiple Dump XML files
+    write_multifile_users_section(db,dir_results)
+    write_multifile_groups_section(db,dir_results)
     
     # Calculate and Print script time duration
     script_duration=datetime.datetime.now()-time_script_start
-- 
GitLab