Python : Pinkg Link with Process Pool Executor and MySQL











up vote
0
down vote

favorite












I have a huge database of links I have to ping to see if they are still alive, I'm using ProcessPoolExecutor to check a maximum per seconds.



I'm trying to check each webpage with a python script, and update a MySQL database after the checkup. My problem come from the MySQL update, when I check the output of the script without the MySQL update, all work perfectly, but when I'm trying to update the database with the last time checked, the script block during the first update. I'm working per 100 links because I'll run out of memory if I try to get them all at once. Here's my script :



import concurrent.futures
import time
import mysql.connector
from urllib.request import *
import urllib.request
from bs4 import BeautifulSoup

MainDB = mysql.connector.connect(
host="**************",
user="********",
passwd="***********",
database="**********"
)
Maincursor = MainDB.cursor(buffered=True)

def Dead(Link):
Date = time.strftime('%Y-%m-%d %H:%M:%S')
print(Date,':',Link,'is Dead')
try:
SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
DataInsert= (0,Date,Link)
Maincursor.execute(SQLInsert,DataInsert)
MainDB.commit()
except mysql.connector.Error as err:
print("Error Updating Dead : '%s'",format(err))
MainDB.rollback()

def Alive(Link):
Date = time.strftime('%Y-%m-%d %H:%M:%S')
try:
SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
DataInsert= (1,Date,Link)
Maincursor.execute(SQLInsert,DataInsert)
MainDB.commit()
except mysql.connector.Error as err:
print("Error Updating Alive : '%s'",format(err))
MainDB.rollback()

def load_link(Link):
try:
html_offer = urlopen(Link)
except urllib.error.HTTPError as err :
return 0
except urllib.error.ContentTooShortError as err :
return 0
except urllib.error.URLError as err :
return 0
else:
return 1

while(1):
SQL = "SELECT COUNT(ID) FROM *****"
try:
Maincursor.execute(SQL)
MainDB.commit()
except mysql.connector.Error as err:
print("Error Getting Count : '%s'",format(err))
Found = Maincursor.fetchone()
if Found[0] > 0:
for x in range(0,Found[0],100):
SQL = "SELECT Link FROM ***** LIMIT %s,100"
try:
Maincursor.execute(SQL,(x,))
MainDB.commit()
except mysql.connector.Error as err:
print("Error Selecting 100 Rows : '%s'",format(err))
Found = Maincursor.rowcount
if Found > 0:
Identified = Maincursor.fetchall()
x =
for item in Identified:
x.extend(item)
with concurrent.futures.ProcessPoolExecutor() as executor:
for Link, alive in zip(x, executor.map(load_link, x)):
if alive==1:
Alive(url)
else:
Dead(url)


I tried with MySQL Pool, with different multiprocessing, but every time the MySQL database can't support the amount of queries. I also tried with a new MySQL connection for the Dead / Alive function, but same problem. I would like to know why my script stop after the first update ?










share|improve this question




























    up vote
    0
    down vote

    favorite












    I have a huge database of links I have to ping to see if they are still alive, I'm using ProcessPoolExecutor to check a maximum per seconds.



    I'm trying to check each webpage with a python script, and update a MySQL database after the checkup. My problem come from the MySQL update, when I check the output of the script without the MySQL update, all work perfectly, but when I'm trying to update the database with the last time checked, the script block during the first update. I'm working per 100 links because I'll run out of memory if I try to get them all at once. Here's my script :



    import concurrent.futures
    import time
    import mysql.connector
    from urllib.request import *
    import urllib.request
    from bs4 import BeautifulSoup

    MainDB = mysql.connector.connect(
    host="**************",
    user="********",
    passwd="***********",
    database="**********"
    )
    Maincursor = MainDB.cursor(buffered=True)

    def Dead(Link):
    Date = time.strftime('%Y-%m-%d %H:%M:%S')
    print(Date,':',Link,'is Dead')
    try:
    SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
    DataInsert= (0,Date,Link)
    Maincursor.execute(SQLInsert,DataInsert)
    MainDB.commit()
    except mysql.connector.Error as err:
    print("Error Updating Dead : '%s'",format(err))
    MainDB.rollback()

    def Alive(Link):
    Date = time.strftime('%Y-%m-%d %H:%M:%S')
    try:
    SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
    DataInsert= (1,Date,Link)
    Maincursor.execute(SQLInsert,DataInsert)
    MainDB.commit()
    except mysql.connector.Error as err:
    print("Error Updating Alive : '%s'",format(err))
    MainDB.rollback()

    def load_link(Link):
    try:
    html_offer = urlopen(Link)
    except urllib.error.HTTPError as err :
    return 0
    except urllib.error.ContentTooShortError as err :
    return 0
    except urllib.error.URLError as err :
    return 0
    else:
    return 1

    while(1):
    SQL = "SELECT COUNT(ID) FROM *****"
    try:
    Maincursor.execute(SQL)
    MainDB.commit()
    except mysql.connector.Error as err:
    print("Error Getting Count : '%s'",format(err))
    Found = Maincursor.fetchone()
    if Found[0] > 0:
    for x in range(0,Found[0],100):
    SQL = "SELECT Link FROM ***** LIMIT %s,100"
    try:
    Maincursor.execute(SQL,(x,))
    MainDB.commit()
    except mysql.connector.Error as err:
    print("Error Selecting 100 Rows : '%s'",format(err))
    Found = Maincursor.rowcount
    if Found > 0:
    Identified = Maincursor.fetchall()
    x =
    for item in Identified:
    x.extend(item)
    with concurrent.futures.ProcessPoolExecutor() as executor:
    for Link, alive in zip(x, executor.map(load_link, x)):
    if alive==1:
    Alive(url)
    else:
    Dead(url)


    I tried with MySQL Pool, with different multiprocessing, but every time the MySQL database can't support the amount of queries. I also tried with a new MySQL connection for the Dead / Alive function, but same problem. I would like to know why my script stop after the first update ?










    share|improve this question


























      up vote
      0
      down vote

      favorite









      up vote
      0
      down vote

      favorite











      I have a huge database of links I have to ping to see if they are still alive, I'm using ProcessPoolExecutor to check a maximum per seconds.



      I'm trying to check each webpage with a python script, and update a MySQL database after the checkup. My problem come from the MySQL update, when I check the output of the script without the MySQL update, all work perfectly, but when I'm trying to update the database with the last time checked, the script block during the first update. I'm working per 100 links because I'll run out of memory if I try to get them all at once. Here's my script :



      import concurrent.futures
      import time
      import mysql.connector
      from urllib.request import *
      import urllib.request
      from bs4 import BeautifulSoup

      MainDB = mysql.connector.connect(
      host="**************",
      user="********",
      passwd="***********",
      database="**********"
      )
      Maincursor = MainDB.cursor(buffered=True)

      def Dead(Link):
      Date = time.strftime('%Y-%m-%d %H:%M:%S')
      print(Date,':',Link,'is Dead')
      try:
      SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
      DataInsert= (0,Date,Link)
      Maincursor.execute(SQLInsert,DataInsert)
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Updating Dead : '%s'",format(err))
      MainDB.rollback()

      def Alive(Link):
      Date = time.strftime('%Y-%m-%d %H:%M:%S')
      try:
      SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
      DataInsert= (1,Date,Link)
      Maincursor.execute(SQLInsert,DataInsert)
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Updating Alive : '%s'",format(err))
      MainDB.rollback()

      def load_link(Link):
      try:
      html_offer = urlopen(Link)
      except urllib.error.HTTPError as err :
      return 0
      except urllib.error.ContentTooShortError as err :
      return 0
      except urllib.error.URLError as err :
      return 0
      else:
      return 1

      while(1):
      SQL = "SELECT COUNT(ID) FROM *****"
      try:
      Maincursor.execute(SQL)
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Getting Count : '%s'",format(err))
      Found = Maincursor.fetchone()
      if Found[0] > 0:
      for x in range(0,Found[0],100):
      SQL = "SELECT Link FROM ***** LIMIT %s,100"
      try:
      Maincursor.execute(SQL,(x,))
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Selecting 100 Rows : '%s'",format(err))
      Found = Maincursor.rowcount
      if Found > 0:
      Identified = Maincursor.fetchall()
      x =
      for item in Identified:
      x.extend(item)
      with concurrent.futures.ProcessPoolExecutor() as executor:
      for Link, alive in zip(x, executor.map(load_link, x)):
      if alive==1:
      Alive(url)
      else:
      Dead(url)


      I tried with MySQL Pool, with different multiprocessing, but every time the MySQL database can't support the amount of queries. I also tried with a new MySQL connection for the Dead / Alive function, but same problem. I would like to know why my script stop after the first update ?










      share|improve this question















      I have a huge database of links I have to ping to see if they are still alive, I'm using ProcessPoolExecutor to check a maximum per seconds.



      I'm trying to check each webpage with a python script, and update a MySQL database after the checkup. My problem come from the MySQL update, when I check the output of the script without the MySQL update, all work perfectly, but when I'm trying to update the database with the last time checked, the script block during the first update. I'm working per 100 links because I'll run out of memory if I try to get them all at once. Here's my script :



      import concurrent.futures
      import time
      import mysql.connector
      from urllib.request import *
      import urllib.request
      from bs4 import BeautifulSoup

      MainDB = mysql.connector.connect(
      host="**************",
      user="********",
      passwd="***********",
      database="**********"
      )
      Maincursor = MainDB.cursor(buffered=True)

      def Dead(Link):
      Date = time.strftime('%Y-%m-%d %H:%M:%S')
      print(Date,':',Link,'is Dead')
      try:
      SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
      DataInsert= (0,Date,Link)
      Maincursor.execute(SQLInsert,DataInsert)
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Updating Dead : '%s'",format(err))
      MainDB.rollback()

      def Alive(Link):
      Date = time.strftime('%Y-%m-%d %H:%M:%S')
      try:
      SQLInsert = "UPDATE ******** SET Alive=%s, LastTimeSeen=%s WHERE Link = %s"
      DataInsert= (1,Date,Link)
      Maincursor.execute(SQLInsert,DataInsert)
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Updating Alive : '%s'",format(err))
      MainDB.rollback()

      def load_link(Link):
      try:
      html_offer = urlopen(Link)
      except urllib.error.HTTPError as err :
      return 0
      except urllib.error.ContentTooShortError as err :
      return 0
      except urllib.error.URLError as err :
      return 0
      else:
      return 1

      while(1):
      SQL = "SELECT COUNT(ID) FROM *****"
      try:
      Maincursor.execute(SQL)
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Getting Count : '%s'",format(err))
      Found = Maincursor.fetchone()
      if Found[0] > 0:
      for x in range(0,Found[0],100):
      SQL = "SELECT Link FROM ***** LIMIT %s,100"
      try:
      Maincursor.execute(SQL,(x,))
      MainDB.commit()
      except mysql.connector.Error as err:
      print("Error Selecting 100 Rows : '%s'",format(err))
      Found = Maincursor.rowcount
      if Found > 0:
      Identified = Maincursor.fetchall()
      x =
      for item in Identified:
      x.extend(item)
      with concurrent.futures.ProcessPoolExecutor() as executor:
      for Link, alive in zip(x, executor.map(load_link, x)):
      if alive==1:
      Alive(url)
      else:
      Dead(url)


      I tried with MySQL Pool, with different multiprocessing, but every time the MySQL database can't support the amount of queries. I also tried with a new MySQL connection for the Dead / Alive function, but same problem. I would like to know why my script stop after the first update ?







      python mysql python-multiprocessing python-multithreading






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Nov 9 at 11:04

























      asked Nov 9 at 10:51









      Jean Alarma

      163




      163





























          active

          oldest

          votes











          Your Answer






          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "1"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














           

          draft saved


          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53224292%2fpython-pinkg-link-with-process-pool-executor-and-mysql%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown






























          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes
















           

          draft saved


          draft discarded



















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53224292%2fpython-pinkg-link-with-process-pool-executor-and-mysql%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Schultheiß

          Verwaltungsgliederung Dänemarks

          Liste der Kulturdenkmale in Wilsdruff