Scrapes facebook's pages front end with no limitations & provides a feature to turn data into structured JSON or CSV
MIT License
git clone https://github.com/shaikhsajid1111/facebook_page_scraper
python3 setup.py install
pip3 install facebook-page-scraper
#import Facebook_scraper class from facebook_page_scraper
from facebook_page_scraper import Facebook_scraper
#instantiate the Facebook_scraper class
page_or_group_name = "Meta"
posts_count = 10
browser = "firefox"
proxy = "IP:PORT" #if proxy requires authentication then user:password@IP:PORT
timeout = 600 #600 seconds
headless = True
# get env password
fb_password = os.getenv('fb_password')
fb_email = os.getenv('fb_email')
# indicates if the Facebook target is a FB group or FB page
isGroup= False
meta_ai = Facebook_scraper(page_or_group_name, posts_count, browser, proxy=proxy, timeout=timeout, headless=headless, isGroup=isGroup)
Using logged-in scraping methods may result in the permanent suspension of your account. Proceed with caution, as violating a platform's terms of service can lead to severe consequences. Exercise discretion and adhere to ethical practices when collecting data through scraping. The library/provider assumes no responsibility for any consequences resulting from the misuse of scraping methods.
#call the scrap_to_json() method
json_data = meta_ai.scrap_to_json()
print(json_data)
Output:
{
"2024182624425347": {
"name": "Meta AI",
"shares": 0,
"reactions": {
"likes": 154,
"loves": 19,
"wow": 0,
"cares": 0,
"sad": 0,
"angry": 0,
"haha": 0
},
"reaction_count": 173,
"comments": 2,
"content": "We’ve built data2vec, the first general high-performance self-supervised algorithm for speech, vision, and text. We applied it to different modalities and found it matches or outperforms the best self-supervised algorithms. We hope this brings us closer to a world where computers can learn to solve many different tasks without supervision. Learn more and get the code: https://ai.facebook.com/…/the-first-high-performance-self-s…",
"posted_on": "2022-01-20T22:43:35",
"video": [],
"image": [
"https://scontent-bom1-2.xx.fbcdn.net/v/t39.30808-6/s480x480/272147088_2024182621092014_6532581039236849529_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=8024bb&_nc_ohc=j4_1PAndJTIAX82OLNq&_nc_ht=scontent-bom1-2.xx&oh=00_AT9us__TvC9eYBqRyQEwEtYSit9r2UKYg0gFoRK7Efrhyw&oe=61F17B71"
],
"post_url": "https://www.facebook.com/MetaAI/photos/a.360372474139712/2024182624425347/?type=3&__xts__%5B0%5D=68.ARBoSaQ-pAC_ApucZNHZ6R-BI3YUSjH4sXsfdZRQ2zZFOwgWGhjt6dmg0VOcmGCLhSFyXpecOY9g1A94vrzU_T-GtYFagqDkJjHuhoyPW2vnkn7fvfzx-ql7fsBYxL5DgQVSsiC1cPoycdCvHmi6BV5Sc4fKADdgDhdFvVvr-ttzXG1ng2DbLzU-XfSes7SAnrPs-gxjODPKJ7AdqkqkSQJ4HrsLgxMgcLFdCsE6feWL7rXjptVWegMVMthhJNVqO0JHu986XBfKKqB60aBFvyAzTSEwJD6o72GtnyzQ-BcH7JxmLtb2_A&__tn__=-R"
}, ...
}
{
"id": {
"name": string,
"shares": integer,
"reactions": {
"likes": integer,
"loves": integer,
"wow": integer,
"cares": integer,
"sad": integer,
"angry": integer,
"haha": integer
},
"reaction_count": integer,
"comments": integer,
"content": string,
"video" : list,
"image" : list,
"posted_on": datetime, //string containing datetime in ISO 8601
"post_url": string
}
}
#call scrap_to_csv(filename,directory) method
filename = "data_file" #file name without CSV extension,where data will be saved
directory = "E:\data" #directory where CSV file will be saved
meta_ai.scrap_to_csv(filename, directory)
content of data_file.csv
:
id,name,shares,likes,loves,wow,cares,sad,angry,haha,reactions_count,comments,content,posted_on,video,image,post_url
2024182624425347,Meta AI,0,154,19,0,0,0,0,0,173,2,"We’ve built data2vec, the first general high-performance self-supervised algorithm for speech, vision, and text. We applied it to different modalities and found it matches or outperforms the best self-supervised algorithms. We hope this brings us closer to a world where computers can learn to solve many different tasks without supervision. Learn more and get the code: https://ai.facebook.com/…/the-first-high-performance-self-s…",2022-01-20T22:43:35,,https://scontent-bom1-2.xx.fbcdn.net/v/t39.30808-6/s480x480/272147088_2024182621092014_6532581039236849529_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=8024bb&_nc_ohc=j4_1PAndJTIAX82OLNq&_nc_ht=scontent-bom1-2.xx&oh=00_AT9us__TvC9eYBqRyQEwEtYSit9r2UKYg0gFoRK7Efrhyw&oe=61F17B71,https://www.facebook.com/MetaAI/photos/a.360372474139712/2024182624425347/?type=3&__xts__%5B0%5D=68.ARAse4eiZmZQDOZumNZEDR0tQkE5B6g50K6S66JJPccb-KaWJWg6Yz4v19BQFSZRMd04MeBmV24VqvqMB3oyjAwMDJUtpmgkMiITtSP8HOgy8QEx_vFlq1j-UEImZkzeEgSAJYINndnR5aSQn0GUwL54L3x2BsxEqL1lElL7SnHfTVvIFUDyNfAqUWIsXrkI8X5KjoDchUj7aHRga1HB5EE0x60dZcHogUMb1sJDRmKCcx8xisRgk5XzdZKCQDDdEkUqN-Ch9_NYTMtxlchz1KfR0w9wRt8y9l7E7BNhfLrmm4qyxo-ZpA&__tn__=-R
...