Skip to content

Reactors info isn't scraping #1121

Open
@muhammad-faizan087

Description

@muhammad-faizan087

below is the code which will give an output file containing all the data about a particular post but i'm getting null value for reactors, i tried updating the module, setting up waits but nothing changes

import sys
import json
import time
import argparse
import logging
import facebook_scraper as fs
import requests

Set up logging for debugging

logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()],
)

logging.debug("Starting the Facebook scraper script...")

Load custom headers from mbasicHeaders.json

headers = {}
try:
with open(
"/home/buyfans/domains/buyfans.pl/public_html/scraper2024-2/new_venv/bin/mbasicHeaders.json",
"r",
) as file:
headers = json.load(file)
logging.debug("Headers loaded successfully.")
except FileNotFoundError:
logging.error("mbasicHeaders.json file not found. Proceeding without headers.")
except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON in headers: {e}")

Argument parsing

parser = argparse.ArgumentParser()
parser.add_argument("-pid", "--post-id", help="Post ID (URL)", required=True)
parser.add_argument("-f", "--output-file", help="Output file", required=True)
parser.add_argument("-c", "--cookies", help="Cookies file", required=True)
args = parser.parse_args()

logging.debug(f"Post ID (URL): {args.post_id}")
logging.debug(f"Output file: {args.output_file}")
logging.debug(f"Cookies file: {args.cookies}")

Function to handle retries in case of connection issues or failures

def fetch_post_with_retries(post_url, options, cookies, headers, retries=3, delay=5):
for attempt in range(retries):
try:
# Start scraping the Facebook post
logging.debug(f"Starting scraping for post URL: {post_url}")

        # Scrape the post using facebook_scraper
        gen = fs.get_posts(post_urls=[post_url], options=options, cookies=cookies)
        post_data = next(gen)
        # print(post_data)
        logging.debug(f"Successfully scraped data: {post_data}")
        return post_data

    except requests.exceptions.RequestException as e:
        logging.error(
            f"Error fetching post: {e}, retrying ({attempt + 1}/{retries})..."
        )
        time.sleep(delay)  # Wait before retrying
    except StopIteration:
        logging.error(f"No data found for the post URL: {post_url}")
        return None
logging.error(f"Failed to fetch the post after {retries} retries.")
return None

Options to ensure we retrieve complete data

options = {
"reactors": True, # Fetch reactors (people who reacted)
"reactions": True, # Fetch reactions data (like, love, etc.)
"comments": True, # Fetch comments
"comments_full": True, # Fetch the full comment thread
"allow_extra_requests": True, # Enable additional requests for more data (shares, etc.)
}

try:
# Fetch the post data with retries
post_data = fetch_post_with_retries(args.post_id, options, args.cookies, headers)

if post_data:
    # Open output file and write the data in JSON format
    with open(args.output_file, "w") as json_file:
        logging.debug(f"Writing data to {args.output_file}")
        json.dump(post_data, json_file, default=str, indent=2)
    logging.info(f"Post data saved to {args.output_file}")
else:
    logging.error(f"Failed to scrape the post: {args.post_id}")

except Exception as e:
logging.error(f"An unexpected error occurred: {e}")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions